Merge branch 'dev' into report-error

2025-05-17 17:29:09 +08:00 · 2023-05-31 19:15:21 +03:00 · 2023-05-31 19:15:21 +03:00 · 52b8752e62
commit 52b8752e62
parent 00dfe27f59 78a602ae8c
17 changed files with 656 additions and 22 deletions
--- a/extensions-builtin/LDSR/sd_hijack_autoencoder.py
+++ b/extensions-builtin/LDSR/sd_hijack_autoencoder.py
@ -10,7 +10,7 @@ from contextlib import contextmanager
 from torch.optim.lr_scheduler import LambdaLR

 from ldm.modules.ema import LitEma
-from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
+from vqvae_quantize import VectorQuantizer2 as VectorQuantizer
 from ldm.modules.diffusionmodules.model import Encoder, Decoder
 from ldm.util import instantiate_from_config

--- a/extensions-builtin/LDSR/vqvae_quantize.py
+++ b/extensions-builtin/LDSR/vqvae_quantize.py
@ -0,0 +1,147 @@
+# Vendored from https://raw.githubusercontent.com/CompVis/taming-transformers/24268930bf1dce879235a7fddd0b2355b84d7ea6/taming/modules/vqvae/quantize.py,
+# where the license is as follows:
+#
+# Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE./
+
+import torch
+import torch.nn as nn
+import numpy as np
+from einops import rearrange
+
+
+class VectorQuantizer2(nn.Module):
+    """
+    Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly
+    avoids costly matrix multiplications and allows for post-hoc remapping of indices.
+    """
+
+    # NOTE: due to a bug the beta term was applied to the wrong term. for
+    # backwards compatibility we use the buggy version by default, but you can
+    # specify legacy=False to fix it.
+    def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random",
+                 sane_index_shape=False, legacy=True):
+        super().__init__()
+        self.n_e = n_e
+        self.e_dim = e_dim
+        self.beta = beta
+        self.legacy = legacy
+
+        self.embedding = nn.Embedding(self.n_e, self.e_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
+
+        self.remap = remap
+        if self.remap is not None:
+            self.register_buffer("used", torch.tensor(np.load(self.remap)))
+            self.re_embed = self.used.shape[0]
+            self.unknown_index = unknown_index  # "random" or "extra" or integer
+            if self.unknown_index == "extra":
+                self.unknown_index = self.re_embed
+                self.re_embed = self.re_embed + 1
+            print(f"Remapping {self.n_e} indices to {self.re_embed} indices. "
+                  f"Using {self.unknown_index} for unknown indices.")
+        else:
+            self.re_embed = n_e
+
+        self.sane_index_shape = sane_index_shape
+
+    def remap_to_used(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        match = (inds[:, :, None] == used[None, None, ...]).long()
+        new = match.argmax(-1)
+        unknown = match.sum(2) < 1
+        if self.unknown_index == "random":
+            new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to(device=new.device)
+        else:
+            new[unknown] = self.unknown_index
+        return new.reshape(ishape)
+
+    def unmap_to_all(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        if self.re_embed > self.used.shape[0]:  # extra token
+            inds[inds >= self.used.shape[0]] = 0  # simply set to zero
+        back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds)
+        return back.reshape(ishape)
+
+    def forward(self, z, temp=None, rescale_logits=False, return_logits=False):
+        assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
+        assert rescale_logits is False, "Only for interface compatible with Gumbel"
+        assert return_logits is False, "Only for interface compatible with Gumbel"
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = rearrange(z, 'b c h w -> b h w c').contiguous()
+        z_flattened = z.view(-1, self.e_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+
+        d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
+            torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
+            torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
+
+        min_encoding_indices = torch.argmin(d, dim=1)
+        z_q = self.embedding(min_encoding_indices).view(z.shape)
+        perplexity = None
+        min_encodings = None
+
+        # compute loss for embedding
+        if not self.legacy:
+            loss = self.beta * torch.mean((z_q.detach() - z) ** 2) + \
+                   torch.mean((z_q - z.detach()) ** 2)
+        else:
+            loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * \
+                   torch.mean((z_q - z.detach()) ** 2)
+
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+
+        # reshape back to match original input shape
+        z_q = rearrange(z_q, 'b h w c -> b c h w').contiguous()
+
+        if self.remap is not None:
+            min_encoding_indices = min_encoding_indices.reshape(z.shape[0], -1)  # add batch axis
+            min_encoding_indices = self.remap_to_used(min_encoding_indices)
+            min_encoding_indices = min_encoding_indices.reshape(-1, 1)  # flatten
+
+        if self.sane_index_shape:
+            min_encoding_indices = min_encoding_indices.reshape(
+                z_q.shape[0], z_q.shape[2], z_q.shape[3])
+
+        return z_q, loss, (perplexity, min_encodings, min_encoding_indices)
+
+    def get_codebook_entry(self, indices, shape):
+        # shape specifying (batch, height, width, channel)
+        if self.remap is not None:
+            indices = indices.reshape(shape[0], -1)  # add batch axis
+            indices = self.unmap_to_all(indices)
+            indices = indices.reshape(-1)  # flatten again
+
+        # get quantized latent vectors
+        z_q = self.embedding(indices)
+
+        if shape is not None:
+            z_q = z_q.view(shape)
+            # reshape back to match original input shape
+            z_q = z_q.permute(0, 3, 1, 2).contiguous()
+
+        return z_q
--- a/extensions-builtin/canvas-zoom-and-pan/javascript/zoom.js
+++ b/extensions-builtin/canvas-zoom-and-pan/javascript/zoom.js
@ -0,0 +1,431 @@
+// Main
+
+// Helper functions
+// Get active tab
+function getActiveTab(elements, all = false) {
+    const tabs = elements.img2imgTabs.querySelectorAll("button");
+
+    if (all) return tabs;
+
+    for (let tab of tabs) {
+        if (tab.classList.contains("selected")) {
+            return tab;
+        }
+    }
+}
+
+onUiLoaded(async() => {
+    const hotkeysConfig = {
+        resetZoom: "KeyR",
+        fitToScreen: "KeyS",
+        moveKey: "KeyF",
+        overlap: "KeyO"
+    };
+
+    let isMoving = false;
+    let mouseX, mouseY;
+
+    const elementIDs = {
+        sketch: "#img2img_sketch",
+        inpaint: "#img2maskimg",
+        inpaintSketch: "#inpaint_sketch",
+        img2imgTabs: "#mode_img2img .tab-nav"
+    };
+
+    async function getElements() {
+        const elements = await Promise.all(
+            Object.values(elementIDs).map(id => document.querySelector(id))
+        );
+        return Object.fromEntries(
+            Object.keys(elementIDs).map((key, index) => [key, elements[index]])
+        );
+    }
+
+    const elements = await getElements();
+
+    function applyZoomAndPan(targetElement, elemId) {
+        targetElement.style.transformOrigin = "0 0";
+        let [zoomLevel, panX, panY] = [1, 0, 0];
+        let fullScreenMode = false;
+
+        // In the course of research, it was found that the tag img is very harmful when zooming and creates white canvases. This hack allows you to almost never think about this problem, it has no effect on webui.
+        function fixCanvas() {
+            const activeTab = getActiveTab(elements).textContent.trim();
+
+            if (activeTab !== "img2img") {
+                const img = targetElement.querySelector(`${elemId} img`);
+
+                if (img && img.style.display !== "none") {
+                    img.style.display = "none";
+                    img.style.visibility = "hidden";
+                }
+            }
+        }
+
+        // Reset the zoom level and pan position of the target element to their initial values
+        function resetZoom() {
+            zoomLevel = 1;
+            panX = 0;
+            panY = 0;
+
+            fixCanvas();
+            targetElement.style.transform = `scale(${zoomLevel}) translate(${panX}px, ${panY}px)`;
+
+            const canvas = gradioApp().querySelector(
+                `${elemId} canvas[key="interface"]`
+            );
+
+            toggleOverlap("off");
+            fullScreenMode = false;
+
+            if (
+                canvas &&
+                parseFloat(canvas.style.width) > 865 &&
+                parseFloat(targetElement.style.width) > 865
+            ) {
+                fitToElement();
+                return;
+            }
+
+            targetElement.style.width = "";
+            if (canvas) {
+                targetElement.style.height = canvas.style.height;
+            }
+        }
+
+        // Toggle the zIndex of the target element between two values, allowing it to overlap or be overlapped by other elements
+        function toggleOverlap(forced = "") {
+            const zIndex1 = "0";
+            const zIndex2 = "998";
+
+            targetElement.style.zIndex =
+                targetElement.style.zIndex !== zIndex2 ? zIndex2 : zIndex1;
+
+            if (forced === "off") {
+                targetElement.style.zIndex = zIndex1;
+            } else if (forced === "on") {
+                targetElement.style.zIndex = zIndex2;
+            }
+        }
+
+        // Adjust the brush size based on the deltaY value from a mouse wheel event
+        function adjustBrushSize(
+            elemId,
+            deltaY,
+            withoutValue = false,
+            percentage = 5
+        ) {
+            const input =
+                gradioApp().querySelector(
+                    `${elemId} input[aria-label='Brush radius']`
+                ) ||
+                gradioApp().querySelector(
+                    `${elemId} button[aria-label="Use brush"]`
+                );
+
+            if (input) {
+                input.click();
+                if (!withoutValue) {
+                    const maxValue =
+                        parseFloat(input.getAttribute("max")) || 100;
+                    const changeAmount = maxValue * (percentage / 100);
+                    const newValue =
+                        parseFloat(input.value) +
+                        (deltaY > 0 ? -changeAmount : changeAmount);
+                    input.value = Math.min(Math.max(newValue, 0), maxValue);
+                    input.dispatchEvent(new Event("change"));
+                }
+            }
+        }
+
+        // Reset zoom when uploading a new image
+        const fileInput = gradioApp().querySelector(
+            `${elemId} input[type="file"][accept="image/*"].svelte-116rqfv`
+        );
+        fileInput.addEventListener("click", resetZoom);
+
+        // Update the zoom level and pan position of the target element based on the values of the zoomLevel, panX and panY variables
+        function updateZoom(newZoomLevel, mouseX, mouseY) {
+            newZoomLevel = Math.max(0.5, Math.min(newZoomLevel, 15));
+            panX += mouseX - (mouseX * newZoomLevel) / zoomLevel;
+            panY += mouseY - (mouseY * newZoomLevel) / zoomLevel;
+
+            targetElement.style.transformOrigin = "0 0";
+            targetElement.style.transform = `translate(${panX}px, ${panY}px) scale(${newZoomLevel})`;
+
+            toggleOverlap("on");
+            return newZoomLevel;
+        }
+
+        // Change the zoom level based on user interaction
+        function changeZoomLevel(operation, e) {
+            if (e.shiftKey) {
+                e.preventDefault();
+
+                let zoomPosX, zoomPosY;
+                let delta = 0.2;
+                if (zoomLevel > 7) {
+                    delta = 0.9;
+                } else if (zoomLevel > 2) {
+                    delta = 0.6;
+                }
+
+                zoomPosX = e.clientX;
+                zoomPosY = e.clientY;
+
+                fullScreenMode = false;
+                zoomLevel = updateZoom(
+                    zoomLevel + (operation === "+" ? delta : -delta),
+                    zoomPosX - targetElement.getBoundingClientRect().left,
+                    zoomPosY - targetElement.getBoundingClientRect().top
+                );
+            }
+        }
+
+        /**
+         * This function fits the target element to the screen by calculating
+         * the required scale and offsets. It also updates the global variables
+         * zoomLevel, panX, and panY to reflect the new state.
+         */
+
+        function fitToElement() {
+            //Reset Zoom
+            targetElement.style.transform = `translate(${0}px, ${0}px) scale(${1})`;
+
+            // Get element and screen dimensions
+            const elementWidth = targetElement.offsetWidth;
+            const elementHeight = targetElement.offsetHeight;
+            const parentElement = targetElement.parentElement;
+            const screenWidth = parentElement.clientWidth;
+            const screenHeight = parentElement.clientHeight;
+
+            // Get element's coordinates relative to the parent element
+            const elementRect = targetElement.getBoundingClientRect();
+            const parentRect = parentElement.getBoundingClientRect();
+            const elementX = elementRect.x - parentRect.x;
+
+            // Calculate scale and offsets
+            const scaleX = screenWidth / elementWidth;
+            const scaleY = screenHeight / elementHeight;
+            const scale = Math.min(scaleX, scaleY);
+
+            const transformOrigin =
+                window.getComputedStyle(targetElement).transformOrigin;
+            const [originX, originY] = transformOrigin.split(" ");
+            const originXValue = parseFloat(originX);
+            const originYValue = parseFloat(originY);
+
+            const offsetX =
+                (screenWidth - elementWidth * scale) / 2 -
+                originXValue * (1 - scale);
+            const offsetY =
+                (screenHeight - elementHeight * scale) / 2.5 -
+                originYValue * (1 - scale);
+
+            // Apply scale and offsets to the element
+            targetElement.style.transform = `translate(${offsetX}px, ${offsetY}px) scale(${scale})`;
+
+            // Update global variables
+            zoomLevel = scale;
+            panX = offsetX;
+            panY = offsetY;
+
+            fullScreenMode = false;
+            toggleOverlap("off");
+        }
+
+        /**
+         * This function fits the target element to the screen by calculating
+         * the required scale and offsets. It also updates the global variables
+         * zoomLevel, panX, and panY to reflect the new state.
+         */
+
+        // Fullscreen mode
+        function fitToScreen() {
+            const canvas = gradioApp().querySelector(
+                `${elemId} canvas[key="interface"]`
+            );
+
+            if (!canvas) return;
+
+            if (canvas.offsetWidth > 862) {
+                targetElement.style.width = canvas.offsetWidth + "px";
+            }
+
+            if (fullScreenMode) {
+                resetZoom();
+                fullScreenMode = false;
+                return;
+            }
+
+            //Reset Zoom
+            targetElement.style.transform = `translate(${0}px, ${0}px) scale(${1})`;
+
+            // Get scrollbar width to right-align the image
+            const scrollbarWidth = window.innerWidth - document.documentElement.clientWidth;
+
+            // Get element and screen dimensions
+            const elementWidth = targetElement.offsetWidth;
+            const elementHeight = targetElement.offsetHeight;
+            const screenWidth = window.innerWidth - scrollbarWidth;
+            const screenHeight = window.innerHeight;
+
+            // Get element's coordinates relative to the page
+            const elementRect = targetElement.getBoundingClientRect();
+            const elementY = elementRect.y;
+            const elementX = elementRect.x;
+
+            // Calculate scale and offsets
+            const scaleX = screenWidth / elementWidth;
+            const scaleY = screenHeight / elementHeight;
+            const scale = Math.min(scaleX, scaleY);
+
+            // Get the current transformOrigin
+            const computedStyle = window.getComputedStyle(targetElement);
+            const transformOrigin = computedStyle.transformOrigin;
+            const [originX, originY] = transformOrigin.split(" ");
+            const originXValue = parseFloat(originX);
+            const originYValue = parseFloat(originY);
+
+            // Calculate offsets with respect to the transformOrigin
+            const offsetX =
+                (screenWidth - elementWidth * scale) / 2 -
+                elementX -
+                originXValue * (1 - scale);
+            const offsetY =
+                (screenHeight - elementHeight * scale) / 2 -
+                elementY -
+                originYValue * (1 - scale);
+
+            // Apply scale and offsets to the element
+            targetElement.style.transform = `translate(${offsetX}px, ${offsetY}px) scale(${scale})`;
+
+            // Update global variables
+            zoomLevel = scale;
+            panX = offsetX;
+            panY = offsetY;
+
+            fullScreenMode = true;
+            toggleOverlap("on");
+        }
+
+        // Handle keydown events
+        function handleKeyDown(event) {
+            const hotkeyActions = {
+                [hotkeysConfig.resetZoom]: resetZoom,
+                [hotkeysConfig.overlap]: toggleOverlap,
+                [hotkeysConfig.fitToScreen]: fitToScreen
+                // [hotkeysConfig.moveKey] : moveCanvas,
+            };
+
+            const action = hotkeyActions[event.code];
+            if (action) {
+                event.preventDefault();
+                action(event);
+            }
+        }
+
+        // Get Mouse position
+        function getMousePosition(e) {
+            mouseX = e.offsetX;
+            mouseY = e.offsetY;
+        }
+
+        targetElement.addEventListener("mousemove", getMousePosition);
+
+        // Handle events only inside the targetElement
+        let isKeyDownHandlerAttached = false;
+
+        function handleMouseMove() {
+            if (!isKeyDownHandlerAttached) {
+                document.addEventListener("keydown", handleKeyDown);
+                isKeyDownHandlerAttached = true;
+            }
+        }
+
+        function handleMouseLeave() {
+            if (isKeyDownHandlerAttached) {
+                document.removeEventListener("keydown", handleKeyDown);
+                isKeyDownHandlerAttached = false;
+            }
+        }
+
+        // Add mouse event handlers
+        targetElement.addEventListener("mousemove", handleMouseMove);
+        targetElement.addEventListener("mouseleave", handleMouseLeave);
+
+        // Reset zoom when click on another tab
+        elements.img2imgTabs.addEventListener("click", resetZoom);
+        elements.img2imgTabs.addEventListener("click", () => {
+            // targetElement.style.width = "";
+            if (parseInt(targetElement.style.width) > 865) {
+                setTimeout(fitToElement, 0);
+            }
+        });
+
+        targetElement.addEventListener("wheel", e => {
+            // change zoom level
+            const operation = e.deltaY > 0 ? "-" : "+";
+            changeZoomLevel(operation, e);
+
+            // Handle brush size adjustment with ctrl key pressed
+            if (e.ctrlKey || e.metaKey) {
+                e.preventDefault();
+
+                // Increase or decrease brush size based on scroll direction
+                adjustBrushSize(elemId, e.deltaY);
+            }
+        });
+
+        /**
+         * Handle the move event for pan functionality. Updates the panX and panY variables and applies the new transform to the target element.
+         * @param {MouseEvent} e - The mouse event.
+         */
+        function handleMoveKeyDown(e) {
+            if (e.code === hotkeysConfig.moveKey) {
+                if (!e.ctrlKey && !e.metaKey) {
+                    isMoving = true;
+                }
+            }
+        }
+
+        function handleMoveKeyUp(e) {
+            if (e.code === hotkeysConfig.moveKey) {
+                isMoving = false;
+            }
+        }
+
+        document.addEventListener("keydown", handleMoveKeyDown);
+        document.addEventListener("keyup", handleMoveKeyUp);
+
+        // Detect zoom level and update the pan speed.
+        function updatePanPosition(movementX, movementY) {
+            let panSpeed = 1.5;
+
+            if (zoomLevel > 8) {
+                panSpeed = 2.5;
+            }
+
+            panX = panX + movementX * panSpeed;
+            panY = panY + movementY * panSpeed;
+
+            targetElement.style.transform = `translate(${panX}px, ${panY}px) scale(${zoomLevel})`;
+            toggleOverlap("on");
+        }
+
+        function handleMoveByKey(e) {
+            if (isMoving) {
+                updatePanPosition(e.movementX, e.movementY);
+                targetElement.style.pointerEvents = "none";
+            } else {
+                targetElement.style.pointerEvents = "auto";
+            }
+        }
+
+        gradioApp().addEventListener("mousemove", handleMoveByKey);
+    }
+
+    applyZoomAndPan(elements.sketch, elementIDs.sketch);
+    applyZoomAndPan(elements.inpaint, elementIDs.inpaint);
+    applyZoomAndPan(elements.inpaintSketch, elementIDs.inpaintSketch);
+});
--- a/javascript/imageviewerGamepad.js
+++ b/javascript/imageviewerGamepad.js
@ -1,7 +1,9 @@
+let gamepads = [];
+
 window.addEventListener('gamepadconnected', (e) => {
    const index = e.gamepad.index;
    let isWaiting = false;
-    setInterval(async() => {
+    gamepads[index] = setInterval(async() => {
        if (!opts.js_modal_lightbox_gamepad || isWaiting) return;
        const gamepad = navigator.getGamepads()[index];
        const xValue = gamepad.axes[0];
@ -24,6 +26,10 @@ window.addEventListener('gamepadconnected', (e) => {
    }, 10);
 });

+window.addEventListener('gamepaddisconnected', (e) => {
+    clearInterval(gamepads[e.gamepad.index]);
+});
+
 /*
 Primarily for vr controller type pointer devices.
 I use the wheel event because there's currently no way to do it properly with web xr.
--- a/modules/api/api.py
+++ b/modules/api/api.py
@ -24,6 +24,7 @@ from modules.textual_inversion.preprocess import preprocess
 from modules.hypernetworks.hypernetwork import create_hypernetwork, train_hypernetwork
 from PIL import PngImagePlugin,Image
 from modules.sd_models import checkpoints_list, unload_model_weights, reload_model_weights
+from modules.sd_vae import vae_dict
 from modules.sd_models_config import find_checkpoint_config_near_filename
 from modules.realesrgan_model import get_realesrgan_models
 from modules import devices
@ -190,6 +191,7 @@ class Api:
        self.add_api_route("/sdapi/v1/samplers", self.get_samplers, methods=["GET"], response_model=List[models.SamplerItem])
        self.add_api_route("/sdapi/v1/upscalers", self.get_upscalers, methods=["GET"], response_model=List[models.UpscalerItem])
        self.add_api_route("/sdapi/v1/sd-models", self.get_sd_models, methods=["GET"], response_model=List[models.SDModelItem])
+        self.add_api_route("/sdapi/v1/sd-vae", self.get_sd_vaes, methods=["GET"], response_model=List[models.SDVaeItem])
        self.add_api_route("/sdapi/v1/hypernetworks", self.get_hypernetworks, methods=["GET"], response_model=List[models.HypernetworkItem])
        self.add_api_route("/sdapi/v1/face-restorers", self.get_face_restorers, methods=["GET"], response_model=List[models.FaceRestorerItem])
        self.add_api_route("/sdapi/v1/realesrgan-models", self.get_realesrgan_models, methods=["GET"], response_model=List[models.RealesrganItem])
@ -542,6 +544,9 @@ class Api:
    def get_sd_models(self):
        return [{"title": x.title, "model_name": x.model_name, "hash": x.shorthash, "sha256": x.sha256, "filename": x.filename, "config": find_checkpoint_config_near_filename(x)} for x in checkpoints_list.values()]

+    def get_sd_vaes(self):
+        return [{"model_name": x, "filename": vae_dict[x]} for x in vae_dict.keys()]
+
    def get_hypernetworks(self):
        return [{"name": name, "path": shared.hypernetworks[name]} for name in shared.hypernetworks]

--- a/modules/api/models.py
+++ b/modules/api/models.py
@ -249,6 +249,10 @@ class SDModelItem(BaseModel):
    filename: str = Field(title="Filename")
    config: Optional[str] = Field(title="Config file")

+class SDVaeItem(BaseModel):
+    model_name: str = Field(title="Model Name")
+    filename: str = Field(title="Filename")
+
 class HypernetworkItem(BaseModel):
    name: str = Field(title="Name")
    path: Optional[str] = Field(title="Path")
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@ -11,7 +11,7 @@ parser.add_argument("--skip-python-version-check", action='store_true', help="la
 parser.add_argument("--skip-torch-cuda-test", action='store_true', help="launch.py argument: do not check if CUDA is able to work properly")
 parser.add_argument("--reinstall-xformers", action='store_true', help="launch.py argument: install the appropriate version of xformers even if you have some version already installed")
 parser.add_argument("--reinstall-torch", action='store_true', help="launch.py argument: install the appropriate version of torch even if you have some version already installed")
-parser.add_argument("--update-check", action='store_true', help="launch.py argument: chck for updates at startup")
+parser.add_argument("--update-check", action='store_true', help="launch.py argument: check for updates at startup")
 parser.add_argument("--test-server", action='store_true', help="launch.py argument: configure server for testing")
 parser.add_argument("--skip-prepare-environment", action='store_true', help="launch.py argument: skip all environment preparation")
 parser.add_argument("--skip-install", action='store_true', help="launch.py argument: skip installation of packages")
--- a/modules/extensions.py
+++ b/modules/extensions.py
@ -1,10 +1,9 @@
 import os
 import threading

-import git
-
 from modules import shared
 from modules.errors import print_error
+from modules.gitpython_hack import Repo
 from modules.paths_internal import extensions_dir, extensions_builtin_dir, script_path  # noqa: F401

 extensions = []
@ -53,7 +52,7 @@ class Extension:
        repo = None
        try:
            if os.path.exists(os.path.join(self.path, ".git")):
-                repo = git.Repo(self.path)
+                repo = Repo(self.path)
        except Exception:
            print_error(f"Error reading github repository info from {self.path}", exc_info=True)

@ -92,7 +91,7 @@ class Extension:
        return res

    def check_updates(self):
-        repo = git.Repo(self.path)
+        repo = Repo(self.path)
        for fetch in repo.remote().fetch(dry_run=True):
            if fetch.flags != fetch.HEAD_UPTODATE:
                self.can_update = True
@ -114,7 +113,7 @@ class Extension:
        self.status = "latest"

    def fetch_and_reset_hard(self, commit='origin'):
-        repo = git.Repo(self.path)
+        repo = Repo(self.path)
        # Fix: `error: Your local changes to the following files would be overwritten by merge`,
        # because WSL2 Docker set 755 file permissions instead of 644, this results to the error.
        repo.git.fetch(all=True)
--- a/modules/gitpython_hack.py
+++ b/modules/gitpython_hack.py
@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import io
+import subprocess
+
+import git
+
+
+class Git(git.Git):
+    """
+    Git subclassed to never use persistent processes.
+    """
+
+    def _get_persistent_cmd(self, attr_name, cmd_name, *args, **kwargs):
+        raise NotImplementedError(f"Refusing to use persistent process: {attr_name} ({cmd_name} {args} {kwargs})")
+
+    def get_object_header(self, ref: str | bytes) -> tuple[str, str, int]:
+        ret = subprocess.check_output(
+            [self.GIT_PYTHON_GIT_EXECUTABLE, "cat-file", "--batch-check"],
+            input=self._prepare_ref(ref),
+            cwd=self._working_dir,
+            timeout=2,
+        )
+        return self._parse_object_header(ret)
+
+    def stream_object_data(self, ref: str) -> tuple[str, str, int, "Git.CatFileContentStream"]:
+        # Not really streaming, per se; this buffers the entire object in memory.
+        # Shouldn't be a problem for our use case, since we're only using this for
+        # object headers (commit objects).
+        ret = subprocess.check_output(
+            [self.GIT_PYTHON_GIT_EXECUTABLE, "cat-file", "--batch"],
+            input=self._prepare_ref(ref),
+            cwd=self._working_dir,
+            timeout=30,
+        )
+        bio = io.BytesIO(ret)
+        hexsha, typename, size = self._parse_object_header(bio.readline())
+        return (hexsha, typename, size, self.CatFileContentStream(size, bio))
+
+
+class Repo(git.Repo):
+    GitCommandWrapperType = Git
--- a/modules/launch_utils.py
+++ b/modules/launch_utils.py
@ -230,13 +230,11 @@ def prepare_environment():
    openclip_package = os.environ.get('OPENCLIP_PACKAGE', "https://github.com/mlfoundations/open_clip/archive/bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b.zip")

    stable_diffusion_repo = os.environ.get('STABLE_DIFFUSION_REPO', "https://github.com/Stability-AI/stablediffusion.git")
-    taming_transformers_repo = os.environ.get('TAMING_TRANSFORMERS_REPO', "https://github.com/CompVis/taming-transformers.git")
    k_diffusion_repo = os.environ.get('K_DIFFUSION_REPO', 'https://github.com/crowsonkb/k-diffusion.git')
    codeformer_repo = os.environ.get('CODEFORMER_REPO', 'https://github.com/sczhou/CodeFormer.git')
    blip_repo = os.environ.get('BLIP_REPO', 'https://github.com/salesforce/BLIP.git')

    stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf")
-    taming_transformers_commit_hash = os.environ.get('TAMING_TRANSFORMERS_COMMIT_HASH', "24268930bf1dce879235a7fddd0b2355b84d7ea6")
    k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "c9fe758757e022f05ca5a53fa8fac28889e4f1cf")
    codeformer_commit_hash = os.environ.get('CODEFORMER_COMMIT_HASH', "c5b4593074ba6214284d6acd5f1719b6c5d739af")
    blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9")
@ -287,7 +285,6 @@ def prepare_environment():
    os.makedirs(os.path.join(script_path, dir_repos), exist_ok=True)

    git_clone(stable_diffusion_repo, repo_dir('stable-diffusion-stability-ai'), "Stable Diffusion", stable_diffusion_commit_hash)
-    git_clone(taming_transformers_repo, repo_dir('taming-transformers'), "Taming Transformers", taming_transformers_commit_hash)
    git_clone(k_diffusion_repo, repo_dir('k-diffusion'), "K-diffusion", k_diffusion_commit_hash)
    git_clone(codeformer_repo, repo_dir('CodeFormer'), "CodeFormer", codeformer_commit_hash)
    git_clone(blip_repo, repo_dir('BLIP'), "BLIP", blip_commit_hash)
--- a/modules/paths.py
+++ b/modules/paths.py
@ -20,7 +20,6 @@ assert sd_path is not None, f"Couldn't find Stable Diffusion in any of: {possibl

 path_dirs = [
    (sd_path, 'ldm', 'Stable Diffusion', []),
-    (os.path.join(sd_path, '../taming-transformers'), 'taming', 'Taming Transformers', []),
    (os.path.join(sd_path, '../CodeFormer'), 'inference_codeformer.py', 'CodeFormer', []),
    (os.path.join(sd_path, '../BLIP'), 'models/blip.py', 'BLIP', []),
    (os.path.join(sd_path, '../k-diffusion'), 'k_diffusion/sampling.py', 'k_diffusion', ["atstart"]),
--- a/modules/processing.py
+++ b/modules/processing.py
@ -321,14 +321,13 @@ class StableDiffusionProcessing:
        have been used before. The second element is where the previously
        computed result is stored.
        """
-
-        if cache[0] is not None and (required_prompts, steps) == cache[0]:
+        if cache[0] is not None and (required_prompts, steps, opts.CLIP_stop_at_last_layers, shared.sd_model.sd_checkpoint_info) == cache[0]:
            return cache[1]

        with devices.autocast():
            cache[1] = function(shared.sd_model, required_prompts, steps)

-        cache[0] = (required_prompts, steps)
+        cache[0] = (required_prompts, steps, opts.CLIP_stop_at_last_layers, shared.sd_model.sd_checkpoint_info)
        return cache[1]

    def setup_conds(self):
--- a/modules/shared.py
+++ b/modules/shared.py
@ -416,12 +416,12 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
    "comma_padding_backtrack": OptionInfo(20, "Prompt word wrap length limit", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1}).info("in tokens - for texts shorter than specified, if they don't fit into 75 token limit, move them to the next 75 token chunk"),
    "CLIP_stop_at_last_layers": OptionInfo(1, "Clip skip", gr.Slider, {"minimum": 1, "maximum": 12, "step": 1}).link("wiki", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#clip-skip").info("ignore last layers of CLIP nrtwork; 1 ignores none, 2 ignores one layer"),
    "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"),
-    "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU"]}).info("changes seeds drastically; use CPU to produce the same picture across different vidocard vendors"),
+    "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU"]}).info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors"),
 }))

 options_templates.update(options_section(('optimizations', "Optimizations"), {
    "cross_attention_optimization": OptionInfo("Automatic", "Cross attention optimization", gr.Dropdown, lambda: {"choices": shared_items.cross_attention_optimizations()}),
-    "s_min_uncond": OptionInfo(0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 4.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"),
+    "s_min_uncond": OptionInfo(0.0, "Negative Guidance minimum sigma", gr.Slider, {"minimum": 0.0, "maximum": 4.0, "step": 0.01}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9177").info("skip negative prompt for some steps when the image is almost ready; 0=disable, higher=faster"),
    "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).link("PR", "https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/9256").info("0=disable, higher=faster"),
    "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
    "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for high-res pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}).info("only applies if non-zero and overrides above"),
--- a/modules/ui.py
+++ b/modules/ui.py
@ -504,10 +504,10 @@ def create_ui():
                            with FormRow(elem_id="txt2img_hires_fix_row4", variant="compact", visible=opts.hires_fix_show_prompts) as hr_prompts_container:
                                with gr.Column(scale=80):
                                    with gr.Row():
-                                        hr_prompt = gr.Textbox(label="Prompt", elem_id="hires_prompt", show_label=False, lines=3, placeholder="Prompt for hires fix pass.\nLeave empty to use the same prompt as in first pass.", elem_classes=["prompt"])
+                                        hr_prompt = gr.Textbox(label="Hires prompt", elem_id="hires_prompt", show_label=False, lines=3, placeholder="Prompt for hires fix pass.\nLeave empty to use the same prompt as in first pass.", elem_classes=["prompt"])
                                with gr.Column(scale=80):
                                    with gr.Row():
-                                        hr_negative_prompt = gr.Textbox(label="Negative prompt", elem_id="hires_neg_prompt", show_label=False, lines=3, placeholder="Negative prompt for hires fix pass.\nLeave empty to use the same negative prompt as in first pass.", elem_classes=["prompt"])
+                                        hr_negative_prompt = gr.Textbox(label="Hires negative prompt", elem_id="hires_neg_prompt", show_label=False, lines=3, placeholder="Negative prompt for hires fix pass.\nLeave empty to use the same negative prompt as in first pass.", elem_classes=["prompt"])

                    elif category == "batch":
                        if not opts.dimensions_and_batch_together:
--- a/modules/ui_extensions.py
+++ b/modules/ui_extensions.py
@ -487,8 +487,14 @@ def refresh_available_extensions_from_data(hide_tags, sort_column, filter_text="


 def preload_extensions_git_metadata():
+    t0 = time.time()
    for extension in extensions.extensions:
        extension.read_info_from_repo()
+    print(
+        f"preload_extensions_git_metadata for "
+        f"{len(extensions.extensions)} extensions took "
+        f"{time.time() - t0:.2f}s"
+    )


 def create_ui():
--- a/modules/upscaler.py
+++ b/modules/upscaler.py
@ -53,8 +53,8 @@ class Upscaler:

    def upscale(self, img: PIL.Image, scale, selected_model: str = None):
        self.scale = scale
-        dest_w = int(img.width * scale)
-        dest_h = int(img.height * scale)
+        dest_w = round((img.width * scale - 4) / 8) * 8
+        dest_h = round((img.height * scale - 4) / 8) * 8

        for _ in range(3):
            shape = (img.width, img.height)
--- a/webui-user.sh
+++ b/webui-user.sh
@ -36,7 +36,6 @@

 # Fixed git commits
 #export STABLE_DIFFUSION_COMMIT_HASH=""
-#export TAMING_TRANSFORMERS_COMMIT_HASH=""
 #export CODEFORMER_COMMIT_HASH=""
 #export BLIP_COMMIT_HASH=""