Add fp8 for sd unet

2025-05-22 03:33:00 +08:00 · 2023-10-19 13:56:17 +08:00 · 2023-10-19 13:56:17 +08:00 · 7c128bbdac
commit 7c128bbdac
parent 861cbd5636
11 changed files with 36 additions and 32 deletions
--- a/extensions-builtin/Lora/network.py
+++ b/extensions-builtin/Lora/network.py
@ -137,7 +137,7 @@ class NetworkModule:
    def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
        if self.bias is not None:
            updown = updown.reshape(self.bias.shape)
-            updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown += self.bias.to(orig_weight.device, dtype=updown.dtype)
            updown = updown.reshape(output_shape)

        if len(output_shape) == 4:
--- a/extensions-builtin/Lora/network_full.py
+++ b/extensions-builtin/Lora/network_full.py
@ -18,9 +18,9 @@ class NetworkModuleFull(network.NetworkModule):

    def calc_updown(self, orig_weight):
        output_shape = self.weight.shape
-        updown = self.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        updown = self.weight.to(orig_weight.device)
        if self.ex_bias is not None:
-            ex_bias = self.ex_bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            ex_bias = self.ex_bias.to(orig_weight.device)
        else:
            ex_bias = None

--- a/extensions-builtin/Lora/network_glora.py
+++ b/extensions-builtin/Lora/network_glora.py
@ -22,12 +22,12 @@ class NetworkModuleGLora(network.NetworkModule):
        self.w2b = weights.w["b2.weight"]

    def calc_updown(self, orig_weight):
-        w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+        w1a = self.w1a.to(orig_weight.device)
+        w1b = self.w1b.to(orig_weight.device)
+        w2a = self.w2a.to(orig_weight.device)
+        w2b = self.w2b.to(orig_weight.device)

        output_shape = [w1a.size(0), w1b.size(1)]
-        updown = ((w2b @ w1b) + ((orig_weight @ w2a) @ w1a))
+        updown = ((w2b @ w1b) + ((orig_weight.to(dtype = w1a.dtype) @ w2a) @ w1a))

        return self.finalize_updown(updown, orig_weight, output_shape)
--- a/extensions-builtin/Lora/network_hada.py
+++ b/extensions-builtin/Lora/network_hada.py
@ -27,16 +27,16 @@ class NetworkModuleHada(network.NetworkModule):
        self.t2 = weights.w.get("hada_t2")

    def calc_updown(self, orig_weight):
-        w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+        w1a = self.w1a.to(orig_weight.device)
+        w1b = self.w1b.to(orig_weight.device)
+        w2a = self.w2a.to(orig_weight.device)
+        w2b = self.w2b.to(orig_weight.device)

        output_shape = [w1a.size(0), w1b.size(1)]

        if self.t1 is not None:
            output_shape = [w1a.size(1), w1b.size(1)]
-            t1 = self.t1.to(orig_weight.device, dtype=orig_weight.dtype)
+            t1 = self.t1.to(orig_weight.device)
            updown1 = lyco_helpers.make_weight_cp(t1, w1a, w1b)
            output_shape += t1.shape[2:]
        else:
@ -45,7 +45,7 @@ class NetworkModuleHada(network.NetworkModule):
            updown1 = lyco_helpers.rebuild_conventional(w1a, w1b, output_shape)

        if self.t2 is not None:
-            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
+            t2 = self.t2.to(orig_weight.device)
            updown2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
        else:
            updown2 = lyco_helpers.rebuild_conventional(w2a, w2b, output_shape)
--- a/extensions-builtin/Lora/network_ia3.py
+++ b/extensions-builtin/Lora/network_ia3.py
@ -17,7 +17,7 @@ class NetworkModuleIa3(network.NetworkModule):
        self.on_input = weights.w["on_input"].item()

    def calc_updown(self, orig_weight):
-        w = self.w.to(orig_weight.device, dtype=orig_weight.dtype)
+        w = self.w.to(orig_weight.device)

        output_shape = [w.size(0), orig_weight.size(1)]
        if self.on_input:
--- a/extensions-builtin/Lora/network_lokr.py
+++ b/extensions-builtin/Lora/network_lokr.py
@ -37,22 +37,22 @@ class NetworkModuleLokr(network.NetworkModule):

    def calc_updown(self, orig_weight):
        if self.w1 is not None:
-            w1 = self.w1.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1 = self.w1.to(orig_weight.device)
        else:
-            w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1a = self.w1a.to(orig_weight.device)
+            w1b = self.w1b.to(orig_weight.device)
            w1 = w1a @ w1b

        if self.w2 is not None:
-            w2 = self.w2.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2 = self.w2.to(orig_weight.device)
        elif self.t2 is None:
-            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2a = self.w2a.to(orig_weight.device)
+            w2b = self.w2b.to(orig_weight.device)
            w2 = w2a @ w2b
        else:
-            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            t2 = self.t2.to(orig_weight.device)
+            w2a = self.w2a.to(orig_weight.device)
+            w2b = self.w2b.to(orig_weight.device)
            w2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)

        output_shape = [w1.size(0) * w2.size(0), w1.size(1) * w2.size(1)]
--- a/extensions-builtin/Lora/network_lora.py
+++ b/extensions-builtin/Lora/network_lora.py
@ -61,13 +61,13 @@ class NetworkModuleLora(network.NetworkModule):
        return module

    def calc_updown(self, orig_weight):
-        up = self.up_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
-        down = self.down_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        up = self.up_model.weight.to(orig_weight.device)
+        down = self.down_model.weight.to(orig_weight.device)

        output_shape = [up.size(0), down.size(1)]
        if self.mid_model is not None:
            # cp-decomposition
-            mid = self.mid_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+            mid = self.mid_model.weight.to(orig_weight.device)
            updown = lyco_helpers.rebuild_cp_decomposition(up, down, mid)
            output_shape += mid.shape[2:]
        else:
--- a/extensions-builtin/Lora/network_norm.py
+++ b/extensions-builtin/Lora/network_norm.py
@ -18,10 +18,10 @@ class NetworkModuleNorm(network.NetworkModule):

    def calc_updown(self, orig_weight):
        output_shape = self.w_norm.shape
-        updown = self.w_norm.to(orig_weight.device, dtype=orig_weight.dtype)
+        updown = self.w_norm.to(orig_weight.device)

        if self.b_norm is not None:
-            ex_bias = self.b_norm.to(orig_weight.device, dtype=orig_weight.dtype)
+            ex_bias = self.b_norm.to(orig_weight.device)
        else:
            ex_bias = None

--- a/extensions-builtin/Lora/networks.py
+++ b/extensions-builtin/Lora/networks.py
@ -381,12 +381,12 @@ def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn
                            # inpainting model. zero pad updown to make channel[1]  4 to 9
                            updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))

-                        self.weight += updown
+                        self.weight.copy_((self.weight.to(dtype=updown.dtype) + updown).to(dtype=self.weight.dtype))
                        if ex_bias is not None and hasattr(self, 'bias'):
                            if self.bias is None:
-                                self.bias = torch.nn.Parameter(ex_bias)
+                                self.bias = torch.nn.Parameter(ex_bias).to(self.weight.dtype)
                            else:
-                                self.bias += ex_bias
+                                self.bias.copy_((self.bias.to(dtype=ex_bias.dtype) + ex_bias).to(dtype=self.bias.dtype))
                except RuntimeError as e:
                    logging.debug(f"Network {net.name} layer {network_layer_name}: {e}")
                    extra_network_lora.errors[net.name] = extra_network_lora.errors.get(net.name, 0) + 1
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@ -118,3 +118,4 @@ parser.add_argument('--timeout-keep-alive', type=int, default=30, help='set time
 parser.add_argument("--disable-all-extensions", action='store_true', help="prevent all extensions from running regardless of any other settings", default=False)
 parser.add_argument("--disable-extra-extensions", action='store_true', help="prevent all extensions except built-in from running regardless of any other settings", default=False)
 parser.add_argument("--skip-load-model-at-start", action='store_true', help="if load a model at web start, only take effect when --nowebui", )
+parser.add_argument("--opt-unet-fp8-storage", action='store_true', help="use fp8 for SD UNet to save vram", default=False)
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@ -391,6 +391,9 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo, state_dict, timer

        devices.dtype_unet = torch.float16
        timer.record("apply half()")
+        if shared.cmd_opts.opt_unet_fp8_storage:
+            model.model.diffusion_model = model.model.diffusion_model.to(torch.float8_e4m3fn)
+            timer.record("apply fp8 unet")

    devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16