mirror of
https://github.com/AUTOMATIC1111/stable-diffusion-webui.git
synced 2025-02-06 21:52:54 +08:00
convert/add hypertile options
This commit is contained in:
parent
af45872fdb
commit
bcfaf3979a
@ -332,3 +332,39 @@ def split_attention(
|
|||||||
module.forward = module._original_forward_hypertile
|
module.forward = module._original_forward_hypertile
|
||||||
del module._original_forward_hypertile
|
del module._original_forward_hypertile
|
||||||
del module._split_sizes_hypertile
|
del module._split_sizes_hypertile
|
||||||
|
|
||||||
|
def hypertile_context_vae(model:nn.Module, aspect_ratio:float, tile_size:int, opts):
|
||||||
|
"""
|
||||||
|
Returns context manager for VAE
|
||||||
|
"""
|
||||||
|
enabled = not opts.hypertile_split_vae_attn
|
||||||
|
swap_size = opts.hypertile_swap_size_vae
|
||||||
|
max_depth = opts.hypertile_max_depth_vae
|
||||||
|
tile_size_max = opts.hypertile_max_tile_vae
|
||||||
|
return split_attention(
|
||||||
|
model,
|
||||||
|
aspect_ratio=aspect_ratio,
|
||||||
|
tile_size=min(tile_size, tile_size_max),
|
||||||
|
swap_size=swap_size,
|
||||||
|
disable=not enabled,
|
||||||
|
max_depth=max_depth,
|
||||||
|
is_sdxl=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
def hypertile_context_unet(model:nn.Module, aspect_ratio:float, tile_size:int, opts, is_sdxl:bool):
|
||||||
|
"""
|
||||||
|
Returns context manager for U-Net
|
||||||
|
"""
|
||||||
|
enabled = not opts.hypertile_split_unet_attn
|
||||||
|
swap_size = opts.hypertile_swap_size_unet
|
||||||
|
max_depth = opts.hypertile_max_depth_unet
|
||||||
|
tile_size_max = opts.hypertile_max_tile_unet
|
||||||
|
return split_attention(
|
||||||
|
model,
|
||||||
|
aspect_ratio=aspect_ratio,
|
||||||
|
tile_size=min(tile_size, tile_size_max),
|
||||||
|
swap_size=swap_size,
|
||||||
|
disable=not enabled,
|
||||||
|
max_depth=max_depth,
|
||||||
|
is_sdxl=is_sdxl,
|
||||||
|
)
|
@ -24,7 +24,7 @@ from modules.shared import opts, cmd_opts, state
|
|||||||
import modules.shared as shared
|
import modules.shared as shared
|
||||||
import modules.paths as paths
|
import modules.paths as paths
|
||||||
import modules.face_restoration
|
import modules.face_restoration
|
||||||
from modules.hypertile import split_attention, set_hypertile_seed, largest_tile_size_available
|
from modules.hypertile import set_hypertile_seed, largest_tile_size_available, hypertile_context_unet, hypertile_context_vae
|
||||||
import modules.images as images
|
import modules.images as images
|
||||||
import modules.styles
|
import modules.styles
|
||||||
import modules.sd_models as sd_models
|
import modules.sd_models as sd_models
|
||||||
@ -874,7 +874,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
|
|||||||
else:
|
else:
|
||||||
if opts.sd_vae_decode_method != 'Full':
|
if opts.sd_vae_decode_method != 'Full':
|
||||||
p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
|
p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
|
||||||
with split_attention(p.sd_model.first_stage_model, aspect_ratio = p.width / p.height, tile_size=min(largest_tile_size_available(p.width, p.height), 128), disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
|
with hypertile_context_unet(p.sd_model.first_stage_model, aspect_ratio=p.width / p.height, tile_size=largest_tile_size_available(p.width, p.height), is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
|
||||||
x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
|
x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
|
||||||
|
|
||||||
x_samples_ddim = torch.stack(x_samples_ddim).float()
|
x_samples_ddim = torch.stack(x_samples_ddim).float()
|
||||||
@ -1144,8 +1144,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
|
|||||||
aspect_ratio = self.width / self.height
|
aspect_ratio = self.width / self.height
|
||||||
x = self.rng.next()
|
x = self.rng.next()
|
||||||
tile_size = largest_tile_size_available(self.width, self.height)
|
tile_size = largest_tile_size_available(self.width, self.height)
|
||||||
with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
|
with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
|
||||||
with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl):
|
with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
|
||||||
devices.torch_gc()
|
devices.torch_gc()
|
||||||
samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
|
samples = self.sampler.sample(self, x, conditioning, unconditional_conditioning, image_conditioning=self.txt2img_image_conditioning(x))
|
||||||
del x
|
del x
|
||||||
@ -1153,7 +1153,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
|
|||||||
return samples
|
return samples
|
||||||
|
|
||||||
if self.latent_scale_mode is None:
|
if self.latent_scale_mode is None:
|
||||||
with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
|
with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
|
||||||
decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32)
|
decoded_samples = torch.stack(decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)).to(dtype=torch.float32)
|
||||||
else:
|
else:
|
||||||
decoded_samples = None
|
decoded_samples = None
|
||||||
@ -1245,15 +1245,16 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
|
|||||||
if self.scripts is not None:
|
if self.scripts is not None:
|
||||||
self.scripts.before_hr(self)
|
self.scripts.before_hr(self)
|
||||||
tile_size = largest_tile_size_available(target_width, target_height)
|
tile_size = largest_tile_size_available(target_width, target_height)
|
||||||
with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
|
aspect_ratio = self.width / self.height
|
||||||
with split_attention(self.sd_model.model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=3, max_depth=1,scale_depth=True, disable=not opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl):
|
with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
|
||||||
|
with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
|
||||||
samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
|
samples = self.sampler.sample_img2img(self, samples, noise, self.hr_c, self.hr_uc, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning)
|
||||||
|
|
||||||
sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
|
sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio())
|
||||||
|
|
||||||
self.sampler = None
|
self.sampler = None
|
||||||
devices.torch_gc()
|
devices.torch_gc()
|
||||||
with split_attention(self.sd_model.first_stage_model, aspect_ratio=target_width / target_height, tile_size=min(tile_size, 256), swap_size=1, disable=not opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
|
with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
|
||||||
decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)
|
decoded_samples = decode_latent_batch(self.sd_model, samples, target_device=devices.cpu, check_for_nans=True)
|
||||||
|
|
||||||
self.is_hr_pass = False
|
self.is_hr_pass = False
|
||||||
@ -1533,8 +1534,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
|
|||||||
x *= self.initial_noise_multiplier
|
x *= self.initial_noise_multiplier
|
||||||
aspect_ratio = self.width / self.height
|
aspect_ratio = self.width / self.height
|
||||||
tile_size = largest_tile_size_available(self.width, self.height)
|
tile_size = largest_tile_size_available(self.width, self.height)
|
||||||
with split_attention(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 128), swap_size=1, disable=not shared.opts.hypertile_split_vae_attn, is_sdxl=shared.sd_model.is_sdxl):
|
with hypertile_context_vae(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, opts=shared.opts):
|
||||||
with split_attention(self.sd_model.model, aspect_ratio=aspect_ratio, tile_size=min(tile_size, 256), swap_size=2, disable=not shared.opts.hypertile_split_unet_attn, is_sdxl=shared.sd_model.is_sdxl):
|
with hypertile_context_unet(self.sd_model.first_stage_model, aspect_ratio=aspect_ratio, tile_size=tile_size, is_sdxl=shared.sd_model.is_sdxl, opts=shared.opts):
|
||||||
devices.torch_gc()
|
devices.torch_gc()
|
||||||
samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
|
samples = self.sampler.sample_img2img(self, self.init_latent, x, conditioning, unconditional_conditioning, image_conditioning=self.image_conditioning)
|
||||||
|
|
||||||
|
@ -202,6 +202,12 @@ options_templates.update(options_section(('optimizations', "Optimizations"), {
|
|||||||
"batch_cond_uncond": OptionInfo(True, "Batch cond/uncond").info("do both conditional and unconditional denoising in one batch; uses a bit more VRAM during sampling, but improves speed; previously this was controlled by --always-batch-cond-uncond comandline argument"),
|
"batch_cond_uncond": OptionInfo(True, "Batch cond/uncond").info("do both conditional and unconditional denoising in one batch; uses a bit more VRAM during sampling, but improves speed; previously this was controlled by --always-batch-cond-uncond comandline argument"),
|
||||||
"hypertile_split_unet_attn" : OptionInfo(False, "Split attention in Unet with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"),
|
"hypertile_split_unet_attn" : OptionInfo(False, "Split attention in Unet with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"),
|
||||||
"hypertile_split_vae_attn": OptionInfo(False, "Split attention in VAE with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"),
|
"hypertile_split_vae_attn": OptionInfo(False, "Split attention in VAE with HyperTile").link("Github", "https://github.com/tfernd/HyperTile").info("improves performance; changes behavior, but deterministic"),
|
||||||
|
"hypertile_max_depth_vae" : OptionInfo(3, "Max depth for VAE HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"),
|
||||||
|
"hypertile_max_depth_unet" : OptionInfo(3, "Max depth for Unet HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 3, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"),
|
||||||
|
"hypertile_max_tile_vae" : OptionInfo(128, "Max tile size for VAE HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}).link("Github", "https://github.com/tfernd/HyperTile"),
|
||||||
|
"hypertile_max_tile_unet" : OptionInfo(256, "Max tile size for Unet HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}).link("Github", "https://github.com/tfernd/HyperTile"),
|
||||||
|
"hypertile_swap_size_unet": OptionInfo(3, "Swap size for Unet HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"),
|
||||||
|
"hypertile_swap_size_vae": OptionInfo(3, "Swap size for VAE HyperTile hijack", gr.Slider, {"minimum": 0, "maximum": 6, "step": 1}).link("Github", "https://github.com/tfernd/HyperTile"),
|
||||||
}))
|
}))
|
||||||
|
|
||||||
options_templates.update(options_section(('compatibility', "Compatibility"), {
|
options_templates.update(options_section(('compatibility', "Compatibility"), {
|
||||||
|
Loading…
Reference in New Issue
Block a user