diff --git a/README.md b/README.md index 2149dcc51..2ceb4d2db 100644 --- a/README.md +++ b/README.md @@ -104,8 +104,7 @@ Alternatively, use online services (like Google Colab): 1. Install [Python 3.10.6](https://www.python.org/downloads/windows/), checking "Add Python to PATH" 2. Install [git](https://git-scm.com/download/win). 3. Download the stable-diffusion-webui repository, for example by running `git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git`. -4. Place stable diffusion checkpoint (`model.ckpt`) in the `models/Stable-diffusion` directory (see [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) for where to get it). -5. Run `webui-user.bat` from Windows Explorer as normal, non-administrator, user. +4. Run `webui-user.bat` from Windows Explorer as normal, non-administrator, user. ### Automatic Installation on Linux 1. Install the dependencies: @@ -121,7 +120,7 @@ sudo pacman -S wget git python3 ```bash bash <(wget -qO- https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/master/webui.sh) ``` - +3. Run `webui.sh`. ### Installation on Apple Silicon Find the instructions [here](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Installation-on-Apple-Silicon). diff --git a/javascript/hints.js b/javascript/hints.js index 0a0620e39..7f4101b23 100644 --- a/javascript/hints.js +++ b/javascript/hints.js @@ -9,8 +9,8 @@ titles = { "UniPC": "Unified Predictor-Corrector Framework for Fast Sampling of Diffusion Models", "DPM adaptive": "Ignores step count - uses a number of steps determined by the CFG and resolution", - "Batch count": "How many batches of images to create", - "Batch size": "How many image to create in a single batch", + "Batch count": "How many batches of images to create (has no impact on generation performance or VRAM usage)", + "Batch size": "How many image to create in a single batch (increases generation performance at cost of higher VRAM usage)", "CFG Scale": "Classifier Free Guidance Scale - how strongly the image should conform to prompt - lower values produce more creative results", "Seed": "A value that determines the output of random number generator - if you create an image with same parameters and seed as another image, you'll get the same result", "\u{1f3b2}\ufe0f": "Set seed to -1, which will cause a new random number to be used every time", diff --git a/launch.py b/launch.py index 9fd766d1a..a68bb3a91 100644 --- a/launch.py +++ b/launch.py @@ -242,7 +242,7 @@ def prepare_environment(): sys.argv += shlex.split(commandline_args) - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(add_help=False) parser.add_argument("--ui-settings-file", type=str, help="filename to use for ui settings", default='config.json') args, _ = parser.parse_known_args(sys.argv) diff --git a/modules/api/api.py b/modules/api/api.py index eb7b1da54..5a9ac5f1a 100644 --- a/modules/api/api.py +++ b/modules/api/api.py @@ -498,7 +498,7 @@ class Api: if not apply_optimizations: sd_hijack.undo_optimizations() try: - hypernetwork, filename = train_hypernetwork(*args) + hypernetwork, filename = train_hypernetwork(**args) except Exception as e: error = e finally: diff --git a/modules/esrgan_model_arch.py b/modules/esrgan_model_arch.py index bc9ceb2ab..1b52b0f5e 100644 --- a/modules/esrgan_model_arch.py +++ b/modules/esrgan_model_arch.py @@ -1,5 +1,6 @@ # this file is adapted from https://github.com/victorca25/iNNfer +from collections import OrderedDict import math import functools import torch diff --git a/modules/extensions.py b/modules/extensions.py index 5e12b1aaa..3eef9eaf6 100644 --- a/modules/extensions.py +++ b/modules/extensions.py @@ -2,6 +2,7 @@ import os import sys import traceback +import time import git from modules import paths, shared @@ -25,6 +26,7 @@ class Extension: self.status = '' self.can_update = False self.is_builtin = is_builtin + self.version = '' repo = None try: @@ -40,6 +42,10 @@ class Extension: try: self.remote = next(repo.remote().urls, None) self.status = 'unknown' + head = repo.head.commit + ts = time.asctime(time.gmtime(repo.head.commit.committed_date)) + self.version = f'{head.hexsha[:8]} ({ts})' + except Exception: self.remote = None diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py index fc9e17aa2..89dc23bff 100644 --- a/modules/generation_parameters_copypaste.py +++ b/modules/generation_parameters_copypaste.py @@ -74,8 +74,8 @@ def image_from_url_text(filedata): return image -def add_paste_fields(tabname, init_img, fields): - paste_fields[tabname] = {"init_img": init_img, "fields": fields} +def add_paste_fields(tabname, init_img, fields, override_settings_component=None): + paste_fields[tabname] = {"init_img": init_img, "fields": fields, "override_settings_component": override_settings_component} # backwards compatibility for existing extensions import modules.ui @@ -110,6 +110,7 @@ def connect_paste_params_buttons(): for binding in registered_param_bindings: destination_image_component = paste_fields[binding.tabname]["init_img"] fields = paste_fields[binding.tabname]["fields"] + override_settings_component = binding.override_settings_component or paste_fields[binding.tabname]["override_settings_component"] destination_width_component = next(iter([field for field, name in fields if name == "Size-1"] if fields else []), None) destination_height_component = next(iter([field for field, name in fields if name == "Size-2"] if fields else []), None) @@ -130,7 +131,7 @@ def connect_paste_params_buttons(): ) if binding.source_text_component is not None and fields is not None: - connect_paste(binding.paste_button, fields, binding.source_text_component, binding.override_settings_component, binding.tabname) + connect_paste(binding.paste_button, fields, binding.source_text_component, override_settings_component, binding.tabname) if binding.source_tabname is not None and fields is not None: paste_field_names = ['Prompt', 'Negative prompt', 'Steps', 'Face restoration'] + (["Seed"] if shared.opts.send_seed else []) diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 825a93b28..f6ef42d5a 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -380,8 +380,8 @@ def apply_single_hypernetwork(hypernetwork, context_k, context_v, layer=None): layer.hyper_k = hypernetwork_layers[0] layer.hyper_v = hypernetwork_layers[1] - context_k = hypernetwork_layers[0](context_k) - context_v = hypernetwork_layers[1](context_v) + context_k = devices.cond_cast_unet(hypernetwork_layers[0](devices.cond_cast_float(context_k))) + context_v = devices.cond_cast_unet(hypernetwork_layers[1](devices.cond_cast_float(context_v))) return context_k, context_v @@ -496,7 +496,7 @@ def create_hypernetwork(name, enable_sizes, overwrite_old, layer_structure=None, shared.reload_hypernetworks() -def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_hypernetwork_every, template_filename, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): +def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, use_weight, create_image_every, save_hypernetwork_every, template_filename, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): # images allows training previews to have infotext. Importing it at the top causes a circular import problem. from modules import images @@ -554,7 +554,7 @@ def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradi pin_memory = shared.opts.pin_memory - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method, varsize=varsize) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=hypernetwork_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, include_cond=True, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method, varsize=varsize, use_weight=use_weight) if shared.opts.save_training_settings_to_txt: saved_params = dict( @@ -640,13 +640,19 @@ def train_hypernetwork(id_task, hypernetwork_name, learn_rate, batch_size, gradi with devices.autocast(): x = batch.latent_sample.to(devices.device, non_blocking=pin_memory) + if use_weight: + w = batch.weight.to(devices.device, non_blocking=pin_memory) if tag_drop_out != 0 or shuffle_tags: shared.sd_model.cond_stage_model.to(devices.device) c = shared.sd_model.cond_stage_model(batch.cond_text).to(devices.device, non_blocking=pin_memory) shared.sd_model.cond_stage_model.to(devices.cpu) else: c = stack_conds(batch.cond).to(devices.device, non_blocking=pin_memory) - loss = shared.sd_model(x, c)[0] / gradient_step + if use_weight: + loss = shared.sd_model.weighted_forward(x, c, w)[0] / gradient_step + del w + else: + loss = shared.sd_model.forward(x, c)[0] / gradient_step del x del c diff --git a/modules/images.py b/modules/images.py index c2ca8849d..5b80c23e1 100644 --- a/modules/images.py +++ b/modules/images.py @@ -18,7 +18,7 @@ import string import json import hashlib -from modules import sd_samplers, shared, script_callbacks +from modules import sd_samplers, shared, script_callbacks, errors from modules.shared import opts, cmd_opts LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS) @@ -553,6 +553,8 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i elif extension.lower() in (".jpg", ".jpeg", ".webp"): if image_to_save.mode == 'RGBA': image_to_save = image_to_save.convert("RGB") + elif image_to_save.mode == 'I;16': + image_to_save = image_to_save.point(lambda p: p * 0.0038910505836576).convert("RGB" if extension.lower() == ".webp" else "L") image_to_save.save(temp_file_path, format=image_format, quality=opts.jpeg_quality) @@ -575,17 +577,19 @@ def save_image(image, path, basename, seed=None, prompt=None, extension='png', i image.already_saved_as = fullfn - target_side_length = 4000 - oversize = image.width > target_side_length or image.height > target_side_length - if opts.export_for_4chan and (oversize or os.stat(fullfn).st_size > 4 * 1024 * 1024): + oversize = image.width > opts.target_side_length or image.height > opts.target_side_length + if opts.export_for_4chan and (oversize or os.stat(fullfn).st_size > opts.img_downscale_threshold * 1024 * 1024): ratio = image.width / image.height if oversize and ratio > 1: - image = image.resize((target_side_length, image.height * target_side_length // image.width), LANCZOS) + image = image.resize((round(opts.target_side_length), round(image.height * opts.target_side_length / image.width)), LANCZOS) elif oversize: - image = image.resize((image.width * target_side_length // image.height, target_side_length), LANCZOS) + image = image.resize((round(image.width * opts.target_side_length / image.height), round(opts.target_side_length)), LANCZOS) - _atomically_save_image(image, fullfn_without_extension, ".jpg") + try: + _atomically_save_image(image, fullfn_without_extension, ".jpg") + except Exception as e: + errors.display(e, "saving image as downscaled JPG") if opts.save_txt and info is not None: txt_fullfn = f"{fullfn_without_extension}.txt" diff --git a/modules/img2img.py b/modules/img2img.py index bcc158dc9..c973b7708 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -73,6 +73,8 @@ def process_batch(p, input_dir, output_dir, inpaint_mask_dir, args): if not save_normally: os.makedirs(output_dir, exist_ok=True) + if processed_image.mode == 'RGBA': + processed_image = processed_image.convert("RGB") processed_image.save(os.path.join(output_dir, filename)) diff --git a/modules/processing.py b/modules/processing.py index 0ca15491b..0b2f7e601 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -543,8 +543,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if os.path.exists(cmd_opts.embeddings_dir) and not p.do_not_reload_embeddings: model_hijack.embedding_db.load_textual_inversion_embeddings() - _, extra_network_data = extra_networks.parse_prompts(p.all_prompts[0:1]) - if p.scripts is not None: p.scripts.process(p) @@ -582,13 +580,6 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if shared.opts.live_previews_enable and opts.show_progress_type == "Approx NN": sd_vae_approx.model() - if not p.disable_extra_networks: - extra_networks.activate(p, extra_network_data) - - with open(os.path.join(paths.data_path, "params.txt"), "w", encoding="utf8") as file: - processed = Processed(p, [], p.seed, "") - file.write(processed.infotext(p, 0)) - if state.job_count == -1: state.job_count = p.n_iter @@ -609,11 +600,24 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: if len(prompts) == 0: break - prompts, _ = extra_networks.parse_prompts(prompts) + prompts, extra_network_data = extra_networks.parse_prompts(prompts) + + if not p.disable_extra_networks: + with devices.autocast(): + extra_networks.activate(p, extra_network_data) if p.scripts is not None: p.scripts.process_batch(p, batch_number=n, prompts=prompts, seeds=seeds, subseeds=subseeds) + # params.txt should be saved after scripts.process_batch, since the + # infotext could be modified by that callback + # Example: a wildcard processed by process_batch sets an extra model + # strength, which is saved as "Model Strength: 1.0" in the infotext + if n == 0: + with open(os.path.join(paths.data_path, "params.txt"), "w", encoding="utf8") as file: + processed = Processed(p, [], p.seed, "") + file.write(processed.infotext(p, 0)) + uc = get_conds_with_caching(prompt_parser.get_learned_conditioning, negative_prompts, p.steps, cached_uc) c = get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, prompts, p.steps, cached_c) diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index 4bb45ec74..edd0e2a72 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -46,6 +46,18 @@ class CFGDenoiserParams: """Total number of sampling steps planned""" +class CFGDenoisedParams: + def __init__(self, x, sampling_step, total_sampling_steps): + self.x = x + """Latent image representation in the process of being denoised""" + + self.sampling_step = sampling_step + """Current Sampling step number""" + + self.total_sampling_steps = total_sampling_steps + """Total number of sampling steps planned""" + + class UiTrainTabParams: def __init__(self, txt2img_preview_params): self.txt2img_preview_params = txt2img_preview_params @@ -68,6 +80,7 @@ callback_map = dict( callbacks_before_image_saved=[], callbacks_image_saved=[], callbacks_cfg_denoiser=[], + callbacks_cfg_denoised=[], callbacks_before_component=[], callbacks_after_component=[], callbacks_image_grid=[], @@ -150,6 +163,14 @@ def cfg_denoiser_callback(params: CFGDenoiserParams): report_exception(c, 'cfg_denoiser_callback') +def cfg_denoised_callback(params: CFGDenoisedParams): + for c in callback_map['callbacks_cfg_denoised']: + try: + c.callback(params) + except Exception: + report_exception(c, 'cfg_denoised_callback') + + def before_component_callback(component, **kwargs): for c in callback_map['callbacks_before_component']: try: @@ -283,6 +304,14 @@ def on_cfg_denoiser(callback): add_callback(callback_map['callbacks_cfg_denoiser'], callback) +def on_cfg_denoised(callback): + """register a function to be called in the kdiffussion cfg_denoiser method after building the inner model inputs. + The callback is called with one argument: + - params: CFGDenoisedParams - parameters to be passed to the inner model and sampling state details. + """ + add_callback(callback_map['callbacks_cfg_denoised'], callback) + + def on_before_component(callback): """register a function to be called before a component is created. The callback is called with arguments: diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 8fdc59909..794767831 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -1,5 +1,6 @@ import torch from torch.nn.functional import silu +from types import MethodType import modules.textual_inversion.textual_inversion from modules import devices, sd_hijack_optimizations, shared, sd_hijack_checkpoint @@ -76,6 +77,54 @@ def fix_checkpoint(): pass +def weighted_loss(sd_model, pred, target, mean=True): + #Calculate the weight normally, but ignore the mean + loss = sd_model._old_get_loss(pred, target, mean=False) + + #Check if we have weights available + weight = getattr(sd_model, '_custom_loss_weight', None) + if weight is not None: + loss *= weight + + #Return the loss, as mean if specified + return loss.mean() if mean else loss + +def weighted_forward(sd_model, x, c, w, *args, **kwargs): + try: + #Temporarily append weights to a place accessible during loss calc + sd_model._custom_loss_weight = w + + #Replace 'get_loss' with a weight-aware one. Otherwise we need to reimplement 'forward' completely + #Keep 'get_loss', but don't overwrite the previous old_get_loss if it's already set + if not hasattr(sd_model, '_old_get_loss'): + sd_model._old_get_loss = sd_model.get_loss + sd_model.get_loss = MethodType(weighted_loss, sd_model) + + #Run the standard forward function, but with the patched 'get_loss' + return sd_model.forward(x, c, *args, **kwargs) + finally: + try: + #Delete temporary weights if appended + del sd_model._custom_loss_weight + except AttributeError as e: + pass + + #If we have an old loss function, reset the loss function to the original one + if hasattr(sd_model, '_old_get_loss'): + sd_model.get_loss = sd_model._old_get_loss + del sd_model._old_get_loss + +def apply_weighted_forward(sd_model): + #Add new function 'weighted_forward' that can be called to calc weighted loss + sd_model.weighted_forward = MethodType(weighted_forward, sd_model) + +def undo_weighted_forward(sd_model): + try: + del sd_model.weighted_forward + except AttributeError as e: + pass + + class StableDiffusionModelHijack: fixes = None comments = [] @@ -104,6 +153,10 @@ class StableDiffusionModelHijack: m.cond_stage_model.model.token_embedding = EmbeddingsWithFixes(m.cond_stage_model.model.token_embedding, self) m.cond_stage_model = sd_hijack_open_clip.FrozenOpenCLIPEmbedderWithCustomWords(m.cond_stage_model, self) + apply_weighted_forward(m) + if m.cond_stage_key == "edit": + sd_hijack_unet.hijack_ddpm_edit() + self.optimization_method = apply_optimizations() self.clip = m.cond_stage_model @@ -132,6 +185,7 @@ class StableDiffusionModelHijack: m.cond_stage_model = m.cond_stage_model.wrapped undo_optimizations() + undo_weighted_forward(m) self.apply_circular(False) self.layers = None diff --git a/modules/sd_hijack_inpainting.py b/modules/sd_hijack_inpainting.py index 478cd4993..55a2ce4d1 100644 --- a/modules/sd_hijack_inpainting.py +++ b/modules/sd_hijack_inpainting.py @@ -11,6 +11,7 @@ import ldm.models.diffusion.plms from ldm.models.diffusion.ddpm import LatentDiffusion from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ddim import DDIMSampler, noise_like +from ldm.models.diffusion.sampling_util import norm_thresholding @torch.no_grad() diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py index 45cf2b18e..843ab66cf 100644 --- a/modules/sd_hijack_unet.py +++ b/modules/sd_hijack_unet.py @@ -44,6 +44,7 @@ def apply_model(orig_func, self, x_noisy, t, cond, **kwargs): with devices.autocast(): return orig_func(self, x_noisy.to(devices.dtype_unet), t.to(devices.dtype_unet), cond, **kwargs).float() + class GELUHijack(torch.nn.GELU, torch.nn.Module): def __init__(self, *args, **kwargs): torch.nn.GELU.__init__(self, *args, **kwargs) @@ -53,6 +54,16 @@ class GELUHijack(torch.nn.GELU, torch.nn.Module): else: return torch.nn.GELU.forward(self, x) + +ddpm_edit_hijack = None +def hijack_ddpm_edit(): + global ddpm_edit_hijack + if not ddpm_edit_hijack: + CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.decode_first_stage', first_stage_sub, first_stage_cond) + CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.encode_first_stage', first_stage_sub, first_stage_cond) + ddpm_edit_hijack = CondFunc('modules.models.diffusion.ddpm_edit.LatentDiffusion.apply_model', apply_model, unet_needs_upcast) + + unet_needs_upcast = lambda *args, **kwargs: devices.unet_needs_upcast CondFunc('ldm.models.diffusion.ddpm.LatentDiffusion.apply_model', apply_model, unet_needs_upcast) CondFunc('ldm.modules.diffusionmodules.openaimodel.timestep_embedding', lambda orig_func, timesteps, *args, **kwargs: orig_func(timesteps, *args, **kwargs).to(torch.float32 if timesteps.dtype == torch.int64 else devices.dtype_unet), unet_needs_upcast) diff --git a/modules/sd_models.py b/modules/sd_models.py index d847d3584..93959f55f 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -105,9 +105,15 @@ def checkpoint_tiles(): def list_models(): checkpoints_list.clear() checkpoint_alisases.clear() - model_list = modelloader.load_models(model_path=model_path, command_path=shared.cmd_opts.ckpt_dir, ext_filter=[".ckpt", ".safetensors"], ext_blacklist=[".vae.safetensors"]) cmd_ckpt = shared.cmd_opts.ckpt + if shared.cmd_opts.no_download_sd_model or cmd_ckpt != shared.sd_model_file or os.path.exists(cmd_ckpt): + model_url = None + else: + model_url = "https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.safetensors" + + model_list = modelloader.load_models(model_path=model_path, model_url=model_url, command_path=shared.cmd_opts.ckpt_dir, ext_filter=[".ckpt", ".safetensors"], download_name="v1-5-pruned-emaonly.safetensors", ext_blacklist=[".vae.ckpt", ".vae.safetensors"]) + if os.path.exists(cmd_ckpt): checkpoint_info = CheckpointInfo(cmd_ckpt) checkpoint_info.register() diff --git a/modules/sd_samplers_kdiffusion.py b/modules/sd_samplers_kdiffusion.py index f076fc550..528f513fe 100644 --- a/modules/sd_samplers_kdiffusion.py +++ b/modules/sd_samplers_kdiffusion.py @@ -8,6 +8,7 @@ from modules import prompt_parser, devices, sd_samplers_common from modules.shared import opts, state import modules.shared as shared from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback +from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback samplers_k_diffusion = [ ('Euler a', 'sample_euler_ancestral', ['k_euler_a', 'k_euler_ancestral'], {}), @@ -136,6 +137,9 @@ class CFGDenoiser(torch.nn.Module): x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond={"c_crossattn": [uncond], "c_concat": [image_cond_in[-uncond.shape[0]:]]}) + denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps) + cfg_denoised_callback(denoised_params) + devices.test_for_nans(x_out, "unet") if opts.live_preview_content == "Prompt": @@ -269,6 +273,16 @@ class KDiffusionSampler: return sigmas + def create_noise_sampler(self, x, sigmas, p): + """For DPM++ SDE: manually create noise sampler to enable deterministic results across different batch sizes""" + if shared.opts.no_dpmpp_sde_batch_determinism: + return None + + from k_diffusion.sampling import BrownianTreeNoiseSampler + sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max() + current_iter_seeds = p.all_seeds[p.iteration * p.batch_size:(p.iteration + 1) * p.batch_size] + return BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=current_iter_seeds) + def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps) @@ -278,18 +292,24 @@ class KDiffusionSampler: xi = x + noise * sigma_sched[0] extra_params_kwargs = self.initialize(p) - if 'sigma_min' in inspect.signature(self.func).parameters: + parameters = inspect.signature(self.func).parameters + + if 'sigma_min' in parameters: ## last sigma is zero which isn't allowed by DPM Fast & Adaptive so taking value before last extra_params_kwargs['sigma_min'] = sigma_sched[-2] - if 'sigma_max' in inspect.signature(self.func).parameters: + if 'sigma_max' in parameters: extra_params_kwargs['sigma_max'] = sigma_sched[0] - if 'n' in inspect.signature(self.func).parameters: + if 'n' in parameters: extra_params_kwargs['n'] = len(sigma_sched) - 1 - if 'sigma_sched' in inspect.signature(self.func).parameters: + if 'sigma_sched' in parameters: extra_params_kwargs['sigma_sched'] = sigma_sched - if 'sigmas' in inspect.signature(self.func).parameters: + if 'sigmas' in parameters: extra_params_kwargs['sigmas'] = sigma_sched + if self.funcname == 'sample_dpmpp_sde': + noise_sampler = self.create_noise_sampler(x, sigmas, p) + extra_params_kwargs['noise_sampler'] = noise_sampler + self.model_wrap_cfg.init_latent = x self.last_latent = x extra_args={ @@ -303,7 +323,7 @@ class KDiffusionSampler: return samples - def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning = None): + def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning=None): steps = steps or p.steps sigmas = self.get_sigmas(p, steps) @@ -311,14 +331,20 @@ class KDiffusionSampler: x = x * sigmas[0] extra_params_kwargs = self.initialize(p) - if 'sigma_min' in inspect.signature(self.func).parameters: + parameters = inspect.signature(self.func).parameters + + if 'sigma_min' in parameters: extra_params_kwargs['sigma_min'] = self.model_wrap.sigmas[0].item() extra_params_kwargs['sigma_max'] = self.model_wrap.sigmas[-1].item() - if 'n' in inspect.signature(self.func).parameters: + if 'n' in parameters: extra_params_kwargs['n'] = steps else: extra_params_kwargs['sigmas'] = sigmas + if self.funcname == 'sample_dpmpp_sde': + noise_sampler = self.create_noise_sampler(x, sigmas, p) + extra_params_kwargs['noise_sampler'] = noise_sampler + self.last_latent = x samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={ 'cond': conditioning, diff --git a/modules/shared.py b/modules/shared.py index 670d4954e..7c559fa42 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -81,6 +81,7 @@ parser.add_argument("--freeze-settings", action='store_true', help="disable edit parser.add_argument("--ui-settings-file", type=str, help="filename to use for ui settings", default=os.path.join(data_path, 'config.json')) parser.add_argument("--gradio-debug", action='store_true', help="launch gradio with --debug option") parser.add_argument("--gradio-auth", type=str, help='set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3"', default=None) +parser.add_argument("--gradio-auth-path", type=str, help='set gradio authentication file path ex. "/path/to/auth/file" same auth format as --gradio-auth', default=None) parser.add_argument("--gradio-img2img-tool", type=str, help='does not do anything') parser.add_argument("--gradio-inpaint-tool", type=str, help="does not do anything") parser.add_argument("--opt-channelslast", action='store_true', help="change memory type for stable diffusion to channels last") @@ -107,6 +108,7 @@ parser.add_argument("--server-name", type=str, help="Sets hostname of server", d parser.add_argument("--gradio-queue", action='store_true', help="Uses gradio queue; experimental option; breaks restart UI button") parser.add_argument("--skip-version-check", action='store_true', help="Do not check versions of torch and xformers") parser.add_argument("--no-hashing", action='store_true', help="disable sha256 hashing of checkpoints to help loading performance", default=False) +parser.add_argument("--no-download-sd-model", action='store_true', help="don't download SD1.5 model even if no model is found in --ckpt-dir", default=False) script_loading.preload_extensions(extensions.extensions_dir, parser) @@ -325,7 +327,9 @@ options_templates.update(options_section(('saving-images', "Saving images/grids" "save_images_before_highres_fix": OptionInfo(False, "Save a copy of image before applying highres fix."), "save_images_before_color_correction": OptionInfo(False, "Save a copy of image before applying color correction to img2img results"), "jpeg_quality": OptionInfo(80, "Quality for saved jpeg images", gr.Slider, {"minimum": 1, "maximum": 100, "step": 1}), - "export_for_4chan": OptionInfo(True, "If PNG image is larger than 4MB or any dimension is larger than 4000, downscale and save copy as JPG"), + "export_for_4chan": OptionInfo(True, "If the saved image file size is above the limit, or its either width or height are above the limit, save a downscaled copy as JPG"), + "img_downscale_threshold": OptionInfo(4.0, "File size limit for the above option, MB", gr.Number), + "target_side_length": OptionInfo(4000, "Width/height limit for the above option, in pixels", gr.Number), "use_original_name_batch": OptionInfo(True, "Use original name for output filename during batch process in extras tab"), "use_upscaler_name_as_suffix": OptionInfo(False, "Use upscaler name as filename suffix in the extras tab"), @@ -364,7 +368,7 @@ options_templates.update(options_section(('upscaling', "Upscaling"), { })) options_templates.update(options_section(('face-restoration', "Face restoration"), { - "face_restoration_model": OptionInfo(None, "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}), + "face_restoration_model": OptionInfo("CodeFormer", "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}), "code_former_weight": OptionInfo(0.5, "CodeFormer weight parameter; 0 = maximum effect; 1 = minimum effect", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), "face_restoration_unload": OptionInfo(False, "Move face restoration model from VRAM into RAM after processing"), })) @@ -414,6 +418,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { options_templates.update(options_section(('compatibility', "Compatibility"), { "use_old_emphasis_implementation": OptionInfo(False, "Use old emphasis implementation. Can be useful to reproduce old seeds."), "use_old_karras_scheduler_sigmas": OptionInfo(False, "Use old karras scheduler sigmas (0.1 to 10)."), + "no_dpmpp_sde_batch_determinism": OptionInfo(False, "Do not make DPM++ SDE deterministic across different batch sizes."), "use_old_hires_fix_width_height": OptionInfo(False, "For hires fix, use width/height sliders to set final resolution rather than first pass (disables Upscale by, Resize width/height to)."), })) diff --git a/modules/shared_items.py b/modules/shared_items.py index 8b5ec96dc..e792a1349 100644 --- a/modules/shared_items.py +++ b/modules/shared_items.py @@ -20,4 +20,4 @@ def sd_vae_items(): def refresh_vae_list(): import modules.sd_vae - return modules.sd_vae.refresh_vae_list + modules.sd_vae.refresh_vae_list() diff --git a/modules/textual_inversion/dataset.py b/modules/textual_inversion/dataset.py index d31963d48..af9fbcf28 100644 --- a/modules/textual_inversion/dataset.py +++ b/modules/textual_inversion/dataset.py @@ -19,9 +19,10 @@ re_numbers_at_start = re.compile(r"^[-\d]+\s*") class DatasetEntry: - def __init__(self, filename=None, filename_text=None, latent_dist=None, latent_sample=None, cond=None, cond_text=None, pixel_values=None): + def __init__(self, filename=None, filename_text=None, latent_dist=None, latent_sample=None, cond=None, cond_text=None, pixel_values=None, weight=None): self.filename = filename self.filename_text = filename_text + self.weight = weight self.latent_dist = latent_dist self.latent_sample = latent_sample self.cond = cond @@ -30,7 +31,7 @@ class DatasetEntry: class PersonalizedBase(Dataset): - def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, cond_model=None, device=None, template_file=None, include_cond=False, batch_size=1, gradient_step=1, shuffle_tags=False, tag_drop_out=0, latent_sampling_method='once', varsize=False): + def __init__(self, data_root, width, height, repeats, flip_p=0.5, placeholder_token="*", model=None, cond_model=None, device=None, template_file=None, include_cond=False, batch_size=1, gradient_step=1, shuffle_tags=False, tag_drop_out=0, latent_sampling_method='once', varsize=False, use_weight=False): re_word = re.compile(shared.opts.dataset_filename_word_regex) if len(shared.opts.dataset_filename_word_regex) > 0 else None self.placeholder_token = placeholder_token @@ -56,10 +57,16 @@ class PersonalizedBase(Dataset): print("Preparing dataset...") for path in tqdm.tqdm(self.image_paths): + alpha_channel = None if shared.state.interrupted: raise Exception("interrupted") try: - image = Image.open(path).convert('RGB') + image = Image.open(path) + #Currently does not work for single color transparency + #We would need to read image.info['transparency'] for that + if use_weight and 'A' in image.getbands(): + alpha_channel = image.getchannel('A') + image = image.convert('RGB') if not varsize: image = image.resize((width, height), PIL.Image.BICUBIC) except Exception: @@ -87,17 +94,35 @@ class PersonalizedBase(Dataset): with devices.autocast(): latent_dist = model.encode_first_stage(torchdata.unsqueeze(dim=0)) - if latent_sampling_method == "once" or (latent_sampling_method == "deterministic" and not isinstance(latent_dist, DiagonalGaussianDistribution)): - latent_sample = model.get_first_stage_encoding(latent_dist).squeeze().to(devices.cpu) - latent_sampling_method = "once" - entry = DatasetEntry(filename=path, filename_text=filename_text, latent_sample=latent_sample) - elif latent_sampling_method == "deterministic": - # Works only for DiagonalGaussianDistribution - latent_dist.std = 0 - latent_sample = model.get_first_stage_encoding(latent_dist).squeeze().to(devices.cpu) - entry = DatasetEntry(filename=path, filename_text=filename_text, latent_sample=latent_sample) - elif latent_sampling_method == "random": - entry = DatasetEntry(filename=path, filename_text=filename_text, latent_dist=latent_dist) + #Perform latent sampling, even for random sampling. + #We need the sample dimensions for the weights + if latent_sampling_method == "deterministic": + if isinstance(latent_dist, DiagonalGaussianDistribution): + # Works only for DiagonalGaussianDistribution + latent_dist.std = 0 + else: + latent_sampling_method = "once" + latent_sample = model.get_first_stage_encoding(latent_dist).squeeze().to(devices.cpu) + + if use_weight and alpha_channel is not None: + channels, *latent_size = latent_sample.shape + weight_img = alpha_channel.resize(latent_size) + npweight = np.array(weight_img).astype(np.float32) + #Repeat for every channel in the latent sample + weight = torch.tensor([npweight] * channels).reshape([channels] + latent_size) + #Normalize the weight to a minimum of 0 and a mean of 1, that way the loss will be comparable to default. + weight -= weight.min() + weight /= weight.mean() + elif use_weight: + #If an image does not have a alpha channel, add a ones weight map anyway so we can stack it later + weight = torch.ones(latent_sample.shape) + else: + weight = None + + if latent_sampling_method == "random": + entry = DatasetEntry(filename=path, filename_text=filename_text, latent_dist=latent_dist, weight=weight) + else: + entry = DatasetEntry(filename=path, filename_text=filename_text, latent_sample=latent_sample, weight=weight) if not (self.tag_drop_out != 0 or self.shuffle_tags): entry.cond_text = self.create_text(filename_text) @@ -110,6 +135,7 @@ class PersonalizedBase(Dataset): del torchdata del latent_dist del latent_sample + del weight self.length = len(self.dataset) self.groups = list(groups.values()) @@ -195,6 +221,10 @@ class BatchLoader: self.cond_text = [entry.cond_text for entry in data] self.cond = [entry.cond for entry in data] self.latent_sample = torch.stack([entry.latent_sample for entry in data]).squeeze(1) + if all(entry.weight is not None for entry in data): + self.weight = torch.stack([entry.weight for entry in data]).squeeze(1) + else: + self.weight = None #self.emb_index = [entry.emb_index for entry in data] #print(self.latent_sample.device) diff --git a/modules/textual_inversion/textual_inversion.py b/modules/textual_inversion/textual_inversion.py index a1a406c22..c63c7d1dd 100644 --- a/modules/textual_inversion/textual_inversion.py +++ b/modules/textual_inversion/textual_inversion.py @@ -351,7 +351,7 @@ def validate_train_inputs(model_name, learn_rate, batch_size, gradient_step, dat assert log_directory, "Log directory is empty" -def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, create_image_every, save_embedding_every, template_filename, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): +def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_step, data_root, log_directory, training_width, training_height, varsize, steps, clip_grad_mode, clip_grad_value, shuffle_tags, tag_drop_out, latent_sampling_method, use_weight, create_image_every, save_embedding_every, template_filename, save_image_with_stored_embedding, preview_from_txt2img, preview_prompt, preview_negative_prompt, preview_steps, preview_sampler_index, preview_cfg_scale, preview_seed, preview_width, preview_height): save_embedding_every = save_embedding_every or 0 create_image_every = create_image_every or 0 template_file = textual_inversion_templates.get(template_filename, None) @@ -410,7 +410,7 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st pin_memory = shared.opts.pin_memory - ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method, varsize=varsize) + ds = modules.textual_inversion.dataset.PersonalizedBase(data_root=data_root, width=training_width, height=training_height, repeats=shared.opts.training_image_repeats_per_epoch, placeholder_token=embedding_name, model=shared.sd_model, cond_model=shared.sd_model.cond_stage_model, device=devices.device, template_file=template_file, batch_size=batch_size, gradient_step=gradient_step, shuffle_tags=shuffle_tags, tag_drop_out=tag_drop_out, latent_sampling_method=latent_sampling_method, varsize=varsize, use_weight=use_weight) if shared.opts.save_training_settings_to_txt: save_settings_to_file(log_directory, {**dict(model_name=checkpoint.model_name, model_hash=checkpoint.shorthash, num_of_dataset_images=len(ds), num_vectors_per_token=len(embedding.vec)), **locals()}) @@ -480,6 +480,8 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st with devices.autocast(): x = batch.latent_sample.to(devices.device, non_blocking=pin_memory) + if use_weight: + w = batch.weight.to(devices.device, non_blocking=pin_memory) c = shared.sd_model.cond_stage_model(batch.cond_text) if is_training_inpainting_model: @@ -490,7 +492,11 @@ def train_embedding(id_task, embedding_name, learn_rate, batch_size, gradient_st else: cond = c - loss = shared.sd_model(x, cond)[0] / gradient_step + if use_weight: + loss = shared.sd_model.weighted_forward(x, cond, w)[0] / gradient_step + del w + else: + loss = shared.sd_model.forward(x, cond)[0] / gradient_step del x _loss_step += loss.item() diff --git a/modules/ui.py b/modules/ui.py index f5df1ffeb..0516c6436 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -631,9 +631,9 @@ def create_ui(): (hr_resize_y, "Hires resize-2"), *modules.scripts.scripts_txt2img.infotext_fields ] - parameters_copypaste.add_paste_fields("txt2img", None, txt2img_paste_fields) + parameters_copypaste.add_paste_fields("txt2img", None, txt2img_paste_fields, override_settings) parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding( - paste_button=txt2img_paste, tabname="txt2img", source_text_component=txt2img_prompt, source_image_component=None, override_settings_component=override_settings, + paste_button=txt2img_paste, tabname="txt2img", source_text_component=txt2img_prompt, source_image_component=None, )) txt2img_preview_params = [ @@ -963,10 +963,10 @@ def create_ui(): (mask_blur, "Mask blur"), *modules.scripts.scripts_img2img.infotext_fields ] - parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields) - parameters_copypaste.add_paste_fields("inpaint", init_img_with_mask, img2img_paste_fields) + parameters_copypaste.add_paste_fields("img2img", init_img, img2img_paste_fields, override_settings) + parameters_copypaste.add_paste_fields("inpaint", init_img_with_mask, img2img_paste_fields, override_settings) parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding( - paste_button=img2img_paste, tabname="img2img", source_text_component=img2img_prompt, source_image_component=None, override_settings_component=override_settings, + paste_button=img2img_paste, tabname="img2img", source_text_component=img2img_prompt, source_image_component=None, )) modules.scripts.scripts_current = None @@ -1191,6 +1191,8 @@ def create_ui(): create_image_every = gr.Number(label='Save an image to log directory every N steps, 0 to disable', value=500, precision=0, elem_id="train_create_image_every") save_embedding_every = gr.Number(label='Save a copy of embedding to log directory every N steps, 0 to disable', value=500, precision=0, elem_id="train_save_embedding_every") + use_weight = gr.Checkbox(label="Use PNG alpha channel as loss weight", value=False, elem_id="use_weight") + save_image_with_stored_embedding = gr.Checkbox(label='Save images with embedding in PNG chunks', value=True, elem_id="train_save_image_with_stored_embedding") preview_from_txt2img = gr.Checkbox(label='Read parameters (prompt, etc...) from txt2img tab when making previews', value=False, elem_id="train_preview_from_txt2img") @@ -1304,6 +1306,7 @@ def create_ui(): shuffle_tags, tag_drop_out, latent_sampling_method, + use_weight, create_image_every, save_embedding_every, template_file, @@ -1337,6 +1340,7 @@ def create_ui(): shuffle_tags, tag_drop_out, latent_sampling_method, + use_weight, create_image_every, save_embedding_every, template_file, @@ -1782,7 +1786,7 @@ def versions_html(): return f""" python: {python_version} • -torch: {torch.__version__} +torch: {getattr(torch, '__long_version__',torch.__version__)} • xformers: {xformers_version} • diff --git a/modules/ui_extensions.py b/modules/ui_extensions.py index 37d30e1f2..bd4308ef0 100644 --- a/modules/ui_extensions.py +++ b/modules/ui_extensions.py @@ -80,6 +80,7 @@ def extension_table():