From 387bcd8e4a825eb4c174d65bc879f4c8011295b4 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Sat, 18 May 2024 19:56:55 -0400 Subject: [PATCH 1/5] Live previews run on cudastream --- modules/sd_samplers_common.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index bda578cc5..31ae46994 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -59,15 +59,14 @@ def samples_to_images_tensor(sample, approximation=None, model=None): return x_sample - +lp_stream = torch.cuda.Stream() def single_sample_to_image(sample, approximation=None): - x_sample = samples_to_images_tensor(sample.unsqueeze(0), approximation)[0] * 0.5 + 0.5 - - x_sample = torch.clamp(x_sample, min=0.0, max=1.0) - x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2) - x_sample = x_sample.astype(np.uint8) - - return Image.fromarray(x_sample) + with torch.cuda.stream(lp_stream): + x_sample = samples_to_images_tensor(sample.unsqueeze(0), approximation)[0] * 0.5 + 0.5 + x_sample = torch.clamp(x_sample, min=0.0, max=1.0) + x_sample = 255. * x_sample.permute(1, 2, 0) + x_sample = x_sample.to(device='cpu', dtype=torch.uint8, non_blocking=True) + return x_sample def decode_first_stage(model, x): @@ -81,7 +80,9 @@ def sample_to_image(samples, index=0, approximation=None): def samples_to_image_grid(samples, approximation=None): - return images.image_grid([single_sample_to_image(sample, approximation) for sample in samples]) + sample_tensors = [single_sample_to_image(sample, approximation) for sample in samples] + lp_stream.synchronize() + return images.image_grid([Image.fromarray(sample.numpy()) for sample in sample_tensors]) def images_tensor_to_samples(image, approximation=None, model=None): From 72c5966e488ae1139a6a1787d393060e0125330a Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Sat, 18 May 2024 18:15:01 -0600 Subject: [PATCH 2/5] Lint --- modules/sd_samplers_common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 31ae46994..69bf9be6d 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -1,6 +1,5 @@ import inspect from collections import namedtuple -import numpy as np import torch from PIL import Image from modules import devices, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared, sd_models From 044494d914f40cbbdfa32f518855cd65eebb44c9 Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Sat, 18 May 2024 20:50:38 -0400 Subject: [PATCH 3/5] Only use cudastreams for live preview when cuda available --- modules/sd_samplers_common.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 69bf9be6d..8bbd8caba 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -58,14 +58,11 @@ def samples_to_images_tensor(sample, approximation=None, model=None): return x_sample -lp_stream = torch.cuda.Stream() -def single_sample_to_image(sample, approximation=None): - with torch.cuda.stream(lp_stream): - x_sample = samples_to_images_tensor(sample.unsqueeze(0), approximation)[0] * 0.5 + 0.5 - x_sample = torch.clamp(x_sample, min=0.0, max=1.0) - x_sample = 255. * x_sample.permute(1, 2, 0) - x_sample = x_sample.to(device='cpu', dtype=torch.uint8, non_blocking=True) - return x_sample +def single_sample_to_image(sample, approximation=None, non_blocking=False): + x_sample = samples_to_images_tensor(sample.unsqueeze(0), approximation)[0] * 0.5 + 0.5 + x_sample = torch.clamp(x_sample, min=0.0, max=1.0) + x_sample = 255. * x_sample.permute(1, 2, 0) + return x_sample.to(device='cpu', dtype=torch.uint8, non_blocking=non_blocking) def decode_first_stage(model, x): @@ -78,9 +75,18 @@ def sample_to_image(samples, index=0, approximation=None): return single_sample_to_image(samples[index], approximation) +if torch.cuda.is_available(): + lp_stream = torch.cuda.Stream() + live_preview_stream_context = torch.cuda.stream(lp_stream) +else: + lp_stream = None + live_preview_stream_context = nullcontext() + def samples_to_image_grid(samples, approximation=None): - sample_tensors = [single_sample_to_image(sample, approximation) for sample in samples] - lp_stream.synchronize() + with live_preview_stream_context: + sample_tensors = [single_sample_to_image(sample, approximation, non_blocking=True) for sample in samples] + if lp_stream is not None: + lp_stream.synchronize() return images.image_grid([Image.fromarray(sample.numpy()) for sample in sample_tensors]) From 4eb7cb443dbed9d3c90bb8783abb8a4318aed16a Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Sat, 18 May 2024 20:53:33 -0400 Subject: [PATCH 4/5] the code runs better when you import things you need --- modules/sd_samplers_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 8bbd8caba..dd9e10a8b 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -1,5 +1,6 @@ import inspect from collections import namedtuple +from contextlib import nullcontext import torch from PIL import Image from modules import devices, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared, sd_models From 27e35f13faa79c90dddae45eea324baffcc13ace Mon Sep 17 00:00:00 2001 From: drhead <1313496+drhead@users.noreply.github.com> Date: Sun, 19 May 2024 18:34:09 -0400 Subject: [PATCH 5/5] handle non blocking better and case of single image --- modules/sd_samplers_common.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index dd9e10a8b..cd9b912d7 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -72,10 +72,6 @@ def decode_first_stage(model, x): return samples_to_images_tensor(x, approx_index, model) -def sample_to_image(samples, index=0, approximation=None): - return single_sample_to_image(samples[index], approximation) - - if torch.cuda.is_available(): lp_stream = torch.cuda.Stream() live_preview_stream_context = torch.cuda.stream(lp_stream) @@ -83,9 +79,17 @@ else: lp_stream = None live_preview_stream_context = nullcontext() +def sample_to_image(samples, index=0, approximation=None): + with live_preview_stream_context: + sample = single_sample_to_image(samples[index], approximation, non_blocking=lp_stream is not None) + if lp_stream is not None: + lp_stream.synchronize() + return Image.fromarray(sample.numpy()) + + def samples_to_image_grid(samples, approximation=None): with live_preview_stream_context: - sample_tensors = [single_sample_to_image(sample, approximation, non_blocking=True) for sample in samples] + sample_tensors = [single_sample_to_image(sample, approximation, non_blocking=lp_stream is not None) for sample in samples] if lp_stream is not None: lp_stream.synchronize() return images.image_grid([Image.fromarray(sample.numpy()) for sample in sample_tensors])