diff --git a/backend/headless/comfy/model_patcher.py b/backend/headless/comfy/model_patcher.py index ba505221..50b725b8 100644 --- a/backend/headless/comfy/model_patcher.py +++ b/backend/headless/comfy/model_patcher.py @@ -107,6 +107,10 @@ class ModelPatcher: for k in patch_list: if hasattr(patch_list[k], "to"): patch_list[k] = patch_list[k].to(device) + if "unet_wrapper_function" in self.model_options: + wrap_func = self.model_options["unet_wrapper_function"] + if hasattr(wrap_func, "to"): + self.model_options["unet_wrapper_function"] = wrap_func.to(device) def model_dtype(self): if hasattr(self.model, "get_dtype"): diff --git a/dev_script_build_backend.py b/dev_script_build_backend.py index 8b29b014..3682534e 100644 --- a/dev_script_build_backend.py +++ b/dev_script_build_backend.py @@ -22,7 +22,7 @@ def get_empty_folder(path): comfy_repo = "https://github.com/comfyanonymous/ComfyUI" -comfy_commit_hash = "5e885bd9c822a3cc7d50d75a40958265f497cd03" +comfy_commit_hash = "8cc75c64ff7188ce72cd4ba595119586e425c09f" comfy_temp_path = get_empty_folder(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'backend', 'comfy_temp')) comfy_core_path = get_empty_folder(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'backend', 'headless')) diff --git a/fooocus_version.py b/fooocus_version.py index 6e79c699..00b3f850 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.47' +version = '2.1.48' diff --git a/modules/advanced_parameters.py b/modules/advanced_parameters.py index 57bcec55..185af15b 100644 --- a/modules/advanced_parameters.py +++ b/modules/advanced_parameters.py @@ -3,7 +3,8 @@ adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_ overwrite_vary_strength, overwrite_upscale_strength, \ mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, inpaint_engine, \ - freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 = [None] * 24 + refiner_swap_method, \ + freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 = [None] * 25 def set_all_advanced_parameters(*args): @@ -12,6 +13,7 @@ def set_all_advanced_parameters(*args): overwrite_vary_strength, overwrite_upscale_strength, \ mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, inpaint_engine, \ + refiner_swap_method, \ freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \ @@ -19,6 +21,7 @@ def set_all_advanced_parameters(*args): overwrite_vary_strength, overwrite_upscale_strength, \ mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, \ debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, inpaint_engine, \ + refiner_swap_method, \ freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2 = args return diff --git a/modules/async_worker.py b/modules/async_worker.py index 97dade2d..41e95d17 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -513,7 +513,8 @@ def worker(): latent=initial_latent, denoise=denoising_strength, tiled=tiled, - cfg_scale=cfg_scale + cfg_scale=cfg_scale, + refiner_swap_method=advanced_parameters.refiner_swap_method ) del task['c'], task['uc'], positive_cond, negative_cond # Save memory diff --git a/modules/core.py b/modules/core.py index 5e1f659d..b49189a2 100644 --- a/modules/core.py +++ b/modules/core.py @@ -4,7 +4,6 @@ patch_all() import os -import random import einops import torch import numpy as np @@ -63,47 +62,6 @@ def apply_controlnet(positive, negative, control_net, image, strength, start_per image=image, strength=strength, start_percent=start_percent, end_percent=end_percent) -@torch.no_grad() -@torch.inference_mode() -def load_unet_only(unet_path): - sd_raw = comfy.utils.load_torch_file(unet_path) - sd = {} - flag = 'model.diffusion_model.' - for k in list(sd_raw.keys()): - if k.startswith(flag): - sd[k[len(flag):]] = sd_raw[k] - del sd_raw[k] - - parameters = comfy.utils.calculate_parameters(sd) - fp16 = comfy.model_management.should_use_fp16(model_params=parameters) - if "input_blocks.0.0.weight" in sd: - # ldm - model_config = comfy.model_detection.model_config_from_unet(sd, "", fp16) - if model_config is None: - raise RuntimeError("ERROR: Could not detect model type of: {}".format(unet_path)) - new_sd = sd - else: - # diffusers - model_config = comfy.model_detection.model_config_from_diffusers_unet(sd, fp16) - if model_config is None: - print("ERROR UNSUPPORTED UNET", unet_path) - return None - - diffusers_keys = comfy.utils.unet_to_diffusers(model_config.unet_config) - - new_sd = {} - for k in diffusers_keys: - if k in sd: - new_sd[diffusers_keys[k]] = sd.pop(k) - else: - print(diffusers_keys[k], k) - offload_device = comfy.model_management.unet_offload_device() - model = model_config.get_model(new_sd, "") - model = model.to(offload_device) - model.load_model_weights(new_sd, "") - return comfy.model_patcher.ModelPatcher(model, load_device=comfy.model_management.get_torch_device(), offload_device=offload_device) - - @torch.no_grad() @torch.inference_mode() def load_model(ckpt_filename): @@ -239,7 +197,7 @@ def get_previewer(): @torch.inference_mode() def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sampler_name='dpmpp_fooocus_2m_sde_inpaint_seamless', scheduler='karras', denoise=1.0, disable_noise=False, start_step=None, last_step=None, - force_full_denoise=False, callback_function=None, refiner=None, refiner_switch=-1): + force_full_denoise=False, callback_function=None, refiner=None, refiner_switch=-1, previewer_start=None, previewer_end=None): latent_image = latent["samples"] if disable_noise: noise = torch.zeros(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, device="cpu") @@ -253,13 +211,19 @@ def ksampler(model, positive, negative, latent, seed=None, steps=30, cfg=7.0, sa previewer = get_previewer() + if previewer_start is None: + previewer_start = 0 + + if previewer_end is None: + previewer_end = steps + def callback(step, x0, x, total_steps): comfy.model_management.throw_exception_if_processing_interrupted() y = None if previewer is not None: - y = previewer(x0, step, total_steps) + y = previewer(x0, previewer_start + step, previewer_end) if callback_function is not None: - callback_function(step, x0, x, total_steps, y) + callback_function(previewer_start + step, x0, x, previewer_end, y) disable_pbar = False modules.sample_hijack.current_refiner = refiner diff --git a/modules/default_pipeline.py b/modules/default_pipeline.py index 6d947ef8..534b2c88 100644 --- a/modules/default_pipeline.py +++ b/modules/default_pipeline.py @@ -4,9 +4,9 @@ import torch import modules.path import comfy.model_management -from comfy.model_patcher import ModelPatcher from comfy.model_base import SDXL, SDXLRefiner from modules.expansion import FooocusExpansion +from modules.sample_hijack import clip_separate xl_base: core.StableDiffusionModel = None @@ -15,14 +15,15 @@ xl_base_hash = '' xl_base_patched: core.StableDiffusionModel = None xl_base_patched_hash = '' -xl_refiner: ModelPatcher = None +xl_refiner: core.StableDiffusionModel = None xl_refiner_hash = '' final_expansion = None final_unet = None final_clip = None final_vae = None -final_refiner = None +final_refiner_unet = None +final_refiner_vae = None loaded_ControlNets = {} @@ -60,8 +61,10 @@ def assert_model_integrity(): error_message = 'You have selected base model other than SDXL. This is not supported yet.' if xl_refiner is not None: - if not isinstance(xl_refiner.model, SDXLRefiner): - error_message = 'You have selected refiner model other than SDXL refiner. This is not supported yet.' + if xl_refiner.unet is None or xl_refiner.unet.model is None: + error_message = 'You have selected an invalid refiner!' + elif not isinstance(xl_refiner.unet.model, SDXL) and not isinstance(xl_refiner.unet.model, SDXLRefiner): + error_message = 'SD1.5 or 2.1 as refiner is not supported!' if error_message is not None: raise NotImplementedError(error_message) @@ -109,9 +112,19 @@ def refresh_refiner_model(name): print(f'Refiner unloaded.') return - xl_refiner = core.load_unet_only(filename) + xl_refiner = core.load_model(filename) xl_refiner_hash = model_hash print(f'Refiner model loaded: {model_hash}') + + if isinstance(xl_refiner.unet.model, SDXL): + xl_refiner.clip = None + xl_refiner.vae = None + elif isinstance(xl_refiner.unet.model, SDXLRefiner): + xl_refiner.clip = None + xl_refiner.vae = None + else: + xl_refiner = None # 1.5/2.1 not supported yet. + return @@ -203,15 +216,23 @@ def prepare_text_encoder(async_call=True): @torch.no_grad() @torch.inference_mode() def refresh_everything(refiner_model_name, base_model_name, loras): - global final_unet, final_clip, final_vae, final_refiner, final_expansion + global final_unet, final_clip, final_vae, final_refiner_unet, final_refiner_vae, final_expansion refresh_refiner_model(refiner_model_name) refresh_base_model(base_model_name) refresh_loras(loras) assert_model_integrity() - final_unet, final_clip, final_vae, final_refiner = \ - xl_base_patched.unet, xl_base_patched.clip, xl_base_patched.vae, xl_refiner + final_unet = xl_base_patched.unet + final_clip = xl_base_patched.clip + final_vae = xl_base_patched.vae + + if xl_refiner is None: + final_refiner_unet = None + final_refiner_vae = None + else: + final_refiner_unet = xl_refiner.unet + final_refiner_vae = xl_refiner.vae if final_expansion is None: final_expansion = FooocusExpansion() @@ -236,30 +257,141 @@ refresh_everything( @torch.no_grad() @torch.inference_mode() -def process_diffusion(positive_cond, negative_cond, steps, switch, width, height, image_seed, callback, sampler_name, scheduler_name, latent=None, denoise=1.0, tiled=False, cfg_scale=7.0): +def vae_parse(x, tiled=False): + if final_vae is None or final_refiner_vae is None: + return x + + print('VAE parsing ...') + x = core.decode_vae(vae=final_vae, latent_image=x, tiled=tiled) + x = core.encode_vae(vae=final_refiner_vae, pixels=x, tiled=tiled) + print('VAE parsed ...') + + return x + + +@torch.no_grad() +@torch.inference_mode() +def process_diffusion(positive_cond, negative_cond, steps, switch, width, height, image_seed, callback, sampler_name, scheduler_name, latent=None, denoise=1.0, tiled=False, cfg_scale=7.0, refiner_swap_method='joint'): + assert refiner_swap_method in ['joint', 'separate', 'vae'] + print(f'[Sampler] refiner_swap_method = {refiner_swap_method}') + if latent is None: empty_latent = core.generate_empty_latent(width=width, height=height, batch_size=1) else: empty_latent = latent - sampled_latent = core.ksampler( - model=final_unet, - refiner=final_refiner, - positive=positive_cond, - negative=negative_cond, - latent=empty_latent, - steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True, - seed=image_seed, - denoise=denoise, - callback_function=callback, - cfg=cfg_scale, - sampler_name=sampler_name, - scheduler=scheduler_name, - refiner_switch=switch - ) + if refiner_swap_method == 'joint': + sampled_latent = core.ksampler( + model=final_unet, + refiner=final_refiner_unet, + positive=positive_cond, + negative=negative_cond, + latent=empty_latent, + steps=steps, start_step=0, last_step=steps, disable_noise=False, force_full_denoise=True, + seed=image_seed, + denoise=denoise, + callback_function=callback, + cfg=cfg_scale, + sampler_name=sampler_name, + scheduler=scheduler_name, + refiner_switch=switch, + previewer_start=0, + previewer_end=steps, + ) + decoded_latent = core.decode_vae(vae=final_vae, latent_image=sampled_latent, tiled=tiled) + images = core.pytorch_to_numpy(decoded_latent) + return images - decoded_latent = core.decode_vae(vae=final_vae, latent_image=sampled_latent, tiled=tiled) - images = core.pytorch_to_numpy(decoded_latent) + if refiner_swap_method == 'separate': + sampled_latent = core.ksampler( + model=final_unet, + positive=positive_cond, + negative=negative_cond, + latent=empty_latent, + steps=steps, start_step=0, last_step=switch, disable_noise=False, force_full_denoise=False, + seed=image_seed, + denoise=denoise, + callback_function=callback, + cfg=cfg_scale, + sampler_name=sampler_name, + scheduler=scheduler_name, + previewer_start=0, + previewer_end=switch, + ) + print('Refiner swapped by changing ksampler. Noise preserved.') - comfy.model_management.soft_empty_cache() - return images + target_model = final_refiner_unet + if target_model is None: + target_model = final_unet + print('Use base model to refine itself - this may because of developer mode.') + + sampled_latent = core.ksampler( + model=target_model, + positive=clip_separate(positive_cond, target_model=target_model.model), + negative=clip_separate(negative_cond, target_model=target_model.model), + latent=sampled_latent, + steps=steps, start_step=switch, last_step=steps, disable_noise=True, force_full_denoise=True, + seed=image_seed, + denoise=denoise, + callback_function=callback, + cfg=cfg_scale, + sampler_name=sampler_name, + scheduler=scheduler_name, + previewer_start=switch, + previewer_end=steps, + ) + + target_model = final_refiner_vae + if target_model is None: + target_model = final_vae + decoded_latent = core.decode_vae(vae=target_model, latent_image=sampled_latent, tiled=tiled) + images = core.pytorch_to_numpy(decoded_latent) + return images + + if refiner_swap_method == 'vae': + sampled_latent = core.ksampler( + model=final_unet, + positive=positive_cond, + negative=negative_cond, + latent=empty_latent, + steps=steps, start_step=0, last_step=switch, disable_noise=False, force_full_denoise=True, + seed=image_seed, + denoise=denoise, + callback_function=callback, + cfg=cfg_scale, + sampler_name=sampler_name, + scheduler=scheduler_name, + previewer_start=0, + previewer_end=switch, + ) + print('Refiner swapped by changing ksampler. Noise is not preserved.') + + target_model = final_refiner_unet + if target_model is None: + target_model = final_unet + print('Use base model to refine itself - this may because of developer mode.') + + sampled_latent = vae_parse(sampled_latent) + + sampled_latent = core.ksampler( + model=target_model, + positive=clip_separate(positive_cond, target_model=target_model.model), + negative=clip_separate(negative_cond, target_model=target_model.model), + latent=sampled_latent, + steps=steps, start_step=switch, last_step=steps, disable_noise=False, force_full_denoise=True, + seed=image_seed, + denoise=denoise, + callback_function=callback, + cfg=cfg_scale, + sampler_name=sampler_name, + scheduler=scheduler_name, + previewer_start=switch, + previewer_end=steps, + ) + + target_model = final_refiner_vae + if target_model is None: + target_model = final_vae + decoded_latent = core.decode_vae(vae=target_model, latent_image=sampled_latent, tiled=tiled) + images = core.pytorch_to_numpy(decoded_latent) + return images diff --git a/modules/inpaint_worker.py b/modules/inpaint_worker.py index 66b642c0..4c0d1a3d 100644 --- a/modules/inpaint_worker.py +++ b/modules/inpaint_worker.py @@ -83,16 +83,13 @@ def area_abcd(a, b, c, d): return (b - a) * (d - c) -def solve_abcd(x, a, b, c, d, k, outpaint): +def solve_abcd(x, a, b, c, d, outpaint): H, W = x.shape[:2] if outpaint: return 0, H, 0, W - min_area = H * W * k - max_area = H * W + min_area = (min(H, W) ** 2) * 0.5 while True: - if area_abcd(a, b, c, d) > min_area and abs((b - a) - (d - c)) < 16: - break - if area_abcd(a, b, c, d) >= max_area: + if area_abcd(a, b, c, d) >= min_area: break add_h = (b - a) < (d - c) @@ -150,7 +147,7 @@ class InpaintWorker: # compute abcd a, b, c, d = compute_initial_abcd(self.mask_raw_bg < 127) - a, b, c, d = solve_abcd(self.mask_raw_bg, a, b, c, d, k=0.618, outpaint=is_outpaint) + a, b, c, d = solve_abcd(self.mask_raw_bg, a, b, c, d, outpaint=is_outpaint) # interested area self.interested_area = (a, b, c, d) diff --git a/modules/sample_hijack.py b/modules/sample_hijack.py index 8609c0c0..c70a74b4 100644 --- a/modules/sample_hijack.py +++ b/modules/sample_hijack.py @@ -2,7 +2,8 @@ import torch import comfy.samplers import comfy.model_management -from comfy.sample import prepare_sampling, cleanup_additional_models, get_additional_models +from comfy.model_base import SDXLRefiner, SDXL +from comfy.sample import get_additional_models from comfy.samplers import resolve_areas_and_cond_masks, wrap_model, calculate_start_end_timesteps, \ create_cond_with_same_area_if_none, pre_run_control, apply_empty_x_to_equal_area, encode_adm, \ blank_inpaint_image_like @@ -14,11 +15,18 @@ refiner_switch_step = -1 @torch.no_grad() @torch.inference_mode() -def clip_separate(cond): +def clip_separate(cond, target_model=None): c, p = cond[0] - c = c[..., -1280:].clone() - p = p["pooled_output"].clone() - return [[c, {"pooled_output": p}]] + if target_model is None or isinstance(target_model, SDXLRefiner): + c = c[..., -1280:].clone() + p = {"pooled_output": p["pooled_output"].clone()} + elif isinstance(target_model, SDXL): + c = c.clone() + p = {"pooled_output": p["pooled_output"].clone()} + else: + c = c[..., :768].clone() + p = {} + return [[c, p]] @torch.no_grad() @@ -54,8 +62,11 @@ def sample_hacked(model, noise, positive, negative, cfg, device, sampler, sigmas negative = encode_adm(model, negative, noise.shape[0], noise.shape[3], noise.shape[2], device, "negative") if current_refiner is not None and current_refiner.model.is_adm(): - positive_refiner = encode_adm(current_refiner.model, clip_separate(positive), noise.shape[0], noise.shape[3], noise.shape[2], device, "positive") - negative_refiner = encode_adm(current_refiner.model, clip_separate(negative), noise.shape[0], noise.shape[3], noise.shape[2], device, "negative") + positive_refiner = clip_separate(positive, target_model=current_refiner.model) + negative_refiner = clip_separate(negative, target_model=current_refiner.model) + + positive_refiner = encode_adm(current_refiner.model, positive_refiner, noise.shape[0], noise.shape[3], noise.shape[2], device, "positive") + negative_refiner = encode_adm(current_refiner.model, negative_refiner, noise.shape[0], noise.shape[3], noise.shape[2], device, "negative") positive_refiner[0][1]['adm_encoded'].to(positive[0][1]['adm_encoded']) negative_refiner[0][1]['adm_encoded'].to(negative[0][1]['adm_encoded']) diff --git a/webui.py b/webui.py index 6bdad465..ca813b29 100644 --- a/webui.py +++ b/webui.py @@ -230,6 +230,10 @@ with shared.gradio_root: adm_scaler_end = gr.Slider(label='ADM Guidance End At Step', minimum=0.0, maximum=1.0, step=0.001, value=0.3, info='When to end the guidance from positive/negative ADM. ') + + refiner_swap_method = gr.Dropdown(label='Refiner swap method', value='joint', + choices=['joint', 'separate', 'vae']) + adaptive_cfg = gr.Slider(label='CFG Mimicking from TSNR', minimum=1.0, maximum=30.0, step=0.01, value=7.0, info='Enabling Fooocus\'s implementation of CFG mimicking for TSNR ' '(effective when real CFG > mimicked CFG).') @@ -293,7 +297,7 @@ with shared.gradio_root: overwrite_vary_strength, overwrite_upscale_strength, mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, debugging_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, - inpaint_engine] + inpaint_engine, refiner_swap_method] adps += freeu_ctrls def dev_mode_checked(r):