Merge branch 'main_upstream' into hotfix/prevent-skipping-and-stopping-by-other-users

# Conflicts: # webui.py
2024-01-04 18:07:06 +01:00 · 2024-01-04 18:07:06 +01:00 · e68c8aa0a7
parent ee3d70ab87 176faf6f34
commit e68c8aa0a7
40 changed files with 553 additions and 215 deletions
--- a/extras/ip_adapter.py
+++ b/extras/ip_adapter.py
@ -2,12 +2,13 @@ import torch
 import ldm_patched.modules.clip_vision
 import safetensors.torch as sf
 import ldm_patched.modules.model_management as model_management
-import contextlib
 import ldm_patched.ldm.modules.attention as attention

 from extras.resampler import Resampler
 from ldm_patched.modules.model_patcher import ModelPatcher
 from modules.core import numpy_to_pytorch
+from modules.ops import use_patched_ops
+from ldm_patched.modules.ops import manual_cast


 SD_V12_CHANNELS = [320] * 4 + [640] * 4 + [1280] * 4 + [1280] * 6 + [640] * 6 + [320] * 6 + [1280] * 2
@ -116,14 +117,16 @@ def load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path):
        clip_extra_context_tokens = ip_state_dict["image_proj"]["proj.weight"].shape[0] // cross_attention_dim
        clip_embeddings_dim = None

-    ip_adapter = IPAdapterModel(
-        ip_state_dict,
-        plus=plus,
-        cross_attention_dim=cross_attention_dim,
-        clip_embeddings_dim=clip_embeddings_dim,
-        clip_extra_context_tokens=clip_extra_context_tokens,
-        sdxl_plus=sdxl_plus
-    )
+    with use_patched_ops(manual_cast):
+        ip_adapter = IPAdapterModel(
+            ip_state_dict,
+            plus=plus,
+            cross_attention_dim=cross_attention_dim,
+            clip_embeddings_dim=clip_embeddings_dim,
+            clip_extra_context_tokens=clip_extra_context_tokens,
+            sdxl_plus=sdxl_plus
+        )
+
    ip_adapter.sdxl = sdxl
    ip_adapter.load_device = load_device
    ip_adapter.offload_device = offload_device
--- a/extras/resampler.py
+++ b/extras/resampler.py
@ -108,8 +108,7 @@ class Resampler(nn.Module):
            )

    def forward(self, x):
-        
-        latents = self.latents.repeat(x.size(0), 1, 1)
+        latents = self.latents.repeat(x.size(0), 1, 1).to(x)
        
        x = self.proj_in(x)
        
@ -118,4 +117,4 @@ class Resampler(nn.Module):
            latents = ff(latents) + latents
            
        latents = self.proj_out(latents)
-        return self.norm_out(latents)
+        return self.norm_out(latents)
--- a/fooocus_version.py
+++ b/fooocus_version.py
@ -1 +1 @@
-version = '2.1.855'
+version = '2.1.860'
--- a/ldm_patched/contrib/external.py
+++ b/ldm_patched/contrib/external.py
@ -11,7 +11,7 @@ import math
 import time
 import random

-from PIL import Image, ImageOps
+from PIL import Image, ImageOps, ImageSequence
 from PIL.PngImagePlugin import PngInfo
 import numpy as np
 import safetensors.torch
@ -1412,17 +1412,30 @@ class LoadImage:
    FUNCTION = "load_image"
    def load_image(self, image):
        image_path = ldm_patched.utils.path_utils.get_annotated_filepath(image)
-        i = Image.open(image_path)
-        i = ImageOps.exif_transpose(i)
-        image = i.convert("RGB")
-        image = np.array(image).astype(np.float32) / 255.0
-        image = torch.from_numpy(image)[None,]
-        if 'A' in i.getbands():
-            mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0
-            mask = 1. - torch.from_numpy(mask)
+        img = Image.open(image_path)
+        output_images = []
+        output_masks = []
+        for i in ImageSequence.Iterator(img):
+            i = ImageOps.exif_transpose(i)
+            image = i.convert("RGB")
+            image = np.array(image).astype(np.float32) / 255.0
+            image = torch.from_numpy(image)[None,]
+            if 'A' in i.getbands():
+                mask = np.array(i.getchannel('A')).astype(np.float32) / 255.0
+                mask = 1. - torch.from_numpy(mask)
+            else:
+                mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
+            output_images.append(image)
+            output_masks.append(mask.unsqueeze(0))
+
+        if len(output_images) > 1:
+            output_image = torch.cat(output_images, dim=0)
+            output_mask = torch.cat(output_masks, dim=0)
        else:
-            mask = torch.zeros((64,64), dtype=torch.float32, device="cpu")
-        return (image, mask.unsqueeze(0))
+            output_image = output_images[0]
+            output_mask = output_masks[0]
+
+        return (output_image, output_mask)

    @classmethod
    def IS_CHANGED(s, image):
@ -1480,13 +1493,10 @@ class LoadImageMask:
        return m.digest().hex()

    @classmethod
-    def VALIDATE_INPUTS(s, image, channel):
+    def VALIDATE_INPUTS(s, image):
        if not ldm_patched.utils.path_utils.exists_annotated_filepath(image):
            return "Invalid image file: {}".format(image)

-        if channel not in s._color_channels:
-            return "Invalid color channel: {}".format(channel)
-
        return True

 class ImageScale:
@ -1871,6 +1881,7 @@ def init_custom_nodes():
        "nodes_video_model.py",
        "nodes_sag.py",
        "nodes_perpneg.py",
+        "nodes_stable3d.py",
    ]

    for node_file in extras_files:
--- a/ldm_patched/contrib/external_custom_sampler.py
+++ b/ldm_patched/contrib/external_custom_sampler.py
@ -89,6 +89,7 @@ class SDTurboScheduler:
        return {"required":
                    {"model": ("MODEL",),
                     "steps": ("INT", {"default": 1, "min": 1, "max": 10}),
+                     "denoise": ("FLOAT", {"default": 1.0, "min": 0, "max": 1.0, "step": 0.01}),
                      }
               }
    RETURN_TYPES = ("SIGMAS",)
@ -96,8 +97,9 @@ class SDTurboScheduler:

    FUNCTION = "get_sigmas"

-    def get_sigmas(self, model, steps):
-        timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[:steps]
+    def get_sigmas(self, model, steps, denoise):
+        start_step = 10 - int(10 * denoise)
+        timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[start_step:start_step + steps]
        sigmas = model.model.model_sampling.sigma(timesteps)
        sigmas = torch.cat([sigmas, sigmas.new_zeros([1])])
        return (sigmas, )
--- a/ldm_patched/contrib/external_mask.py
+++ b/ldm_patched/contrib/external_mask.py
@ -8,6 +8,7 @@ import ldm_patched.modules.utils
 from ldm_patched.contrib.external import MAX_RESOLUTION

 def composite(destination, source, x, y, mask = None, multiplier = 8, resize_source = False):
+    source = source.to(destination.device)
    if resize_source:
        source = torch.nn.functional.interpolate(source, size=(destination.shape[2], destination.shape[3]), mode="bilinear")

@ -22,7 +23,7 @@ def composite(destination, source, x, y, mask = None, multiplier = 8, resize_sou
    if mask is None:
        mask = torch.ones_like(source)
    else:
-        mask = mask.clone()
+        mask = mask.to(destination.device, copy=True)
        mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(source.shape[2], source.shape[3]), mode="bilinear")
        mask = ldm_patched.modules.utils.repeat_to_batch_size(mask, source.shape[0])

--- a/ldm_patched/contrib/external_rebatch.py
+++ b/ldm_patched/contrib/external_rebatch.py
@ -101,10 +101,40 @@ class LatentRebatch:

        return (output_list,)

+class ImageRebatch:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "images": ("IMAGE",),
+                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
+                              }}
+    RETURN_TYPES = ("IMAGE",)
+    INPUT_IS_LIST = True
+    OUTPUT_IS_LIST = (True, )
+
+    FUNCTION = "rebatch"
+
+    CATEGORY = "image/batch"
+
+    def rebatch(self, images, batch_size):
+        batch_size = batch_size[0]
+
+        output_list = []
+        all_images = []
+        for img in images:
+            for i in range(img.shape[0]):
+                all_images.append(img[i:i+1])
+
+        for i in range(0, len(all_images), batch_size):
+            output_list.append(torch.cat(all_images[i:i+batch_size], dim=0))
+
+        return (output_list,)
+
 NODE_CLASS_MAPPINGS = {
    "RebatchLatents": LatentRebatch,
+    "RebatchImages": ImageRebatch,
 }

 NODE_DISPLAY_NAME_MAPPINGS = {
    "RebatchLatents": "Rebatch Latents",
-}
+    "RebatchImages": "Rebatch Images",
+}
--- a/ldm_patched/contrib/external_sag.py
+++ b/ldm_patched/contrib/external_sag.py
@ -153,7 +153,7 @@ class SelfAttentionGuidance:
            (sag, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, uncond, None, degraded_noised, sigma, model_options)
            return cfg_result + (degraded - sag) * sag_scale

-        m.set_model_sampler_post_cfg_function(post_cfg_function)
+        m.set_model_sampler_post_cfg_function(post_cfg_function, disable_cfg1_optimization=True)

        # from diffusers:
        # unet.mid_block.attentions[0].transformer_blocks[0].attn1.patch
--- a/ldm_patched/contrib/external_stable3d.py
+++ b/ldm_patched/contrib/external_stable3d.py
@ -0,0 +1,60 @@
+# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
+
+import torch
+import ldm_patched.contrib.external
+import ldm_patched.modules.utils
+
+def camera_embeddings(elevation, azimuth):
+    elevation = torch.as_tensor([elevation])
+    azimuth = torch.as_tensor([azimuth])
+    embeddings = torch.stack(
+        [
+                torch.deg2rad(
+                    (90 - elevation) - (90)
+                ),  # Zero123 polar is 90-elevation
+                torch.sin(torch.deg2rad(azimuth)),
+                torch.cos(torch.deg2rad(azimuth)),
+                torch.deg2rad(
+                    90 - torch.full_like(elevation, 0)
+                ),
+        ], dim=-1).unsqueeze(1)
+
+    return embeddings
+
+
+class StableZero123_Conditioning:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "clip_vision": ("CLIP_VISION",),
+                              "init_image": ("IMAGE",),
+                              "vae": ("VAE",),
+                              "width": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
+                              "height": ("INT", {"default": 256, "min": 16, "max": ldm_patched.contrib.external.MAX_RESOLUTION, "step": 8}),
+                              "batch_size": ("INT", {"default": 1, "min": 1, "max": 4096}),
+                              "elevation": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
+                              "azimuth": ("FLOAT", {"default": 0.0, "min": -180.0, "max": 180.0}),
+                             }}
+    RETURN_TYPES = ("CONDITIONING", "CONDITIONING", "LATENT")
+    RETURN_NAMES = ("positive", "negative", "latent")
+
+    FUNCTION = "encode"
+
+    CATEGORY = "conditioning/3d_models"
+
+    def encode(self, clip_vision, init_image, vae, width, height, batch_size, elevation, azimuth):
+        output = clip_vision.encode_image(init_image)
+        pooled = output.image_embeds.unsqueeze(0)
+        pixels = ldm_patched.modules.utils.common_upscale(init_image.movedim(-1,1), width, height, "bilinear", "center").movedim(1,-1)
+        encode_pixels = pixels[:,:,:,:3]
+        t = vae.encode(encode_pixels)
+        cam_embeds = camera_embeddings(elevation, azimuth)
+        cond = torch.cat([pooled, cam_embeds.repeat((pooled.shape[0], 1, 1))], dim=-1)
+
+        positive = [[cond, {"concat_latent_image": t}]]
+        negative = [[torch.zeros_like(pooled), {"concat_latent_image": torch.zeros_like(t)}]]
+        latent = torch.zeros([batch_size, 4, height // 8, width // 8])
+        return (positive, negative, {"samples":latent})
+
+NODE_CLASS_MAPPINGS = {
+    "StableZero123_Conditioning": StableZero123_Conditioning,
+}
--- a/ldm_patched/ldm/models/autoencoder.py
+++ b/ldm_patched/ldm/models/autoencoder.py
@ -8,6 +8,7 @@ from ldm_patched.ldm.modules.distributions.distributions import DiagonalGaussian

 from ldm_patched.ldm.util import instantiate_from_config
 from ldm_patched.ldm.modules.ema import LitEma
+import ldm_patched.modules.ops

 class DiagonalGaussianRegularizer(torch.nn.Module):
    def __init__(self, sample: bool = True):
@ -161,12 +162,12 @@ class AutoencodingEngineLegacy(AutoencodingEngine):
            },
            **kwargs,
        )
-        self.quant_conv = torch.nn.Conv2d(
+        self.quant_conv = ldm_patched.modules.ops.disable_weight_init.Conv2d(
            (1 + ddconfig["double_z"]) * ddconfig["z_channels"],
            (1 + ddconfig["double_z"]) * embed_dim,
            1,
        )
-        self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
+        self.post_quant_conv = ldm_patched.modules.ops.disable_weight_init.Conv2d(embed_dim, ddconfig["z_channels"], 1)
        self.embed_dim = embed_dim

    def get_autoencoder_params(self) -> list:
--- a/ldm_patched/ldm/modules/diffusionmodules/model.py
+++ b/ldm_patched/ldm/modules/diffusionmodules/model.py
@ -41,7 +41,7 @@ def nonlinearity(x):


 def Normalize(in_channels, num_groups=32):
-    return torch.nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)
+    return ops.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)


 class Upsample(nn.Module):
--- a/ldm_patched/ldm/modules/diffusionmodules/upscaling.py
+++ b/ldm_patched/ldm/modules/diffusionmodules/upscaling.py
@ -43,8 +43,8 @@ class AbstractLowScaleModel(nn.Module):

    def q_sample(self, x_start, t, noise=None):
        noise = default(noise, lambda: torch.randn_like(x_start))
-        return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
-                extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise)
+        return (extract_into_tensor(self.sqrt_alphas_cumprod.to(x_start.device), t, x_start.shape) * x_start +
+                extract_into_tensor(self.sqrt_one_minus_alphas_cumprod.to(x_start.device), t, x_start.shape) * noise)

    def forward(self, x):
        return x, None
--- a/ldm_patched/ldm/modules/diffusionmodules/util.py
+++ b/ldm_patched/ldm/modules/diffusionmodules/util.py
@ -51,9 +51,9 @@ class AlphaBlender(nn.Module):
        if self.merge_strategy == "fixed":
            # make shape compatible
            # alpha = repeat(self.mix_factor, '1 -> b () t  () ()', t=t, b=bs)
-            alpha = self.mix_factor
+            alpha = self.mix_factor.to(image_only_indicator.device)
        elif self.merge_strategy == "learned":
-            alpha = torch.sigmoid(self.mix_factor)
+            alpha = torch.sigmoid(self.mix_factor.to(image_only_indicator.device))
            # make shape compatible
            # alpha = repeat(alpha, '1 -> s () ()', s = t * bs)
        elif self.merge_strategy == "learned_with_images":
@ -61,7 +61,7 @@ class AlphaBlender(nn.Module):
            alpha = torch.where(
                image_only_indicator.bool(),
                torch.ones(1, 1, device=image_only_indicator.device),
-                rearrange(torch.sigmoid(self.mix_factor), "... -> ... 1"),
+                rearrange(torch.sigmoid(self.mix_factor.to(image_only_indicator.device)), "... -> ... 1"),
            )
            alpha = rearrange(alpha, self.rearrange_pattern)
            # make shape compatible
--- a/ldm_patched/ldm/modules/encoders/noise_aug_modules.py
+++ b/ldm_patched/ldm/modules/encoders/noise_aug_modules.py
@ -15,12 +15,12 @@ class CLIPEmbeddingNoiseAugmentation(ImageConcatWithNoiseAugmentation):

    def scale(self, x):
        # re-normalize to centered mean and unit variance
-        x = (x - self.data_mean) * 1. / self.data_std
+        x = (x - self.data_mean.to(x.device)) * 1. / self.data_std.to(x.device)
        return x

    def unscale(self, x):
        # back to original data stats
-        x = (x * self.data_std) + self.data_mean
+        x = (x * self.data_std.to(x.device)) + self.data_mean.to(x.device)
        return x

    def forward(self, x, noise_level=None):
--- a/ldm_patched/ldm/modules/temporal_ae.py
+++ b/ldm_patched/ldm/modules/temporal_ae.py
@ -82,14 +82,14 @@ class VideoResBlock(ResnetBlock):

            x = self.time_stack(x, temb)

-            alpha = self.get_alpha(bs=b // timesteps)
+            alpha = self.get_alpha(bs=b // timesteps).to(x.device)
            x = alpha * x + (1.0 - alpha) * x_mix

            x = rearrange(x, "b c t h w -> (b t) c h w")
        return x


-class AE3DConv(torch.nn.Conv2d):
+class AE3DConv(ops.Conv2d):
    def __init__(self, in_channels, out_channels, video_kernel_size=3, *args, **kwargs):
        super().__init__(in_channels, out_channels, *args, **kwargs)
        if isinstance(video_kernel_size, Iterable):
@ -97,7 +97,7 @@ class AE3DConv(torch.nn.Conv2d):
        else:
            padding = int(video_kernel_size // 2)

-        self.time_mix_conv = torch.nn.Conv3d(
+        self.time_mix_conv = ops.Conv3d(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=video_kernel_size,
@ -167,7 +167,7 @@ class AttnVideoBlock(AttnBlock):
        emb = emb[:, None, :]
        x_mix = x_mix + emb

-        alpha = self.get_alpha()
+        alpha = self.get_alpha().to(x.device)
        x_mix = self.time_mix_block(x_mix, timesteps=timesteps)
        x = alpha * x + (1.0 - alpha) * x_mix  # alpha merge

--- a/ldm_patched/modules/args_parser.py
+++ b/ldm_patched/modules/args_parser.py
@ -66,6 +66,8 @@ fpvae_group.add_argument("--vae-in-fp16", action="store_true")
 fpvae_group.add_argument("--vae-in-fp32", action="store_true")
 fpvae_group.add_argument("--vae-in-bf16", action="store_true")

+parser.add_argument("--vae-in-cpu", action="store_true")
+
 fpte_group = parser.add_mutually_exclusive_group()
 fpte_group.add_argument("--clip-in-fp8-e4m3fn", action="store_true")
 fpte_group.add_argument("--clip-in-fp8-e5m2", action="store_true")
--- a/ldm_patched/modules/clip_model.py
+++ b/ldm_patched/modules/clip_model.py
@ -151,7 +151,7 @@ class CLIPVisionEmbeddings(torch.nn.Module):

    def forward(self, pixel_values):
        embeds = self.patch_embedding(pixel_values).flatten(2).transpose(1, 2)
-        return torch.cat([self.class_embedding.expand(pixel_values.shape[0], 1, -1), embeds], dim=1) + self.position_embedding.weight
+        return torch.cat([self.class_embedding.to(embeds.device).expand(pixel_values.shape[0], 1, -1), embeds], dim=1) + self.position_embedding.weight.to(embeds.device)


 class CLIPVision(torch.nn.Module):
--- a/ldm_patched/modules/controlnet.py
+++ b/ldm_patched/modules/controlnet.py
@ -283,7 +283,7 @@ class ControlLora(ControlNet):
        cm = self.control_model.state_dict()

        for k in sd:
-            weight = ldm_patched.modules.model_management.resolve_lowvram_weight(sd[k], diffusion_model, k)
+            weight = sd[k]
            try:
                ldm_patched.modules.utils.set_attr(self.control_model, k, weight)
            except:
--- a/ldm_patched/modules/model_base.py
+++ b/ldm_patched/modules/model_base.py
@ -126,9 +126,15 @@ class BaseModel(torch.nn.Module):
                        cond_concat.append(blank_inpaint_image_like(noise))
            data = torch.cat(cond_concat, dim=1)
            out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(data)
+
        adm = self.encode_adm(**kwargs)
        if adm is not None:
            out['y'] = ldm_patched.modules.conds.CONDRegular(adm)
+
+        cross_attn = kwargs.get("cross_attn", None)
+        if cross_attn is not None:
+            out['c_crossattn'] = ldm_patched.modules.conds.CONDCrossAttn(cross_attn)
+
        return out

    def load_model_weights(self, sd, unet_prefix=""):
@ -156,11 +162,7 @@ class BaseModel(torch.nn.Module):

    def state_dict_for_saving(self, clip_state_dict, vae_state_dict):
        clip_state_dict = self.model_config.process_clip_state_dict_for_saving(clip_state_dict)
-        unet_sd = self.diffusion_model.state_dict()
-        unet_state_dict = {}
-        for k in unet_sd:
-            unet_state_dict[k] = ldm_patched.modules.model_management.resolve_lowvram_weight(unet_sd[k], self.diffusion_model, k)
-
+        unet_state_dict = self.diffusion_model.state_dict()
        unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)
        vae_state_dict = self.model_config.process_vae_state_dict_for_saving(vae_state_dict)
        if self.get_dtype() == torch.float16:
@ -322,9 +324,43 @@ class SVD_img2vid(BaseModel):

        out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(latent_image)

+        cross_attn = kwargs.get("cross_attn", None)
+        if cross_attn is not None:
+            out['c_crossattn'] = ldm_patched.modules.conds.CONDCrossAttn(cross_attn)
+
        if "time_conditioning" in kwargs:
            out["time_context"] = ldm_patched.modules.conds.CONDCrossAttn(kwargs["time_conditioning"])

        out['image_only_indicator'] = ldm_patched.modules.conds.CONDConstant(torch.zeros((1,), device=device))
        out['num_video_frames'] = ldm_patched.modules.conds.CONDConstant(noise.shape[0])
        return out
+
+class Stable_Zero123(BaseModel):
+    def __init__(self, model_config, model_type=ModelType.EPS, device=None, cc_projection_weight=None, cc_projection_bias=None):
+        super().__init__(model_config, model_type, device=device)
+        self.cc_projection = ldm_patched.modules.ops.manual_cast.Linear(cc_projection_weight.shape[1], cc_projection_weight.shape[0], dtype=self.get_dtype(), device=device)
+        self.cc_projection.weight.copy_(cc_projection_weight)
+        self.cc_projection.bias.copy_(cc_projection_bias)
+
+    def extra_conds(self, **kwargs):
+        out = {}
+
+        latent_image = kwargs.get("concat_latent_image", None)
+        noise = kwargs.get("noise", None)
+
+        if latent_image is None:
+            latent_image = torch.zeros_like(noise)
+
+        if latent_image.shape[1:] != noise.shape[1:]:
+            latent_image = utils.common_upscale(latent_image, noise.shape[-1], noise.shape[-2], "bilinear", "center")
+
+        latent_image = utils.resize_to_batch_size(latent_image, noise.shape[0])
+
+        out['c_concat'] = ldm_patched.modules.conds.CONDNoiseShape(latent_image)
+
+        cross_attn = kwargs.get("cross_attn", None)
+        if cross_attn is not None:
+            if cross_attn.shape[-1] != 768:
+                cross_attn = self.cc_projection(cross_attn)
+            out['c_crossattn'] = ldm_patched.modules.conds.CONDCrossAttn(cross_attn)
+        return out
--- a/ldm_patched/modules/model_management.py
+++ b/ldm_patched/modules/model_management.py
@ -186,6 +186,9 @@ except:
 if is_intel_xpu():
    VAE_DTYPE = torch.bfloat16

+if args.vae_in_cpu:
+    VAE_DTYPE = torch.float32
+
 if args.vae_in_fp16:
    VAE_DTYPE = torch.float16
 elif args.vae_in_bf16:
@ -218,15 +221,8 @@ if args.all_in_fp16:
    FORCE_FP16 = True

 if lowvram_available:
-    try:
-        import accelerate
-        if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM):
-            vram_state = set_vram_to
-    except Exception as e:
-        import traceback
-        print(traceback.format_exc())
-        print("ERROR: LOW VRAM MODE NEEDS accelerate.")
-        lowvram_available = False
+    if set_vram_to in (VRAMState.LOW_VRAM, VRAMState.NO_VRAM):
+        vram_state = set_vram_to


 if cpu_state != CPUState.GPU:
@ -266,6 +262,14 @@ print("VAE dtype:", VAE_DTYPE)

 current_loaded_models = []

+def module_size(module):
+    module_mem = 0
+    sd = module.state_dict()
+    for k in sd:
+        t = sd[k]
+        module_mem += t.nelement() * t.element_size()
+    return module_mem
+
 class LoadedModel:
    def __init__(self, model):
        self.model = model
@ -298,8 +302,20 @@ class LoadedModel:

        if lowvram_model_memory > 0:
            print("loading in lowvram mode", lowvram_model_memory/(1024 * 1024))
-            device_map = accelerate.infer_auto_device_map(self.real_model, max_memory={0: "{}MiB".format(lowvram_model_memory // (1024 * 1024)), "cpu": "16GiB"})
-            accelerate.dispatch_model(self.real_model, device_map=device_map, main_device=self.device)
+            mem_counter = 0
+            for m in self.real_model.modules():
+                if hasattr(m, "ldm_patched_cast_weights"):
+                    m.prev_ldm_patched_cast_weights = m.ldm_patched_cast_weights
+                    m.ldm_patched_cast_weights = True
+                    module_mem = module_size(m)
+                    if mem_counter + module_mem < lowvram_model_memory:
+                        m.to(self.device)
+                        mem_counter += module_mem
+                elif hasattr(m, "weight"): #only modules with ldm_patched_cast_weights can be set to lowvram mode
+                    m.to(self.device)
+                    mem_counter += module_size(m)
+                    print("lowvram: loaded module regularly", m)
+
            self.model_accelerated = True

        if is_intel_xpu() and not args.disable_ipex_hijack:
@ -309,7 +325,11 @@ class LoadedModel:

    def model_unload(self):
        if self.model_accelerated:
-            accelerate.hooks.remove_hook_from_submodules(self.real_model)
+            for m in self.real_model.modules():
+                if hasattr(m, "prev_ldm_patched_cast_weights"):
+                    m.ldm_patched_cast_weights = m.prev_ldm_patched_cast_weights
+                    del m.prev_ldm_patched_cast_weights
+
            self.model_accelerated = False

        self.model.unpatch_model(self.model.offload_device)
@ -402,14 +422,14 @@ def load_models_gpu(models, memory_required=0):
        if lowvram_available and (vram_set_state == VRAMState.LOW_VRAM or vram_set_state == VRAMState.NORMAL_VRAM):
            model_size = loaded_model.model_memory_required(torch_dev)
            current_free_mem = get_free_memory(torch_dev)
-            lowvram_model_memory = int(max(256 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
+            lowvram_model_memory = int(max(64 * (1024 * 1024), (current_free_mem - 1024 * (1024 * 1024)) / 1.3 ))
            if model_size > (current_free_mem - inference_memory): #only switch to lowvram if really necessary
                vram_set_state = VRAMState.LOW_VRAM
            else:
                lowvram_model_memory = 0

        if vram_set_state == VRAMState.NO_VRAM:
-            lowvram_model_memory = 256 * 1024 * 1024
+            lowvram_model_memory = 64 * 1024 * 1024

        cur_loaded_model = loaded_model.model_load(lowvram_model_memory)
        current_loaded_models.insert(0, loaded_model)
@ -538,6 +558,8 @@ def intermediate_device():
        return torch.device("cpu")

 def vae_device():
+    if args.vae_in_cpu:
+        return torch.device("cpu")
    return get_torch_device()

 def vae_offload_device():
@ -566,6 +588,11 @@ def supports_dtype(device, dtype): #TODO
        return True
    return False

+def device_supports_non_blocking(device):
+    if is_device_mps(device):
+        return False #pytorch bug? mps doesn't support non blocking
+    return True
+
 def cast_to_device(tensor, device, dtype, copy=False):
    device_supports_cast = False
    if tensor.dtype == torch.float32 or tensor.dtype == torch.float16:
@ -576,9 +603,7 @@ def cast_to_device(tensor, device, dtype, copy=False):
        elif is_intel_xpu():
            device_supports_cast = True

-    non_blocking = True
-    if is_device_mps(device):
-        non_blocking = False #pytorch bug? mps doesn't support non blocking
+    non_blocking = device_supports_non_blocking(device)

    if device_supports_cast:
        if copy:
@ -742,11 +767,11 @@ def soft_empty_cache(force=False):
            torch.cuda.empty_cache()
            torch.cuda.ipc_collect()

-def resolve_lowvram_weight(weight, model, key):
-    if weight.device == torch.device("meta"): #lowvram NOTE: this depends on the inner working of the accelerate library so it might break.
-        key_split = key.split('.')              # I have no idea why they don't just leave the weight there instead of using the meta device.
-        op = ldm_patched.modules.utils.get_attr(model, '.'.join(key_split[:-1]))
-        weight = op._hf_hook.weights_map[key_split[-1]]
+def unload_all_models():
+    free_memory(1e30, get_torch_device())
+
+
+def resolve_lowvram_weight(weight, model, key): #TODO: remove
    return weight

 #TODO: might be cleaner to put this somewhere else
--- a/ldm_patched/modules/model_patcher.py
+++ b/ldm_patched/modules/model_patcher.py
@ -28,13 +28,9 @@ class ModelPatcher:
        if self.size > 0:
            return self.size
        model_sd = self.model.state_dict()
-        size = 0
-        for k in model_sd:
-            t = model_sd[k]
-            size += t.nelement() * t.element_size()
-        self.size = size
+        self.size = ldm_patched.modules.model_management.module_size(self.model)
        self.model_keys = set(model_sd.keys())
-        return size
+        return self.size

    def clone(self):
        n = ModelPatcher(self.model, self.load_device, self.offload_device, self.size, self.current_device, weight_inplace_update=self.weight_inplace_update)
@ -55,14 +51,18 @@ class ModelPatcher:
    def memory_required(self, input_shape):
        return self.model.memory_required(input_shape=input_shape)

-    def set_model_sampler_cfg_function(self, sampler_cfg_function):
+    def set_model_sampler_cfg_function(self, sampler_cfg_function, disable_cfg1_optimization=False):
        if len(inspect.signature(sampler_cfg_function).parameters) == 3:
            self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way
        else:
            self.model_options["sampler_cfg_function"] = sampler_cfg_function
+        if disable_cfg1_optimization:
+            self.model_options["disable_cfg1_optimization"] = True

-    def set_model_sampler_post_cfg_function(self, post_cfg_function):
+    def set_model_sampler_post_cfg_function(self, post_cfg_function, disable_cfg1_optimization=False):
        self.model_options["sampler_post_cfg_function"] = self.model_options.get("sampler_post_cfg_function", []) + [post_cfg_function]
+        if disable_cfg1_optimization:
+            self.model_options["disable_cfg1_optimization"] = True

    def set_model_unet_function_wrapper(self, unet_wrapper_function):
        self.model_options["model_function_wrapper"] = unet_wrapper_function
--- a/ldm_patched/modules/ops.py
+++ b/ldm_patched/modules/ops.py
@ -1,27 +1,93 @@
 import torch
 from contextlib import contextmanager
+import ldm_patched.modules.model_management
+
+def cast_bias_weight(s, input):
+    bias = None
+    non_blocking = ldm_patched.modules.model_management.device_supports_non_blocking(input.device)
+    if s.bias is not None:
+        bias = s.bias.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking)
+    weight = s.weight.to(device=input.device, dtype=input.dtype, non_blocking=non_blocking)
+    return weight, bias
+

 class disable_weight_init:
    class Linear(torch.nn.Linear):
+        ldm_patched_cast_weights = False
        def reset_parameters(self):
            return None

+        def forward_ldm_patched_cast_weights(self, input):
+            weight, bias = cast_bias_weight(self, input)
+            return torch.nn.functional.linear(input, weight, bias)
+
+        def forward(self, *args, **kwargs):
+            if self.ldm_patched_cast_weights:
+                return self.forward_ldm_patched_cast_weights(*args, **kwargs)
+            else:
+                return super().forward(*args, **kwargs)
+
    class Conv2d(torch.nn.Conv2d):
+        ldm_patched_cast_weights = False
        def reset_parameters(self):
            return None

+        def forward_ldm_patched_cast_weights(self, input):
+            weight, bias = cast_bias_weight(self, input)
+            return self._conv_forward(input, weight, bias)
+
+        def forward(self, *args, **kwargs):
+            if self.ldm_patched_cast_weights:
+                return self.forward_ldm_patched_cast_weights(*args, **kwargs)
+            else:
+                return super().forward(*args, **kwargs)
+
    class Conv3d(torch.nn.Conv3d):
+        ldm_patched_cast_weights = False
        def reset_parameters(self):
            return None

+        def forward_ldm_patched_cast_weights(self, input):
+            weight, bias = cast_bias_weight(self, input)
+            return self._conv_forward(input, weight, bias)
+
+        def forward(self, *args, **kwargs):
+            if self.ldm_patched_cast_weights:
+                return self.forward_ldm_patched_cast_weights(*args, **kwargs)
+            else:
+                return super().forward(*args, **kwargs)
+
    class GroupNorm(torch.nn.GroupNorm):
+        ldm_patched_cast_weights = False
        def reset_parameters(self):
            return None

+        def forward_ldm_patched_cast_weights(self, input):
+            weight, bias = cast_bias_weight(self, input)
+            return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
+
+        def forward(self, *args, **kwargs):
+            if self.ldm_patched_cast_weights:
+                return self.forward_ldm_patched_cast_weights(*args, **kwargs)
+            else:
+                return super().forward(*args, **kwargs)
+
+
    class LayerNorm(torch.nn.LayerNorm):
+        ldm_patched_cast_weights = False
        def reset_parameters(self):
            return None

+        def forward_ldm_patched_cast_weights(self, input):
+            weight, bias = cast_bias_weight(self, input)
+            return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
+
+        def forward(self, *args, **kwargs):
+            if self.ldm_patched_cast_weights:
+                return self.forward_ldm_patched_cast_weights(*args, **kwargs)
+            else:
+                return super().forward(*args, **kwargs)
+
    @classmethod
    def conv_nd(s, dims, *args, **kwargs):
        if dims == 2:
@ -31,35 +97,19 @@ class disable_weight_init:
        else:
            raise ValueError(f"unsupported dimensions: {dims}")

-def cast_bias_weight(s, input):
-    bias = None
-    if s.bias is not None:
-        bias = s.bias.to(device=input.device, dtype=input.dtype)
-    weight = s.weight.to(device=input.device, dtype=input.dtype)
-    return weight, bias

 class manual_cast(disable_weight_init):
    class Linear(disable_weight_init.Linear):
-        def forward(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return torch.nn.functional.linear(input, weight, bias)
+        ldm_patched_cast_weights = True

    class Conv2d(disable_weight_init.Conv2d):
-        def forward(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return self._conv_forward(input, weight, bias)
+        ldm_patched_cast_weights = True

    class Conv3d(disable_weight_init.Conv3d):
-        def forward(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return self._conv_forward(input, weight, bias)
+        ldm_patched_cast_weights = True

    class GroupNorm(disable_weight_init.GroupNorm):
-        def forward(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return torch.nn.functional.group_norm(input, self.num_groups, weight, bias, self.eps)
+        ldm_patched_cast_weights = True

    class LayerNorm(disable_weight_init.LayerNorm):
-        def forward(self, input):
-            weight, bias = cast_bias_weight(self, input)
-            return torch.nn.functional.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
+        ldm_patched_cast_weights = True
--- a/ldm_patched/modules/sample.py
+++ b/ldm_patched/modules/sample.py
@ -47,7 +47,8 @@ def convert_cond(cond):
        temp = c[1].copy()
        model_conds = temp.get("model_conds", {})
        if c[0] is not None:
-            model_conds["c_crossattn"] = ldm_patched.modules.conds.CONDCrossAttn(c[0])
+            model_conds["c_crossattn"] = ldm_patched.modules.conds.CONDCrossAttn(c[0]) #TODO: remove
+            temp["cross_attn"] = c[0]
        temp["model_conds"] = model_conds
        out.append(temp)
    return out
--- a/ldm_patched/modules/samplers.py
+++ b/ldm_patched/modules/samplers.py
@ -244,7 +244,7 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options):
 #The main sampling function shared by all the samplers
 #Returns denoised
 def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_options={}, seed=None):
-        if math.isclose(cond_scale, 1.0):
+        if math.isclose(cond_scale, 1.0) and model_options.get("disable_cfg1_optimization", False) == False:
            uncond_ = None
        else:
            uncond_ = uncond
@ -599,6 +599,13 @@ def sample(model, noise, positive, negative, cfg, device, sampler, sigmas, model
    calculate_start_end_timesteps(model, negative)
    calculate_start_end_timesteps(model, positive)

+    if latent_image is not None:
+        latent_image = model.process_latent_in(latent_image)
+
+    if hasattr(model, 'extra_conds'):
+        positive = encode_model_conds(model.extra_conds, positive, noise, device, "positive", latent_image=latent_image, denoise_mask=denoise_mask)
+        negative = encode_model_conds(model.extra_conds, negative, noise, device, "negative", latent_image=latent_image, denoise_mask=denoise_mask)
+
    #make sure each cond area has an opposite one with the same area
    for c in positive:
        create_cond_with_same_area_if_none(negative, c)
@ -610,13 +617,6 @@ def sample(model, noise, positive, negative, cfg, device, sampler, sigmas, model
    apply_empty_x_to_equal_area(list(filter(lambda c: c.get('control_apply_to_uncond', False) == True, positive)), negative, 'control', lambda cond_cnets, x: cond_cnets[x])
    apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])

-    if latent_image is not None:
-        latent_image = model.process_latent_in(latent_image)
-
-    if hasattr(model, 'extra_conds'):
-        positive = encode_model_conds(model.extra_conds, positive, noise, device, "positive", latent_image=latent_image, denoise_mask=denoise_mask)
-        negative = encode_model_conds(model.extra_conds, negative, noise, device, "negative", latent_image=latent_image, denoise_mask=denoise_mask)
-
    extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": model_options, "seed":seed}

    samples = sampler.sample(model_wrap, sigmas, extra_args, callback, noise, latent_image, denoise_mask, disable_pbar)
--- a/ldm_patched/modules/supported_models.py
+++ b/ldm_patched/modules/supported_models.py
@ -252,5 +252,32 @@ class SVD_img2vid(supported_models_base.BASE):
    def clip_target(self):
        return None

-models = [SD15, SD20, SD21UnclipL, SD21UnclipH, SDXLRefiner, SDXL, SSD1B, Segmind_Vega]
+class Stable_Zero123(supported_models_base.BASE):
+    unet_config = {
+        "context_dim": 768,
+        "model_channels": 320,
+        "use_linear_in_transformer": False,
+        "adm_in_channels": None,
+        "use_temporal_attention": False,
+        "in_channels": 8,
+    }
+
+    unet_extra_config = {
+        "num_heads": 8,
+        "num_head_channels": -1,
+    }
+
+    clip_vision_prefix = "cond_stage_model.model.visual."
+
+    latent_format = latent_formats.SD15
+
+    def get_model(self, state_dict, prefix="", device=None):
+        out = model_base.Stable_Zero123(self, device=device, cc_projection_weight=state_dict["cc_projection.weight"], cc_projection_bias=state_dict["cc_projection.bias"])
+        return out
+
+    def clip_target(self):
+        return None
+
+
+models = [Stable_Zero123, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXLRefiner, SDXL, SSD1B, Segmind_Vega]
 models += [SVD_img2vid]
--- a/ldm_patched/taesd/taesd.py
+++ b/ldm_patched/taesd/taesd.py
@ -7,9 +7,10 @@ import torch
 import torch.nn as nn

 import ldm_patched.modules.utils
+import ldm_patched.modules.ops

 def conv(n_in, n_out, **kwargs):
-    return nn.Conv2d(n_in, n_out, 3, padding=1, **kwargs)
+    return ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 3, padding=1, **kwargs)

 class Clamp(nn.Module):
    def forward(self, x):
@ -19,7 +20,7 @@ class Block(nn.Module):
    def __init__(self, n_in, n_out):
        super().__init__()
        self.conv = nn.Sequential(conv(n_in, n_out), nn.ReLU(), conv(n_out, n_out), nn.ReLU(), conv(n_out, n_out))
-        self.skip = nn.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity()
+        self.skip = ldm_patched.modules.ops.disable_weight_init.Conv2d(n_in, n_out, 1, bias=False) if n_in != n_out else nn.Identity()
        self.fuse = nn.ReLU()
    def forward(self, x):
        return self.fuse(self.conv(x) + self.skip(x))
--- a/ldm_patched/utils/path_utils.py
+++ b/ldm_patched/utils/path_utils.py
@ -184,8 +184,7 @@ def cached_filename_list_(folder_name):
    if folder_name not in filename_list_cache:
        return None
    out = filename_list_cache[folder_name]
-    if time.perf_counter() < (out[2] + 0.5):
-        return out
+
    for x in out[1]:
        time_modified = out[1][x]
        folder = x
--- a/modules/advanced_parameters.py
+++ b/modules/advanced_parameters.py
@ -5,7 +5,8 @@ disable_preview, adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adapt
    debugging_cn_preprocessor, skipping_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, \
    refiner_swap_method, \
    freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2, \
-    debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field = [None] * 32
+    debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field, \
+    inpaint_mask_upload_checkbox, invert_mask_checkbox, inpaint_erode_or_dilate = [None] * 35


 def set_all_advanced_parameters(*args):
@ -16,7 +17,8 @@ def set_all_advanced_parameters(*args):
        debugging_cn_preprocessor, skipping_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, \
        refiner_swap_method, \
        freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2, \
-        debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field
+        debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field, \
+        inpaint_mask_upload_checkbox, invert_mask_checkbox, inpaint_erode_or_dilate

    disable_preview, adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, \
        scheduler_name, generate_image_grid, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, \
@ -25,6 +27,7 @@ def set_all_advanced_parameters(*args):
        debugging_cn_preprocessor, skipping_cn_preprocessor, controlnet_softness, canny_low_threshold, canny_high_threshold, \
        refiner_swap_method, \
        freeu_enabled, freeu_b1, freeu_b2, freeu_s1, freeu_s2, \
-        debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field = args
+        debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field, \
+        inpaint_mask_upload_checkbox, invert_mask_checkbox, inpaint_erode_or_dilate = args

    return
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@ -42,7 +42,7 @@ def worker():
    from modules.private_logger import log
    from extras.expansion import safe_str
    from modules.util import remove_empty_str, HWC3, resize_image, \
-        get_image_shape_ceil, set_image_shape_ceil, get_shape_ceil, resample_image
+        get_image_shape_ceil, set_image_shape_ceil, get_shape_ceil, resample_image, erode_or_dilate
    from modules.upscaler import perform_upscale

    try:
@ -142,6 +142,7 @@ def worker():
        outpaint_selections = args.pop()
        inpaint_input_image = args.pop()
        inpaint_additional_prompt = args.pop()
+        inpaint_mask_image_upload = args.pop()

        cn_tasks = {x: [] for x in flags.ip_list}
        for _ in range(4):
@ -277,6 +278,22 @@ def worker():
                    and isinstance(inpaint_input_image, dict):
                inpaint_image = inpaint_input_image['image']
                inpaint_mask = inpaint_input_image['mask'][:, :, 0]
+                
+                if advanced_parameters.inpaint_mask_upload_checkbox:
+                    if isinstance(inpaint_mask_image_upload, np.ndarray):
+                        if inpaint_mask_image_upload.ndim == 3:
+                            H, W, C = inpaint_image.shape
+                            inpaint_mask_image_upload = resample_image(inpaint_mask_image_upload, width=W, height=H)
+                            inpaint_mask_image_upload = np.mean(inpaint_mask_image_upload, axis=2)
+                            inpaint_mask_image_upload = (inpaint_mask_image_upload > 127).astype(np.uint8) * 255
+                            inpaint_mask = np.maximum(inpaint_mask, inpaint_mask_image_upload)
+
+                if int(advanced_parameters.inpaint_erode_or_dilate) != 0:
+                    inpaint_mask = erode_or_dilate(inpaint_mask, advanced_parameters.inpaint_erode_or_dilate)
+
+                if advanced_parameters.invert_mask_checkbox:
+                    inpaint_mask = 255 - inpaint_mask
+
                inpaint_image = HWC3(inpaint_image)
                if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
                        and (np.any(inpaint_mask > 127) or len(outpaint_selections) > 0):
--- a/modules/config.py
+++ b/modules/config.py
@ -243,10 +243,15 @@ default_advanced_checkbox = get_config_item_or_set_default(
    default_value=False,
    validator=lambda x: isinstance(x, bool)
 )
+default_max_image_number = get_config_item_or_set_default(
+    key='default_max_image_number',
+    default_value=32,
+    validator=lambda x: isinstance(x, int) and x >= 1
+)
 default_image_number = get_config_item_or_set_default(
    key='default_image_number',
    default_value=2,
-    validator=lambda x: isinstance(x, int) and 1 <= x <= 32
+    validator=lambda x: isinstance(x, int) and 1 <= x <= default_max_image_number
 )
 checkpoint_downloads = get_config_item_or_set_default(
    key='checkpoint_downloads',
--- a/modules/meta_parser.py
+++ b/modules/meta_parser.py
@ -3,7 +3,7 @@ import gradio as gr
 import modules.config


-def load_parameter_button_click(raw_prompt_txt):
+def load_parameter_button_click(raw_prompt_txt, is_generating):
    loaded_parameter_dict = json.loads(raw_prompt_txt)
    assert isinstance(loaded_parameter_dict, dict)

@ -128,7 +128,11 @@ def load_parameter_button_click(raw_prompt_txt):
        results.append(gr.update())
        results.append(gr.update())

-    results.append(gr.update(visible=True))
+    if is_generating:
+        results.append(gr.update())
+    else:
+        results.append(gr.update(visible=True))
+    
    results.append(gr.update(visible=False))

    for i in range(1, 6):
--- a/modules/ops.py
+++ b/modules/ops.py
@ -0,0 +1,19 @@
+import torch
+import contextlib
+
+
+@contextlib.contextmanager
+def use_patched_ops(operations):
+    op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm']
+    backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names}
+
+    try:
+        for op_name in op_names:
+            setattr(torch.nn, op_name, getattr(operations, op_name))
+
+        yield
+
+    finally:
+        for op_name in op_names:
+            setattr(torch.nn, op_name, backups[op_name])
+    return
--- a/modules/patch.py
+++ b/modules/patch.py
@ -218,7 +218,7 @@ def compute_cfg(uncond, cond, cfg_scale, t):
 def patched_sampling_function(model, x, timestep, uncond, cond, cond_scale, model_options=None, seed=None):
    global eps_record

-    if math.isclose(cond_scale, 1.0):
+    if math.isclose(cond_scale, 1.0) and not model_options.get("disable_cfg1_optimization", False):
        final_x0 = calc_cond_uncond_batch(model, cond, None, x, timestep, model_options)[0]

        if eps_record is not None:
@ -480,6 +480,10 @@ def build_loaded(module, loader_name):


 def patch_all():
+    if ldm_patched.modules.model_management.directml_enabled:
+        ldm_patched.modules.model_management.lowvram_available = True
+        ldm_patched.modules.model_management.OOM_EXCEPTION = Exception
+    
    patch_all_precision()
    patch_all_clip()

--- a/modules/patch_clip.py
+++ b/modules/patch_clip.py
@ -16,30 +16,12 @@ import ldm_patched.modules.samplers
 import ldm_patched.modules.sd
 import ldm_patched.modules.sd1_clip
 import ldm_patched.modules.clip_vision
-import ldm_patched.modules.model_management as model_management
 import ldm_patched.modules.ops as ops
-import contextlib

+from modules.ops import use_patched_ops
 from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection


-@contextlib.contextmanager
-def use_patched_ops(operations):
-    op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm']
-    backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names}
-
-    try:
-        for op_name in op_names:
-            setattr(torch.nn, op_name, getattr(operations, op_name))
-
-        yield
-
-    finally:
-        for op_name in op_names:
-            setattr(torch.nn, op_name, backups[op_name])
-    return
-
-
 def patched_encode_token_weights(self, token_weight_pairs):
    to_encode = list()
    max_token_len = 0
--- a/modules/private_logger.py
+++ b/modules/private_logger.py
@ -44,13 +44,28 @@ def log(img, dic):
    )

    js = (
-        "<script>"
-        "function to_clipboard(txt) { "
-        "txt = decodeURIComponent(txt);"
-        "navigator.clipboard.writeText(txt);"
-        "alert('Copied to Clipboard!\\nPaste to prompt area to load parameters.\\nCurrent clipboard content is:\\n\\n' + txt);"
-        "}"
-        "</script>"
+        """<script>
+        function to_clipboard(txt) { 
+        txt = decodeURIComponent(txt);
+        if (navigator.clipboard && navigator.permissions) {
+            navigator.clipboard.writeText(txt)
+        } else {
+            const textArea = document.createElement('textArea')
+            textArea.value = txt
+            textArea.style.width = 0
+            textArea.style.position = 'fixed'
+            textArea.style.left = '-999px'
+            textArea.style.top = '10px'
+            textArea.setAttribute('readonly', 'readonly')
+            document.body.appendChild(textArea)
+
+            textArea.select()
+            document.execCommand('copy')
+            document.body.removeChild(textArea)
+        }
+        alert('Copied to Clipboard!\\nPaste to prompt area to load parameters.\\nCurrent clipboard content is:\\n\\n' + txt);
+        }
+        </script>"""
    )

    begin_part = f"<html><head><title>Fooocus Log {date_string}</title>{css_styles}</head><body>{js}<p>Fooocus Log {date_string} (private)</p>\n<p>All images are clean, without any hidden data/meta, and safe to share with others.</p><!--fooocus-log-split-->\n\n"
--- a/modules/sample_hijack.py
+++ b/modules/sample_hijack.py
@ -99,6 +99,13 @@ def sample_hacked(model, noise, positive, negative, cfg, device, sampler, sigmas
    calculate_start_end_timesteps(model, negative)
    calculate_start_end_timesteps(model, positive)

+    if latent_image is not None:
+        latent_image = model.process_latent_in(latent_image)
+
+    if hasattr(model, 'extra_conds'):
+        positive = encode_model_conds(model.extra_conds, positive, noise, device, "positive", latent_image=latent_image, denoise_mask=denoise_mask)
+        negative = encode_model_conds(model.extra_conds, negative, noise, device, "negative", latent_image=latent_image, denoise_mask=denoise_mask)
+
    #make sure each cond area has an opposite one with the same area
    for c in positive:
        create_cond_with_same_area_if_none(negative, c)
@ -111,13 +118,6 @@ def sample_hacked(model, noise, positive, negative, cfg, device, sampler, sigmas
    apply_empty_x_to_equal_area(list(filter(lambda c: c.get('control_apply_to_uncond', False) == True, positive)), negative, 'control', lambda cond_cnets, x: cond_cnets[x])
    apply_empty_x_to_equal_area(positive, negative, 'gligen', lambda cond_cnets, x: cond_cnets[x])

-    if latent_image is not None:
-        latent_image = model.process_latent_in(latent_image)
-
-    if hasattr(model, 'extra_conds'):
-        positive = encode_model_conds(model.extra_conds, positive, noise, device, "positive", latent_image=latent_image, denoise_mask=denoise_mask)
-        negative = encode_model_conds(model.extra_conds, negative, noise, device, "negative", latent_image=latent_image, denoise_mask=denoise_mask)
-
    extra_args = {"cond":positive, "uncond":negative, "cond_scale": cfg, "model_options": model_options, "seed":seed}

    if current_refiner is not None and hasattr(current_refiner.model, 'extra_conds'):
@ -174,7 +174,7 @@ def calculate_sigmas_scheduler_hacked(model, scheduler_name, steps):
    elif scheduler_name == "sgm_uniform":
        sigmas = normal_scheduler(model, steps, sgm=True)
    elif scheduler_name == "turbo":
-        sigmas = SDTurboScheduler().get_sigmas(namedtuple('Patcher', ['model'])(model=model), steps)[0]
+        sigmas = SDTurboScheduler().get_sigmas(namedtuple('Patcher', ['model'])(model=model), steps=steps, denoise=1.0)[0]
    else:
        raise TypeError("error invalid scheduler")
    return sigmas
--- a/modules/util.py
+++ b/modules/util.py
@ -3,6 +3,7 @@ import datetime
 import random
 import math
 import os
+import cv2

 from PIL import Image

@ -10,6 +11,15 @@ from PIL import Image
 LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS)


+def erode_or_dilate(x, k):
+    k = int(k)
+    if k > 0:
+        return cv2.dilate(x, kernel=np.ones(shape=(3, 3), dtype=np.uint8), iterations=k)
+    if k < 0:
+        return cv2.erode(x, kernel=np.ones(shape=(3, 3), dtype=np.uint8), iterations=-k)
+    return x
+
+
 def resample_image(im, width, height):
    im = Image.fromarray(im)
    im = im.resize((int(width), int(height)), resample=LANCZOS)
--- a/readme.md
+++ b/readme.md
@ -38,7 +38,7 @@ Using Fooocus is as easy as (probably easier than) Midjourney – but this does

 | Midjourney | Fooocus |
 | - | - |
-| High-quality text-to-image without needing much prompt engineering or parameter tuning. <br> (Unknown method) | High-quality text-to-image without needing much prompt engineering or parameter tuning. <br> (Fooocus has offline GPT-2 based prompt processing engine and lots of sampling improvements so that results are always beautiful, no matter your prompt is as short as “house in garden” or as long as 1000 words) |
+| High-quality text-to-image without needing much prompt engineering or parameter tuning. <br> (Unknown method) | High-quality text-to-image without needing much prompt engineering or parameter tuning. <br> (Fooocus has an offline GPT-2 based prompt processing engine and lots of sampling improvements so that results are always beautiful, no matter if your prompt is as short as “house in garden” or as long as 1000 words) |
 | V1 V2 V3 V4 | Input Image -> Upscale or Variation -> Vary (Subtle) / Vary (Strong)|
 | U1 U2 U3 U4 | Input Image -> Upscale or Variation -> Upscale (1.5x) / Upscale (2x) |
 | Inpaint / Up / Down / Left / Right (Pan) | Input Image -> Inpaint or Outpaint -> Inpaint / Up / Down / Left / Right <br> (Fooocus uses its own inpaint algorithm and inpaint models so that results are more satisfying than all other software that uses standard SDXL inpaint method/model) |
@ -73,16 +73,16 @@ You can directly download Fooocus with:

 **[>>> Click here to download <<<](https://github.com/lllyasviel/Fooocus/releases/download/release/Fooocus_win64_2-1-831.7z)**

-After you download the file, please uncompress it, and then run the "run.bat".
+After you download the file, please uncompress it and then run the "run.bat".

 ![image](https://github.com/lllyasviel/Fooocus/assets/19834515/c49269c4-c274-4893-b368-047c401cc58c)

-In the first time you launch the software, it will automatically download models:
+The first time you launch the software, it will automatically download models:

 1. It will download [default models](#models) to the folder "Fooocus\models\checkpoints" given different presets. You can download them in advance if you do not want automatic download.
 2. Note that if you use inpaint, at the first time you inpaint an image, it will download [Fooocus's own inpaint control model from here](https://huggingface.co/lllyasviel/fooocus_inpaint/resolve/main/inpaint_v26.fooocus.patch) as the file "Fooocus\models\inpaint\inpaint_v26.fooocus.patch" (the size of this file is 1.28GB).

-After Fooocus 2.1.60, you will also have `run_anime.bat` and `run_realistic.bat`. They are different model presets (and requires different models, but they will be automatically downloaded). [Check here for more details](https://github.com/lllyasviel/Fooocus/discussions/679).
+After Fooocus 2.1.60, you will also have `run_anime.bat` and `run_realistic.bat`. They are different model presets (and require different models, but they will be automatically downloaded). [Check here for more details](https://github.com/lllyasviel/Fooocus/discussions/679).

 ![image](https://github.com/lllyasviel/Fooocus/assets/19834515/d386f817-4bd7-490c-ad89-c1e228c23447)

@ -99,7 +99,7 @@ Besides, recently many other software report that Nvidia driver above 532 is som
 Note that the minimal requirement is **4GB Nvidia GPU memory (4GB VRAM)** and **8GB system memory (8GB RAM)**. This requires using Microsoft’s Virtual Swap technique, which is automatically enabled by your Windows installation in most cases, so you often do not need to do anything about it. However, if you are not sure, or if you manually turned it off (would anyone really do that?), or **if you see any "RuntimeError: CPUAllocator"**, you can enable it here:

 <details>
-<summary>Click here to the see the image instruction. </summary>
+<summary>Click here to see the image instructions. </summary>

 ![image](https://github.com/lllyasviel/Fooocus/assets/19834515/2a06b130-fe9b-4504-94f1-2763be4476e9)

@ -123,7 +123,7 @@ See also the common problems and troubleshoots [here](troubleshoot.md).

 In Colab, you can modify the last line to `!python entry_with_update.py --share` or `!python entry_with_update.py --preset anime --share` or `!python entry_with_update.py --preset realistic --share` for Fooocus Default/Anime/Realistic Edition.

-Note that this Colab will disable refiner by default because Colab free's resource is relatively limited (and some "big" features like image prompt may cause free-tier Colab to disconnect). We make sure that basic text-to-image is always working on free-tier Colab.
+Note that this Colab will disable refiner by default because Colab free's resources are relatively limited (and some "big" features like image prompt may cause free-tier Colab to disconnect). We make sure that basic text-to-image is always working on free-tier Colab.

 Thanks to [camenduru](https://github.com/camenduru)!

@ -142,7 +142,7 @@ Then download the models: download [default models](#models) to the folder "Fooo
    conda activate fooocus
    python entry_with_update.py

-Or if you want to open a remote port, use
+Or, if you want to open a remote port, use

    conda activate fooocus
    python entry_with_update.py --listen
@ -151,7 +151,7 @@ Use `python entry_with_update.py --preset anime` or `python entry_with_update.py

 ### Linux (Using Python Venv)

-Your Linux needs to have **Python 3.10** installed, and lets say your Python can be called with command **python3** with your venv system working, you can
+Your Linux needs to have **Python 3.10** installed, and let's say your Python can be called with the command **python3** with your venv system working; you can

    git clone https://github.com/lllyasviel/Fooocus.git
    cd Fooocus
@ -164,7 +164,7 @@ See the above sections for model downloads. You can launch the software with:
    source fooocus_env/bin/activate
    python entry_with_update.py

-Or if you want to open a remote port, use
+Or, if you want to open a remote port, use

    source fooocus_env/bin/activate
    python entry_with_update.py --listen
@ -173,7 +173,7 @@ Use `python entry_with_update.py --preset anime` or `python entry_with_update.py

 ### Linux (Using native system Python)

-If you know what you are doing, and your Linux already has **Python 3.10** installed, and your Python can be called with command **python3** (and Pip with **pip3**), you can
+If you know what you are doing, and your Linux already has **Python 3.10** installed, and your Python can be called with the command **python3** (and Pip with **pip3**), you can

    git clone https://github.com/lllyasviel/Fooocus.git
    cd Fooocus
@ -183,7 +183,7 @@ See the above sections for model downloads. You can launch the software with:

    python3 entry_with_update.py

-Or if you want to open a remote port, use
+Or, if you want to open a remote port, use

    python3 entry_with_update.py --listen

@ -193,7 +193,7 @@ Use `python entry_with_update.py --preset anime` or `python entry_with_update.py

 Note that the [minimal requirement](#minimal-requirement) for different platforms is different.

-Same with the above instructions. You need to change torch to AMD version
+Same with the above instructions. You need to change torch to the AMD version

    pip uninstall torch torchvision torchaudio torchtext functorch xformers 
    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6
@ -206,7 +206,7 @@ Use `python entry_with_update.py --preset anime` or `python entry_with_update.py

 Note that the [minimal requirement](#minimal-requirement) for different platforms is different.

-Same with Windows. Download the software, edit the content of `run.bat` as:
+Same with Windows. Download the software and edit the content of `run.bat` as:

    .\python_embeded\python.exe -m pip uninstall torch torchvision torchaudio torchtext functorch xformers -y
    .\python_embeded\python.exe -m pip install torch-directml
@ -217,7 +217,7 @@ Then run the `run.bat`.

 AMD is not intensively tested, however. The AMD support is in beta.

-For AMD, use `python entry_with_update.py --directml --preset anime` or `python entry_with_update.py --directml --preset realistic` for Fooocus Anime/Realistic Edition.
+For AMD, use `.\python_embeded\python.exe entry_with_update.py --directml --preset anime` or `.\python_embeded\python.exe entry_with_update.py --directml --preset realistic` for Fooocus Anime/Realistic Edition.

 ### Mac

@ -233,7 +233,7 @@ You can install Fooocus on Apple Mac silicon (M1 or M2) with macOS 'Catalina' or
 1. Create a new conda environment, `conda env create -f environment.yaml`.
 1. Activate your new conda environment, `conda activate fooocus`.
 1. Install the packages required by Fooocus, `pip install -r requirements_versions.txt`.
-1. Launch Fooocus by running `python entry_with_update.py`. (Some Mac M2 users may need `python entry_with_update.py --disable-offload-from-vram` to speed up model loading/unloading.) The first time you run Fooocus, it will automatically download the Stable Diffusion SDXL models and will take a significant time, depending on your internet connection.
+1. Launch Fooocus by running `python entry_with_update.py`. (Some Mac M2 users may need `python entry_with_update.py --disable-offload-from-vram` to speed up model loading/unloading.) The first time you run Fooocus, it will automatically download the Stable Diffusion SDXL models and will take a significant amount of time, depending on your internet connection.

 Use `python entry_with_update.py --preset anime` or `python entry_with_update.py --preset realistic` for Fooocus Anime/Realistic Edition.

@ -245,23 +245,22 @@ See the guidelines [here](https://github.com/lllyasviel/Fooocus/discussions/1405

 Below is the minimal requirement for running Fooocus locally. If your device capability is lower than this spec, you may not be able to use Fooocus locally. (Please let us know, in any case, if your device capability is lower but Fooocus still works.)

-| Operating System  | GPU                          | Minimal GPU Memory        | Minimal System Memory     | [System Swap](troubleshoot.md) | Note                                  |
-|-------------------|------------------------------|---------------------------|---------------------------|--------------------------------|---------------------------------------|
-| Windows/Linux     | Nvidia RTX 4XXX              | 4GB                       | 8GB                       | Required                       | fastest                               |
-| Windows/Linux     | Nvidia RTX 3XXX              | 4GB                       | 8GB                       | Required                       | usually faster than RTX 2XXX          |
-| Windows/Linux     | Nvidia RTX 2XXX              | 4GB                       | 8GB                       | Required                       | usually faster than GTX 1XXX          |
-| Windows/Linux     | Nvidia GTX 1XXX              | 8GB (&ast; 6GB uncertain) | 8GB                       | Required                       | only marginally faster than CPU       |
-| Windows/Linux     | Nvidia GTX 9XX               | 8GB                       | 8GB                       | Required                       | faster or slower than CPU             |
-| Windows/Linux     | Nvidia GTX < 9XX             | Not supported             | /                         | /                              | /                                     |
-| Windows           | AMD GPU                      | 16GB                      | 8GB                       | Required                       | via DirectML                          |
-| Linux             | AMD GPU                      | 8GB                       | 8GB                       | Required                       | via ROCm                              |
-| Windows           | &ast; AMD GPU ROCm (on hold) | 8GB     (on hold)         | 8GB             (on hold) | Required           (on hold)   | via ROCm     (on hold)                |
-| Mac               | M1/M2 MPS                    | Shared                    | Shared                    | Shared                         | about 9x slower than Nvidia RTX 3XXX  |
-| Windows/Linux/Mac | only use CPU                 | 0GB                       | 32GB                      | Required                       | about 17x slower than Nvidia RTX 3XXX |
+| Operating System  | GPU                          | Minimal GPU Memory           | Minimal System Memory     | [System Swap](troubleshoot.md) | Note                                                                       |
+|-------------------|------------------------------|------------------------------|---------------------------|--------------------------------|----------------------------------------------------------------------------|
+| Windows/Linux     | Nvidia RTX 4XXX              | 4GB                          | 8GB                       | Required                       | fastest                                                                    |
+| Windows/Linux     | Nvidia RTX 3XXX              | 4GB                          | 8GB                       | Required                       | usually faster than RTX 2XXX                                               |
+| Windows/Linux     | Nvidia RTX 2XXX              | 4GB                          | 8GB                       | Required                       | usually faster than GTX 1XXX                                               |
+| Windows/Linux     | Nvidia GTX 1XXX              | 8GB (&ast; 6GB uncertain)    | 8GB                       | Required                       | only marginally faster than CPU                                            |
+| Windows/Linux     | Nvidia GTX 9XX               | 8GB                          | 8GB                       | Required                       | faster or slower than CPU                                                  |
+| Windows/Linux     | Nvidia GTX < 9XX             | Not supported                | /                         | /                              | /                                                                          |
+| Windows           | AMD GPU                      | 8GB    (updated 2023 Dec 30) | 8GB                       | Required                       | via DirectML (&ast; ROCm is on hold), about 3x slower than Nvidia RTX 3XXX |
+| Linux             | AMD GPU                      | 8GB                          | 8GB                       | Required                       | via ROCm, about 1.5x slower than Nvidia RTX 3XXX                           |
+| Mac               | M1/M2 MPS                    | Shared                       | Shared                    | Shared                         | about 9x slower than Nvidia RTX 3XXX                                       |
+| Windows/Linux/Mac | only use CPU                 | 0GB                          | 32GB                      | Required                       | about 17x slower than Nvidia RTX 3XXX                                      |

 &ast; AMD GPU ROCm (on hold): The AMD is still working on supporting ROCm on Windows.

-&ast; Nvidia GTX 1XXX 6GB uncertain: Some people reports 6GB success on GTX 10XX but some other people reports failure cases.
+&ast; Nvidia GTX 1XXX 6GB uncertain: Some people report 6GB success on GTX 10XX, but some other people report failure cases.

 *Note that Fooocus is only for extremely high quality image generating. We will not support smaller models to reduce the requirement and sacrifice result quality.*

@ -272,7 +271,7 @@ See the common problems [here](troubleshoot.md).
 ## Default Models
 <a name="models"></a>

-Given different goals, the default models and configs of Fooocus is different:
+Given different goals, the default models and configs of Fooocus are different:

 | Task | Windows | Linux args | Main Model | Refiner | Config |
 | --- | --- | --- | --- | --- | --- |
@ -285,26 +284,26 @@ Note that the download is **automatic** - you do not need to do anything if the
 ## List of "Hidden" Tricks
 <a name="tech_list"></a>

-Below things are already inside the software, and **users do not need to do anything about these**.
+The below things are already inside the software, and **users do not need to do anything about these**.

 1. GPT2-based [prompt expansion as a dynamic style "Fooocus V2".](https://github.com/lllyasviel/Fooocus/discussions/117#raw) (similar to Midjourney's hidden pre-processsing and "raw" mode, or the LeonardoAI's Prompt Magic).
-2. Native refiner swap inside one single k-sampler. The advantage is that now the refiner model can reuse the base model's momentum (or ODE's history parameters) collected from k-sampling to achieve more coherent sampling. In Automatic1111's high-res fix and ComfyUI's node system, the base model and refiner use two independent k-samplers, which means the momentum is largely wasted, and the sampling continuity is broken. Fooocus uses its own advanced k-diffusion sampling that ensures seamless, native, and continuous swap in a refiner setup. (Update Aug 13: Actually I discussed this with Automatic1111 several days ago and it seems that the “native refiner swap inside one single k-sampler” is [merged]( https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12371) into the dev branch of webui. Great!)
-3. Negative ADM guidance. Because the highest resolution level of XL Base does not have cross attentions, the positive and negative signals for XL's highest resolution level cannot receive enough contrasts during the CFG sampling, causing the results look a bit plastic or overly smooth in certain cases. Fortunately, since the XL's highest resolution level is still conditioned on image aspect ratios (ADM), we can modify the adm on the positive/negative side to compensate for the lack of CFG contrast in the highest resolution level. (Update Aug 16, the IOS App [Drawing Things](https://apps.apple.com/us/app/draw-things-ai-generation/id6444050820) will support Negative ADM Guidance. Great!)
-4. We implemented a carefully tuned variation of the Section 5.1 of ["Improving Sample Quality of Diffusion Models Using Self-Attention Guidance"](https://arxiv.org/pdf/2210.00939.pdf). The weight is set to very low, but this is Fooocus's final guarantee to make sure that the XL will never yield overly smooth or plastic appearance (examples [here](https://github.com/lllyasviel/Fooocus/discussions/117#sharpness)). This can almostly eliminate all cases that XL still occasionally produce overly smooth results even with negative ADM guidance. (Update 2023 Aug 18, the Gaussian kernel of SAG is changed to an anisotropic kernel for better structure preservation and fewer artifacts.)
+2. Native refiner swap inside one single k-sampler. The advantage is that the refiner model can now reuse the base model's momentum (or ODE's history parameters) collected from k-sampling to achieve more coherent sampling. In Automatic1111's high-res fix and ComfyUI's node system, the base model and refiner use two independent k-samplers, which means the momentum is largely wasted, and the sampling continuity is broken. Fooocus uses its own advanced k-diffusion sampling that ensures seamless, native, and continuous swap in a refiner setup. (Update Aug 13: Actually, I discussed this with Automatic1111 several days ago, and it seems that the “native refiner swap inside one single k-sampler” is [merged]( https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12371) into the dev branch of webui. Great!)
+3. Negative ADM guidance. Because the highest resolution level of XL Base does not have cross attentions, the positive and negative signals for XL's highest resolution level cannot receive enough contrasts during the CFG sampling, causing the results to look a bit plastic or overly smooth in certain cases. Fortunately, since the XL's highest resolution level is still conditioned on image aspect ratios (ADM), we can modify the adm on the positive/negative side to compensate for the lack of CFG contrast in the highest resolution level. (Update Aug 16, the IOS App [Drawing Things](https://apps.apple.com/us/app/draw-things-ai-generation/id6444050820) will support Negative ADM Guidance. Great!)
+4. We implemented a carefully tuned variation of Section 5.1 of ["Improving Sample Quality of Diffusion Models Using Self-Attention Guidance"](https://arxiv.org/pdf/2210.00939.pdf). The weight is set to very low, but this is Fooocus's final guarantee to make sure that the XL will never yield an overly smooth or plastic appearance (examples [here](https://github.com/lllyasviel/Fooocus/discussions/117#sharpness)). This can almost eliminate all cases for which XL still occasionally produces overly smooth results, even with negative ADM guidance. (Update 2023 Aug 18, the Gaussian kernel of SAG is changed to an anisotropic kernel for better structure preservation and fewer artifacts.)
 5. We modified the style templates a bit and added the "cinematic-default".
 6. We tested the "sd_xl_offset_example-lora_1.0.safetensors" and it seems that when the lora weight is below 0.5, the results are always better than XL without lora.
 7. The parameters of samplers are carefully tuned.
-8. Because XL uses positional encoding for generation resolution, images generated by several fixed resolutions look a bit better than that from arbitrary resolutions (because the positional encoding is not very good at handling int numbers that are unseen during training). This suggests that the resolutions in UI may be hard coded for best results.
-9. Separated prompts for two different text encoders seem unnecessary. Separated prompts for base model and refiner may work but the effects are random, and we refrain from implement this.
-10. DPM family seems well-suited for XL, since XL sometimes generates overly smooth texture but DPM family sometimes generate overly dense detail in texture. Their joint effect looks neutral and appealing to human perception.
+8. Because XL uses positional encoding for generation resolution, images generated by several fixed resolutions look a bit better than those from arbitrary resolutions (because the positional encoding is not very good at handling int numbers that are unseen during training). This suggests that the resolutions in UI may be hard coded for best results.
+9. Separated prompts for two different text encoders seem unnecessary. Separated prompts for the base model and refiner may work, but the effects are random, and we refrain from implementing this.
+10. The DPM family seems well-suited for XL since XL sometimes generates overly smooth texture, but the DPM family sometimes generates overly dense detail in texture. Their joint effect looks neutral and appealing to human perception.
 11. A carefully designed system for balancing multiple styles as well as prompt expansion.
-12. Using automatic1111's method to normalize prompt emphasizing. This significantly improve results when users directly copy prompts from civitai.
-13. The joint swap system of refiner now also support img2img and upscale in a seamless way.
+12. Using automatic1111's method to normalize prompt emphasizing. This significantly improves results when users directly copy prompts from civitai.
+13. The joint swap system of the refiner now also supports img2img and upscale in a seamless way.
 14. CFG Scale and TSNR correction (tuned for SDXL) when CFG is bigger than 10.

 ## Customization

-After the first time you run Fooocus, a config file will be generated at `Fooocus\config.txt`. This file can be edited for changing the model path or default parameters.
+After the first time you run Fooocus, a config file will be generated at `Fooocus\config.txt`. This file can be edited to change the model path or default parameters.

 For example, an edited `Fooocus\config.txt` (this file will be generated after the first launch) may look like this:

@ -340,7 +339,7 @@ Many other keys, formats, and examples are in `Fooocus\config_modification_tutor

 Consider twice before you really change the config. If you find yourself breaking things, just delete `Fooocus\config.txt`. Fooocus will go back to default.

-A safter way is just to try "run_anime.bat" or "run_realistic.bat" - they should be already good enough for different tasks.
+A safer way is just to try "run_anime.bat" or "run_realistic.bat" - they should already be good enough for different tasks.

 ~Note that `user_path_config.txt` is deprecated and will be removed soon.~ (Edit: it is already removed.)

@ -394,7 +393,7 @@ The log is [here](update_log.md).

 ## Localization/Translation/I18N

-**We need your help!** Please help with translating Fooocus to international languages.
+**We need your help!** Please help translate Fooocus into international languages.

 You can put json files in the `language` folder to translate the user interface.

--- a/update_log.md
+++ b/update_log.md
@ -1,5 +1,13 @@
 **(2023 Dec 21) Hi all, the feature updating of Fooocus will be paused for about two or three weeks because we have some other workloads. See you soon and we will come back in mid or late Jan. However, you may still see updates if other collaborators are fixing bugs or solving problems.**

+# 2.1.860 (requested update)
+
+* Allow upload inpaint mask in developer mode.
+
+# 2.1.857 (requested update)
+
+* Begin to support 8GB AMD GPU on Windows.
+
 # 2.1.854

 * Add a button to copy parameters to clipboard in log.
--- a/webui.py
+++ b/webui.py
@ -190,7 +190,10 @@ with shared.gradio_root:
                                           outputs=ip_ad_cols + ip_types + ip_stops + ip_weights,
                                           queue=False, show_progress=False)
                    with gr.TabItem(label='Inpaint or Outpaint') as inpaint_tab:
-                        inpaint_input_image = grh.Image(label='Drag above image to here', source='upload', type='numpy', tool='sketch', height=500, brush_color="#FFFFFF", elem_id='inpaint_canvas')
+                        with gr.Row():
+                            inpaint_input_image = grh.Image(label='Drag inpaint or outpaint image to here', source='upload', type='numpy', tool='sketch', height=500, brush_color="#FFFFFF", elem_id='inpaint_canvas')
+                            inpaint_mask_image = grh.Image(label='Mask Upload', source='upload', type='numpy', height=500, visible=False)
+
                        with gr.Row():
                            inpaint_additional_prompt = gr.Textbox(placeholder="Describe what you want to inpaint.", elem_id='inpaint_additional_prompt', label='Inpaint Additional Prompt', visible=False)
                            outpaint_selections = gr.CheckboxGroup(choices=['Left', 'Right', 'Top', 'Bottom'], value=[], label='Outpaint Direction')
@ -230,7 +233,7 @@ with shared.gradio_root:
                aspect_ratios_selection = gr.Radio(label='Aspect Ratios', choices=modules.config.available_aspect_ratios,
                                                   value=modules.config.default_aspect_ratio, info='width × height',
                                                   elem_classes='aspect_ratios')
-                image_number = gr.Slider(label='Image Number', minimum=1, maximum=32, step=1, value=modules.config.default_image_number)
+                image_number = gr.Slider(label='Image Number', minimum=1, maximum=modules.config.default_max_image_number, step=1, value=modules.config.default_image_number)
                negative_prompt = gr.Textbox(label='Negative Prompt', show_label=True, placeholder="Type prompt here.",
                                             info='Describing what you do not want to see.', lines=2,
                                             elem_id='negative_prompt',
@ -423,7 +426,21 @@ with shared.gradio_root:
                                                                  'Value 1 is same as "Whole Image" in A1111. '
                                                                  'Only used in inpaint, not used in outpaint. '
                                                                  '(Outpaint always use 1.0)')
-                        inpaint_ctrls = [debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field]
+                        inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate',
+                                                            minimum=-64, maximum=64, step=1, value=0,
+                                                            info='Positive value will make white area in the mask larger, '
+                                                                 'negative value will make white area smaller.'
+                                                                 '(default is 0, always process before any mask invert)')
+                        inpaint_mask_upload_checkbox = gr.Checkbox(label='Enable Mask Upload', value=False)
+                        invert_mask_checkbox = gr.Checkbox(label='Invert Mask', value=False)
+                        
+                        inpaint_ctrls = [debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine,
+                                         inpaint_strength, inpaint_respective_field,
+                                         inpaint_mask_upload_checkbox, invert_mask_checkbox, inpaint_erode_or_dilate]
+
+                        inpaint_mask_upload_checkbox.change(lambda x: gr.update(visible=x),
+                                                           inputs=inpaint_mask_upload_checkbox,
+                                                           outputs=inpaint_mask_image, queue=False, show_progress=False)

                    with gr.Tab(label='FreeU'):
                        freeu_enabled = gr.Checkbox(label='Enabled', value=False)
@ -514,10 +531,12 @@ with shared.gradio_root:
        ctrls += [base_model, refiner_model, refiner_switch] + lora_ctrls
        ctrls += [input_image_checkbox, current_tab]
        ctrls += [uov_method, uov_input_image]
-        ctrls += [outpaint_selections, inpaint_input_image, inpaint_additional_prompt]
+        ctrls += [outpaint_selections, inpaint_input_image, inpaint_additional_prompt, inpaint_mask_image]
        ctrls += ip_ctrls

-        def parse_meta(raw_prompt_txt):
+        state_is_generating = gr.State(False)
+
+        def parse_meta(raw_prompt_txt, is_generating):
            loaded_json = None
            try:
                if '{' in raw_prompt_txt:
@ -529,13 +548,16 @@ with shared.gradio_root:
                loaded_json = None

            if loaded_json is None:
-                return gr.update(), gr.update(visible=True), gr.update(visible=False)
+                if is_generating:
+                    return gr.update(), gr.update(), gr.update()
+                else:
+                    return gr.update(), gr.update(visible=True), gr.update(visible=False)

            return json.dumps(loaded_json), gr.update(visible=False), gr.update(visible=True)

-        prompt.input(parse_meta, inputs=prompt, outputs=[prompt, generate_button, load_parameter_button], queue=False, show_progress=False)
+        prompt.input(parse_meta, inputs=[prompt, state_is_generating], outputs=[prompt, generate_button, load_parameter_button], queue=False, show_progress=False)

-        load_parameter_button.click(modules.meta_parser.load_parameter_button_click, inputs=prompt, outputs=[
+        load_parameter_button.click(modules.meta_parser.load_parameter_button_click, inputs=[prompt, state_is_generating], outputs=[
            advanced_checkbox,
            image_number,
            prompt,
@ -561,12 +583,14 @@ with shared.gradio_root:
            load_parameter_button
        ] + lora_ctrls, queue=False, show_progress=False)

-        generate_button.click(lambda: (gr.update(visible=True, interactive=True), gr.update(visible=True, interactive=True), gr.update(visible=False), []), outputs=[stop_button, skip_button, generate_button, gallery]) \
+        generate_button.click(lambda: (gr.update(visible=True, interactive=True), gr.update(visible=True, interactive=True), gr.update(visible=False, interactive=False), [], True),
+                              outputs=[stop_button, skip_button, generate_button, gallery, state_is_generating]) \
            .then(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed) \
            .then(advanced_parameters.set_all_advanced_parameters, inputs=adps) \
            .then(fn=get_task, inputs=ctrls, outputs=currentTask) \
-            .then(fn=generate_clicked, inputs=currentTask, outputs=[progress_html, progress_window, progress_gallery, gallery]) \
-            .then(lambda: (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)), outputs=[generate_button, stop_button, skip_button]) \
+            .then(fn=generate_clicked, inputs=ctrls, outputs=[progress_html, progress_window, progress_gallery, gallery]) \
+            .then(lambda: (gr.update(visible=True, interactive=True), gr.update(visible=False, interactive=False), gr.update(visible=False, interactive=False), False),
+                  outputs=[generate_button, stop_button, skip_button, state_is_generating]) \
            .then(fn=lambda: None, _js='playNotification').then(fn=lambda: None, _js='refresh_grid_delayed')

        for notification_file in ['notification.ogg', 'notification.mp3']:
@ -584,7 +608,7 @@ with shared.gradio_root:
            return mode, ["Fooocus V2"]

        desc_btn.click(trigger_describe, inputs=[desc_method, desc_input_image],
-                       outputs=[prompt, style_selections], show_progress=True, queue=False)
+                       outputs=[prompt, style_selections], show_progress=True, queue=True)


 def dump_default_english_config():