From 3a727fd240151ab247fb554d47ff587a79e3bdb0 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Sat, 16 Dec 2023 10:34:47 -0800
Subject: [PATCH 01/10] 2.1.844

* maintain clip vision device
* update links in troubleshoot
---
 extras/ip_adapter.py                          |  9 +--
 fooocus_version.py                            |  2 +-
 ldm_patched/contrib/external.py               |  1 +
 ldm_patched/contrib/external_latent.py        | 25 +++++++-
 .../contrib/external_model_advanced.py        | 48 +++-------------
 ldm_patched/contrib/external_perpneg.py       | 57 +++++++++++++++++++
 ldm_patched/contrib/external_sag.py           |  2 +-
 ldm_patched/modules/clip_vision.py            | 31 ++++------
 ldm_patched/modules/samplers.py               |  3 +-
 troubleshoot.md                               | 12 ++--
 10 files changed, 113 insertions(+), 77 deletions(-)
 create mode 100644 ldm_patched/contrib/external_perpneg.py

diff --git a/extras/ip_adapter.py b/extras/ip_adapter.py
index a145b68d..b18f0dfc 100644
--- a/extras/ip_adapter.py
+++ b/extras/ip_adapter.py
@@ -167,14 +167,7 @@ def preprocess(img, ip_adapter_path):
 
     ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher)
     pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))
-
-    if clip_vision.dtype != torch.float32:
-        precision_scope = torch.autocast
-    else:
-        precision_scope = lambda a, b: contextlib.nullcontext(a)
-
-    with precision_scope(ldm_patched.modules.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
-        outputs = clip_vision.model(pixel_values=pixel_values, intermediate_output=-2)
+    outputs = clip_vision.model(pixel_values=pixel_values, intermediate_output=-2)
 
     ip_adapter = entry['ip_adapter']
     ip_layers = entry['ip_layers']
diff --git a/fooocus_version.py b/fooocus_version.py
index 1a708c50..a7dac990 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.843'
+version = '2.1.844'
diff --git a/ldm_patched/contrib/external.py b/ldm_patched/contrib/external.py
index e20b08c5..7f95f084 100644
--- a/ldm_patched/contrib/external.py
+++ b/ldm_patched/contrib/external.py
@@ -1870,6 +1870,7 @@ def init_custom_nodes():
         "nodes_images.py",
         "nodes_video_model.py",
         "nodes_sag.py",
+        "nodes_perpneg.py",
     ]
 
     for node_file in extras_files:
diff --git a/ldm_patched/contrib/external_latent.py b/ldm_patched/contrib/external_latent.py
index e2364b88..c6f874e1 100644
--- a/ldm_patched/contrib/external_latent.py
+++ b/ldm_patched/contrib/external_latent.py
@@ -5,9 +5,7 @@ import torch
 
 def reshape_latent_to(target_shape, latent):
     if latent.shape[1:] != target_shape[1:]:
-        latent.movedim(1, -1)
         latent = ldm_patched.modules.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center")
-        latent.movedim(-1, 1)
     return ldm_patched.modules.utils.repeat_to_batch_size(latent, target_shape[0])
 
 
@@ -104,9 +102,32 @@ class LatentInterpolate:
         samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio))
         return (samples_out,)
 
+class LatentBatch:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}}
+
+    RETURN_TYPES = ("LATENT",)
+    FUNCTION = "batch"
+
+    CATEGORY = "latent/batch"
+
+    def batch(self, samples1, samples2):
+        samples_out = samples1.copy()
+        s1 = samples1["samples"]
+        s2 = samples2["samples"]
+
+        if s1.shape[1:] != s2.shape[1:]:
+            s2 = ldm_patched.modules.utils.common_upscale(s2, s1.shape[3], s1.shape[2], "bilinear", "center")
+        s = torch.cat((s1, s2), dim=0)
+        samples_out["samples"] = s
+        samples_out["batch_index"] = samples1.get("batch_index", [x for x in range(0, s1.shape[0])]) + samples2.get("batch_index", [x for x in range(0, s2.shape[0])])
+        return (samples_out,)
+
 NODE_CLASS_MAPPINGS = {
     "LatentAdd": LatentAdd,
     "LatentSubtract": LatentSubtract,
     "LatentMultiply": LatentMultiply,
     "LatentInterpolate": LatentInterpolate,
+    "LatentBatch": LatentBatch,
 }
diff --git a/ldm_patched/contrib/external_model_advanced.py b/ldm_patched/contrib/external_model_advanced.py
index 4ebd9dbf..03a2f045 100644
--- a/ldm_patched/contrib/external_model_advanced.py
+++ b/ldm_patched/contrib/external_model_advanced.py
@@ -19,41 +19,19 @@ class LCM(ldm_patched.modules.model_sampling.EPS):
 
         return c_out * x0 + c_skip * model_input
 
-class ModelSamplingDiscreteDistilled(torch.nn.Module):
+class ModelSamplingDiscreteDistilled(ldm_patched.modules.model_sampling.ModelSamplingDiscrete):
     original_timesteps = 50
 
-    def __init__(self):
-        super().__init__()
-        self.sigma_data = 1.0
-        timesteps = 1000
-        beta_start = 0.00085
-        beta_end = 0.012
+    def __init__(self, model_config=None):
+        super().__init__(model_config)
 
-        betas = torch.linspace(beta_start**0.5, beta_end**0.5, timesteps, dtype=torch.float32) ** 2
-        alphas = 1.0 - betas
-        alphas_cumprod = torch.cumprod(alphas, dim=0)
+        self.skip_steps = self.num_timesteps // self.original_timesteps
 
-        self.skip_steps = timesteps // self.original_timesteps
-
-
-        alphas_cumprod_valid = torch.zeros((self.original_timesteps), dtype=torch.float32)
+        sigmas_valid = torch.zeros((self.original_timesteps), dtype=torch.float32)
         for x in range(self.original_timesteps):
-            alphas_cumprod_valid[self.original_timesteps - 1 - x] = alphas_cumprod[timesteps - 1 - x * self.skip_steps]
+            sigmas_valid[self.original_timesteps - 1 - x] = self.sigmas[self.num_timesteps - 1 - x * self.skip_steps]
 
-        sigmas = ((1 - alphas_cumprod_valid) / alphas_cumprod_valid) ** 0.5
-        self.set_sigmas(sigmas)
-
-    def set_sigmas(self, sigmas):
-        self.register_buffer('sigmas', sigmas)
-        self.register_buffer('log_sigmas', sigmas.log())
-
-    @property
-    def sigma_min(self):
-        return self.sigmas[0]
-
-    @property
-    def sigma_max(self):
-        return self.sigmas[-1]
+        self.set_sigmas(sigmas_valid)
 
     def timestep(self, sigma):
         log_sigma = sigma.log()
@@ -68,14 +46,6 @@ class ModelSamplingDiscreteDistilled(torch.nn.Module):
         log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx]
         return log_sigma.exp().to(timestep.device)
 
-    def percent_to_sigma(self, percent):
-        if percent <= 0.0:
-            return 999999999.9
-        if percent >= 1.0:
-            return 0.0
-        percent = 1.0 - percent
-        return self.sigma(torch.tensor(percent * 999.0)).item()
-
 
 def rescale_zero_terminal_snr_sigmas(sigmas):
     alphas_cumprod = 1 / ((sigmas * sigmas) + 1)
@@ -124,7 +94,7 @@ class ModelSamplingDiscrete:
         class ModelSamplingAdvanced(sampling_base, sampling_type):
             pass
 
-        model_sampling = ModelSamplingAdvanced()
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
         if zsnr:
             model_sampling.set_sigmas(rescale_zero_terminal_snr_sigmas(model_sampling.sigmas))
 
@@ -156,7 +126,7 @@ class ModelSamplingContinuousEDM:
         class ModelSamplingAdvanced(ldm_patched.modules.model_sampling.ModelSamplingContinuousEDM, sampling_type):
             pass
 
-        model_sampling = ModelSamplingAdvanced()
+        model_sampling = ModelSamplingAdvanced(model.model.model_config)
         model_sampling.set_sigma_range(sigma_min, sigma_max)
         m.add_object_patch("model_sampling", model_sampling)
         return (m, )
diff --git a/ldm_patched/contrib/external_perpneg.py b/ldm_patched/contrib/external_perpneg.py
new file mode 100644
index 00000000..ec91681f
--- /dev/null
+++ b/ldm_patched/contrib/external_perpneg.py
@@ -0,0 +1,57 @@
+# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py 
+
+import torch
+import ldm_patched.modules.model_management
+import ldm_patched.modules.sample
+import ldm_patched.modules.samplers
+import ldm_patched.modules.utils
+
+
+class PerpNeg:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {"required": {"model": ("MODEL", ),
+                             "empty_conditioning": ("CONDITIONING", ),
+                             "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}),
+                            }}
+    RETURN_TYPES = ("MODEL",)
+    FUNCTION = "patch"
+
+    CATEGORY = "_for_testing"
+
+    def patch(self, model, empty_conditioning, neg_scale):
+        m = model.clone()
+        nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning)
+
+        def cfg_function(args):
+            model = args["model"]
+            noise_pred_pos = args["cond_denoised"]
+            noise_pred_neg = args["uncond_denoised"]
+            cond_scale = args["cond_scale"]
+            x = args["input"]
+            sigma = args["sigma"]
+            model_options = args["model_options"]
+            nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative")
+
+            (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options)
+
+            pos = noise_pred_pos - noise_pred_nocond
+            neg = noise_pred_neg - noise_pred_nocond
+            perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg
+            perp_neg = perp * neg_scale
+            cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg)
+            cfg_result = x - cfg_result
+            return cfg_result
+
+        m.set_model_sampler_cfg_function(cfg_function)
+
+        return (m, )
+
+
+NODE_CLASS_MAPPINGS = {
+    "PerpNeg": PerpNeg,
+}
+
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "PerpNeg": "Perp-Neg",
+}
diff --git a/ldm_patched/contrib/external_sag.py b/ldm_patched/contrib/external_sag.py
index 3505b44e..06ca67fa 100644
--- a/ldm_patched/contrib/external_sag.py
+++ b/ldm_patched/contrib/external_sag.py
@@ -60,7 +60,7 @@ def create_blur_map(x0, attn, sigma=3.0, threshold=1.0):
     attn = attn.reshape(b, -1, hw1, hw2)
     # Global Average Pool
     mask = attn.mean(1, keepdim=False).sum(1, keepdim=False) > threshold
-    ratio = round(math.sqrt(lh * lw / hw1))
+    ratio = math.ceil(math.sqrt(lh * lw / hw1))
     mid_shape = [math.ceil(lh / ratio), math.ceil(lw / ratio)]
 
     # Reshape
diff --git a/ldm_patched/modules/clip_vision.py b/ldm_patched/modules/clip_vision.py
index eda441af..9699210d 100644
--- a/ldm_patched/modules/clip_vision.py
+++ b/ldm_patched/modules/clip_vision.py
@@ -19,11 +19,13 @@ class Output:
 def clip_preprocess(image, size=224):
     mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype)
     std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype)
-    scale = (size / min(image.shape[1], image.shape[2]))
-    image = torch.nn.functional.interpolate(image.movedim(-1, 1), size=(round(scale * image.shape[1]), round(scale * image.shape[2])), mode="bicubic", antialias=True)
-    h = (image.shape[2] - size)//2
-    w = (image.shape[3] - size)//2
-    image = image[:,:,h:h+size,w:w+size]
+    image = image.movedim(-1, 1)
+    if not (image.shape[2] == size and image.shape[3] == size):
+        scale = (size / min(image.shape[2], image.shape[3]))
+        image = torch.nn.functional.interpolate(image, size=(round(scale * image.shape[2]), round(scale * image.shape[3])), mode="bicubic", antialias=True)
+        h = (image.shape[2] - size)//2
+        w = (image.shape[3] - size)//2
+        image = image[:,:,h:h+size,w:w+size]
     image = torch.clip((255. * image), 0, 255).round() / 255.0
     return (image - mean.view([3,1,1])) / std.view([3,1,1])
 
@@ -34,11 +36,9 @@ class ClipVisionModel():
 
         self.load_device = ldm_patched.modules.model_management.text_encoder_device()
         offload_device = ldm_patched.modules.model_management.text_encoder_offload_device()
-        self.dtype = torch.float32
-        if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
-            self.dtype = torch.float16
-
-        self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.disable_weight_init)
+        self.dtype = ldm_patched.modules.model_management.text_encoder_dtype(self.load_device)
+        self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.manual_cast)
+        self.model.eval()
 
         self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
     def load_sd(self, sd):
@@ -46,15 +46,8 @@ class ClipVisionModel():
 
     def encode_image(self, image):
         ldm_patched.modules.model_management.load_model_gpu(self.patcher)
-        pixel_values = clip_preprocess(image.to(self.load_device))
-
-        if self.dtype != torch.float32:
-            precision_scope = torch.autocast
-        else:
-            precision_scope = lambda a, b: contextlib.nullcontext(a)
-
-        with precision_scope(ldm_patched.modules.model_management.get_autocast_device(self.load_device), torch.float32):
-            out = self.model(pixel_values=pixel_values, intermediate_output=-2)
+        pixel_values = clip_preprocess(image.to(self.load_device)).float()
+        out = self.model(pixel_values=pixel_values, intermediate_output=-2)
 
         outputs = Output()
         outputs["last_hidden_state"] = out[0].to(ldm_patched.modules.model_management.intermediate_device())
diff --git a/ldm_patched/modules/samplers.py b/ldm_patched/modules/samplers.py
index 4e13d727..bfcb3f56 100644
--- a/ldm_patched/modules/samplers.py
+++ b/ldm_patched/modules/samplers.py
@@ -251,7 +251,8 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option
 
         cond_pred, uncond_pred = calc_cond_uncond_batch(model, cond, uncond_, x, timestep, model_options)
         if "sampler_cfg_function" in model_options:
-            args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep}
+            args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep,
+                    "cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options}
             cfg_result = x - model_options["sampler_cfg_function"](args)
         else:
             cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale
diff --git a/troubleshoot.md b/troubleshoot.md
index 7be743d5..7e079742 100644
--- a/troubleshoot.md
+++ b/troubleshoot.md
@@ -143,19 +143,19 @@ Besides, the current support for MAC is very experimental, and we encourage user
 
 ### I am using Nvidia with 8GB VRAM, I get CUDA Out Of Memory
 
-It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement).
+It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using Nvidia with 6GB VRAM, I get CUDA Out Of Memory
 
-It is very likely a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement).
+It is very likely a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using Nvidia with 4GB VRAM with Float16 support, like RTX 3050, I get CUDA Out Of Memory
 
-It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement).
+It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using Nvidia with 4GB VRAM without Float16 support, like GTX 960, I get CUDA Out Of Memory
 
-Supporting GPU with 4GB VRAM without fp16 is extremely difficult, and you may not be able to use SDXL. However, you may still make an issue and let us know. You may try SD1.5 in Automatic1111 or other software for your device. See also [minimal requirements](readme.md#minimal-requirement).
+Supporting GPU with 4GB VRAM without fp16 is extremely difficult, and you may not be able to use SDXL. However, you may still make an issue and let us know. You may try SD1.5 in Automatic1111 or other software for your device. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using AMD GPU on Windows, I get CUDA Out Of Memory
 
@@ -163,11 +163,11 @@ Current AMD support is very experimental for Windows. If you see this, then perh
 
 However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help.
 
-Besides, the AMD support on Linux is slightly better because it will use ROCM. You may also try it if you are willing to change OS to linux. See also [minimal requirements](readme.md#minimal-requirement).
+Besides, the AMD support on Linux is slightly better because it will use ROCM. You may also try it if you are willing to change OS to linux. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I am using AMD GPU on Linux, I get CUDA Out Of Memory
 
-Current AMD support for Linux is better than that for Windows, but still, very experimental. However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help. See also [minimal requirements](readme.md#minimal-requirement).
+Current AMD support for Linux is better than that for Windows, but still, very experimental. However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement).
 
 ### I tried flags like --lowvram or --gpu-only or --bf16 or so on, and things are not getting any better?
 

From ec5dd950a25cbbc2c6a9462101712402bf0920c7 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Sat, 16 Dec 2023 15:55:53 -0800
Subject: [PATCH 02/10] Fix many precision problems

Many users reported that image quality is different from 2.1.824. We reviewed all codes and fixed several precision problems in 2.1.846.
---
 extras/ip_adapter.py       |  16 ++-
 fooocus_version.py         |   2 +-
 modules/patch.py           |  46 +-----
 modules/patch_clip.py      | 279 +++++++++++++++++++++++++++++++++++++
 modules/patch_precision.py |  60 ++++++++
 update_log.md              |   4 +
 6 files changed, 363 insertions(+), 44 deletions(-)
 create mode 100644 modules/patch_clip.py
 create mode 100644 modules/patch_precision.py

diff --git a/extras/ip_adapter.py b/extras/ip_adapter.py
index b18f0dfc..ac64ef1d 100644
--- a/extras/ip_adapter.py
+++ b/extras/ip_adapter.py
@@ -167,14 +167,26 @@ def preprocess(img, ip_adapter_path):
 
     ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher)
     pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))
-    outputs = clip_vision.model(pixel_values=pixel_values, intermediate_output=-2)
+
+    if clip_vision.dtype != torch.float32:
+        precision_scope = torch.autocast
+    else:
+        precision_scope = lambda a, b: contextlib.nullcontext(a)
+
+    with precision_scope(ldm_patched.modules.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
+        outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True)
 
     ip_adapter = entry['ip_adapter']
     ip_layers = entry['ip_layers']
     image_proj_model = entry['image_proj_model']
     ip_unconds = entry['ip_unconds']
 
-    cond = outputs[1].to(device=ip_adapter.load_device, dtype=ip_adapter.dtype)
+    if ip_adapter.plus:
+        cond = outputs.hidden_states[-2]
+    else:
+        cond = outputs.image_embeds
+
+    cond = cond.to(device=ip_adapter.load_device, dtype=ip_adapter.dtype)
 
     ldm_patched.modules.model_management.load_model_gpu(image_proj_model)
     cond = image_proj_model.model(cond).to(device=ip_adapter.load_device, dtype=ip_adapter.dtype)
diff --git a/fooocus_version.py b/fooocus_version.py
index a7dac990..26b2cf04 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.844'
+version = '2.1.846'
diff --git a/modules/patch.py b/modules/patch.py
index 0a04bafb..0ae53585 100644
--- a/modules/patch.py
+++ b/modules/patch.py
@@ -25,6 +25,8 @@ import modules.constants as constants
 from ldm_patched.modules.samplers import calc_cond_uncond_batch
 from ldm_patched.k_diffusion.sampling import BatchedBrownianTree
 from ldm_patched.ldm.modules.diffusionmodules.openaimodel import forward_timestep_embed, apply_control
+from modules.patch_precision import patch_all_precision
+from modules.patch_clip import patch_all_clip
 
 
 sharpness = 2.0
@@ -286,46 +288,6 @@ def sdxl_encode_adm_patched(self, **kwargs):
     return final_adm
 
 
-def encode_token_weights_patched_with_a1111_method(self, token_weight_pairs):
-    to_encode = list()
-    max_token_len = 0
-    has_weights = False
-    for x in token_weight_pairs:
-        tokens = list(map(lambda a: a[0], x))
-        max_token_len = max(len(tokens), max_token_len)
-        has_weights = has_weights or not all(map(lambda a: a[1] == 1.0, x))
-        to_encode.append(tokens)
-
-    sections = len(to_encode)
-    if has_weights or sections == 0:
-        to_encode.append(ldm_patched.modules.sd1_clip.gen_empty_tokens(self.special_tokens, max_token_len))
-
-    out, pooled = self.encode(to_encode)
-    if pooled is not None:
-        first_pooled = pooled[0:1].to(ldm_patched.modules.model_management.intermediate_device())
-    else:
-        first_pooled = pooled
-
-    output = []
-    for k in range(0, sections):
-        z = out[k:k + 1]
-        if has_weights:
-            original_mean = z.mean()
-            z_empty = out[-1]
-            for i in range(len(z)):
-                for j in range(len(z[i])):
-                    weight = token_weight_pairs[k][j][1]
-                    if weight != 1.0:
-                        z[i][j] = (z[i][j] - z_empty[j]) * weight + z_empty[j]
-            new_mean = z.mean()
-            z = z * (original_mean / new_mean)
-        output.append(z)
-
-    if len(output) == 0:
-        return out[-1:].to(ldm_patched.modules.model_management.intermediate_device()), first_pooled
-    return torch.cat(output, dim=-2).to(ldm_patched.modules.model_management.intermediate_device()), first_pooled
-
-
 def patched_KSamplerX0Inpaint_forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, model_options={}, seed=None):
     if inpaint_worker.current_task is not None:
         latent_processor = self.inner_model.inner_model.process_latent_in
@@ -519,6 +481,9 @@ def build_loaded(module, loader_name):
 
 
 def patch_all():
+    patch_all_precision()
+    patch_all_clip()
+
     if not hasattr(ldm_patched.modules.model_management, 'load_models_gpu_origin'):
         ldm_patched.modules.model_management.load_models_gpu_origin = ldm_patched.modules.model_management.load_models_gpu
 
@@ -527,7 +492,6 @@ def patch_all():
     ldm_patched.controlnet.cldm.ControlNet.forward = patched_cldm_forward
     ldm_patched.ldm.modules.diffusionmodules.openaimodel.UNetModel.forward = patched_unet_forward
     ldm_patched.modules.model_base.SDXL.encode_adm = sdxl_encode_adm_patched
-    ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_patched_with_a1111_method
     ldm_patched.modules.samplers.KSamplerX0Inpaint.forward = patched_KSamplerX0Inpaint_forward
     ldm_patched.k_diffusion.sampling.BrownianTreeNoiseSampler = BrownianTreeNoiseSamplerPatched
     ldm_patched.modules.samplers.sampling_function = patched_sampling_function
diff --git a/modules/patch_clip.py b/modules/patch_clip.py
new file mode 100644
index 00000000..157f051b
--- /dev/null
+++ b/modules/patch_clip.py
@@ -0,0 +1,279 @@
+# Consistent with Kohya/A1111 to reduce differences between model training and inference.
+
+import os
+import torch
+import ldm_patched.controlnet.cldm
+import ldm_patched.k_diffusion.sampling
+import ldm_patched.ldm.modules.attention
+import ldm_patched.ldm.modules.diffusionmodules.model
+import ldm_patched.ldm.modules.diffusionmodules.openaimodel
+import ldm_patched.ldm.modules.diffusionmodules.openaimodel
+import ldm_patched.modules.args_parser
+import ldm_patched.modules.model_base
+import ldm_patched.modules.model_management
+import ldm_patched.modules.model_patcher
+import ldm_patched.modules.ops
+import ldm_patched.modules.samplers
+import ldm_patched.modules.sd
+import ldm_patched.modules.sd1_clip
+import ldm_patched.modules.clip_vision
+import ldm_patched.modules.model_management as model_management
+import contextlib
+
+from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection
+
+
+@contextlib.contextmanager
+def use_disable_weight_init_linear_ops(device=None, dtype=None):
+    old_torch_nn_linear = torch.nn.Linear
+    force_device = device
+    force_dtype = dtype
+
+    def linear_with_dtype(in_features: int, out_features: int, bias: bool = True, device=None, dtype=None):
+        if force_device is not None:
+            device = force_device
+        if force_dtype is not None:
+            dtype = force_dtype
+        return ldm_patched.modules.ops.disable_weight_init.Linear(in_features, out_features, bias=bias, device=device,
+                                                                  dtype=dtype)
+
+    torch.nn.Linear = linear_with_dtype
+    try:
+        yield
+    finally:
+        torch.nn.Linear = old_torch_nn_linear
+    return
+
+
+def encode_token_weights_fooocus(self, token_weight_pairs):
+    to_encode = list()
+    max_token_len = 0
+    has_weights = False
+    for x in token_weight_pairs:
+        tokens = list(map(lambda a: a[0], x))
+        max_token_len = max(len(tokens), max_token_len)
+        has_weights = has_weights or not all(map(lambda a: a[1] == 1.0, x))
+        to_encode.append(tokens)
+
+    sections = len(to_encode)
+    if has_weights or sections == 0:
+        to_encode.append(ldm_patched.modules.sd1_clip.gen_empty_tokens(self.special_tokens, max_token_len))
+
+    out, pooled = self.encode(to_encode)
+    if pooled is not None:
+        first_pooled = pooled[0:1].to(ldm_patched.modules.model_management.intermediate_device())
+    else:
+        first_pooled = pooled
+
+    output = []
+    for k in range(0, sections):
+        z = out[k:k + 1]
+        if has_weights:
+            original_mean = z.mean()
+            z_empty = out[-1]
+            for i in range(len(z)):
+                for j in range(len(z[i])):
+                    weight = token_weight_pairs[k][j][1]
+                    if weight != 1.0:
+                        z[i][j] = (z[i][j] - z_empty[j]) * weight + z_empty[j]
+            new_mean = z.mean()
+            z = z * (original_mean / new_mean)
+        output.append(z)
+
+    if len(output) == 0:
+        return out[-1:].to(ldm_patched.modules.model_management.intermediate_device()), first_pooled
+    return torch.cat(output, dim=-2).to(ldm_patched.modules.model_management.intermediate_device()), first_pooled
+
+
+class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder):
+    """Uses the CLIP transformer encoder for text (from huggingface)"""
+    LAYERS = [
+        "last",
+        "pooled",
+        "hidden"
+    ]
+
+    def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77,
+                 freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=ldm_patched.modules.clip_model.CLIPTextModel,
+                 special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True):  # clip-vit-base-patch32
+        super().__init__()
+        assert layer in self.LAYERS
+
+        if textmodel_json_config is None:
+            textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(ldm_patched.modules.sd1_clip.__file__)), "sd1_clip_config.json")
+
+        config = CLIPTextConfig.from_json_file(textmodel_json_config)
+
+        self.num_layers = config.num_hidden_layers
+        with use_disable_weight_init_linear_ops(device, dtype):
+            with modeling_utils.no_init_weights():
+                self.transformer = CLIPTextModel(config)
+
+        self.inner_name = "text_model"
+        if dtype is not None:
+            self.transformer.to(dtype)
+            inner_model = getattr(self.transformer, self.inner_name)
+            if hasattr(inner_model, "embeddings"):
+                inner_model.embeddings.to(torch.float32)
+            else:
+                self.transformer.set_input_embeddings(self.transformer.get_input_embeddings().to(torch.float32))
+
+        self.max_length = max_length
+        if freeze:
+            self.freeze()
+        self.layer = layer
+        self.layer_idx = None
+        self.special_tokens = special_tokens
+        self.text_projection = torch.nn.Parameter(torch.eye(self.transformer.get_input_embeddings().weight.shape[1]))
+        self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055))
+        self.enable_attention_masks = False
+
+        self.layer_norm_hidden_state = layer_norm_hidden_state
+        if layer == "hidden":
+            assert layer_idx is not None
+            assert abs(layer_idx) < self.num_layers
+            self.clip_layer(layer_idx)
+        self.layer_default = (self.layer, self.layer_idx)
+
+    def freeze(self):
+        self.transformer = self.transformer.eval()
+        # self.train = disabled_train
+        for param in self.parameters():
+            param.requires_grad = False
+
+    def clip_layer(self, layer_idx):
+        if abs(layer_idx) > self.num_layers:
+            self.layer = "last"
+        else:
+            self.layer = "hidden"
+            self.layer_idx = layer_idx
+
+    def reset_clip_layer(self):
+        self.layer = self.layer_default[0]
+        self.layer_idx = self.layer_default[1]
+
+    def set_up_textual_embeddings(self, tokens, current_embeds):
+        out_tokens = []
+        next_new_token = token_dict_size = current_embeds.weight.shape[0] - 1
+        embedding_weights = []
+
+        for x in tokens:
+            tokens_temp = []
+            for y in x:
+                if isinstance(y, int):
+                    if y == token_dict_size:  # EOS token
+                        y = -1
+                    tokens_temp += [y]
+                else:
+                    if y.shape[0] == current_embeds.weight.shape[1]:
+                        embedding_weights += [y]
+                        tokens_temp += [next_new_token]
+                        next_new_token += 1
+                    else:
+                        print("WARNING: shape mismatch when trying to apply embedding, embedding will be ignored",
+                              y.shape[0], current_embeds.weight.shape[1])
+            while len(tokens_temp) < len(x):
+                tokens_temp += [self.special_tokens["pad"]]
+            out_tokens += [tokens_temp]
+
+        n = token_dict_size
+        if len(embedding_weights) > 0:
+            new_embedding = torch.nn.Embedding(next_new_token + 1, current_embeds.weight.shape[1],
+                                               device=current_embeds.weight.device, dtype=current_embeds.weight.dtype)
+            new_embedding.weight[:token_dict_size] = current_embeds.weight[:-1]
+            for x in embedding_weights:
+                new_embedding.weight[n] = x
+                n += 1
+            new_embedding.weight[n] = current_embeds.weight[-1]  # EOS embedding
+            self.transformer.set_input_embeddings(new_embedding)
+
+        processed_tokens = []
+        for x in out_tokens:
+            processed_tokens += [
+                list(map(lambda a: n if a == -1 else a, x))]  # The EOS token should always be the largest one
+
+        return processed_tokens
+
+    def forward(self, tokens):
+        backup_embeds = self.transformer.get_input_embeddings()
+        device = backup_embeds.weight.device
+        tokens = self.set_up_textual_embeddings(tokens, backup_embeds)
+        tokens = torch.LongTensor(tokens).to(device)
+
+        if getattr(self.transformer, self.inner_name).final_layer_norm.weight.dtype != torch.float32:
+            precision_scope = torch.autocast
+        else:
+            precision_scope = lambda a, dtype: contextlib.nullcontext(a)
+
+        with precision_scope(model_management.get_autocast_device(device), dtype=torch.float32):
+            attention_mask = None
+            if self.enable_attention_masks:
+                attention_mask = torch.zeros_like(tokens)
+                max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1
+                for x in range(attention_mask.shape[0]):
+                    for y in range(attention_mask.shape[1]):
+                        attention_mask[x, y] = 1
+                        if tokens[x, y] == max_token:
+                            break
+
+            outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask,
+                                       output_hidden_states=self.layer == "hidden")
+            self.transformer.set_input_embeddings(backup_embeds)
+
+            if self.layer == "last":
+                z = outputs.last_hidden_state
+            elif self.layer == "pooled":
+                z = outputs.pooler_output[:, None, :]
+            else:
+                z = outputs.hidden_states[self.layer_idx]
+                if self.layer_norm_hidden_state:
+                    z = getattr(self.transformer, self.inner_name).final_layer_norm(z)
+
+            if hasattr(outputs, "pooler_output"):
+                pooled_output = outputs.pooler_output.float()
+            else:
+                pooled_output = None
+
+            if self.text_projection is not None and pooled_output is not None:
+                pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float()
+        return z.float(), pooled_output
+
+    def encode(self, tokens):
+        return self(tokens)
+
+    def load_sd(self, sd):
+        if "text_projection" in sd:
+            self.text_projection[:] = sd.pop("text_projection")
+        if "text_projection.weight" in sd:
+            self.text_projection[:] = sd.pop("text_projection.weight").transpose(0, 1)
+        return self.transformer.load_state_dict(sd, strict=False)
+
+
+class ClipVisionModelFooocus:
+    def __init__(self, json_config):
+        config = CLIPVisionConfig.from_json_file(json_config)
+        self.load_device = ldm_patched.modules.model_management.text_encoder_device()
+        offload_device = ldm_patched.modules.model_management.text_encoder_offload_device()
+        self.dtype = torch.float32
+        if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
+            self.dtype = torch.float16
+
+        with use_disable_weight_init_linear_ops(offload_device, self.dtype):
+            with modeling_utils.no_init_weights():
+                self.model = CLIPVisionModelWithProjection(config)
+        self.model.to(self.dtype)
+
+        self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+
+    def load_sd(self, sd):
+        return self.model.load_state_dict(sd, strict=False)
+
+    def encode_image(self, image):
+        raise NotImplementedError('wrong clip vision call!')
+
+
+def patch_all_clip():
+    ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_fooocus
+    ldm_patched.modules.sd1_clip.SDClipModel = SDClipModelFooocus
+    ldm_patched.modules.clip_vision.ClipVisionModel = ClipVisionModelFooocus
+    return
diff --git a/modules/patch_precision.py b/modules/patch_precision.py
new file mode 100644
index 00000000..83569bdd
--- /dev/null
+++ b/modules/patch_precision.py
@@ -0,0 +1,60 @@
+# Consistent with Kohya to reduce differences between model training and inference.
+
+import torch
+import math
+import einops
+import numpy as np
+
+import ldm_patched.ldm.modules.diffusionmodules.openaimodel
+import ldm_patched.modules.model_sampling
+import ldm_patched.modules.sd1_clip
+
+from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule
+
+
+def patched_timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False):
+    # Consistent with Kohya to reduce differences between model training and inference.
+
+    if not repeat_only:
+        half = dim // 2
+        freqs = torch.exp(
+            -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half
+        ).to(device=timesteps.device)
+        args = timesteps[:, None].float() * freqs[None]
+        embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
+        if dim % 2:
+            embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
+    else:
+        embedding = einops.repeat(timesteps, 'b -> b d', d=dim)
+    return embedding
+
+
+def patched_register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
+                          linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+    # Consistent with Kohya to reduce differences between model training and inference.
+
+    if given_betas is not None:
+        betas = given_betas
+    else:
+        betas = make_beta_schedule(
+            beta_schedule,
+            timesteps,
+            linear_start=linear_start,
+            linear_end=linear_end,
+            cosine_s=cosine_s)
+
+    alphas = 1. - betas
+    alphas_cumprod = np.cumprod(alphas, axis=0)
+    timesteps, = betas.shape
+    self.num_timesteps = int(timesteps)
+    self.linear_start = linear_start
+    self.linear_end = linear_end
+    sigmas = torch.tensor(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, dtype=torch.float32)
+    self.set_sigmas(sigmas)
+    return
+
+
+def patch_all_precision():
+    ldm_patched.ldm.modules.diffusionmodules.openaimodel.timestep_embedding = patched_timestep_embedding
+    ldm_patched.modules.model_sampling.ModelSamplingDiscrete._register_schedule = patched_register_schedule
+    return
diff --git a/update_log.md b/update_log.md
index 640b5665..39e4c9e5 100644
--- a/update_log.md
+++ b/update_log.md
@@ -1,3 +1,7 @@
+# 2.1.846
+
+* Many users reported that image quality is different from 2.1.824. We reviewed all codes and fixed several precision problems in 2.1.846.
+
 # 2.1.843
 
 * Many improvements to Canvas. Thanks CanvasZoom author!

From 085bc3aeaafee4b2310bc31be9536f869581120a Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Sat, 16 Dec 2023 16:58:46 -0800
Subject: [PATCH 03/10] revise caster

---
 fooocus_version.py    |  2 +-
 modules/patch.py      |  4 +-
 modules/patch_clip.py | 91 ++++++++++++++++++-------------------------
 3 files changed, 40 insertions(+), 57 deletions(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index 26b2cf04..0404a937 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.846'
+version = '2.1.847'
diff --git a/modules/patch.py b/modules/patch.py
index 0ae53585..6a7111a6 100644
--- a/modules/patch.py
+++ b/modules/patch.py
@@ -271,8 +271,8 @@ def sdxl_encode_adm_patched(self, **kwargs):
         height = float(height) * positive_adm_scale
 
     def embedder(number_list):
-        h = torch.tensor(number_list, dtype=torch.float32)
-        h = self.embedder(h)
+        h = [self.embedder(torch.tensor([x], dtype=torch.float32)) for x in number_list]
+        h = torch.cat(h)
         h = torch.flatten(h).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
         return h
 
diff --git a/modules/patch_clip.py b/modules/patch_clip.py
index 157f051b..8aa7468f 100644
--- a/modules/patch_clip.py
+++ b/modules/patch_clip.py
@@ -23,28 +23,6 @@ import contextlib
 from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection
 
 
-@contextlib.contextmanager
-def use_disable_weight_init_linear_ops(device=None, dtype=None):
-    old_torch_nn_linear = torch.nn.Linear
-    force_device = device
-    force_dtype = dtype
-
-    def linear_with_dtype(in_features: int, out_features: int, bias: bool = True, device=None, dtype=None):
-        if force_device is not None:
-            device = force_device
-        if force_dtype is not None:
-            dtype = force_dtype
-        return ldm_patched.modules.ops.disable_weight_init.Linear(in_features, out_features, bias=bias, device=device,
-                                                                  dtype=dtype)
-
-    torch.nn.Linear = linear_with_dtype
-    try:
-        yield
-    finally:
-        torch.nn.Linear = old_torch_nn_linear
-    return
-
-
 def encode_token_weights_fooocus(self, token_weight_pairs):
     to_encode = list()
     max_token_len = 0
@@ -93,34 +71,40 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken
         "hidden"
     ]
 
-    def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77,
-                 freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=ldm_patched.modules.clip_model.CLIPTextModel,
-                 special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True):  # clip-vit-base-patch32
+    def __init__(self,
+                 device="cpu",
+                 max_length=77,
+                 freeze=True,
+                 layer="last",
+                 layer_idx=None,
+                 textmodel_json_config=None,
+                 dtype=None,
+                 special_tokens=None,
+                 layer_norm_hidden_state=True,
+                 **kwargs):
         super().__init__()
         assert layer in self.LAYERS
 
+        if special_tokens is None:
+            special_tokens = {"start": 49406, "end": 49407, "pad": 49407}
+
         if textmodel_json_config is None:
             textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(ldm_patched.modules.sd1_clip.__file__)), "sd1_clip_config.json")
 
         config = CLIPTextConfig.from_json_file(textmodel_json_config)
-
         self.num_layers = config.num_hidden_layers
-        with use_disable_weight_init_linear_ops(device, dtype):
-            with modeling_utils.no_init_weights():
-                self.transformer = CLIPTextModel(config)
 
-        self.inner_name = "text_model"
+        with modeling_utils.no_init_weights():
+            self.transformer = CLIPTextModel(config)
+
         if dtype is not None:
             self.transformer.to(dtype)
-            inner_model = getattr(self.transformer, self.inner_name)
-            if hasattr(inner_model, "embeddings"):
-                inner_model.embeddings.to(torch.float32)
-            else:
-                self.transformer.set_input_embeddings(self.transformer.get_input_embeddings().to(torch.float32))
+            self.transformer.text_model.embeddings.to(torch.float32)
 
-        self.max_length = max_length
         if freeze:
             self.freeze()
+
+        self.max_length = max_length
         self.layer = layer
         self.layer_idx = None
         self.special_tokens = special_tokens
@@ -131,7 +115,6 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken
         self.layer_norm_hidden_state = layer_norm_hidden_state
         if layer == "hidden":
             assert layer_idx is not None
-            assert abs(layer_idx) < self.num_layers
             self.clip_layer(layer_idx)
         self.layer_default = (self.layer, self.layer_idx)
 
@@ -142,11 +125,8 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken
             param.requires_grad = False
 
     def clip_layer(self, layer_idx):
-        if abs(layer_idx) > self.num_layers:
-            self.layer = "last"
-        else:
-            self.layer = "hidden"
-            self.layer_idx = layer_idx
+        self.layer = "hidden"
+        self.layer_idx = layer_idx
 
     def reset_clip_layer(self):
         self.layer = self.layer_default[0]
@@ -200,7 +180,7 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken
         tokens = self.set_up_textual_embeddings(tokens, backup_embeds)
         tokens = torch.LongTensor(tokens).to(device)
 
-        if getattr(self.transformer, self.inner_name).final_layer_norm.weight.dtype != torch.float32:
+        if self.transformer.text_model.final_layer_norm.weight.dtype != torch.float32:
             precision_scope = torch.autocast
         else:
             precision_scope = lambda a, dtype: contextlib.nullcontext(a)
@@ -227,7 +207,7 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken
             else:
                 z = outputs.hidden_states[self.layer_idx]
                 if self.layer_norm_hidden_state:
-                    z = getattr(self.transformer, self.inner_name).final_layer_norm(z)
+                    z = self.transformer.text_model.final_layer_norm(z)
 
             if hasattr(outputs, "pooler_output"):
                 pooled_output = outputs.pooler_output.float()
@@ -252,25 +232,28 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken
 class ClipVisionModelFooocus:
     def __init__(self, json_config):
         config = CLIPVisionConfig.from_json_file(json_config)
+
         self.load_device = ldm_patched.modules.model_management.text_encoder_device()
-        offload_device = ldm_patched.modules.model_management.text_encoder_offload_device()
-        self.dtype = torch.float32
+        self.offload_device = ldm_patched.modules.model_management.text_encoder_offload_device()
+
         if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
             self.dtype = torch.float16
+        else:
+            self.dtype = torch.float32
+
+        with modeling_utils.no_init_weights():
+            self.model = CLIPVisionModelWithProjection(config)
 
-        with use_disable_weight_init_linear_ops(offload_device, self.dtype):
-            with modeling_utils.no_init_weights():
-                self.model = CLIPVisionModelWithProjection(config)
         self.model.to(self.dtype)
-
-        self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
+        self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(
+            self.model,
+            load_device=self.load_device,
+            offload_device=self.offload_device
+        )
 
     def load_sd(self, sd):
         return self.model.load_state_dict(sd, strict=False)
 
-    def encode_image(self, image):
-        raise NotImplementedError('wrong clip vision call!')
-
 
 def patch_all_clip():
     ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_fooocus

From efb312d4951f228158f1a47ffd2c04f76a664557 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Sat, 16 Dec 2023 17:16:24 -0800
Subject: [PATCH 04/10] fix autocast (#1459)

---
 fooocus_version.py    | 2 +-
 modules/patch_clip.py | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index 0404a937..709af32d 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.847'
+version = '2.1.848'
diff --git a/modules/patch_clip.py b/modules/patch_clip.py
index 8aa7468f..4a1e0307 100644
--- a/modules/patch_clip.py
+++ b/modules/patch_clip.py
@@ -72,7 +72,6 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken
     ]
 
     def __init__(self,
-                 device="cpu",
                  max_length=77,
                  freeze=True,
                  layer="last",
@@ -97,6 +96,9 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken
         with modeling_utils.no_init_weights():
             self.transformer = CLIPTextModel(config)
 
+        if 'cuda' not in model_management.text_encoder_device().type:
+            dtype = torch.float32
+
         if dtype is not None:
             self.transformer.to(dtype)
             self.transformer.text_model.embeddings.to(torch.float32)
@@ -241,6 +243,9 @@ class ClipVisionModelFooocus:
         else:
             self.dtype = torch.float32
 
+        if 'cuda' not in self.load_device.type:
+            self.dtype = torch.float32
+
         with modeling_utils.no_init_weights():
             self.model = CLIPVisionModelWithProjection(config)
 

From 67808d5ee541d3fff350980bf36d38c8e5943029 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Sat, 16 Dec 2023 19:54:05 -0800
Subject: [PATCH 05/10] fix all precision issues

We fixed number precision issues again. Now 2.1.849 will give 100% exactly same results as 2.1.824.
---
 fooocus_version.py    |   2 +-
 modules/patch.py      |   5 +-
 modules/patch_clip.py | 221 ++++++++++++++----------------------------
 3 files changed, 75 insertions(+), 153 deletions(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index 709af32d..e1578ebb 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.848'
+version = '2.1.849'
diff --git a/modules/patch.py b/modules/patch.py
index 6a7111a6..66b243cb 100644
--- a/modules/patch.py
+++ b/modules/patch.py
@@ -271,12 +271,11 @@ def sdxl_encode_adm_patched(self, **kwargs):
         height = float(height) * positive_adm_scale
 
     def embedder(number_list):
-        h = [self.embedder(torch.tensor([x], dtype=torch.float32)) for x in number_list]
-        h = torch.cat(h)
+        h = self.embedder(torch.tensor(number_list, dtype=torch.float32))
         h = torch.flatten(h).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1)
         return h
 
-    width, height = round_to_64(width), round_to_64(height)
+    width, height = int(width), int(height)
     target_width, target_height = round_to_64(target_width), round_to_64(target_height)
 
     adm_emphasized = embedder([height, width, 0, 0, target_height, target_width])
diff --git a/modules/patch_clip.py b/modules/patch_clip.py
index 4a1e0307..0ef22e8b 100644
--- a/modules/patch_clip.py
+++ b/modules/patch_clip.py
@@ -63,172 +63,94 @@ def encode_token_weights_fooocus(self, token_weight_pairs):
     return torch.cat(output, dim=-2).to(ldm_patched.modules.model_management.intermediate_device()), first_pooled
 
 
-class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder):
-    """Uses the CLIP transformer encoder for text (from huggingface)"""
-    LAYERS = [
-        "last",
-        "pooled",
-        "hidden"
-    ]
+def patched_SDClipModel__init__(self, max_length=77, freeze=True, layer="last", layer_idx=None,
+                                textmodel_json_config=None, dtype=None, special_tokens=None,
+                                layer_norm_hidden_state=True, **kwargs):
+    torch.nn.Module.__init__(self)
+    assert layer in self.LAYERS
 
-    def __init__(self,
-                 max_length=77,
-                 freeze=True,
-                 layer="last",
-                 layer_idx=None,
-                 textmodel_json_config=None,
-                 dtype=None,
-                 special_tokens=None,
-                 layer_norm_hidden_state=True,
-                 **kwargs):
-        super().__init__()
-        assert layer in self.LAYERS
+    if special_tokens is None:
+        special_tokens = {"start": 49406, "end": 49407, "pad": 49407}
 
-        if special_tokens is None:
-            special_tokens = {"start": 49406, "end": 49407, "pad": 49407}
+    if textmodel_json_config is None:
+        textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(ldm_patched.modules.sd1_clip.__file__)),
+                                             "sd1_clip_config.json")
 
-        if textmodel_json_config is None:
-            textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(ldm_patched.modules.sd1_clip.__file__)), "sd1_clip_config.json")
+    config = CLIPTextConfig.from_json_file(textmodel_json_config)
+    self.num_layers = config.num_hidden_layers
 
-        config = CLIPTextConfig.from_json_file(textmodel_json_config)
-        self.num_layers = config.num_hidden_layers
+    with modeling_utils.no_init_weights():
+        self.transformer = CLIPTextModel(config)
 
-        with modeling_utils.no_init_weights():
-            self.transformer = CLIPTextModel(config)
+    if 'cuda' not in model_management.text_encoder_device().type:
+        dtype = torch.float32
 
-        if 'cuda' not in model_management.text_encoder_device().type:
-            dtype = torch.float32
+    if dtype is not None:
+        self.transformer.to(dtype)
+        self.transformer.text_model.embeddings.to(torch.float32)
 
-        if dtype is not None:
-            self.transformer.to(dtype)
-            self.transformer.text_model.embeddings.to(torch.float32)
+    if freeze:
+        self.freeze()
 
-        if freeze:
-            self.freeze()
+    self.max_length = max_length
+    self.layer = layer
+    self.layer_idx = None
+    self.special_tokens = special_tokens
+    self.text_projection = torch.nn.Parameter(torch.eye(self.transformer.get_input_embeddings().weight.shape[1]))
+    self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055))
+    self.enable_attention_masks = False
 
-        self.max_length = max_length
-        self.layer = layer
-        self.layer_idx = None
-        self.special_tokens = special_tokens
-        self.text_projection = torch.nn.Parameter(torch.eye(self.transformer.get_input_embeddings().weight.shape[1]))
-        self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055))
-        self.enable_attention_masks = False
+    self.layer_norm_hidden_state = layer_norm_hidden_state
+    if layer == "hidden":
+        assert layer_idx is not None
+        assert abs(layer_idx) < self.num_layers
+        self.clip_layer(layer_idx)
+    self.layer_default = (self.layer, self.layer_idx)
 
-        self.layer_norm_hidden_state = layer_norm_hidden_state
-        if layer == "hidden":
-            assert layer_idx is not None
-            self.clip_layer(layer_idx)
-        self.layer_default = (self.layer, self.layer_idx)
 
-    def freeze(self):
-        self.transformer = self.transformer.eval()
-        # self.train = disabled_train
-        for param in self.parameters():
-            param.requires_grad = False
+def patched_SDClipModel_forward(self, tokens):
+    backup_embeds = self.transformer.get_input_embeddings()
+    device = backup_embeds.weight.device
+    tokens = self.set_up_textual_embeddings(tokens, backup_embeds)
+    tokens = torch.LongTensor(tokens).to(device)
 
-    def clip_layer(self, layer_idx):
-        self.layer = "hidden"
-        self.layer_idx = layer_idx
+    if self.transformer.text_model.final_layer_norm.weight.dtype != torch.float32:
+        precision_scope = torch.autocast
+    else:
+        precision_scope = lambda a, dtype: contextlib.nullcontext(a)
 
-    def reset_clip_layer(self):
-        self.layer = self.layer_default[0]
-        self.layer_idx = self.layer_default[1]
+    with precision_scope(model_management.get_autocast_device(device), dtype=torch.float32):
+        attention_mask = None
+        if self.enable_attention_masks:
+            attention_mask = torch.zeros_like(tokens)
+            max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1
+            for x in range(attention_mask.shape[0]):
+                for y in range(attention_mask.shape[1]):
+                    attention_mask[x, y] = 1
+                    if tokens[x, y] == max_token:
+                        break
 
-    def set_up_textual_embeddings(self, tokens, current_embeds):
-        out_tokens = []
-        next_new_token = token_dict_size = current_embeds.weight.shape[0] - 1
-        embedding_weights = []
+        outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask,
+                                   output_hidden_states=self.layer == "hidden")
+        self.transformer.set_input_embeddings(backup_embeds)
 
-        for x in tokens:
-            tokens_temp = []
-            for y in x:
-                if isinstance(y, int):
-                    if y == token_dict_size:  # EOS token
-                        y = -1
-                    tokens_temp += [y]
-                else:
-                    if y.shape[0] == current_embeds.weight.shape[1]:
-                        embedding_weights += [y]
-                        tokens_temp += [next_new_token]
-                        next_new_token += 1
-                    else:
-                        print("WARNING: shape mismatch when trying to apply embedding, embedding will be ignored",
-                              y.shape[0], current_embeds.weight.shape[1])
-            while len(tokens_temp) < len(x):
-                tokens_temp += [self.special_tokens["pad"]]
-            out_tokens += [tokens_temp]
-
-        n = token_dict_size
-        if len(embedding_weights) > 0:
-            new_embedding = torch.nn.Embedding(next_new_token + 1, current_embeds.weight.shape[1],
-                                               device=current_embeds.weight.device, dtype=current_embeds.weight.dtype)
-            new_embedding.weight[:token_dict_size] = current_embeds.weight[:-1]
-            for x in embedding_weights:
-                new_embedding.weight[n] = x
-                n += 1
-            new_embedding.weight[n] = current_embeds.weight[-1]  # EOS embedding
-            self.transformer.set_input_embeddings(new_embedding)
-
-        processed_tokens = []
-        for x in out_tokens:
-            processed_tokens += [
-                list(map(lambda a: n if a == -1 else a, x))]  # The EOS token should always be the largest one
-
-        return processed_tokens
-
-    def forward(self, tokens):
-        backup_embeds = self.transformer.get_input_embeddings()
-        device = backup_embeds.weight.device
-        tokens = self.set_up_textual_embeddings(tokens, backup_embeds)
-        tokens = torch.LongTensor(tokens).to(device)
-
-        if self.transformer.text_model.final_layer_norm.weight.dtype != torch.float32:
-            precision_scope = torch.autocast
+        if self.layer == "last":
+            z = outputs.last_hidden_state
+        elif self.layer == "pooled":
+            z = outputs.pooler_output[:, None, :]
         else:
-            precision_scope = lambda a, dtype: contextlib.nullcontext(a)
+            z = outputs.hidden_states[self.layer_idx]
+            if self.layer_norm_hidden_state:
+                z = self.transformer.text_model.final_layer_norm(z)
 
-        with precision_scope(model_management.get_autocast_device(device), dtype=torch.float32):
-            attention_mask = None
-            if self.enable_attention_masks:
-                attention_mask = torch.zeros_like(tokens)
-                max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1
-                for x in range(attention_mask.shape[0]):
-                    for y in range(attention_mask.shape[1]):
-                        attention_mask[x, y] = 1
-                        if tokens[x, y] == max_token:
-                            break
+        if hasattr(outputs, "pooler_output"):
+            pooled_output = outputs.pooler_output.float()
+        else:
+            pooled_output = None
 
-            outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask,
-                                       output_hidden_states=self.layer == "hidden")
-            self.transformer.set_input_embeddings(backup_embeds)
-
-            if self.layer == "last":
-                z = outputs.last_hidden_state
-            elif self.layer == "pooled":
-                z = outputs.pooler_output[:, None, :]
-            else:
-                z = outputs.hidden_states[self.layer_idx]
-                if self.layer_norm_hidden_state:
-                    z = self.transformer.text_model.final_layer_norm(z)
-
-            if hasattr(outputs, "pooler_output"):
-                pooled_output = outputs.pooler_output.float()
-            else:
-                pooled_output = None
-
-            if self.text_projection is not None and pooled_output is not None:
-                pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float()
-        return z.float(), pooled_output
-
-    def encode(self, tokens):
-        return self(tokens)
-
-    def load_sd(self, sd):
-        if "text_projection" in sd:
-            self.text_projection[:] = sd.pop("text_projection")
-        if "text_projection.weight" in sd:
-            self.text_projection[:] = sd.pop("text_projection.weight").transpose(0, 1)
-        return self.transformer.load_state_dict(sd, strict=False)
+        if self.text_projection is not None and pooled_output is not None:
+            pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float()
+    return z.float(), pooled_output
 
 
 class ClipVisionModelFooocus:
@@ -262,6 +184,7 @@ class ClipVisionModelFooocus:
 
 def patch_all_clip():
     ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_fooocus
-    ldm_patched.modules.sd1_clip.SDClipModel = SDClipModelFooocus
+    ldm_patched.modules.sd1_clip.SDClipModel.__init__ = patched_SDClipModel__init__
+    ldm_patched.modules.sd1_clip.SDClipModel.forward = patched_SDClipModel_forward
     ldm_patched.modules.clip_vision.ClipVisionModel = ClipVisionModelFooocus
     return

From 1669370d2e44b789a4167b9a546a94abcedbb726 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Sun, 17 Dec 2023 00:13:51 -0800
Subject: [PATCH 06/10] Refactor CLIP Vision

---
 fooocus_version.py    |  2 +-
 modules/patch_clip.py | 69 ++++++++++++++++++++++++++++---------------
 2 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index e1578ebb..70a5e92a 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.849'
+version = '2.1.850'
diff --git a/modules/patch_clip.py b/modules/patch_clip.py
index 0ef22e8b..5a3e85df 100644
--- a/modules/patch_clip.py
+++ b/modules/patch_clip.py
@@ -23,7 +23,7 @@ import contextlib
 from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection
 
 
-def encode_token_weights_fooocus(self, token_weight_pairs):
+def patched_encode_token_weights(self, token_weight_pairs):
     to_encode = list()
     max_token_len = 0
     has_weights = False
@@ -153,38 +153,59 @@ def patched_SDClipModel_forward(self, tokens):
     return z.float(), pooled_output
 
 
-class ClipVisionModelFooocus:
-    def __init__(self, json_config):
-        config = CLIPVisionConfig.from_json_file(json_config)
+def patched_ClipVisionModel__init__(self, json_config):
+    config = CLIPVisionConfig.from_json_file(json_config)
 
-        self.load_device = ldm_patched.modules.model_management.text_encoder_device()
-        self.offload_device = ldm_patched.modules.model_management.text_encoder_offload_device()
+    self.load_device = ldm_patched.modules.model_management.text_encoder_device()
+    self.offload_device = ldm_patched.modules.model_management.text_encoder_offload_device()
 
-        if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
-            self.dtype = torch.float16
-        else:
-            self.dtype = torch.float32
+    if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
+        self.dtype = torch.float16
+    else:
+        self.dtype = torch.float32
 
-        if 'cuda' not in self.load_device.type:
-            self.dtype = torch.float32
+    if 'cuda' not in self.load_device.type:
+        self.dtype = torch.float32
 
-        with modeling_utils.no_init_weights():
-            self.model = CLIPVisionModelWithProjection(config)
+    with modeling_utils.no_init_weights():
+        self.model = CLIPVisionModelWithProjection(config)
 
-        self.model.to(self.dtype)
-        self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(
-            self.model,
-            load_device=self.load_device,
-            offload_device=self.offload_device
-        )
+    self.model.to(self.dtype)
+    self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(
+        self.model,
+        load_device=self.load_device,
+        offload_device=self.offload_device
+    )
 
-    def load_sd(self, sd):
-        return self.model.load_state_dict(sd, strict=False)
+
+def patched_ClipVisionModel_encode_image(self, image):
+    ldm_patched.modules.model_management.load_model_gpu(self.patcher)
+    pixel_values = ldm_patched.modules.clip_vision.clip_preprocess(image.to(self.load_device))
+
+    if self.dtype != torch.float32:
+        precision_scope = torch.autocast
+    else:
+        precision_scope = lambda a, b: contextlib.nullcontext(a)
+
+    with precision_scope(ldm_patched.modules.model_management.get_autocast_device(self.load_device), torch.float32):
+        outputs = self.model(pixel_values=pixel_values, output_hidden_states=True)
+
+    for k in outputs:
+        t = outputs[k]
+        if t is not None:
+            if k == 'hidden_states':
+                outputs["penultimate_hidden_states"] = t[-2].to(ldm_patched.modules.model_management.intermediate_device())
+                outputs["hidden_states"] = None
+            else:
+                outputs[k] = t.to(ldm_patched.modules.model_management.intermediate_device())
+
+    return outputs
 
 
 def patch_all_clip():
-    ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_fooocus
+    ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = patched_encode_token_weights
     ldm_patched.modules.sd1_clip.SDClipModel.__init__ = patched_SDClipModel__init__
     ldm_patched.modules.sd1_clip.SDClipModel.forward = patched_SDClipModel_forward
-    ldm_patched.modules.clip_vision.ClipVisionModel = ClipVisionModelFooocus
+    ldm_patched.modules.clip_vision.ClipVisionModel.__init__ = patched_ClipVisionModel__init__
+    ldm_patched.modules.clip_vision.ClipVisionModel.encode_image = patched_ClipVisionModel_encode_image
     return

From 69a23c4d60c9e627409d0cb0f8862cdb015488eb Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Sun, 17 Dec 2023 00:43:00 -0800
Subject: [PATCH 07/10] python 3.11 need this (#1465)

---
 requirements_versions.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements_versions.txt b/requirements_versions.txt
index 16050a5e..b2111c1f 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -8,7 +8,6 @@ Pillow==9.2.0
 scipy==1.9.3
 tqdm==4.64.1
 psutil==5.9.5
-numpy==1.23.5
 pytorch_lightning==1.9.4
 omegaconf==2.2.3
 gradio==3.41.2

From 0e1aa8d084b8a6c0bd7734f7a77eef80814cbb47 Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Sun, 17 Dec 2023 17:09:15 -0800
Subject: [PATCH 08/10] better caster (#1480)

related to mps/rocm/cpu casting for fp16 and etc on clip
---
 extras/ip_adapter.py                    |   9 +-
 fooocus_version.py                      |   2 +-
 ldm_patched/contrib/external_images.py  |   4 +-
 ldm_patched/modules/args_parser.py      |   2 +-
 ldm_patched/modules/model_management.py |   4 +
 modules/patch_clip.py                   | 106 ++++++++++++------------
 6 files changed, 63 insertions(+), 64 deletions(-)

diff --git a/extras/ip_adapter.py b/extras/ip_adapter.py
index ac64ef1d..cb1d366f 100644
--- a/extras/ip_adapter.py
+++ b/extras/ip_adapter.py
@@ -167,14 +167,7 @@ def preprocess(img, ip_adapter_path):
 
     ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher)
     pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device))
-
-    if clip_vision.dtype != torch.float32:
-        precision_scope = torch.autocast
-    else:
-        precision_scope = lambda a, b: contextlib.nullcontext(a)
-
-    with precision_scope(ldm_patched.modules.model_management.get_autocast_device(clip_vision.load_device), torch.float32):
-        outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True)
+    outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True)
 
     ip_adapter = entry['ip_adapter']
     ip_layers = entry['ip_layers']
diff --git a/fooocus_version.py b/fooocus_version.py
index 70a5e92a..b588f46f 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.850'
+version = '2.1.851'
diff --git a/ldm_patched/contrib/external_images.py b/ldm_patched/contrib/external_images.py
index 3dbb3e3b..17e9c497 100644
--- a/ldm_patched/contrib/external_images.py
+++ b/ldm_patched/contrib/external_images.py
@@ -76,7 +76,7 @@ class SaveAnimatedWEBP:
 
     OUTPUT_NODE = True
 
-    CATEGORY = "_for_testing"
+    CATEGORY = "image/animation"
 
     def save_images(self, images, fps, filename_prefix, lossless, quality, method, num_frames=0, prompt=None, extra_pnginfo=None):
         method = self.methods.get(method)
@@ -138,7 +138,7 @@ class SaveAnimatedPNG:
 
     OUTPUT_NODE = True
 
-    CATEGORY = "_for_testing"
+    CATEGORY = "image/animation"
 
     def save_images(self, images, fps, compress_level, filename_prefix="ldm_patched", prompt=None, extra_pnginfo=None):
         filename_prefix += self.prefix_append
diff --git a/ldm_patched/modules/args_parser.py b/ldm_patched/modules/args_parser.py
index 3931997d..7957783e 100644
--- a/ldm_patched/modules/args_parser.py
+++ b/ldm_patched/modules/args_parser.py
@@ -102,7 +102,7 @@ vram_group.add_argument("--always-cpu", action="store_true")
 
 
 parser.add_argument("--always-offload-from-vram", action="store_true")
-
+parser.add_argument("--pytorch-deterministic", action="store_true")
 
 parser.add_argument("--disable-server-log", action="store_true")
 parser.add_argument("--debug-mode", action="store_true")
diff --git a/ldm_patched/modules/model_management.py b/ldm_patched/modules/model_management.py
index 0e783b36..31cf95da 100644
--- a/ldm_patched/modules/model_management.py
+++ b/ldm_patched/modules/model_management.py
@@ -28,6 +28,10 @@ total_vram = 0
 lowvram_available = True
 xpu_available = False
 
+if args.pytorch_deterministic:
+    print("Using deterministic algorithms for pytorch")
+    torch.use_deterministic_algorithms(True, warn_only=True)
+
 directml_enabled = False
 if args.directml is not None:
     import torch_directml
diff --git a/modules/patch_clip.py b/modules/patch_clip.py
index 5a3e85df..74ee436a 100644
--- a/modules/patch_clip.py
+++ b/modules/patch_clip.py
@@ -12,17 +12,34 @@ import ldm_patched.modules.args_parser
 import ldm_patched.modules.model_base
 import ldm_patched.modules.model_management
 import ldm_patched.modules.model_patcher
-import ldm_patched.modules.ops
 import ldm_patched.modules.samplers
 import ldm_patched.modules.sd
 import ldm_patched.modules.sd1_clip
 import ldm_patched.modules.clip_vision
 import ldm_patched.modules.model_management as model_management
+import ldm_patched.modules.ops as ops
 import contextlib
 
 from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection
 
 
+@contextlib.contextmanager
+def use_patched_ops(operations):
+    op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm']
+    backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names}
+
+    try:
+        for op_name in op_names:
+            setattr(torch.nn, op_name, getattr(operations, op_name))
+
+        yield
+
+    finally:
+        for op_name in op_names:
+            setattr(torch.nn, op_name, backups[op_name])
+    return
+
+
 def patched_encode_token_weights(self, token_weight_pairs):
     to_encode = list()
     max_token_len = 0
@@ -79,15 +96,14 @@ def patched_SDClipModel__init__(self, max_length=77, freeze=True, layer="last",
     config = CLIPTextConfig.from_json_file(textmodel_json_config)
     self.num_layers = config.num_hidden_layers
 
-    with modeling_utils.no_init_weights():
-        self.transformer = CLIPTextModel(config)
-
-    if 'cuda' not in model_management.text_encoder_device().type:
-        dtype = torch.float32
+    with use_patched_ops(ops.manual_cast):
+        with modeling_utils.no_init_weights():
+            self.transformer = CLIPTextModel(config)
 
     if dtype is not None:
         self.transformer.to(dtype)
-        self.transformer.text_model.embeddings.to(torch.float32)
+
+    self.transformer.text_model.embeddings.to(torch.float32)
 
     if freeze:
         self.freeze()
@@ -114,42 +130,37 @@ def patched_SDClipModel_forward(self, tokens):
     tokens = self.set_up_textual_embeddings(tokens, backup_embeds)
     tokens = torch.LongTensor(tokens).to(device)
 
-    if self.transformer.text_model.final_layer_norm.weight.dtype != torch.float32:
-        precision_scope = torch.autocast
+    attention_mask = None
+    if self.enable_attention_masks:
+        attention_mask = torch.zeros_like(tokens)
+        max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1
+        for x in range(attention_mask.shape[0]):
+            for y in range(attention_mask.shape[1]):
+                attention_mask[x, y] = 1
+                if tokens[x, y] == max_token:
+                    break
+
+    outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask,
+                               output_hidden_states=self.layer == "hidden")
+    self.transformer.set_input_embeddings(backup_embeds)
+
+    if self.layer == "last":
+        z = outputs.last_hidden_state
+    elif self.layer == "pooled":
+        z = outputs.pooler_output[:, None, :]
     else:
-        precision_scope = lambda a, dtype: contextlib.nullcontext(a)
+        z = outputs.hidden_states[self.layer_idx]
+        if self.layer_norm_hidden_state:
+            z = self.transformer.text_model.final_layer_norm(z)
 
-    with precision_scope(model_management.get_autocast_device(device), dtype=torch.float32):
-        attention_mask = None
-        if self.enable_attention_masks:
-            attention_mask = torch.zeros_like(tokens)
-            max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1
-            for x in range(attention_mask.shape[0]):
-                for y in range(attention_mask.shape[1]):
-                    attention_mask[x, y] = 1
-                    if tokens[x, y] == max_token:
-                        break
+    if hasattr(outputs, "pooler_output"):
+        pooled_output = outputs.pooler_output.float()
+    else:
+        pooled_output = None
 
-        outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask,
-                                   output_hidden_states=self.layer == "hidden")
-        self.transformer.set_input_embeddings(backup_embeds)
+    if self.text_projection is not None and pooled_output is not None:
+        pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float()
 
-        if self.layer == "last":
-            z = outputs.last_hidden_state
-        elif self.layer == "pooled":
-            z = outputs.pooler_output[:, None, :]
-        else:
-            z = outputs.hidden_states[self.layer_idx]
-            if self.layer_norm_hidden_state:
-                z = self.transformer.text_model.final_layer_norm(z)
-
-        if hasattr(outputs, "pooler_output"):
-            pooled_output = outputs.pooler_output.float()
-        else:
-            pooled_output = None
-
-        if self.text_projection is not None and pooled_output is not None:
-            pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float()
     return z.float(), pooled_output
 
 
@@ -164,11 +175,9 @@ def patched_ClipVisionModel__init__(self, json_config):
     else:
         self.dtype = torch.float32
 
-    if 'cuda' not in self.load_device.type:
-        self.dtype = torch.float32
-
-    with modeling_utils.no_init_weights():
-        self.model = CLIPVisionModelWithProjection(config)
+    with use_patched_ops(ops.manual_cast):
+        with modeling_utils.no_init_weights():
+            self.model = CLIPVisionModelWithProjection(config)
 
     self.model.to(self.dtype)
     self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(
@@ -181,14 +190,7 @@ def patched_ClipVisionModel__init__(self, json_config):
 def patched_ClipVisionModel_encode_image(self, image):
     ldm_patched.modules.model_management.load_model_gpu(self.patcher)
     pixel_values = ldm_patched.modules.clip_vision.clip_preprocess(image.to(self.load_device))
-
-    if self.dtype != torch.float32:
-        precision_scope = torch.autocast
-    else:
-        precision_scope = lambda a, b: contextlib.nullcontext(a)
-
-    with precision_scope(ldm_patched.modules.model_management.get_autocast_device(self.load_device), torch.float32):
-        outputs = self.model(pixel_values=pixel_values, output_hidden_states=True)
+    outputs = self.model(pixel_values=pixel_values, output_hidden_states=True)
 
     for k in outputs:
         t = outputs[k]

From 7a6b46f363d0f3797d263bfdf00bd16e315da02a Mon Sep 17 00:00:00 2001
From: Huy Nguyen <111590292+peterng1618@users.noreply.github.com>
Date: Mon, 18 Dec 2023 11:20:02 +0700
Subject: [PATCH 09/10] New Log System

---
 fooocus_version.py        |  2 +-
 modules/async_worker.py   | 10 ++++---
 modules/private_logger.py | 58 ++++++++++++++++++++++++---------------
 update_log.md             |  4 +++
 4 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index b588f46f..71457946 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.851'
+version = '2.1.852'
diff --git a/modules/async_worker.py b/modules/async_worker.py
index 93a76579..c2c8632d 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -34,6 +34,7 @@ def worker():
     import modules.advanced_parameters as advanced_parameters
     import extras.ip_adapter as ip_adapter
     import extras.face_crop
+    import fooocus_version
 
     from modules.sdxl_styles import apply_style, apply_wildcards, fooocus_expansion
     from modules.private_logger import log
@@ -492,7 +493,7 @@ def worker():
 
             if direct_return:
                 d = [('Upscale (Fast)', '2x')]
-                log(uov_input_image, d, single_line_number=1)
+                log(uov_input_image, d)
                 yield_result(async_task, uov_input_image, do_not_show_finished_images=True)
                 return
 
@@ -774,12 +775,13 @@ def worker():
                         ('Refiner Switch', refiner_switch),
                         ('Sampler', sampler_name),
                         ('Scheduler', scheduler_name),
-                        ('Seed', task['task_seed'])
+                        ('Seed', task['task_seed']),
                     ]
                     for n, w in loras:
                         if n != 'None':
-                            d.append((f'LoRA [{n}] weight', w))
-                    log(x, d, single_line_number=3)
+                            d.append((f'LoRA', f'{n} : {w}'))
+                    d.append(('Version', 'v' + fooocus_version.version))
+                    log(x, d)
 
                 yield_result(async_task, imgs, do_not_show_finished_images=len(tasks) == 1)
             except ldm_patched.modules.model_management.InterruptProcessingException as e:
diff --git a/modules/private_logger.py b/modules/private_logger.py
index 3a992cf6..b91f68d3 100644
--- a/modules/private_logger.py
+++ b/modules/private_logger.py
@@ -16,7 +16,7 @@ def get_current_html_path():
     return html_name
 
 
-def log(img, dic, single_line_number=3):
+def log(img, dic):
     if args_manager.args.disable_image_log:
         return
 
@@ -25,36 +25,50 @@ def log(img, dic, single_line_number=3):
     Image.fromarray(img).save(local_temp_filename)
     html_name = os.path.join(os.path.dirname(local_temp_filename), 'log.html')
 
-    existing_log = log_cache.get(html_name, None)
+    css_styles = (
+        "<style>"
+        "body { background-color: #121212; color: #E0E0E0; } "
+        "a { color: #BB86FC; } "
+        ".metadata { border-collapse: collapse; width: 100%; } "
+        ".metadata .key { width: 15%; } "
+        ".metadata .value { width: 85%; font-weight: bold; } "
+        ".metadata th, .metadata td { border: 1px solid #4d4d4d; padding: 4px; } "
+        ".image-container img { height: auto; max-width: 512px; display: block; padding-right:10px; } "
+        ".image-container div { text-align: center; padding: 4px; } "
+        "hr { border-color: gray; } "
+        "</style>"
+    )
 
-    if existing_log is None:
+    begin_part = f"<html><head><title>Fooocus Log {date_string}</title>{css_styles}</head><body><p>Fooocus Log {date_string} (private)</p>\n<p>All images are clean, without any hidden data/meta, and safe to share with others.</p><!--fooocus-log-split-->\n\n"
+    end_part = f'\n<!--fooocus-log-split--></body></html>'
+
+    middle_part = log_cache.get(html_name, "")
+
+    if middle_part == "":
         if os.path.exists(html_name):
-            existing_log = open(html_name, encoding='utf-8').read()
-        else:
-            existing_log = f'<p>Fooocus Log {date_string} (private)</p>\n<p>All images do not contain any hidden data.</p>'
+            existing_split = open(html_name, 'r', encoding='utf-8').read().split('<!--fooocus-log-split-->')
+            if len(existing_split) == 3:
+                middle_part = existing_split[1]
+            else:
+                middle_part = existing_split[0]
 
     div_name = only_name.replace('.', '_')
-    item = f'<div id="{div_name}">\n'
-    item += "<table><tr>"
-    item += f"<td><img src=\"{only_name}\" width=auto height=100% loading=lazy style=\"height:auto;max-width:512px\" onerror=\"document.getElementById('{div_name}').style.display = 'none';\"></img></p></td>"
-    item += f"<td style=\"padding-left:10px;\"><p>{only_name}</p>\n"
-    for i, (k, v) in enumerate(dic):
-        if i < single_line_number:
-            item += f"<p>{k}: <b>{v}</b></p>\n"
-        else:
-            if (i - single_line_number) % 2 == 0:
-                item += f"<p>{k}: <b>{v}</b>, "
-            else:
-                item += f"{k}: <b>{v}</b></p>\n"
+    item = f"<div id=\"{div_name}\" class=\"image-container\"><hr><table><tr>\n"
+    item += f"<td><a href=\"{only_name}\" target=\"_blank\"><img src='{only_name}' onerror=\"this.closest('.image-container').style.display='none';\" loading='lazy'></img></a><div>{only_name}</div></td>"
+    item += "<td><table class='metadata'>"
+    for key, value in dic:
+        item += f"<tr><td class='key'>{key}</td><td class='value'>{value}</td></tr>\n"
+    item += "</table>"
     item += "</td>"
-    item += "</tr></table><hr></div>\n"
-    existing_log = item + existing_log
+    item += "</tr></table></div>\n\n"
+
+    middle_part = item + middle_part
 
     with open(html_name, 'w', encoding='utf-8') as f:
-        f.write(existing_log)
+        f.write(begin_part + middle_part + end_part)
 
     print(f'Image generated with private log at: {html_name}')
 
-    log_cache[html_name] = existing_log
+    log_cache[html_name] = middle_part
 
     return
diff --git a/update_log.md b/update_log.md
index 39e4c9e5..7ac777de 100644
--- a/update_log.md
+++ b/update_log.md
@@ -1,3 +1,7 @@
+# 2.1.852
+
+* New Log System: Log system now uses tables. If this is breaking some other browser extension or javascript developments, see also [use previous version](https://github.com/lllyasviel/Fooocus/discussions/1405).
+
 # 2.1.846
 
 * Many users reported that image quality is different from 2.1.824. We reviewed all codes and fixed several precision problems in 2.1.846.

From f3084894402a4c0b7ed9e7164466bcedd5f5428d Mon Sep 17 00:00:00 2001
From: lllyasviel <lyuminzhang@outlook.com>
Date: Mon, 18 Dec 2023 19:01:52 -0800
Subject: [PATCH 10/10] Add Marc K3nt3L's styles

Thanks Marc K3nt3L !
---
 fooocus_version.py                       |   2 +-
 modules/sdxl_styles.py                   |   3 +-
 modules/style_sorter.py                  |  13 +-
 readme.md                                |   2 +-
 sdxl_styles/sdxl_styles_marc_k3nt3l.json | 312 +++++++++++++++++++++++
 update_log.md                            |   4 +
 6 files changed, 328 insertions(+), 8 deletions(-)
 create mode 100644 sdxl_styles/sdxl_styles_marc_k3nt3l.json

diff --git a/fooocus_version.py b/fooocus_version.py
index 71457946..8d6fe946 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.1.852'
+version = '2.1.853'
diff --git a/modules/sdxl_styles.py b/modules/sdxl_styles.py
index d7489455..f5bb6276 100644
--- a/modules/sdxl_styles.py
+++ b/modules/sdxl_styles.py
@@ -31,7 +31,8 @@ for x in ['sdxl_styles_fooocus.json',
           'sdxl_styles_sai.json',
           'sdxl_styles_mre.json',
           'sdxl_styles_twri.json',
-          'sdxl_styles_diva.json']:
+          'sdxl_styles_diva.json',
+          'sdxl_styles_marc_k3nt3l.json']:
     if x in styles_files:
         styles_files.remove(x)
         styles_files.append(x)
diff --git a/modules/style_sorter.py b/modules/style_sorter.py
index 393e441d..49142bc7 100644
--- a/modules/style_sorter.py
+++ b/modules/style_sorter.py
@@ -15,11 +15,14 @@ def try_load_sorted_styles(style_names, default_selected):
     try:
         if os.path.exists('sorted_styles.json'):
             with open('sorted_styles.json', 'rt', encoding='utf-8') as fp:
-                sorted_styles = json.load(fp)
-                if len(sorted_styles) == len(all_styles):
-                    if all(x in all_styles for x in sorted_styles):
-                        if all(x in sorted_styles for x in all_styles):
-                            all_styles = sorted_styles
+                sorted_styles = []
+                for x in json.load(fp):
+                    if x in all_styles:
+                        sorted_styles.append(x)
+                for x in all_styles:
+                    if x not in sorted_styles:
+                        sorted_styles.append(x)
+                all_styles = sorted_styles
     except Exception as e:
         print('Load style sorting failed.')
         print(e)
diff --git a/readme.md b/readme.md
index 78682fc0..b970ee12 100644
--- a/readme.md
+++ b/readme.md
@@ -384,7 +384,7 @@ See also [About Forking and Promotion of Forks](https://github.com/lllyasviel/Fo
 
 ## Thanks
 
-Special thanks to [twri](https://github.com/twri) and [3Diva](https://github.com/3Diva) for creating additional SDXL styles available in Fooocus. Thanks [daswer123](https://github.com/daswer123) for contributing the Canvas Zoom!
+Special thanks to [twri](https://github.com/twri) and [3Diva](https://github.com/3Diva) and [Marc K3nt3L](https://github.com/K3nt3L) for creating additional SDXL styles available in Fooocus. Thanks [daswer123](https://github.com/daswer123) for contributing the Canvas Zoom!
 
 ## Update Log
 
diff --git a/sdxl_styles/sdxl_styles_marc_k3nt3l.json b/sdxl_styles/sdxl_styles_marc_k3nt3l.json
new file mode 100644
index 00000000..fbbe1a24
--- /dev/null
+++ b/sdxl_styles/sdxl_styles_marc_k3nt3l.json
@@ -0,0 +1,312 @@
+[
+  {
+    "name": "MK Chromolithography",
+    "prompt": "Chromolithograph {prompt}. Vibrant colors, intricate details, rich color saturation, meticulous registration, multi-layered printing, decorative elements, historical charm, artistic reproductions, commercial posters, nostalgic, ornate compositions.",
+    "negative_prompt": "monochromatic, simple designs, limited color palette, imprecise registration, minimalistic, modern aesthetic, digital appearance."
+  },
+  {
+    "name": "MK Cross Processing Print",
+    "prompt": "Cross processing print {prompt}. Experimental color shifts, unconventional tonalities, vibrant and surreal hues, heightened contrasts, unpredictable results, artistic unpredictability, retro and vintage feel, dynamic color interplay, abstract and dreamlike.",
+    "negative_prompt": "predictable color tones, traditional processing, realistic color representation, subdued contrasts, standard photographic aesthetics."
+  },
+  {
+    "name": "MK Dufaycolor Photograph",
+    "prompt": "Dufaycolor photograph {prompt}. Vintage color palette, distinctive color rendering, soft and dreamy atmosphere, historical charm, unique color process, grainy texture, evocative mood, nostalgic aesthetic, hand-tinted appearance, artistic patina.",
+    "negative_prompt": "modern color reproduction, hyperrealistic tones, sharp and clear details, digital precision, contemporary aesthetic."
+  },
+  {
+    "name": "MK Herbarium",
+    "prompt": "Herbarium drawing{prompt}. Botanical accuracy, old botanical book illustration, detailed illustrations, pressed plants, delicate and precise linework, scientific documentation, meticulous presentation, educational purpose, organic compositions, timeless aesthetic, naturalistic beauty.",
+    "negative_prompt": "abstract representation, vibrant colors, artistic interpretation, chaotic compositions, fantastical elements, digital appearance."
+  },
+  {
+    "name": "MK Punk Collage",
+    "prompt": "punk collage style {prompt} . mixed media, papercut,textured paper, overlapping, ripped posters, safety pins, chaotic layers, graffiti-style elements, anarchy symbols, vintage photos, cut-and-paste aesthetic, bold typography, distorted images, political messages, urban decay, distressed textures, newspaper clippings, spray paint, rebellious icons, DIY spirit, vivid colors, punk band logos, edgy and raw compositions, ",
+    "negative_prompt": "conventional,blurry, noisy, low contrast"
+  },
+  {
+    "name": "MK mosaic",
+    "prompt": "mosaic style {prompt} . fragmented, assembled, colorful, highly detailed",
+    "negative_prompt": "whole, unbroken, monochrome"
+  },
+  {
+    "name": "MK Van Gogh",
+    "prompt": "Oil painting by Van Gogh {prompt} . Expressive, impasto, swirling brushwork, vibrant, brush strokes, Brushstroke-heavy, Textured, Impasto, Colorful, Dynamic, Bold, Distinctive, Vibrant, Whirling, Expressive, Dramatic, Swirling, Layered, Intense, Contrastive, Atmospheric, Luminous, Textural, Evocative, SpiraledVan Gogh style",
+    "negative_prompt": "realistic, photorealistic, calm, straight lines, signature, frame, text, watermark"
+  },
+  {
+    "name": "MK Coloring Book",
+    "prompt": "centered black and white high contrast line drawing, coloring book style,{prompt} . monochrome, blank white background",
+    "negative_prompt": "greyscale, gradients,shadows,shadow, colored, Red, Blue, Yellow, Green, Orange, Purple, Pink, Brown, Gray, Beige, Turquoise, Lavender, Cyan, Magenta, Olive, Indigo, black background"
+  },
+  {
+    "name": "MK Singer Sargent",
+    "prompt": "Oil painting by John Singer Sargent {prompt}. Elegant, refined, masterful technique,realistic portrayal, subtle play of light, captivating expression, rich details, harmonious colors, skillful composition, brush strokes, chiaroscuro.",
+    "negative_prompt": "realistic, photorealistic, abstract, overly stylized, excessive contrasts, distorted,bright colors,disorder."
+  },
+  {
+    "name": "MK Pollock",
+    "prompt": "Oil painting by Jackson Pollock {prompt}. Abstract expressionism, drip painting, chaotic composition, energetic, spontaneous, unconventional technique, dynamic, bold, distinctive, vibrant, intense, expressive, energetic, layered, non-representational, gestural.",
+    "negative_prompt": "(realistic:1.5), (photorealistic:1.5), representational, calm, ordered composition, precise lines, detailed forms, subdued colors, quiet, static, traditional, figurative."
+  },
+  {
+    "name": "MK Basquiat",
+    "prompt": "Artwork by Jean-Michel Basquiat {prompt}. Neo-expressionism, street art influence, graffiti-inspired, raw, energetic, bold colors, dynamic composition, chaotic, layered, textural, expressive, spontaneous, distinctive, symbolic,energetic brushstrokes.",
+    "negative_prompt": "(realistic:1.5), (photorealistic:1.5), calm, precise lines, conventional composition, subdued"
+  },
+  {
+    "name": "MK Andy Warhol",
+    "prompt": "Artwork in the style of Andy Warhol {prompt}. Pop art, vibrant colors, bold compositions, repetition of iconic imagery, celebrity culture, commercial aesthetics, mass production influence, stylized simplicity, cultural commentary, graphical elements, distinctive portraits.",
+    "negative_prompt": "subdued colors, realistic, lack of repetition, minimalistic."
+  },
+  {
+    "name": "MK Halftone print",
+    "prompt": "Halftone print of {prompt}. Dot matrix pattern, grayscale tones, vintage aesthetic, newspaper print vibe, stylized dots, visual texture, black and white contrasts, retro appearance, artistic pointillism,pop culture, (Roy Lichtenstein style:1.5).",
+    "negative_prompt": "smooth gradients, continuous tones, vibrant colors."
+  },
+  {
+    "name": "MK Gond Painting",
+    "prompt": "Gond painting {prompt}. Intricate patterns, vibrant colors, detailed motifs, nature-inspired themes, tribal folklore, fine lines, intricate detailing, storytelling compositions, mystical and folkloric, cultural richness.",
+    "negative_prompt": "monochromatic, abstract shapes, minimalistic."
+  },
+  {
+    "name": "MK Albumen Print",
+    "prompt": "Albumen print {prompt}. Sepia tones, fine details, subtle tonal gradations, delicate highlights, vintage aesthetic, soft and muted atmosphere, historical charm, rich textures, meticulous craftsmanship, classic photographic technique, vignetting.",
+    "negative_prompt": "vibrant colors, high contrast, modern, digital appearance, sharp details, contemporary style."
+  },
+  {
+    "name": "MK Aquatint Print",
+    "prompt": "Aquatint print {prompt}. Soft tonal gradations, atmospheric effects, velvety textures, rich contrasts, fine details, etching process, delicate lines, nuanced shading, expressive and moody atmosphere, artistic depth.",
+    "negative_prompt": "sharp contrasts, bold lines, minimalistic."
+  },
+  {
+    "name": "MK Anthotype Print",
+    "prompt": "Anthotype print {prompt}. Monochrome dye, soft and muted colors, organic textures, ephemeral and delicate appearance, low details, watercolor canvas, low contrast, overexposed, silhouette, textured paper.",
+    "negative_prompt": "vibrant synthetic dyes, bold and saturated colors."
+  },
+  {
+    "name": "MK Inuit Carving",
+    "prompt": "A sculpture made of ivory, {prompt} made of . Sculptures, Inuit art style, intricate carvings, natural materials, storytelling motifs, arctic wildlife themes, symbolic representations, cultural traditions, earthy tones, harmonious compositions, spiritual and mythological elements.",
+    "negative_prompt": "abstract, vibrant colors."
+  },
+  {
+    "name": "MK Bromoil Print",
+    "prompt": "Bromoil print {prompt}. Painterly effects, sepia tones, textured surfaces, rich contrasts, expressive brushwork, tonal variations, vintage aesthetic, atmospheric mood, handmade quality, artistic experimentation, darkroom craftsmanship, vignetting.",
+    "negative_prompt": "smooth surfaces, minimal brushwork, contemporary digital appearance."
+  },
+  {
+    "name": "MK Calotype Print",
+    "prompt": "Calotype print {prompt}. Soft focus, subtle tonal range, paper negative process, fine details, vintage aesthetic, artistic experimentation, atmospheric mood, early photographic charm, handmade quality, vignetting.",
+    "negative_prompt": "sharp focus, bold contrasts, modern aesthetic, digital photography."
+  },
+  {
+    "name": "MK Color Sketchnote",
+    "prompt": "Color sketchnote {prompt}. Hand-drawn elements, vibrant colors, visual hierarchy, playful illustrations, varied typography, graphic icons, organic and dynamic layout, personalized touches, creative expression, engaging storytelling.",
+    "negative_prompt": "monochromatic, geometric layout."
+  },
+  {
+    "name": "MK Cibulak Porcelain",
+    "prompt": "A sculpture made of blue pattern porcelain of {prompt}. Classic design, blue and white color scheme, intricate detailing, floral motifs, onion-shaped elements, historical charm, rococo, white ware, cobalt blue, underglaze pattern, fine craftsmanship, traditional elegance, delicate patterns, vintage aesthetic, Meissen, Blue Onion pattern, Cibulak.",
+    "negative_prompt": "tea, teapot, cup, teacup,bright colors, bold and modern design, absence of intricate detailing, lack of floral motifs, non-traditional shapes."
+  },
+  {
+    "name": "MK Alcohol Ink Art",
+    "prompt": "Alcohol ink art {prompt}. Fluid and vibrant colors, unpredictable patterns, organic textures, translucent layers, abstract compositions, ethereal and dreamy effects, free-flowing movement, expressive brushstrokes, contemporary aesthetic, wet textured paper.",
+    "negative_prompt": "monochromatic, controlled patterns."
+  },
+  {
+    "name": "MK One Line Art",
+    "prompt": "One line art {prompt}. Continuous and unbroken black line, minimalistic, simplicity, economical use of space, flowing and dynamic, symbolic representations, contemporary aesthetic, evocative and abstract, white background.",
+    "negative_prompt": "disjointed lines, complexity, complex detailing."
+  },
+  {
+    "name": "MK Blacklight Paint",
+    "prompt": "Blacklight paint {prompt}. Fluorescent pigments, vibrant and surreal colors, ethereal glow, otherworldly effects, dynamic and psychedelic compositions, neon aesthetics, transformative in ultraviolet light, contemporary and experimental.",
+    "negative_prompt": "muted colors, traditional and realistic compositions."
+  },
+  {
+    "name": "MK Carnival Glass",
+    "prompt": "A sculpture made of Carnival glass, {prompt}. Iridescent surfaces, vibrant colors, intricate patterns, opalescent hues, reflective and prismatic effects, Art Nouveau and Art Deco influences, vintage charm, intricate detailing, lustrous and luminous appearance, Carnival Glass style.",
+    "negative_prompt": "non-iridescent surfaces, muted colors, absence of intricate patterns, lack of opalescent hues, modern and minimalist aesthetic."
+  },
+  {
+    "name": "MK Cyanotype Print",
+    "prompt": "Cyanotype print {prompt}. Prussian blue tones, distinctive coloration, high contrast, blueprint aesthetics, atmospheric mood, sun-exposed paper, silhouette effects, delicate details, historical charm, handmade and experimental quality.",
+    "negative_prompt": "vibrant colors, low contrast, modern and polished appearance."
+  },
+  {
+    "name": "MK Cross-Stitching",
+    "prompt": "Cross-stitching {prompt}. Intricate patterns, embroidery thread, sewing, fine details, precise stitches, textile artistry, symmetrical designs, varied color palette, traditional and contemporary motifs, handmade and crafted,canvas, nostalgic charm.",
+    "negative_prompt": "paper, paint, ink, photography."
+  },
+  {
+    "name": "MK Encaustic Paint",
+    "prompt": "Encaustic paint {prompt}. Textured surfaces, translucent layers, luminous quality, wax medium, rich color saturation, fluid and organic shapes, contemporary and historical influences, mixed media elements, atmospheric depth.",
+    "negative_prompt": "flat surfaces, opaque layers, lack of wax medium, muted color palette, absence of textured surfaces, non-mixed media."
+  },
+  {
+    "name": "MK Embroidery",
+    "prompt": "Embroidery {prompt}. Intricate stitching, embroidery thread, fine details, varied thread textures, textile artistry, embellished surfaces, diverse color palette, traditional and contemporary motifs, handmade and crafted, tactile and ornate.",
+    "negative_prompt": "minimalist, monochromatic."
+  },
+  {
+    "name": "MK Gyotaku",
+    "prompt": "Gyotaku {prompt}. Fish impressions, realistic details, ink rubbings, textured surfaces, traditional Japanese art form, nature-inspired compositions, artistic representation of marine life, black and white contrasts, cultural significance.",
+    "negative_prompt": "photography."
+  },
+  {
+    "name": "MK Luminogram",
+    "prompt": "Luminogram {prompt}. Photogram technique, ethereal and abstract effects, light and shadow interplay, luminous quality, experimental process, direct light exposure, unique and unpredictable results, artistic experimentation.",
+    "negative_prompt": ""
+  },
+  {
+    "name": "MK Lite Brite Art",
+    "prompt": "Lite Brite art {prompt}. Luminous and colorful designs, pixelated compositions, retro aesthetic, glowing effects, creative patterns, interactive and playful, nostalgic charm, vibrant and dynamic arrangements.",
+    "negative_prompt": "monochromatic."
+  },
+  {
+    "name": "MK Mokume-gane",
+    "prompt": "Mokume-gane {prompt}. Wood-grain patterns, mixed metal layers, intricate and organic designs, traditional Japanese metalwork, harmonious color combinations, artisanal craftsmanship, unique and layered textures, cultural and historical significance.",
+    "negative_prompt": "uniform metal surfaces."
+  },
+  {
+    "name": "Pebble Art",
+    "prompt": "a sculpture made of peebles, {prompt}. Pebble art style,natural materials, textured surfaces, balanced compositions, organic forms, harmonious arrangements, tactile and 3D effects, beach-inspired aesthetic, creative storytelling, artisanal craftsmanship.",
+    "negative_prompt": "non-natural materials, lack of textured surfaces, imbalanced compositions, absence of organic forms, non-tactile appearance."
+  },
+  {
+    "name": "MK Palekh",
+    "prompt": "Palekh art {prompt}. Miniature paintings, intricate details, vivid colors, folkloric themes, lacquer finish, storytelling compositions, symbolic elements, Russian folklore influence, cultural and historical significance.",
+    "negative_prompt": "large-scale paintings."
+  },
+  {
+    "name": "MK Suminagashi",
+    "prompt": "Suminagashi {prompt}. Floating ink patterns, marbled effects, delicate and ethereal designs, water-based ink, fluid and unpredictable compositions, meditative process, monochromatic or subtle color palette, Japanese artistic tradition.",
+    "negative_prompt": "vibrant and bold color palette."
+  },
+  {
+    "name": "MK Scrimshaw",
+    "prompt": "A Scrimshaw engraving of {prompt}. Intricate engravings on a spermwhale's teeth, marine motifs, detailed scenes, nautical themes, black and white contrasts, historical craftsmanship, artisanal carving, storytelling compositions, maritime heritage.",
+    "negative_prompt": "colorful, modern."
+  },
+  {
+    "name": "MK Shibori",
+    "prompt": "Shibori {prompt}. Textured fabric, intricate patterns, resist-dyeing technique, indigo or vibrant colors, organic and flowing designs, Japanese textile art, cultural tradition, tactile and visual interest.",
+    "negative_prompt": "monochromatic."
+  },
+  {
+    "name": "MK Vitreous Enamel",
+    "prompt": "A sculpture made of Vitreous enamel {prompt}. Smooth and glossy surfaces, vibrant colors, glass-like finish, durable and resilient, intricate detailing, traditional and contemporary applications, artistic craftsmanship, jewelry and decorative objects, , Vitreous enamel, colored glass.",
+    "negative_prompt": "rough surfaces, muted colors."
+  },
+  {
+    "name": "MK Ukiyo-e",
+    "prompt": "Ukiyo-e {prompt}. Woodblock prints, vibrant colors, intricate details, depictions of landscapes, kabuki actors, beautiful women, cultural scenes, traditional Japanese art, artistic craftsmanship, historical significance.",
+    "negative_prompt": "absence of woodblock prints, muted colors, lack of intricate details, non-traditional Japanese themes, absence of cultural scenes."
+  },
+  {
+    "name": "MK vintage-airline-poster",
+    "prompt": "vintage airline poster {prompt} . classic aviation fonts, pastel colors, elegant aircraft illustrations, scenic destinations, distressed textures, retro travel allure",
+    "negative_prompt": "modern fonts, bold colors, hyper-realistic, sleek design"
+  },
+  {
+    "name": "MK vintage-travel-poster",
+    "prompt": "vintage travel poster {prompt} . retro fonts, muted colors, scenic illustrations, iconic landmarks, distressed textures, nostalgic vibes",
+    "negative_prompt": "modern fonts, vibrant colors, hyper-realistic, sleek design"
+  },
+  {
+    "name": "MK bauhaus-style",
+    "prompt": "Bauhaus-inspired {prompt} . minimalism, geometric precision, primary colors, sans-serif typography, asymmetry, functional design",
+    "negative_prompt": "ornate, intricate, excessive detail, complex patterns, serif typography"
+  },
+  {
+    "name": "MK afrofuturism",
+    "prompt": "Afrofuturism illustration {prompt} . vibrant colors, futuristic elements, cultural symbolism, cosmic imagery, dynamic patterns, empowering narratives",
+    "negative_prompt": "monochromatic"
+  },
+  {
+    "name": "MK atompunk",
+    "prompt": "Atompunk illustation, {prompt} . retro-futuristic, atomic age aesthetics, sleek lines, metallic textures, futuristic technology, optimism, energy",
+    "negative_prompt": "organic, natural textures, rustic, dystopian"
+  },
+  {
+    "name": "MK constructivism",
+    "prompt": "Constructivism {prompt} . geometric abstraction, bold colors, industrial aesthetics, dynamic compositions, utilitarian design, revolutionary spirit",
+    "negative_prompt": "organic shapes, muted colors, ornate elements, traditional"
+  },
+  {
+    "name": "MK chicano-art",
+    "prompt": "Chicano art {prompt} . bold colors, cultural symbolism, muralism, lowrider aesthetics, barrio life, political messages, social activism, Mexico",
+    "negative_prompt": "monochromatic, minimalist, mainstream aesthetics"
+  },
+  {
+    "name": "MK de-stijl",
+    "prompt": "De Stijl Art {prompt} . neoplasticism, primary colors, geometric abstraction, horizontal and vertical lines, simplicity, harmony, utopian ideals",
+    "negative_prompt": "complex patterns, muted colors, ornate elements, asymmetry"
+  },
+  {
+    "name": "MK dayak-art",
+    "prompt": "Dayak art sculpture of {prompt} . intricate patterns, nature-inspired motifs, vibrant colors, traditional craftsmanship, cultural symbolism, storytelling",
+    "negative_prompt": "minimalist, monochromatic, modern"
+  },
+  {
+    "name": "MK fayum-portrait",
+    "prompt": "Fayum portrait {prompt} . encaustic painting, realistic facial features, warm earth tones, serene expressions, ancient Egyptian influences",
+    "negative_prompt": "abstract, vibrant colors, exaggerated features, modern"
+  },
+  {
+    "name": "MK illuminated-manuscript",
+    "prompt": "Illuminated manuscript {prompt} . intricate calligraphy, rich colors, detailed illustrations, gold leaf accents, ornate borders, religious, historical, medieval",
+    "negative_prompt": "modern typography, minimalist design, monochromatic, abstract themes"
+  },
+  {
+    "name": "MK kalighat-painting",
+    "prompt": "Kalighat painting {prompt} . bold lines, vibrant colors, narrative storytelling, cultural motifs, flat compositions, expressive characters",
+    "negative_prompt": "subdued colors, intricate details, realistic portrayal, modern aesthetics"
+  },
+  {
+    "name": "MK madhubani-painting",
+    "prompt": "Madhubani painting {prompt} . intricate patterns, vibrant colors, nature-inspired motifs, cultural storytelling, symmetry, folk art aesthetics",
+    "negative_prompt": "abstract, muted colors, minimalistic design, modern aesthetics"
+  },
+  {
+    "name": "MK pictorialism",
+    "prompt": "Pictorialism illustration{prompt} . soft focus, atmospheric effects, artistic interpretation, tonality, muted colors, evocative storytelling",
+    "negative_prompt": "sharp focus, high contrast, realistic depiction, vivid colors"
+  },
+  {
+    "name": "MK pichwai-painting",
+    "prompt": "Pichwai painting {prompt} . intricate detailing, vibrant colors, religious themes, nature motifs, devotional storytelling, gold leaf accents",
+    "negative_prompt": "minimalist, subdued colors, abstract design"
+  },
+  {
+    "name": "MK patachitra-painting",
+    "prompt": "Patachitra painting {prompt} . bold outlines, vibrant colors, intricate detailing, mythological themes, storytelling, traditional craftsmanship",
+    "negative_prompt": "subdued colors, minimalistic, abstract, modern aesthetics"
+  },
+  {
+    "name": "MK samoan-art-inspired",
+    "prompt": "Samoan art-inspired wooden sculpture {prompt} . traditional motifs, natural elements, bold colors, cultural symbolism, storytelling, craftsmanship",
+    "negative_prompt": "modern aesthetics, minimalist, abstract"
+  },
+  {
+    "name": "MK tlingit-art",
+    "prompt": "Tlingit art {prompt} . formline design, natural elements, animal motifs, bold colors, cultural storytelling, traditional craftsmanship, Alaska traditional art, (totem:1.5)",
+    "negative_prompt": ""
+  },
+  {
+    "name": "MK adnate-style",
+    "prompt": "Painting by Adnate {prompt} . realistic portraits, street art, large-scale murals, subdued color palette, social narratives",
+    "negative_prompt": "abstract, vibrant colors, small-scale art"
+  },
+  {
+    "name": "MK ron-english-style",
+    "prompt": "Painting by Ron English {prompt} . pop-surrealism, cultural subversion, iconic mash-ups, vibrant and bold colors, satirical commentary",
+    "negative_prompt": "traditional, monochromatic"
+  },
+  {
+    "name": "MK shepard-fairey-style",
+    "prompt": "Painting by Shepard Fairey {prompt} . street art, political activism, iconic stencils, bold typography, high contrast, red, black, and white color palette",
+    "negative_prompt": "traditional, muted colors"
+  }
+]
diff --git a/update_log.md b/update_log.md
index 7ac777de..5c2efd87 100644
--- a/update_log.md
+++ b/update_log.md
@@ -1,3 +1,7 @@
+# 2.1.853
+
+* Add Marc K3nt3L's styles. Thanks [Marc K3nt3L](https://github.com/K3nt3L)!
+
 # 2.1.852
 
 * New Log System: Log system now uses tables. If this is breaking some other browser extension or javascript developments, see also [use previous version](https://github.com/lllyasviel/Fooocus/discussions/1405).