From e58d913fbb1d0fdd032ec467a45ab4711abd1f27 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 8 Jun 2024 19:55:31 +0200
Subject: [PATCH 001/101] refactor: extract async_worker handle method into
 separate functions

---
 modules/async_worker.py | 1500 ++++++++++++++++++++-------------------
 1 file changed, 760 insertions(+), 740 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 92831427..32dd5a56 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1,6 +1,4 @@
 import threading
-import os
-import re
 from modules.patch import PatchSettings, patch_settings, patch_all
 
 patch_all()
@@ -8,12 +6,105 @@ patch_all()
 
 class AsyncTask:
     def __init__(self, args):
-        self.args = args
+        from modules.flags import Performance, MetadataScheme, ip_list, controlnet_image_count
+        from modules.util import get_enabled_loras
+        from modules.config import default_max_lora_number
+        import args_manager
+
+        self.args = args.copy()
         self.yields = []
         self.results = []
         self.last_stop = False
         self.processing = False
 
+        self.performance_loras = []
+
+        if len(args) == 0:
+            return
+
+        args.reverse()
+        self.generate_image_grid = args.pop()
+        self.prompt = args.pop()
+        self.negative_prompt = args.pop()
+        self.translate_prompts = args.pop()
+        self.style_selections = args.pop()
+
+        self.performance_selection = Performance(args.pop())
+        self.steps = self.performance_selection.steps()
+
+        self.aspect_ratios_selection = args.pop()
+        self.image_number = args.pop()
+        self.output_format = args.pop()
+        self.seed = int(args.pop())
+        self.read_wildcards_in_order = args.pop()
+        self.sharpness = args.pop()
+        self.cfg_scale = args.pop()
+        self.base_model_name = args.pop()
+        self.refiner_model_name = args.pop()
+        self.refiner_switch = args.pop()
+        self.loras = get_enabled_loras([(bool(args.pop()), str(args.pop()), float(args.pop())) for _ in
+                                        range(default_max_lora_number)])
+        self.input_image_checkbox = args.pop()
+        self.current_tab = args.pop()
+        self.uov_method = args.pop()
+        self.uov_input_image = args.pop()
+        self.outpaint_selections = args.pop()
+        self.inpaint_input_image = args.pop()
+        self.inpaint_additional_prompt = args.pop()
+        self.inpaint_mask_image_upload = args.pop()
+
+        self.disable_preview = args.pop()
+        self.disable_intermediate_results = args.pop()
+        self.disable_seed_increment = args.pop()
+        self.black_out_nsfw = args.pop()
+        self.adm_scaler_positive = args.pop()
+        self.adm_scaler_negative = args.pop()
+        self.adm_scaler_end = args.pop()
+        self.adaptive_cfg = args.pop()
+        self.clip_skip = args.pop()
+        self.sampler_name = args.pop()
+        self.scheduler_name = args.pop()
+        self.vae_name = args.pop()
+        self.overwrite_step = args.pop()
+        self.overwrite_switch = args.pop()
+        self.overwrite_width = args.pop()
+        self.overwrite_height = args.pop()
+        self.overwrite_vary_strength = args.pop()
+        self.overwrite_upscale_strength = args.pop()
+        self.mixing_image_prompt_and_vary_upscale = args.pop()
+        self.mixing_image_prompt_and_inpaint = args.pop()
+        self.debugging_cn_preprocessor = args.pop()
+        self.skipping_cn_preprocessor = args.pop()
+        self.canny_low_threshold = args.pop()
+        self.canny_high_threshold = args.pop()
+        self.refiner_swap_method = args.pop()
+        self.controlnet_softness = args.pop()
+        self.freeu_enabled = args.pop()
+        self.freeu_b1 = args.pop()
+        self.freeu_b2 = args.pop()
+        self.freeu_s1 = args.pop()
+        self.freeu_s2 = args.pop()
+        self.debugging_inpaint_preprocessor = args.pop()
+        self.inpaint_disable_initial_latent = args.pop()
+        self.inpaint_engine = args.pop()
+        self.inpaint_strength = args.pop()
+        self.inpaint_respective_field = args.pop()
+        self.inpaint_mask_upload_checkbox = args.pop()
+        self.invert_mask_checkbox = args.pop()
+        self.inpaint_erode_or_dilate = args.pop()
+        self.save_metadata_to_images = args.pop() if not args_manager.args.disable_metadata else False
+        self.metadata_scheme = MetadataScheme(
+            args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS
+
+        self.cn_tasks = {x: [] for x in ip_list}
+        for _ in range(controlnet_image_count):
+            cn_img = args.pop()
+            cn_stop = args.pop()
+            cn_weight = args.pop()
+            cn_type = args.pop()
+            if cn_img is not None:
+                self.cn_tasks[cn_type].append([cn_img, cn_stop, cn_weight])
+
 
 async_tasks = []
 
@@ -25,7 +116,6 @@ def worker():
     import traceback
     import math
     import numpy as np
-    import cv2
     import torch
     import time
     import shared
@@ -44,18 +134,17 @@ def worker():
     import extras.ip_adapter as ip_adapter
     import extras.face_crop
     import fooocus_version
-    import args_manager
 
     from extras.censor import default_censor
     from modules.sdxl_styles import apply_style, get_random_style, fooocus_expansion, apply_arrays, random_style_name
     from modules.private_logger import log
     from extras.expansion import safe_str
     from modules.util import (remove_empty_str, HWC3, resize_image, get_image_shape_ceil, set_image_shape_ceil,
-                              get_shape_ceil, resample_image, erode_or_dilate, get_enabled_loras,
-                              parse_lora_references_from_prompt, apply_wildcards)
+                              get_shape_ceil, resample_image, erode_or_dilate, parse_lora_references_from_prompt,
+                              apply_wildcards)
     from modules.upscaler import perform_upscale
     from modules.flags import Performance
-    from modules.meta_parser import get_metadata_parser, MetadataScheme
+    from modules.meta_parser import get_metadata_parser
 
     pid = os.getpid()
     print(f'Started worker with PID {pid}')
@@ -136,207 +225,60 @@ def worker():
 
     @torch.no_grad()
     @torch.inference_mode()
-    def handler(async_task):
-        execution_start_time = time.perf_counter()
+    def handler(async_task: AsyncTask):
+        preparation_start_time = time.perf_counter()
         async_task.processing = True
 
-        args = async_task.args
-        args.reverse()
-
-        prompt = args.pop()
-        negative_prompt = args.pop()
-        translate_prompts = args.pop()
-        style_selections = args.pop()
-        performance_selection = Performance(args.pop())
-        aspect_ratios_selection = args.pop()
-        image_number = args.pop()
-        output_format = args.pop()
-        image_seed = args.pop()
-        read_wildcards_in_order = args.pop()
-        sharpness = args.pop()
-        guidance_scale = args.pop()
-        base_model_name = args.pop()
-        refiner_model_name = args.pop()
-        refiner_switch = args.pop()
-        loras = get_enabled_loras([(bool(args.pop()), str(args.pop()), float(args.pop())) for _ in
-                                   range(modules.config.default_max_lora_number)])
-        input_image_checkbox = args.pop()
-        current_tab = args.pop()
-        uov_method = args.pop()
-        uov_input_image = args.pop()
-        outpaint_selections = args.pop()
-        inpaint_input_image = args.pop()
-        inpaint_additional_prompt = args.pop()
-        inpaint_mask_image_upload = args.pop()
-
-        disable_preview = args.pop()
-        disable_intermediate_results = args.pop()
-        disable_seed_increment = args.pop()
-        black_out_nsfw = args.pop()
-        adm_scaler_positive = args.pop()
-        adm_scaler_negative = args.pop()
-        adm_scaler_end = args.pop()
-        adaptive_cfg = args.pop()
-        clip_skip = args.pop()
-        sampler_name = args.pop()
-        scheduler_name = args.pop()
-        vae_name = args.pop()
-        overwrite_step = args.pop()
-        overwrite_switch = args.pop()
-        overwrite_width = args.pop()
-        overwrite_height = args.pop()
-        overwrite_vary_strength = args.pop()
-        overwrite_upscale_strength = args.pop()
-        mixing_image_prompt_and_vary_upscale = args.pop()
-        mixing_image_prompt_and_inpaint = args.pop()
-        debugging_cn_preprocessor = args.pop()
-        skipping_cn_preprocessor = args.pop()
-        canny_low_threshold = args.pop()
-        canny_high_threshold = args.pop()
-        refiner_swap_method = args.pop()
-        controlnet_softness = args.pop()
-        freeu_enabled = args.pop()
-        freeu_b1 = args.pop()
-        freeu_b2 = args.pop()
-        freeu_s1 = args.pop()
-        freeu_s2 = args.pop()
-        debugging_inpaint_preprocessor = args.pop()
-        inpaint_disable_initial_latent = args.pop()
-        inpaint_engine = args.pop()
-        inpaint_strength = args.pop()
-        inpaint_respective_field = args.pop()
-        inpaint_mask_upload_checkbox = args.pop()
-        invert_mask_checkbox = args.pop()
-        inpaint_erode_or_dilate = args.pop()
-
-        save_metadata_to_images = args.pop() if not args_manager.args.disable_metadata else False
-        metadata_scheme = MetadataScheme(
-            args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS
-
-        cn_tasks = {x: [] for x in flags.ip_list}
-        for _ in range(flags.controlnet_image_count):
-            cn_img = args.pop()
-            cn_stop = args.pop()
-            cn_weight = args.pop()
-            cn_type = args.pop()
-            if cn_img is not None:
-                cn_tasks[cn_type].append([cn_img, cn_stop, cn_weight])
-
-        outpaint_selections = [o.lower() for o in outpaint_selections]
+        async_task.outpaint_selections = [o.lower() for o in async_task.outpaint_selections]
         base_model_additional_loras = []
-        raw_style_selections = copy.deepcopy(style_selections)
-        uov_method = uov_method.lower()
+        async_task.uov_method = async_task.uov_method.lower()
 
-        if fooocus_expansion in style_selections:
+        if fooocus_expansion in async_task.style_selections:
             use_expansion = True
-            style_selections.remove(fooocus_expansion)
+            async_task.style_selections.remove(fooocus_expansion)
         else:
             use_expansion = False
 
-        use_style = len(style_selections) > 0
+        use_style = len(async_task.style_selections) > 0
 
-        if base_model_name == refiner_model_name:
+        if async_task.base_model_name == async_task.refiner_model_name:
             print(f'Refiner disabled because base model and refiner are same.')
-            refiner_model_name = 'None'
+            async_task.refiner_model_name = 'None'
 
-        steps = performance_selection.steps()
+        if async_task.performance_selection == Performance.EXTREME_SPEED:
+            set_lcm_defaults(async_task)
+        elif async_task.performance_selection == Performance.LIGHTNING:
+            set_lightning_defaults(async_task)
+        elif async_task.performance_selection == Performance.HYPER_SD:
+            set_hyper_sd_defaults(async_task)
 
-        performance_loras = []
+        if async_task.translate_prompts:
+            translate_prompts(async_task)
 
-        if performance_selection == Performance.EXTREME_SPEED:
-            print('Enter LCM mode.')
-            progressbar(async_task, 1, 'Downloading LCM components ...')
-            performance_loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)]
-
-            if refiner_model_name != 'None':
-                print(f'Refiner disabled in LCM mode.')
-
-            refiner_model_name = 'None'
-            sampler_name = 'lcm'
-            scheduler_name = 'lcm'
-            sharpness = 0.0
-            guidance_scale = 1.0
-            adaptive_cfg = 1.0
-            refiner_switch = 1.0
-            adm_scaler_positive = 1.0
-            adm_scaler_negative = 1.0
-            adm_scaler_end = 0.0
-
-        elif performance_selection == Performance.LIGHTNING:
-            print('Enter Lightning mode.')
-            progressbar(async_task, 1, 'Downloading Lightning components ...')
-            performance_loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)]
-
-            if refiner_model_name != 'None':
-                print(f'Refiner disabled in Lightning mode.')
-
-            refiner_model_name = 'None'
-            sampler_name = 'euler'
-            scheduler_name = 'sgm_uniform'
-            sharpness = 0.0
-            guidance_scale = 1.0
-            adaptive_cfg = 1.0
-            refiner_switch = 1.0
-            adm_scaler_positive = 1.0
-            adm_scaler_negative = 1.0
-            adm_scaler_end = 0.0
-
-        elif performance_selection == Performance.HYPER_SD:
-            print('Enter Hyper-SD mode.')
-            progressbar(async_task, 1, 'Downloading Hyper-SD components ...')
-            performance_loras += [(modules.config.downloading_sdxl_hyper_sd_lora(), 0.8)]
-
-            if refiner_model_name != 'None':
-                print(f'Refiner disabled in Hyper-SD mode.')
-
-            refiner_model_name = 'None'
-            sampler_name = 'dpmpp_sde_gpu'
-            scheduler_name = 'karras'
-            sharpness = 0.0
-            guidance_scale = 1.0
-            adaptive_cfg = 1.0
-            refiner_switch = 1.0
-            adm_scaler_positive = 1.0
-            adm_scaler_negative = 1.0
-            adm_scaler_end = 0.0
-
-        if translate_prompts:
-            from modules.translator import translate2en
-            prompt = translate2en(prompt, 'prompt')
-            negative_prompt = translate2en(negative_prompt, 'negative prompt')
-
-        print(f'[Parameters] Adaptive CFG = {adaptive_cfg}')
-        print(f'[Parameters] CLIP Skip = {clip_skip}')
-        print(f'[Parameters] Sharpness = {sharpness}')
-        print(f'[Parameters] ControlNet Softness = {controlnet_softness}')
+        print(f'[Parameters] Adaptive CFG = {async_task.adaptive_cfg}')
+        print(f'[Parameters] CLIP Skip = {async_task.clip_skip}')
+        print(f'[Parameters] Sharpness = {async_task.sharpness}')
+        print(f'[Parameters] ControlNet Softness = {async_task.controlnet_softness}')
         print(f'[Parameters] ADM Scale = '
-              f'{adm_scaler_positive} : '
-              f'{adm_scaler_negative} : '
-              f'{adm_scaler_end}')
+              f'{async_task.adm_scaler_positive} : '
+              f'{async_task.adm_scaler_negative} : '
+              f'{async_task.adm_scaler_end}')
+        print(f'[Parameters] Seed = {async_task.seed}')
 
-        patch_settings[pid] = PatchSettings(
-            sharpness,
-            adm_scaler_end,
-            adm_scaler_positive,
-            adm_scaler_negative,
-            controlnet_softness,
-            adaptive_cfg
-        )
+        apply_patch_settings(async_task)
 
-        cfg_scale = float(guidance_scale)
-        print(f'[Parameters] CFG = {cfg_scale}')
+        print(f'[Parameters] CFG = {async_task.cfg_scale}')
 
         initial_latent = None
         denoising_strength = 1.0
         tiled = False
 
-        width, height = aspect_ratios_selection.replace('×', ' ').split(' ')[:2]
+        width, height = async_task.aspect_ratios_selection.replace('×', ' ').split(' ')[:2]
         width, height = int(width), int(height)
 
         skip_prompt_processing = False
 
-        inpaint_worker.current_task = None
-        inpaint_parameterized = inpaint_engine != 'None'
+        inpaint_parameterized = async_task.inpaint_engine != 'None'
         inpaint_image = None
         inpaint_mask = None
         inpaint_head_model_path = None
@@ -347,480 +289,135 @@ def worker():
         controlnet_cpds_path = None
         clip_vision_path, ip_negative_path, ip_adapter_path, ip_adapter_face_path = None, None, None, None
 
-        seed = int(image_seed)
-        print(f'[Parameters] Seed = {seed}')
-
         goals = []
         tasks = []
 
-        if input_image_checkbox:
-            if (current_tab == 'uov' or (
-                    current_tab == 'ip' and mixing_image_prompt_and_vary_upscale)) \
-                    and uov_method != flags.disabled and uov_input_image is not None:
-                uov_input_image = HWC3(uov_input_image)
-                if 'vary' in uov_method:
+        if async_task.input_image_checkbox:
+            if (async_task.current_tab == 'uov' or (
+                    async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
+                    and async_task.uov_method != flags.disabled and async_task.uov_input_image is not None:
+                async_task.uov_input_image = HWC3(async_task.uov_input_image)
+                if 'vary' in async_task.uov_method:
                     goals.append('vary')
-                elif 'upscale' in uov_method:
+                elif 'upscale' in async_task.uov_method:
                     goals.append('upscale')
-                    if 'fast' in uov_method:
+                    if 'fast' in async_task.uov_method:
                         skip_prompt_processing = True
                     else:
-                        steps = performance_selection.steps_uov()
+                        async_task.steps = async_task.performance_selection.steps_uov()
 
                     progressbar(async_task, 1, 'Downloading upscale models ...')
                     modules.config.downloading_upscale_model()
-            if (current_tab == 'inpaint' or (
-                    current_tab == 'ip' and mixing_image_prompt_and_inpaint)) \
-                    and isinstance(inpaint_input_image, dict):
-                inpaint_image = inpaint_input_image['image']
-                inpaint_mask = inpaint_input_image['mask'][:, :, 0]
+            if (async_task.current_tab == 'inpaint' or (
+                    async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_inpaint)) \
+                    and isinstance(async_task.inpaint_input_image, dict):
+                inpaint_image = async_task.inpaint_input_image['image']
+                inpaint_mask = async_task.inpaint_input_image['mask'][:, :, 0]
 
-                if inpaint_mask_upload_checkbox:
-                    if isinstance(inpaint_mask_image_upload, dict):
-                        if (isinstance(inpaint_mask_image_upload['image'], np.ndarray)
-                                and isinstance(inpaint_mask_image_upload['mask'], np.ndarray)
-                                and inpaint_mask_image_upload['image'].ndim == 3):
-                            inpaint_mask_image_upload = np.maximum(inpaint_mask_image_upload['image'], inpaint_mask_image_upload['mask'])
-                    if isinstance(inpaint_mask_image_upload, np.ndarray) and inpaint_mask_image_upload.ndim == 3:
+                if async_task.inpaint_mask_upload_checkbox:
+                    if isinstance(async_task.inpaint_mask_image_upload, dict):
+                        if (isinstance(async_task.inpaint_mask_image_upload['image'], np.ndarray)
+                                and isinstance(async_task.inpaint_mask_image_upload['mask'], np.ndarray)
+                                and async_task.inpaint_mask_image_upload['image'].ndim == 3):
+                            async_task.inpaint_mask_image_upload = np.maximum(async_task.inpaint_mask_image_upload['image'], async_task.inpaint_mask_image_upload['mask'])
+                    if isinstance(async_task.inpaint_mask_image_upload, np.ndarray) and async_task.inpaint_mask_image_upload.ndim == 3:
                         H, W, C = inpaint_image.shape
-                        inpaint_mask_image_upload = resample_image(inpaint_mask_image_upload, width=W, height=H)
-                        inpaint_mask_image_upload = np.mean(inpaint_mask_image_upload, axis=2)
-                        inpaint_mask_image_upload = (inpaint_mask_image_upload > 127).astype(np.uint8) * 255
-                        inpaint_mask = np.maximum(inpaint_mask, inpaint_mask_image_upload)
+                        async_task.inpaint_mask_image_upload = resample_image(async_task.inpaint_mask_image_upload, width=W, height=H)
+                        async_task.inpaint_mask_image_upload = np.mean(async_task.inpaint_mask_image_upload, axis=2)
+                        async_task.inpaint_mask_image_upload = (async_task.inpaint_mask_image_upload > 127).astype(np.uint8) * 255
+                        async_task.inpaint_mask = np.maximum(inpaint_mask, async_task.inpaint_mask_image_upload)
 
-                if int(inpaint_erode_or_dilate) != 0:
-                    inpaint_mask = erode_or_dilate(inpaint_mask, inpaint_erode_or_dilate)
+                if int(async_task.inpaint_erode_or_dilate) != 0:
+                    async_task.inpaint_mask = erode_or_dilate(async_task.inpaint_mask, async_task.inpaint_erode_or_dilate)
 
-                if invert_mask_checkbox:
-                    inpaint_mask = 255 - inpaint_mask
+                if async_task.invert_mask_checkbox:
+                    async_task.inpaint_mask = 255 - async_task.inpaint_mask
 
                 inpaint_image = HWC3(inpaint_image)
                 if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
-                        and (np.any(inpaint_mask > 127) or len(outpaint_selections) > 0):
+                        and (np.any(inpaint_mask > 127) or len(async_task.outpaint_selections) > 0):
                     progressbar(async_task, 1, 'Downloading upscale models ...')
                     modules.config.downloading_upscale_model()
                     if inpaint_parameterized:
                         progressbar(async_task, 1, 'Downloading inpainter ...')
                         inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
-                            inpaint_engine)
+                            async_task.inpaint_engine)
                         base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]
                         print(f'[Inpaint] Current inpaint model is {inpaint_patch_model_path}')
-                        if refiner_model_name == 'None':
+                        if async_task.refiner_model_name == 'None':
                             use_synthetic_refiner = True
-                            refiner_switch = 0.8
+                            async_task.refiner_switch = 0.8
                     else:
                         inpaint_head_model_path, inpaint_patch_model_path = None, None
                         print(f'[Inpaint] Parameterized inpaint is disabled.')
-                    if inpaint_additional_prompt != '':
-                        if prompt == '':
-                            prompt = inpaint_additional_prompt
+                    if async_task.inpaint_additional_prompt != '':
+                        if async_task.prompt == '':
+                            async_task.prompt = async_task.inpaint_additional_prompt
                         else:
-                            prompt = inpaint_additional_prompt + '\n' + prompt
+                            async_task.prompt = async_task.inpaint_additional_prompt + '\n' + async_task.prompt
                     goals.append('inpaint')
-            if current_tab == 'ip' or \
-                    mixing_image_prompt_and_vary_upscale or \
-                    mixing_image_prompt_and_inpaint:
+            if async_task.current_tab == 'ip' or \
+                    async_task.mixing_image_prompt_and_vary_upscale or \
+                    async_task.mixing_image_prompt_and_inpaint:
                 goals.append('cn')
                 progressbar(async_task, 1, 'Downloading control models ...')
-                if len(cn_tasks[flags.cn_canny]) > 0:
+                if len(async_task.cn_tasks[flags.cn_canny]) > 0:
                     controlnet_canny_path = modules.config.downloading_controlnet_canny()
-                if len(cn_tasks[flags.cn_cpds]) > 0:
+                if len(async_task.cn_tasks[flags.cn_cpds]) > 0:
                     controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
-                if len(cn_tasks[flags.cn_ip]) > 0:
+                if len(async_task.cn_tasks[flags.cn_ip]) > 0:
                     clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
-                if len(cn_tasks[flags.cn_ip_face]) > 0:
+                if len(async_task.cn_tasks[flags.cn_ip_face]) > 0:
                     clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters(
                         'face')
-                progressbar(async_task, 1, 'Loading control models ...')
+
 
         # Load or unload CNs
+        progressbar(async_task, 1, 'Loading control models ...')
         pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path])
         ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
         ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path)
 
-        if overwrite_step > 0:
-            steps = overwrite_step
+        height, switch, width = apply_overrides(async_task, height, width)
 
-        switch = int(round(steps * refiner_switch))
-
-        if overwrite_switch > 0:
-            switch = overwrite_switch
-
-        if overwrite_width > 0:
-            width = overwrite_width
-
-        if overwrite_height > 0:
-            height = overwrite_height
-
-        print(f'[Parameters] Sampler = {sampler_name} - {scheduler_name}')
-        print(f'[Parameters] Steps = {steps} - {switch}')
+        print(f'[Parameters] Sampler = {async_task.sampler_name} - {async_task.scheduler_name}')
+        print(f'[Parameters] Steps = {async_task.steps} - {switch}')
 
         progressbar(async_task, 1, 'Initializing ...')
 
+        tasks = []
         if not skip_prompt_processing:
-
-            prompts = remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')
-            negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.splitlines()], default='')
-
-            prompt = prompts[0]
-            negative_prompt = negative_prompts[0]
-
-            if prompt == '':
-                # disable expansion when empty since it is not meaningful and influences image prompt
-                use_expansion = False
-
-            extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
-            extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
-
-            progressbar(async_task, 2, 'Loading models ...')
-
-            lora_filenames = modules.util.remove_performance_lora(modules.config.lora_filenames, performance_selection)
-            loras, prompt = parse_lora_references_from_prompt(prompt, loras, modules.config.default_max_lora_number, lora_filenames=lora_filenames)
-            loras += performance_loras
-
-            pipeline.refresh_everything(refiner_model_name=refiner_model_name, base_model_name=base_model_name,
-                                        loras=loras, base_model_additional_loras=base_model_additional_loras,
-                                        use_synthetic_refiner=use_synthetic_refiner, vae_name=vae_name)
-
-            pipeline.set_clip_skip(clip_skip)
-
-            progressbar(async_task, 3, 'Processing prompts ...')
-            tasks = []
-
-            for i in range(image_number):
-                if disable_seed_increment:
-                    task_seed = seed % (constants.MAX_SEED + 1)
-                else:
-                    task_seed = (seed + i) % (constants.MAX_SEED + 1)  # randint is inclusive, % is not
-
-                task_rng = random.Random(task_seed)  # may bind to inpaint noise in the future
-                task_prompt = apply_wildcards(prompt, task_rng, i, read_wildcards_in_order)
-                task_prompt = apply_arrays(task_prompt, i)
-                task_negative_prompt = apply_wildcards(negative_prompt, task_rng, i, read_wildcards_in_order)
-                task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in
-                                               extra_positive_prompts]
-                task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in
-                                               extra_negative_prompts]
-
-                positive_basic_workloads = []
-                negative_basic_workloads = []
-
-                task_styles = style_selections.copy()
-                if use_style:
-                    for i, s in enumerate(task_styles):
-                        if s == random_style_name:
-                            s = get_random_style(task_rng)
-                            task_styles[i] = s
-                        p, n = apply_style(s, positive=task_prompt)
-                        positive_basic_workloads = positive_basic_workloads + p
-                        negative_basic_workloads = negative_basic_workloads + n
-                else:
-                    positive_basic_workloads.append(task_prompt)
-
-                negative_basic_workloads.append(task_negative_prompt)  # Always use independent workload for negative.
-
-                positive_basic_workloads = positive_basic_workloads + task_extra_positive_prompts
-                negative_basic_workloads = negative_basic_workloads + task_extra_negative_prompts
-
-                positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=task_prompt)
-                negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=task_negative_prompt)
-
-                tasks.append(dict(
-                    task_seed=task_seed,
-                    task_prompt=task_prompt,
-                    task_negative_prompt=task_negative_prompt,
-                    positive=positive_basic_workloads,
-                    negative=negative_basic_workloads,
-                    expansion='',
-                    c=None,
-                    uc=None,
-                    positive_top_k=len(positive_basic_workloads),
-                    negative_top_k=len(negative_basic_workloads),
-                    log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts),
-                    log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts),
-                    styles=task_styles
-                ))
-
-            if use_expansion:
-                for i, t in enumerate(tasks):
-                    progressbar(async_task, 4, f'Preparing Fooocus text #{i + 1} ...')
-                    expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed'])
-                    print(f'[Prompt Expansion] {expansion}')
-                    t['expansion'] = expansion
-                    t['positive'] = copy.deepcopy(t['positive']) + [expansion]  # Deep copy.
-
-            for i, t in enumerate(tasks):
-                progressbar(async_task, 5, f'Encoding positive #{i + 1} ...')
-                t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k'])
-
-            for i, t in enumerate(tasks):
-                if abs(float(cfg_scale) - 1.0) < 1e-4:
-                    t['uc'] = pipeline.clone_cond(t['c'])
-                else:
-                    progressbar(async_task, 6, f'Encoding negative #{i + 1} ...')
-                    t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k'])
+            tasks, use_expansion = process_prompt(async_task, base_model_additional_loras, use_expansion, use_style,
+                                                  use_synthetic_refiner)
 
         if len(goals) > 0:
             progressbar(async_task, 7, 'Image processing ...')
 
         if 'vary' in goals:
-            if 'subtle' in uov_method:
-                denoising_strength = 0.5
-            if 'strong' in uov_method:
-                denoising_strength = 0.85
-            if overwrite_vary_strength > 0:
-                denoising_strength = overwrite_vary_strength
-
-            shape_ceil = get_image_shape_ceil(uov_input_image)
-            if shape_ceil < 1024:
-                print(f'[Vary] Image is resized because it is too small.')
-                shape_ceil = 1024
-            elif shape_ceil > 2048:
-                print(f'[Vary] Image is resized because it is too big.')
-                shape_ceil = 2048
-
-            uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil)
-
-            initial_pixels = core.numpy_to_pytorch(uov_input_image)
-            progressbar(async_task, 8, 'VAE encoding ...')
-
-            candidate_vae, _ = pipeline.get_candidate_vae(
-                steps=steps,
-                switch=switch,
-                denoise=denoising_strength,
-                refiner_swap_method=refiner_swap_method
-            )
-
-            initial_latent = core.encode_vae(vae=candidate_vae, pixels=initial_pixels)
-            B, C, H, W = initial_latent['samples'].shape
-            width = W * 8
-            height = H * 8
-            print(f'Final resolution is {str((height, width))}.')
+            height, initial_latent, width = apply_vary(async_task, denoising_strength, height, initial_latent, switch, width)
 
         if 'upscale' in goals:
-            H, W, C = uov_input_image.shape
-            progressbar(async_task, 9, f'Upscaling image from {str((H, W))} ...')
-            uov_input_image = perform_upscale(uov_input_image)
-            print(f'Image upscaled.')
-
-            if '1.5x' in uov_method:
-                f = 1.5
-            elif '2x' in uov_method:
-                f = 2.0
-            else:
-                f = 1.0
-
-            shape_ceil = get_shape_ceil(H * f, W * f)
-
-            if shape_ceil < 1024:
-                print(f'[Upscale] Image is resized because it is too small.')
-                uov_input_image = set_image_shape_ceil(uov_input_image, 1024)
-                shape_ceil = 1024
-            else:
-                uov_input_image = resample_image(uov_input_image, width=W * f, height=H * f)
-
-            image_is_super_large = shape_ceil > 2800
-
-            if 'fast' in uov_method:
-                direct_return = True
-            elif image_is_super_large:
-                print('Image is too large. Directly returned the SR image. '
-                      'Usually directly return SR image at 4K resolution '
-                      'yields better results than SDXL diffusion.')
-                direct_return = True
-            else:
-                direct_return = False
-
+            denoising_strength, height, initial_latent, tiled, width, direct_return = apply_upscale(async_task, switch)
             if direct_return:
-                d = [('Upscale (Fast)', 'upscale_fast', '2x')]
-                if modules.config.default_black_out_nsfw or black_out_nsfw:
-                    progressbar(async_task, 100, 'Checking for NSFW content ...')
-                    uov_input_image = default_censor(uov_input_image)
-                progressbar(async_task, 100, 'Saving image to system ...')
-                uov_input_image_path = log(uov_input_image, d, output_format=output_format)
-                yield_result(async_task, uov_input_image_path, black_out_nsfw, False, do_not_show_finished_images=True)
                 return
 
-            tiled = True
-            denoising_strength = 0.382
-
-            if overwrite_upscale_strength > 0:
-                denoising_strength = overwrite_upscale_strength
-
-            initial_pixels = core.numpy_to_pytorch(uov_input_image)
-            progressbar(async_task, 10, 'VAE encoding ...')
-
-            candidate_vae, _ = pipeline.get_candidate_vae(
-                steps=steps,
-                switch=switch,
-                denoise=denoising_strength,
-                refiner_swap_method=refiner_swap_method
-            )
-
-            initial_latent = core.encode_vae(
-                vae=candidate_vae,
-                pixels=initial_pixels, tiled=True)
-            B, C, H, W = initial_latent['samples'].shape
-            width = W * 8
-            height = H * 8
-            print(f'Final resolution is {str((height, width))}.')
-
         if 'inpaint' in goals:
-            if len(outpaint_selections) > 0:
-                H, W, C = inpaint_image.shape
-                if 'top' in outpaint_selections:
-                    inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge')
-                    inpaint_mask = np.pad(inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant',
-                                          constant_values=255)
-                if 'bottom' in outpaint_selections:
-                    inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge')
-                    inpaint_mask = np.pad(inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant',
-                                          constant_values=255)
-
-                H, W, C = inpaint_image.shape
-                if 'left' in outpaint_selections:
-                    inpaint_image = np.pad(inpaint_image, [[0, 0], [int(W * 0.3), 0], [0, 0]], mode='edge')
-                    inpaint_mask = np.pad(inpaint_mask, [[0, 0], [int(W * 0.3), 0]], mode='constant',
-                                          constant_values=255)
-                if 'right' in outpaint_selections:
-                    inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(W * 0.3)], [0, 0]], mode='edge')
-                    inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int(W * 0.3)]], mode='constant',
-                                          constant_values=255)
-
-                inpaint_image = np.ascontiguousarray(inpaint_image.copy())
-                inpaint_mask = np.ascontiguousarray(inpaint_mask.copy())
-                inpaint_strength = 1.0
-                inpaint_respective_field = 1.0
-
-            denoising_strength = inpaint_strength
-
-            inpaint_worker.current_task = inpaint_worker.InpaintWorker(
-                image=inpaint_image,
-                mask=inpaint_mask,
-                use_fill=denoising_strength > 0.99,
-                k=inpaint_respective_field
-            )
-
-            if debugging_inpaint_preprocessor:
-                yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), black_out_nsfw, do_not_show_finished_images=True)
+            denoising_strength, height, initial_latent, width = apply_inpaint(async_task, initial_latent,
+                                                                              inpaint_head_model_path, inpaint_image,
+                                                                              inpaint_mask, inpaint_parameterized,
+                                                                              switch)
+            if async_task.debugging_inpaint_preprocessor:
                 return
 
-            progressbar(async_task, 11, 'VAE Inpaint encoding ...')
-
-            inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill)
-            inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image)
-            inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask)
-
-            candidate_vae, candidate_vae_swap = pipeline.get_candidate_vae(
-                steps=steps,
-                switch=switch,
-                denoise=denoising_strength,
-                refiner_swap_method=refiner_swap_method
-            )
-
-            latent_inpaint, latent_mask = core.encode_vae_inpaint(
-                mask=inpaint_pixel_mask,
-                vae=candidate_vae,
-                pixels=inpaint_pixel_image)
-
-            latent_swap = None
-            if candidate_vae_swap is not None:
-                progressbar(async_task, 12, 'VAE SD15 encoding ...')
-                latent_swap = core.encode_vae(
-                    vae=candidate_vae_swap,
-                    pixels=inpaint_pixel_fill)['samples']
-
-            progressbar(async_task, 13, 'VAE encoding ...')
-            latent_fill = core.encode_vae(
-                vae=candidate_vae,
-                pixels=inpaint_pixel_fill)['samples']
-
-            inpaint_worker.current_task.load_latent(
-                latent_fill=latent_fill, latent_mask=latent_mask, latent_swap=latent_swap)
-
-            if inpaint_parameterized:
-                pipeline.final_unet = inpaint_worker.current_task.patch(
-                    inpaint_head_model_path=inpaint_head_model_path,
-                    inpaint_latent=latent_inpaint,
-                    inpaint_latent_mask=latent_mask,
-                    model=pipeline.final_unet
-                )
-
-            if not inpaint_disable_initial_latent:
-                initial_latent = {'samples': latent_fill}
-
-            B, C, H, W = latent_fill.shape
-            height, width = H * 8, W * 8
-            final_height, final_width = inpaint_worker.current_task.image.shape[:2]
-            print(f'Final resolution is {str((final_height, final_width))}, latent is {str((height, width))}.')
-
         if 'cn' in goals:
-            for task in cn_tasks[flags.cn_canny]:
-                cn_img, cn_stop, cn_weight = task
-                cn_img = resize_image(HWC3(cn_img), width=width, height=height)
+            apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
+            if async_task.debugging_cn_preprocessor:
+                return
 
-                if not skipping_cn_preprocessor:
-                    cn_img = preprocessors.canny_pyramid(cn_img, canny_low_threshold, canny_high_threshold)
+        if async_task.freeu_enabled:
+            apply_freeu(async_task)
 
-                cn_img = HWC3(cn_img)
-                task[0] = core.numpy_to_pytorch(cn_img)
-                if debugging_cn_preprocessor:
-                    yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True)
-                    return
-            for task in cn_tasks[flags.cn_cpds]:
-                cn_img, cn_stop, cn_weight = task
-                cn_img = resize_image(HWC3(cn_img), width=width, height=height)
-
-                if not skipping_cn_preprocessor:
-                    cn_img = preprocessors.cpds(cn_img)
-
-                cn_img = HWC3(cn_img)
-                task[0] = core.numpy_to_pytorch(cn_img)
-                if debugging_cn_preprocessor:
-                    yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True)
-                    return
-            for task in cn_tasks[flags.cn_ip]:
-                cn_img, cn_stop, cn_weight = task
-                cn_img = HWC3(cn_img)
-
-                # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
-                cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
-
-                task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path)
-                if debugging_cn_preprocessor:
-                    yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True)
-                    return
-            for task in cn_tasks[flags.cn_ip_face]:
-                cn_img, cn_stop, cn_weight = task
-                cn_img = HWC3(cn_img)
-
-                if not skipping_cn_preprocessor:
-                    cn_img = extras.face_crop.crop_image(cn_img)
-
-                # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
-                cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
-
-                task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path)
-                if debugging_cn_preprocessor:
-                    yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True)
-                    return
-
-            all_ip_tasks = cn_tasks[flags.cn_ip] + cn_tasks[flags.cn_ip_face]
-
-            if len(all_ip_tasks) > 0:
-                pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
-
-        if freeu_enabled:
-            print(f'FreeU is enabled!')
-            pipeline.final_unet = core.apply_freeu(
-                pipeline.final_unet,
-                freeu_b1,
-                freeu_b2,
-                freeu_s1,
-                freeu_s2
-            )
-
-        all_steps = steps * image_number
+        all_steps = async_task.steps * async_task.image_number
 
         print(f'[Parameters] Denoising Strength = {denoising_strength}')
 
@@ -831,163 +428,34 @@ def worker():
 
         print(f'[Parameters] Initial Latent shape: {log_shape}')
 
-        preparation_time = time.perf_counter() - execution_start_time
+        preparation_time = time.perf_counter() - preparation_start_time
         print(f'Preparation time: {preparation_time:.2f} seconds')
 
-        final_sampler_name = sampler_name
-        final_scheduler_name = scheduler_name
-
-        if scheduler_name in ['lcm', 'tcd']:
-            final_scheduler_name = 'sgm_uniform'
-
-            def patch_discrete(unet):
-                return core.opModelSamplingDiscrete.patch(
-                    pipeline.final_unet,
-                    sampling=scheduler_name,
-                    zsnr=False)[0]
-
-            if pipeline.final_unet is not None:
-                pipeline.final_unet = patch_discrete(pipeline.final_unet)
-            if pipeline.final_refiner_unet is not None:
-                pipeline.final_refiner_unet = patch_discrete(pipeline.final_refiner_unet)
-            print(f'Using {scheduler_name} scheduler.')
-        elif scheduler_name == 'edm_playground_v2.5':
-            final_scheduler_name = 'karras'
-
-            def patch_edm(unet):
-                return core.opModelSamplingContinuousEDM.patch(
-                    unet,
-                    sampling=scheduler_name,
-                    sigma_max=120.0,
-                    sigma_min=0.002)[0]
-
-            if pipeline.final_unet is not None:
-                pipeline.final_unet = patch_edm(pipeline.final_unet)
-            if pipeline.final_refiner_unet is not None:
-                pipeline.final_refiner_unet = patch_edm(pipeline.final_refiner_unet)
-
-            print(f'Using {scheduler_name} scheduler.')
+        final_scheduler_name = patch_samplers(async_task)
+        print(f'Using {final_scheduler_name} scheduler.')
 
         async_task.yields.append(['preview', (flags.preparation_step_count, 'Moving model to GPU ...', None)])
 
+        processing_start_time = time.perf_counter()
+
         def callback(step, x0, x, total_steps, y):
-            done_steps = current_task_id * steps + step
+            done_steps = current_task_id * async_task.steps + step
             async_task.yields.append(['preview', (
                 int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)),
-                f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{image_number} ...', y)])
+                f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{async_task.image_number} ...', y)])
 
         for current_task_id, task in enumerate(tasks):
-            current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(current_task_id * steps) / float(all_steps))
-            progressbar(async_task, current_progress, f'Preparing task {current_task_id + 1}/{image_number} ...')
+            current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(
+                current_task_id * async_task.steps) / float(all_steps))
+            progressbar(async_task, current_progress,
+                        f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
             execution_start_time = time.perf_counter()
 
             try:
-                if async_task.last_stop is not False:
-                    ldm_patched.modules.model_management.interrupt_current_processing()
-                positive_cond, negative_cond = task['c'], task['uc']
-
-                if 'cn' in goals:
-                    for cn_flag, cn_path in [
-                        (flags.cn_canny, controlnet_canny_path),
-                        (flags.cn_cpds, controlnet_cpds_path)
-                    ]:
-                        for cn_img, cn_stop, cn_weight in cn_tasks[cn_flag]:
-                            positive_cond, negative_cond = core.apply_controlnet(
-                                positive_cond, negative_cond,
-                                pipeline.loaded_ControlNets[cn_path], cn_img, cn_weight, 0, cn_stop)
-
-                imgs = pipeline.process_diffusion(
-                    positive_cond=positive_cond,
-                    negative_cond=negative_cond,
-                    steps=steps,
-                    switch=switch,
-                    width=width,
-                    height=height,
-                    image_seed=task['task_seed'],
-                    callback=callback,
-                    sampler_name=final_sampler_name,
-                    scheduler_name=final_scheduler_name,
-                    latent=initial_latent,
-                    denoise=denoising_strength,
-                    tiled=tiled,
-                    cfg_scale=cfg_scale,
-                    refiner_swap_method=refiner_swap_method,
-                    disable_preview=disable_preview
-                )
-
-                del task['c'], task['uc'], positive_cond, negative_cond  # Save memory
-
-                if inpaint_worker.current_task is not None:
-                    imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
-
-                img_paths = []
-
-                current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float((current_task_id + 1) * steps) / float(all_steps))
-                if modules.config.default_black_out_nsfw or black_out_nsfw:
-                    progressbar(async_task, current_progress, 'Checking for NSFW content ...')
-                    imgs = default_censor(imgs)
-
-                progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{image_number} to system ...')
-
-                for x in imgs:
-                    d = [('Prompt', 'prompt', task['log_positive_prompt']),
-                         ('Negative Prompt', 'negative_prompt', task['log_negative_prompt']),
-                         ('Fooocus V2 Expansion', 'prompt_expansion', task['expansion']),
-                         ('Styles', 'styles',
-                          str(task['styles'] if not use_expansion else [fooocus_expansion] + task['styles'])),
-                         ('Performance', 'performance', performance_selection.value)]
-
-                    if performance_selection.steps() != steps:
-                        d.append(('Steps', 'steps', steps))
-
-                    d += [('Resolution', 'resolution', str((width, height))),
-                          ('Guidance Scale', 'guidance_scale', guidance_scale),
-                          ('Sharpness', 'sharpness', sharpness),
-                          ('ADM Guidance', 'adm_guidance', str((
-                              modules.patch.patch_settings[pid].positive_adm_scale,
-                              modules.patch.patch_settings[pid].negative_adm_scale,
-                              modules.patch.patch_settings[pid].adm_scaler_end))),
-                          ('Base Model', 'base_model', base_model_name),
-                          ('Refiner Model', 'refiner_model', refiner_model_name),
-                          ('Refiner Switch', 'refiner_switch', refiner_switch)]
-
-                    if refiner_model_name != 'None':
-                        if overwrite_switch > 0:
-                            d.append(('Overwrite Switch', 'overwrite_switch', overwrite_switch))
-                        if refiner_swap_method != flags.refiner_swap_method:
-                            d.append(('Refiner Swap Method', 'refiner_swap_method', refiner_swap_method))
-                    if modules.patch.patch_settings[pid].adaptive_cfg != modules.config.default_cfg_tsnr:
-                        d.append(
-                            ('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg))
-
-                    if clip_skip > 1:
-                        d.append(('CLIP Skip', 'clip_skip', clip_skip))
-                    d.append(('Sampler', 'sampler', sampler_name))
-                    d.append(('Scheduler', 'scheduler', scheduler_name))
-                    d.append(('VAE', 'vae', vae_name))
-                    d.append(('Seed', 'seed', str(task['task_seed'])))
-
-                    if freeu_enabled:
-                        d.append(('FreeU', 'freeu', str((freeu_b1, freeu_b2, freeu_s1, freeu_s2))))
-
-                    for li, (n, w) in enumerate(loras):
-                        if n != 'None':
-                            d.append((f'LoRA {li + 1}', f'lora_combined_{li + 1}', f'{n} : {w}'))
-
-                    metadata_parser = None
-                    if save_metadata_to_images:
-                        metadata_parser = modules.meta_parser.get_metadata_parser(metadata_scheme)
-                        metadata_parser.set_data(task['log_positive_prompt'], task['positive'],
-                                                 task['log_negative_prompt'], task['negative'],
-                                                 steps, base_model_name, refiner_model_name, loras, vae_name)
-                    d.append(('Metadata Scheme', 'metadata_scheme',
-                              metadata_scheme.value if save_metadata_to_images else save_metadata_to_images))
-                    d.append(('Version', 'version', 'Fooocus v' + fooocus_version.version))
-                    img_paths.append(log(x, d, metadata_parser, output_format, task))
-
-                yield_result(async_task, img_paths, black_out_nsfw, False,
-                             do_not_show_finished_images=len(tasks) == 1 or disable_intermediate_results)
-            except ldm_patched.modules.model_management.InterruptProcessingException as e:
+                process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
+                             current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
+                             switch, task, tasks, tiled, use_expansion, width, height)
+            except ldm_patched.modules.model_management.InterruptProcessingException:
                 if async_task.last_stop == 'skip':
                     print('User skipped')
                     async_task.last_stop = False
@@ -998,18 +466,570 @@ def worker():
 
             execution_time = time.perf_counter() - execution_start_time
             print(f'Generating and saving time: {execution_time:.2f} seconds')
+
         async_task.processing = False
+
+        processing_time = time.perf_counter() - processing_start_time
+        print(f'Processing time (total): {processing_time:.2f} seconds')
         return
 
+    def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
+                     denoising_strength, final_scheduler_name, goals, initial_latent, switch, task, tasks,
+                     tiled, use_expansion, width, height):
+        if async_task.last_stop is not False:
+            ldm_patched.modules.model_management.interrupt_current_processing()
+        positive_cond, negative_cond = task['c'], task['uc']
+        if 'cn' in goals:
+            for cn_flag, cn_path in [
+                (flags.cn_canny, controlnet_canny_path),
+                (flags.cn_cpds, controlnet_cpds_path)
+            ]:
+                for cn_img, cn_stop, cn_weight in async_task.cn_tasks[cn_flag]:
+                    positive_cond, negative_cond = core.apply_controlnet(
+                        positive_cond, negative_cond,
+                        pipeline.loaded_ControlNets[cn_path], cn_img, cn_weight, 0, cn_stop)
+        imgs = pipeline.process_diffusion(
+            positive_cond=positive_cond,
+            negative_cond=negative_cond,
+            steps=async_task.steps,
+            switch=switch,
+            width=width,
+            height=height,
+            image_seed=task['task_seed'],
+            callback=callback,
+            sampler_name=async_task.sampler_name,
+            scheduler_name=final_scheduler_name,
+            latent=initial_latent,
+            denoise=denoising_strength,
+            tiled=tiled,
+            cfg_scale=async_task.cfg_scale,
+            refiner_swap_method=async_task.refiner_swap_method,
+            disable_preview=async_task.disable_preview
+        )
+        del task['c'], task['uc'], positive_cond, negative_cond  # Save memory
+        if inpaint_worker.current_task is not None:
+            imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
+        current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(
+            (current_task_id + 1) * async_task.steps) / float(all_steps))
+        if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
+            progressbar(async_task, current_progress, 'Checking for NSFW content ...')
+            imgs = default_censor(imgs)
+        progressbar(async_task, current_progress,
+                    f'Saving image {current_task_id + 1}/{async_task.image_number} to system ...')
+        img_paths = save_and_log(async_task, height, imgs, task, use_expansion, width)
+        yield_result(async_task, img_paths, async_task.black_out_nsfw, False,
+                     do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
+
+        return imgs
+    def apply_patch_settings(async_task):
+        patch_settings[pid] = PatchSettings(
+            async_task.sharpness,
+            async_task.adm_scaler_end,
+            async_task.adm_scaler_positive,
+            async_task.adm_scaler_negative,
+            async_task.controlnet_softness,
+            async_task.adaptive_cfg
+        )
+
+    def save_and_log(async_task, height, imgs, task, use_expansion, width) -> list:
+        img_paths = []
+        for x in imgs:
+            d = [('Prompt', 'prompt', task['log_positive_prompt']),
+                 ('Negative Prompt', 'negative_prompt', task['log_negative_prompt']),
+                 ('Fooocus V2 Expansion', 'prompt_expansion', task['expansion']),
+                 ('Styles', 'styles',
+                  str(task['styles'] if not use_expansion else [fooocus_expansion] + task['styles'])),
+                 ('Performance', 'performance', async_task.performance_selection.value)]
+
+            if async_task.performance_selection.steps() != async_task.steps:
+                d.append(('Steps', 'steps', async_task.steps))
+
+            d += [('Resolution', 'resolution', str((width, height))),
+                  ('Guidance Scale', 'guidance_scale', async_task.cfg_scale),
+                  ('Sharpness', 'sharpness', async_task.sharpness),
+                  ('ADM Guidance', 'adm_guidance', str((
+                      modules.patch.patch_settings[pid].positive_adm_scale,
+                      modules.patch.patch_settings[pid].negative_adm_scale,
+                      modules.patch.patch_settings[pid].adm_scaler_end))),
+                  ('Base Model', 'base_model', async_task.base_model_name),
+                  ('Refiner Model', 'refiner_model', async_task.refiner_model_name),
+                  ('Refiner Switch', 'refiner_switch', async_task.refiner_switch)]
+
+            if async_task.refiner_model_name != 'None':
+                if async_task.overwrite_switch > 0:
+                    d.append(('Overwrite Switch', 'overwrite_switch', async_task.overwrite_switch))
+                if async_task.refiner_swap_method != flags.refiner_swap_method:
+                    d.append(('Refiner Swap Method', 'refiner_swap_method', async_task.refiner_swap_method))
+            if modules.patch.patch_settings[pid].adaptive_cfg != modules.config.default_cfg_tsnr:
+                d.append(
+                    ('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg))
+
+            if async_task.clip_skip > 1:
+                d.append(('CLIP Skip', 'clip_skip', async_task.clip_skip))
+            d.append(('Sampler', 'sampler', async_task.sampler_name))
+            d.append(('Scheduler', 'scheduler', async_task.scheduler_name))
+            d.append(('VAE', 'vae', async_task.vae_name))
+            d.append(('Seed', 'seed', str(task['task_seed'])))
+
+            if async_task.freeu_enabled:
+                d.append(('FreeU', 'freeu',
+                          str((async_task.freeu_b1, async_task.freeu_b2, async_task.freeu_s1, async_task.freeu_s2))))
+
+            for li, (n, w) in enumerate(async_task.loras):
+                if n != 'None':
+                    d.append((f'LoRA {li + 1}', f'lora_combined_{li + 1}', f'{n} : {w}'))
+
+            metadata_parser = None
+            if async_task.save_metadata_to_images:
+                metadata_parser = modules.meta_parser.get_metadata_parser(async_task.metadata_scheme)
+                metadata_parser.set_data(task['log_positive_prompt'], task['positive'],
+                                         task['log_negative_prompt'], task['negative'],
+                                         async_task.steps, async_task.base_model_name, async_task.refiner_model_name,
+                                         async_task.loras, async_task.vae_name)
+            d.append(('Metadata Scheme', 'metadata_scheme',
+                      async_task.metadata_scheme.value if async_task.save_metadata_to_images else async_task.save_metadata_to_images))
+            d.append(('Version', 'version', 'Fooocus v' + fooocus_version.version))
+            img_paths.append(log(x, d, metadata_parser, async_task.output_format, task))
+
+        return img_paths
+
+    def apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width):
+        for task in async_task.cn_tasks[flags.cn_canny]:
+            cn_img, cn_stop, cn_weight = task
+            cn_img = resize_image(HWC3(cn_img), width=width, height=height)
+
+            if not async_task.skipping_cn_preprocessor:
+                cn_img = preprocessors.canny_pyramid(cn_img, async_task.canny_low_threshold,
+                                                     async_task.canny_high_threshold)
+
+            cn_img = HWC3(cn_img)
+            task[0] = core.numpy_to_pytorch(cn_img)
+            if async_task.debugging_cn_preprocessor:
+                yield_result(async_task, cn_img, async_task.black_out_nsfw, do_not_show_finished_images=True)
+        for task in async_task.cn_tasks[flags.cn_cpds]:
+            cn_img, cn_stop, cn_weight = task
+            cn_img = resize_image(HWC3(cn_img), width=width, height=height)
+
+            if not async_task.skipping_cn_preprocessor:
+                cn_img = preprocessors.cpds(cn_img)
+
+            cn_img = HWC3(cn_img)
+            task[0] = core.numpy_to_pytorch(cn_img)
+            if async_task.debugging_cn_preprocessor:
+                yield_result(async_task, cn_img, async_task.black_out_nsfw, do_not_show_finished_images=True)
+        for task in async_task.cn_tasks[flags.cn_ip]:
+            cn_img, cn_stop, cn_weight = task
+            cn_img = HWC3(cn_img)
+
+            # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
+            cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
+
+            task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path)
+            if async_task.debugging_cn_preprocessor:
+                yield_result(async_task, cn_img, async_task.black_out_nsfw, do_not_show_finished_images=True)
+        for task in async_task.cn_tasks[flags.cn_ip_face]:
+            cn_img, cn_stop, cn_weight = task
+            cn_img = HWC3(cn_img)
+
+            if not async_task.skipping_cn_preprocessor:
+                cn_img = extras.face_crop.crop_image(cn_img)
+
+            # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
+            cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
+
+            task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path)
+            if async_task.debugging_cn_preprocessor:
+                yield_result(async_task, cn_img, async_task.black_out_nsfw, do_not_show_finished_images=True)
+        all_ip_tasks = async_task.cn_tasks[flags.cn_ip] + async_task.cn_tasks[flags.cn_ip_face]
+        if len(all_ip_tasks) > 0:
+            pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
+
+    def apply_vary(async_task, denoising_strength, height, initial_latent, switch, width):
+        if 'subtle' in async_task.uov_method:
+            async_task.denoising_strength = 0.5
+        if 'strong' in async_task.uov_method:
+            async_task.denoising_strength = 0.85
+        if async_task.overwrite_vary_strength > 0:
+            async_task.denoising_strength = async_task.overwrite_vary_strength
+        shape_ceil = get_image_shape_ceil(async_task.uov_input_image)
+        if shape_ceil < 1024:
+            print(f'[Vary] Image is resized because it is too small.')
+            shape_ceil = 1024
+        elif shape_ceil > 2048:
+            print(f'[Vary] Image is resized because it is too big.')
+            shape_ceil = 2048
+        async_task.uov_input_image = set_image_shape_ceil(async_task.uov_input_image, shape_ceil)
+        initial_pixels = core.numpy_to_pytorch(async_task.uov_input_image)
+        progressbar(async_task, 8, 'VAE encoding ...')
+        candidate_vae, _ = pipeline.get_candidate_vae(
+            steps=async_task.steps,
+            switch=switch,
+            denoise=denoising_strength,
+            refiner_swap_method=async_task.refiner_swap_method
+        )
+        initial_latent = core.encode_vae(vae=candidate_vae, pixels=initial_pixels)
+        B, C, H, W = initial_latent['samples'].shape
+        width = W * 8
+        height = H * 8
+        print(f'Final resolution is {str((height, width))}.')
+        return height, initial_latent, width
+
+    def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
+                inpaint_mask, inpaint_parameterized, switch):
+        if len(async_task.outpaint_selections) > 0:
+            H, W, C = inpaint_image.shape
+            if 'top' in async_task.outpaint_selections:
+                inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge')
+                inpaint_mask = np.pad(inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant',
+                                      constant_values=255)
+            if 'bottom' in async_task.outpaint_selections:
+                inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge')
+                inpaint_mask = np.pad(inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant',
+                                      constant_values=255)
+
+            H, W, C = inpaint_image.shape
+            if 'left' in async_task.outpaint_selections:
+                inpaint_image = np.pad(inpaint_image, [[0, 0], [int(W * 0.3), 0], [0, 0]], mode='edge')
+                inpaint_mask = np.pad(inpaint_mask, [[0, 0], [int(W * 0.3), 0]], mode='constant',
+                                      constant_values=255)
+            if 'right' in async_task.outpaint_selections:
+                inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(W * 0.3)], [0, 0]], mode='edge')
+                inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int(W * 0.3)]], mode='constant',
+                                      constant_values=255)
+
+            inpaint_image = np.ascontiguousarray(inpaint_image.copy())
+            inpaint_mask = np.ascontiguousarray(inpaint_mask.copy())
+            inpaint_strength = 1.0
+            inpaint_respective_field = 1.0
+        denoising_strength = async_task.inpaint_strength
+        inpaint_worker.current_task = inpaint_worker.InpaintWorker(
+            image=inpaint_image,
+            mask=inpaint_mask,
+            use_fill=denoising_strength > 0.99,
+            k=async_task.inpaint_respective_field
+        )
+        if async_task.debugging_inpaint_preprocessor:
+            yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), async_task.black_out_nsfw,
+                         do_not_show_finished_images=True)
+            return
+
+        progressbar(async_task, 11, 'VAE Inpaint encoding ...')
+        inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill)
+        inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image)
+        inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask)
+        candidate_vae, candidate_vae_swap = pipeline.get_candidate_vae(
+            steps=async_task.steps,
+            switch=switch,
+            denoise=denoising_strength,
+            refiner_swap_method=async_task.refiner_swap_method
+        )
+        latent_inpaint, latent_mask = core.encode_vae_inpaint(
+            mask=inpaint_pixel_mask,
+            vae=candidate_vae,
+            pixels=inpaint_pixel_image)
+        latent_swap = None
+        if candidate_vae_swap is not None:
+            progressbar(async_task, 12, 'VAE SD15 encoding ...')
+            latent_swap = core.encode_vae(
+                vae=candidate_vae_swap,
+                pixels=inpaint_pixel_fill)['samples']
+        progressbar(async_task, 13, 'VAE encoding ...')
+        latent_fill = core.encode_vae(
+            vae=candidate_vae,
+            pixels=inpaint_pixel_fill)['samples']
+        inpaint_worker.current_task.load_latent(
+            latent_fill=latent_fill, latent_mask=latent_mask, latent_swap=latent_swap)
+        if inpaint_parameterized:
+            pipeline.final_unet = inpaint_worker.current_task.patch(
+                inpaint_head_model_path=inpaint_head_model_path,
+                inpaint_latent=latent_inpaint,
+                inpaint_latent_mask=latent_mask,
+                model=pipeline.final_unet
+            )
+        if not async_task.inpaint_disable_initial_latent:
+            initial_latent = {'samples': latent_fill}
+        B, C, H, W = latent_fill.shape
+        height, width = H * 8, W * 8
+        final_height, final_width = inpaint_worker.current_task.image.shape[:2]
+        print(f'Final resolution is {str((final_height, final_width))}, latent is {str((height, width))}.')
+        return denoising_strength, height, initial_latent, width
+
+    def apply_upscale(async_task, switch):
+        H, W, C = async_task.uov_input_image.shape
+        progressbar(async_task, 9, f'Upscaling image from {str((H, W))} ...')
+        async_task.uov_input_image = perform_upscale(async_task.uov_input_image)
+        print(f'Image upscaled.')
+        if '1.5x' in async_task.uov_method:
+            f = 1.5
+        elif '2x' in async_task.uov_method:
+            f = 2.0
+        else:
+            f = 1.0
+        shape_ceil = get_shape_ceil(H * f, W * f)
+        if shape_ceil < 1024:
+            print(f'[Upscale] Image is resized because it is too small.')
+            async_task.uov_input_image = set_image_shape_ceil(async_task.uov_input_image, 1024)
+            shape_ceil = 1024
+        else:
+            async_task.uov_input_image = resample_image(async_task.uov_input_image, width=W * f, height=H * f)
+        image_is_super_large = shape_ceil > 2800
+        if 'fast' in async_task.uov_method:
+            direct_return = True
+        elif image_is_super_large:
+            print('Image is too large. Directly returned the SR image. '
+                  'Usually directly return SR image at 4K resolution '
+                  'yields better results than SDXL diffusion.')
+            direct_return = True
+        else:
+            direct_return = False
+        if direct_return:
+            d = [('Upscale (Fast)', 'upscale_fast', '2x')]
+            if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
+                progressbar(async_task, 100, 'Checking for NSFW content ...')
+                async_task.uov_input_image = default_censor(async_task.uov_input_image)
+            progressbar(async_task, 100, 'Saving image to system ...')
+            uov_input_image_path = log(async_task.uov_input_image, d, output_format=async_task.output_format)
+            yield_result(async_task, uov_input_image_path, async_task.black_out_nsfw, False,
+                         do_not_show_finished_images=True)
+            return direct_return
+
+        tiled = True
+        denoising_strength = 0.382
+        if async_task.overwrite_upscale_strength > 0:
+            denoising_strength = async_task.overwrite_upscale_strength
+        initial_pixels = core.numpy_to_pytorch(async_task.uov_input_image)
+        progressbar(async_task, 10, 'VAE encoding ...')
+        candidate_vae, _ = pipeline.get_candidate_vae(
+            steps=async_task.steps,
+            switch=switch,
+            denoise=denoising_strength,
+            refiner_swap_method=async_task.refiner_swap_method
+        )
+        initial_latent = core.encode_vae(
+            vae=candidate_vae,
+            pixels=initial_pixels, tiled=True)
+        B, C, H, W = initial_latent['samples'].shape
+        width = W * 8
+        height = H * 8
+        print(f'Final resolution is {str((height, width))}.')
+        return denoising_strength, height, initial_latent, tiled, width, direct_return
+
+    def apply_overrides(async_task, height, width):
+        if async_task.overwrite_step > 0:
+            async_task.steps = async_task.overwrite_step
+        switch = int(round(async_task.steps * async_task.refiner_switch))
+        if async_task.overwrite_switch > 0:
+            switch = async_task.overwrite_switch
+        if async_task.overwrite_width > 0:
+            width = async_task.overwrite_width
+        if async_task.overwrite_height > 0:
+            height = async_task.overwrite_height
+        return height, switch, width
+
+    def process_prompt(async_task, base_model_additional_loras, use_expansion, use_style,
+                    use_synthetic_refiner):
+        prompts = remove_empty_str([safe_str(p) for p in async_task.prompt.splitlines()], default='')
+        negative_prompts = remove_empty_str([safe_str(p) for p in async_task.negative_prompt.splitlines()], default='')
+        prompt = prompts[0]
+        negative_prompt = negative_prompts[0]
+        if prompt == '':
+            # disable expansion when empty since it is not meaningful and influences image prompt
+            use_expansion = False
+        extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
+        extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
+        progressbar(async_task, 2, 'Loading models ...')
+        lora_filenames = modules.util.remove_performance_lora(modules.config.lora_filenames,
+                                                              async_task.performance_selection)
+        loras, prompt = parse_lora_references_from_prompt(prompt, async_task.loras,
+                                                          modules.config.default_max_lora_number,
+                                                          lora_filenames=lora_filenames)
+        loras += async_task.performance_loras
+        pipeline.refresh_everything(refiner_model_name=async_task.refiner_model_name,
+                                    base_model_name=async_task.base_model_name,
+                                    loras=loras, base_model_additional_loras=base_model_additional_loras,
+                                    use_synthetic_refiner=use_synthetic_refiner, vae_name=async_task.vae_name)
+        pipeline.set_clip_skip(async_task.clip_skip)
+        progressbar(async_task, 3, 'Processing prompts ...')
+        tasks = []
+        for i in range(async_task.image_number):
+            if async_task.disable_seed_increment:
+                task_seed = async_task.seed % (constants.MAX_SEED + 1)
+            else:
+                task_seed = (async_task.seed + i) % (constants.MAX_SEED + 1)  # randint is inclusive, % is not
+
+            task_rng = random.Random(task_seed)  # may bind to inpaint noise in the future
+            task_prompt = apply_wildcards(prompt, task_rng, i, async_task.read_wildcards_in_order)
+            task_prompt = apply_arrays(task_prompt, i)
+            task_negative_prompt = apply_wildcards(negative_prompt, task_rng, i, async_task.read_wildcards_in_order)
+            task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, async_task.read_wildcards_in_order) for pmt
+                                           in
+                                           extra_positive_prompts]
+            task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, async_task.read_wildcards_in_order) for pmt
+                                           in
+                                           extra_negative_prompts]
+
+            positive_basic_workloads = []
+            negative_basic_workloads = []
+
+            task_styles = async_task.style_selections.copy()
+            if use_style:
+                for i, s in enumerate(task_styles):
+                    if s == random_style_name:
+                        s = get_random_style(task_rng)
+                        task_styles[i] = s
+                    p, n = apply_style(s, positive=task_prompt)
+                    positive_basic_workloads = positive_basic_workloads + p
+                    negative_basic_workloads = negative_basic_workloads + n
+            else:
+                positive_basic_workloads.append(task_prompt)
+
+            negative_basic_workloads.append(task_negative_prompt)  # Always use independent workload for negative.
+
+            positive_basic_workloads = positive_basic_workloads + task_extra_positive_prompts
+            negative_basic_workloads = negative_basic_workloads + task_extra_negative_prompts
+
+            positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=task_prompt)
+            negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=task_negative_prompt)
+
+            tasks.append(dict(
+                task_seed=task_seed,
+                task_prompt=task_prompt,
+                task_negative_prompt=task_negative_prompt,
+                positive=positive_basic_workloads,
+                negative=negative_basic_workloads,
+                expansion='',
+                c=None,
+                uc=None,
+                positive_top_k=len(positive_basic_workloads),
+                negative_top_k=len(negative_basic_workloads),
+                log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts),
+                log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts),
+                styles=task_styles
+            ))
+        if use_expansion:
+            for i, t in enumerate(tasks):
+                progressbar(async_task, 4, f'Preparing Fooocus text #{i + 1} ...')
+                expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed'])
+                print(f'[Prompt Expansion] {expansion}')
+                t['expansion'] = expansion
+                t['positive'] = copy.deepcopy(t['positive']) + [expansion]  # Deep copy.
+        for i, t in enumerate(tasks):
+            progressbar(async_task, 5, f'Encoding positive #{i + 1} ...')
+            t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k'])
+        for i, t in enumerate(tasks):
+            if abs(float(async_task.cfg_scale) - 1.0) < 1e-4:
+                t['uc'] = pipeline.clone_cond(t['c'])
+            else:
+                progressbar(async_task, 6, f'Encoding negative #{i + 1} ...')
+                t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k'])
+        return tasks, use_expansion
+
+    def apply_freeu(async_task):
+        print(f'FreeU is enabled!')
+        pipeline.final_unet = core.apply_freeu(
+            pipeline.final_unet,
+            async_task.freeu_b1,
+            async_task.freeu_b2,
+            async_task.freeu_s1,
+            async_task.freeu_s2
+        )
+
+    def patch_samplers(async_task):
+        final_scheduler_name = async_task.scheduler_name
+        if async_task.scheduler_name in ['lcm', 'tcd']:
+            final_scheduler_name = 'sgm_uniform'
+
+            def patch_discrete(unet):
+                return core.opModelSamplingDiscrete.patch(
+                    pipeline.final_unet,
+                    sampling=async_task.scheduler_name,
+                    zsnr=False)[0]
+
+            if pipeline.final_unet is not None:
+                pipeline.final_unet = patch_discrete(pipeline.final_unet)
+            if pipeline.final_refiner_unet is not None:
+                pipeline.final_refiner_unet = patch_discrete(pipeline.final_refiner_unet)
+        elif async_task.scheduler_name == 'edm_playground_v2.5':
+            final_scheduler_name = 'karras'
+
+            def patch_edm(unet):
+                return core.opModelSamplingContinuousEDM.patch(
+                    unet,
+                    sampling=async_task.scheduler_name,
+                    sigma_max=120.0,
+                    sigma_min=0.002)[0]
+
+            if pipeline.final_unet is not None:
+                pipeline.final_unet = patch_edm(pipeline.final_unet)
+            if pipeline.final_refiner_unet is not None:
+                pipeline.final_refiner_unet = patch_edm(pipeline.final_refiner_unet)
+
+        return final_scheduler_name
+
+    def translate_prompts(async_task):
+        from modules.translator import translate2en
+        async_task.prompt = translate2en(async_task.prompt, 'prompt')
+        async_task.negative_prompt = translate2en(async_task.negative_prompt, 'negative prompt')
+
+    def set_hyper_sd_defaults(async_task):
+        print('Enter Hyper-SD mode.')
+        progressbar(async_task, 1, 'Downloading Hyper-SD components ...')
+        async_task.performance_loras += [(modules.config.downloading_sdxl_hyper_sd_lora(), 0.8)]
+        if async_task.refiner_model_name != 'None':
+            print(f'Refiner disabled in Hyper-SD mode.')
+        async_task.refiner_model_name = 'None'
+        async_task.sampler_name = 'dpmpp_sde_gpu'
+        async_task.scheduler_name = 'karras'
+        async_task.sharpness = 0.0
+        async_task.cfg_scale = 1.0
+        async_task.adaptive_cfg = 1.0
+        async_task.refiner_switch = 1.0
+        async_task.adm_scaler_positive = 1.0
+        async_task.adm_scaler_negative = 1.0
+        async_task.adm_scaler_end = 0.0
+
+    def set_lightning_defaults(async_task):
+        print('Enter Lightning mode.')
+        progressbar(async_task, 1, 'Downloading Lightning components ...')
+        async_task.performance_loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)]
+        if async_task.refiner_model_name != 'None':
+            print(f'Refiner disabled in Lightning mode.')
+        async_task.refiner_model_name = 'None'
+        async_task.sampler_name = 'euler'
+        async_task.scheduler_name = 'sgm_uniform'
+        async_task.sharpness = 0.0
+        async_task.cfg_scale = 1.0
+        async_task.adaptive_cfg = 1.0
+        async_task.refiner_switch = 1.0
+        async_task.adm_scaler_positive = 1.0
+        async_task.adm_scaler_negative = 1.0
+        async_task.adm_scaler_end = 0.0
+
+    def set_lcm_defaults(async_task):
+        print('Enter LCM mode.')
+        progressbar(async_task, 1, 'Downloading LCM components ...')
+        async_task.performance_loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)]
+        if async_task.refiner_model_name != 'None':
+            print(f'Refiner disabled in LCM mode.')
+        async_task.refiner_model_name = 'None'
+        async_task.sampler_name = 'lcm'
+        async_task.scheduler_name = 'lcm'
+        async_task.sharpness = 0.0
+        async_task.cfg_scale = 1.0
+        async_task.adaptive_cfg = 1.0
+        async_task.refiner_switch = 1.0
+        async_task.adm_scaler_positive = 1.0
+        async_task.adm_scaler_negative = 1.0
+        async_task.adm_scaler_end = 0.0
+
     while True:
         time.sleep(0.01)
         if len(async_tasks) > 0:
             task = async_tasks.pop(0)
-            generate_image_grid = task.args.pop(0)
 
             try:
                 handler(task)
-                if generate_image_grid:
+                if task.generate_image_grid:
                     build_image_wall(task)
                 task.yields.append(['finish', task.results])
                 pipeline.prepare_text_encoder(async_call=True)

From c1f62361ba4512d83dce50adca6b1dc66826e1c7 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 8 Jun 2024 20:25:50 +0200
Subject: [PATCH 002/101] refactor: add EarlyReturnException to prevent return
 value issues

---
 modules/async_worker.py | 85 ++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 43 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 32dd5a56..527ac9fd 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -109,6 +109,10 @@ class AsyncTask:
 async_tasks = []
 
 
+class EarlyReturnException:
+    pass
+
+
 def worker():
     global async_tasks
 
@@ -394,19 +398,20 @@ def worker():
             progressbar(async_task, 7, 'Image processing ...')
 
         if 'vary' in goals:
-            height, initial_latent, width = apply_vary(async_task, denoising_strength, height, initial_latent, switch, width)
+            height, initial_latent, width = apply_vary(async_task, denoising_strength, switch)
 
         if 'upscale' in goals:
-            denoising_strength, height, initial_latent, tiled, width, direct_return = apply_upscale(async_task, switch)
-            if direct_return:
+            try:
+                denoising_strength, height, initial_latent, tiled, width = apply_upscale(async_task, switch)
+            except EarlyReturnException:
                 return
-
         if 'inpaint' in goals:
-            denoising_strength, height, initial_latent, width = apply_inpaint(async_task, initial_latent,
-                                                                              inpaint_head_model_path, inpaint_image,
-                                                                              inpaint_mask, inpaint_parameterized,
-                                                                              switch)
-            if async_task.debugging_inpaint_preprocessor:
+            try:
+                denoising_strength, initial_latent, height, width = apply_inpaint(async_task, initial_latent,
+                                                                                  inpaint_head_model_path, inpaint_image,
+                                                                                  inpaint_mask, inpaint_parameterized,
+                                                                                  switch)
+            except EarlyReturnException:
                 return
 
         if 'cn' in goals:
@@ -471,7 +476,6 @@ def worker():
 
         processing_time = time.perf_counter() - processing_start_time
         print(f'Processing time (total): {processing_time:.2f} seconds')
-        return
 
     def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
                      denoising_strength, final_scheduler_name, goals, initial_latent, switch, task, tasks,
@@ -521,6 +525,7 @@ def worker():
                      do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
 
         return imgs
+
     def apply_patch_settings(async_task):
         patch_settings[pid] = PatchSettings(
             async_task.sharpness,
@@ -644,22 +649,22 @@ def worker():
         if len(all_ip_tasks) > 0:
             pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
 
-    def apply_vary(async_task, denoising_strength, height, initial_latent, switch, width):
+    def apply_vary(async_task, uov_input_image, denoising_strength, switch):
         if 'subtle' in async_task.uov_method:
             async_task.denoising_strength = 0.5
         if 'strong' in async_task.uov_method:
             async_task.denoising_strength = 0.85
         if async_task.overwrite_vary_strength > 0:
             async_task.denoising_strength = async_task.overwrite_vary_strength
-        shape_ceil = get_image_shape_ceil(async_task.uov_input_image)
+        shape_ceil = get_image_shape_ceil(uov_input_image)
         if shape_ceil < 1024:
             print(f'[Vary] Image is resized because it is too small.')
             shape_ceil = 1024
         elif shape_ceil > 2048:
             print(f'[Vary] Image is resized because it is too big.')
             shape_ceil = 2048
-        async_task.uov_input_image = set_image_shape_ceil(async_task.uov_input_image, shape_ceil)
-        initial_pixels = core.numpy_to_pytorch(async_task.uov_input_image)
+        uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil)
+        initial_pixels = core.numpy_to_pytorch(uov_input_image)
         progressbar(async_task, 8, 'VAE encoding ...')
         candidate_vae, _ = pipeline.get_candidate_vae(
             steps=async_task.steps,
@@ -671,8 +676,8 @@ def worker():
         B, C, H, W = initial_latent['samples'].shape
         width = W * 8
         height = H * 8
-        print(f'Final resolution is {str((height, width))}.')
-        return height, initial_latent, width
+        print(f'Final resolution is {str((width, height))}.')
+        return initial_latent, width, height
 
     def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
                 inpaint_mask, inpaint_parameterized, switch):
@@ -699,8 +704,8 @@ def worker():
 
             inpaint_image = np.ascontiguousarray(inpaint_image.copy())
             inpaint_mask = np.ascontiguousarray(inpaint_mask.copy())
-            inpaint_strength = 1.0
-            inpaint_respective_field = 1.0
+            async_task.inpaint_strength = 1.0
+            async_task.inpaint_respective_field = 1.0
         denoising_strength = async_task.inpaint_strength
         inpaint_worker.current_task = inpaint_worker.InpaintWorker(
             image=inpaint_image,
@@ -711,7 +716,7 @@ def worker():
         if async_task.debugging_inpaint_preprocessor:
             yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), async_task.black_out_nsfw,
                          do_not_show_finished_images=True)
-            return
+            raise EarlyReturnException
 
         progressbar(async_task, 11, 'VAE Inpaint encoding ...')
         inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill)
@@ -751,8 +756,9 @@ def worker():
         B, C, H, W = latent_fill.shape
         height, width = H * 8, W * 8
         final_height, final_width = inpaint_worker.current_task.image.shape[:2]
-        print(f'Final resolution is {str((final_height, final_width))}, latent is {str((height, width))}.')
-        return denoising_strength, height, initial_latent, width
+        print(f'Final resolution is {str((final_height, final_width))}, latent is {str((width, height))}.')
+
+        return denoising_strength, initial_latent, width, height
 
     def apply_upscale(async_task, switch):
         H, W, C = async_task.uov_input_image.shape
@@ -791,7 +797,7 @@ def worker():
             uov_input_image_path = log(async_task.uov_input_image, d, output_format=async_task.output_format)
             yield_result(async_task, uov_input_image_path, async_task.black_out_nsfw, False,
                          do_not_show_finished_images=True)
-            return direct_return
+            raise EarlyReturnException
 
         tiled = True
         denoising_strength = 0.382
@@ -811,8 +817,8 @@ def worker():
         B, C, H, W = initial_latent['samples'].shape
         width = W * 8
         height = H * 8
-        print(f'Final resolution is {str((height, width))}.')
-        return denoising_strength, height, initial_latent, tiled, width, direct_return
+        print(f'Final resolution is {str((width, height))}.')
+        return denoising_strength, height, initial_latent, tiled, width
 
     def apply_overrides(async_task, height, width):
         if async_task.overwrite_step > 0:
@@ -934,35 +940,28 @@ def worker():
             async_task.freeu_s2
         )
 
+    def patch_discrete(unet, scheduler_name):
+        return core.opModelSamplingDiscrete.patch(unet, scheduler_name, False)[0]
+
+    def patch_edm(unet, scheduler_name):
+        return core.opModelSamplingContinuousEDM.patch(unet, scheduler_name, 120.0, 0.002)[0]
+
     def patch_samplers(async_task):
         final_scheduler_name = async_task.scheduler_name
+
         if async_task.scheduler_name in ['lcm', 'tcd']:
             final_scheduler_name = 'sgm_uniform'
-
-            def patch_discrete(unet):
-                return core.opModelSamplingDiscrete.patch(
-                    pipeline.final_unet,
-                    sampling=async_task.scheduler_name,
-                    zsnr=False)[0]
-
             if pipeline.final_unet is not None:
-                pipeline.final_unet = patch_discrete(pipeline.final_unet)
+                pipeline.final_unet = patch_discrete(pipeline.final_unet, async_task.scheduler_name)
             if pipeline.final_refiner_unet is not None:
-                pipeline.final_refiner_unet = patch_discrete(pipeline.final_refiner_unet)
+                pipeline.final_refiner_unet = patch_discrete(pipeline.final_refiner_unet, async_task.scheduler_name)
+
         elif async_task.scheduler_name == 'edm_playground_v2.5':
             final_scheduler_name = 'karras'
-
-            def patch_edm(unet):
-                return core.opModelSamplingContinuousEDM.patch(
-                    unet,
-                    sampling=async_task.scheduler_name,
-                    sigma_max=120.0,
-                    sigma_min=0.002)[0]
-
             if pipeline.final_unet is not None:
-                pipeline.final_unet = patch_edm(pipeline.final_unet)
+                pipeline.final_unet = patch_edm(pipeline.final_unet, async_task.scheduler_name)
             if pipeline.final_refiner_unet is not None:
-                pipeline.final_refiner_unet = patch_edm(pipeline.final_refiner_unet)
+                pipeline.final_refiner_unet = patch_edm(pipeline.final_refiner_unet, async_task.scheduler_name)
 
         return final_scheduler_name
 

From c79f6bd461e282ca86f386dc2ab132b230c23bb2 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 8 Jun 2024 20:33:47 +0200
Subject: [PATCH 003/101] refactor: move methods to handler scope

---
 modules/async_worker.py | 512 ++++++++++++++++++++--------------------
 1 file changed, 256 insertions(+), 256 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 527ac9fd..e4543edf 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -227,256 +227,6 @@ def worker():
         async_task.results = async_task.results + [wall]
         return
 
-    @torch.no_grad()
-    @torch.inference_mode()
-    def handler(async_task: AsyncTask):
-        preparation_start_time = time.perf_counter()
-        async_task.processing = True
-
-        async_task.outpaint_selections = [o.lower() for o in async_task.outpaint_selections]
-        base_model_additional_loras = []
-        async_task.uov_method = async_task.uov_method.lower()
-
-        if fooocus_expansion in async_task.style_selections:
-            use_expansion = True
-            async_task.style_selections.remove(fooocus_expansion)
-        else:
-            use_expansion = False
-
-        use_style = len(async_task.style_selections) > 0
-
-        if async_task.base_model_name == async_task.refiner_model_name:
-            print(f'Refiner disabled because base model and refiner are same.')
-            async_task.refiner_model_name = 'None'
-
-        if async_task.performance_selection == Performance.EXTREME_SPEED:
-            set_lcm_defaults(async_task)
-        elif async_task.performance_selection == Performance.LIGHTNING:
-            set_lightning_defaults(async_task)
-        elif async_task.performance_selection == Performance.HYPER_SD:
-            set_hyper_sd_defaults(async_task)
-
-        if async_task.translate_prompts:
-            translate_prompts(async_task)
-
-        print(f'[Parameters] Adaptive CFG = {async_task.adaptive_cfg}')
-        print(f'[Parameters] CLIP Skip = {async_task.clip_skip}')
-        print(f'[Parameters] Sharpness = {async_task.sharpness}')
-        print(f'[Parameters] ControlNet Softness = {async_task.controlnet_softness}')
-        print(f'[Parameters] ADM Scale = '
-              f'{async_task.adm_scaler_positive} : '
-              f'{async_task.adm_scaler_negative} : '
-              f'{async_task.adm_scaler_end}')
-        print(f'[Parameters] Seed = {async_task.seed}')
-
-        apply_patch_settings(async_task)
-
-        print(f'[Parameters] CFG = {async_task.cfg_scale}')
-
-        initial_latent = None
-        denoising_strength = 1.0
-        tiled = False
-
-        width, height = async_task.aspect_ratios_selection.replace('×', ' ').split(' ')[:2]
-        width, height = int(width), int(height)
-
-        skip_prompt_processing = False
-
-        inpaint_parameterized = async_task.inpaint_engine != 'None'
-        inpaint_image = None
-        inpaint_mask = None
-        inpaint_head_model_path = None
-
-        use_synthetic_refiner = False
-
-        controlnet_canny_path = None
-        controlnet_cpds_path = None
-        clip_vision_path, ip_negative_path, ip_adapter_path, ip_adapter_face_path = None, None, None, None
-
-        goals = []
-        tasks = []
-
-        if async_task.input_image_checkbox:
-            if (async_task.current_tab == 'uov' or (
-                    async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
-                    and async_task.uov_method != flags.disabled and async_task.uov_input_image is not None:
-                async_task.uov_input_image = HWC3(async_task.uov_input_image)
-                if 'vary' in async_task.uov_method:
-                    goals.append('vary')
-                elif 'upscale' in async_task.uov_method:
-                    goals.append('upscale')
-                    if 'fast' in async_task.uov_method:
-                        skip_prompt_processing = True
-                    else:
-                        async_task.steps = async_task.performance_selection.steps_uov()
-
-                    progressbar(async_task, 1, 'Downloading upscale models ...')
-                    modules.config.downloading_upscale_model()
-            if (async_task.current_tab == 'inpaint' or (
-                    async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_inpaint)) \
-                    and isinstance(async_task.inpaint_input_image, dict):
-                inpaint_image = async_task.inpaint_input_image['image']
-                inpaint_mask = async_task.inpaint_input_image['mask'][:, :, 0]
-
-                if async_task.inpaint_mask_upload_checkbox:
-                    if isinstance(async_task.inpaint_mask_image_upload, dict):
-                        if (isinstance(async_task.inpaint_mask_image_upload['image'], np.ndarray)
-                                and isinstance(async_task.inpaint_mask_image_upload['mask'], np.ndarray)
-                                and async_task.inpaint_mask_image_upload['image'].ndim == 3):
-                            async_task.inpaint_mask_image_upload = np.maximum(async_task.inpaint_mask_image_upload['image'], async_task.inpaint_mask_image_upload['mask'])
-                    if isinstance(async_task.inpaint_mask_image_upload, np.ndarray) and async_task.inpaint_mask_image_upload.ndim == 3:
-                        H, W, C = inpaint_image.shape
-                        async_task.inpaint_mask_image_upload = resample_image(async_task.inpaint_mask_image_upload, width=W, height=H)
-                        async_task.inpaint_mask_image_upload = np.mean(async_task.inpaint_mask_image_upload, axis=2)
-                        async_task.inpaint_mask_image_upload = (async_task.inpaint_mask_image_upload > 127).astype(np.uint8) * 255
-                        async_task.inpaint_mask = np.maximum(inpaint_mask, async_task.inpaint_mask_image_upload)
-
-                if int(async_task.inpaint_erode_or_dilate) != 0:
-                    async_task.inpaint_mask = erode_or_dilate(async_task.inpaint_mask, async_task.inpaint_erode_or_dilate)
-
-                if async_task.invert_mask_checkbox:
-                    async_task.inpaint_mask = 255 - async_task.inpaint_mask
-
-                inpaint_image = HWC3(inpaint_image)
-                if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
-                        and (np.any(inpaint_mask > 127) or len(async_task.outpaint_selections) > 0):
-                    progressbar(async_task, 1, 'Downloading upscale models ...')
-                    modules.config.downloading_upscale_model()
-                    if inpaint_parameterized:
-                        progressbar(async_task, 1, 'Downloading inpainter ...')
-                        inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
-                            async_task.inpaint_engine)
-                        base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]
-                        print(f'[Inpaint] Current inpaint model is {inpaint_patch_model_path}')
-                        if async_task.refiner_model_name == 'None':
-                            use_synthetic_refiner = True
-                            async_task.refiner_switch = 0.8
-                    else:
-                        inpaint_head_model_path, inpaint_patch_model_path = None, None
-                        print(f'[Inpaint] Parameterized inpaint is disabled.')
-                    if async_task.inpaint_additional_prompt != '':
-                        if async_task.prompt == '':
-                            async_task.prompt = async_task.inpaint_additional_prompt
-                        else:
-                            async_task.prompt = async_task.inpaint_additional_prompt + '\n' + async_task.prompt
-                    goals.append('inpaint')
-            if async_task.current_tab == 'ip' or \
-                    async_task.mixing_image_prompt_and_vary_upscale or \
-                    async_task.mixing_image_prompt_and_inpaint:
-                goals.append('cn')
-                progressbar(async_task, 1, 'Downloading control models ...')
-                if len(async_task.cn_tasks[flags.cn_canny]) > 0:
-                    controlnet_canny_path = modules.config.downloading_controlnet_canny()
-                if len(async_task.cn_tasks[flags.cn_cpds]) > 0:
-                    controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
-                if len(async_task.cn_tasks[flags.cn_ip]) > 0:
-                    clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
-                if len(async_task.cn_tasks[flags.cn_ip_face]) > 0:
-                    clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters(
-                        'face')
-
-
-        # Load or unload CNs
-        progressbar(async_task, 1, 'Loading control models ...')
-        pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path])
-        ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
-        ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path)
-
-        height, switch, width = apply_overrides(async_task, height, width)
-
-        print(f'[Parameters] Sampler = {async_task.sampler_name} - {async_task.scheduler_name}')
-        print(f'[Parameters] Steps = {async_task.steps} - {switch}')
-
-        progressbar(async_task, 1, 'Initializing ...')
-
-        tasks = []
-        if not skip_prompt_processing:
-            tasks, use_expansion = process_prompt(async_task, base_model_additional_loras, use_expansion, use_style,
-                                                  use_synthetic_refiner)
-
-        if len(goals) > 0:
-            progressbar(async_task, 7, 'Image processing ...')
-
-        if 'vary' in goals:
-            height, initial_latent, width = apply_vary(async_task, denoising_strength, switch)
-
-        if 'upscale' in goals:
-            try:
-                denoising_strength, height, initial_latent, tiled, width = apply_upscale(async_task, switch)
-            except EarlyReturnException:
-                return
-        if 'inpaint' in goals:
-            try:
-                denoising_strength, initial_latent, height, width = apply_inpaint(async_task, initial_latent,
-                                                                                  inpaint_head_model_path, inpaint_image,
-                                                                                  inpaint_mask, inpaint_parameterized,
-                                                                                  switch)
-            except EarlyReturnException:
-                return
-
-        if 'cn' in goals:
-            apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
-            if async_task.debugging_cn_preprocessor:
-                return
-
-        if async_task.freeu_enabled:
-            apply_freeu(async_task)
-
-        all_steps = async_task.steps * async_task.image_number
-
-        print(f'[Parameters] Denoising Strength = {denoising_strength}')
-
-        if isinstance(initial_latent, dict) and 'samples' in initial_latent:
-            log_shape = initial_latent['samples'].shape
-        else:
-            log_shape = f'Image Space {(height, width)}'
-
-        print(f'[Parameters] Initial Latent shape: {log_shape}')
-
-        preparation_time = time.perf_counter() - preparation_start_time
-        print(f'Preparation time: {preparation_time:.2f} seconds')
-
-        final_scheduler_name = patch_samplers(async_task)
-        print(f'Using {final_scheduler_name} scheduler.')
-
-        async_task.yields.append(['preview', (flags.preparation_step_count, 'Moving model to GPU ...', None)])
-
-        processing_start_time = time.perf_counter()
-
-        def callback(step, x0, x, total_steps, y):
-            done_steps = current_task_id * async_task.steps + step
-            async_task.yields.append(['preview', (
-                int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)),
-                f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{async_task.image_number} ...', y)])
-
-        for current_task_id, task in enumerate(tasks):
-            current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(
-                current_task_id * async_task.steps) / float(all_steps))
-            progressbar(async_task, current_progress,
-                        f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
-            execution_start_time = time.perf_counter()
-
-            try:
-                process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
-                             current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
-                             switch, task, tasks, tiled, use_expansion, width, height)
-            except ldm_patched.modules.model_management.InterruptProcessingException:
-                if async_task.last_stop == 'skip':
-                    print('User skipped')
-                    async_task.last_stop = False
-                    continue
-                else:
-                    print('User stopped')
-                    break
-
-            execution_time = time.perf_counter() - execution_start_time
-            print(f'Generating and saving time: {execution_time:.2f} seconds')
-
-        async_task.processing = False
-
-        processing_time = time.perf_counter() - processing_start_time
-        print(f'Processing time (total): {processing_time:.2f} seconds')
-
     def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
                      denoising_strength, final_scheduler_name, goals, initial_latent, switch, task, tasks,
                      tiled, use_expansion, width, height):
@@ -649,22 +399,22 @@ def worker():
         if len(all_ip_tasks) > 0:
             pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
 
-    def apply_vary(async_task, uov_input_image, denoising_strength, switch):
+    def apply_vary(async_task, denoising_strength, switch):
         if 'subtle' in async_task.uov_method:
             async_task.denoising_strength = 0.5
         if 'strong' in async_task.uov_method:
             async_task.denoising_strength = 0.85
         if async_task.overwrite_vary_strength > 0:
             async_task.denoising_strength = async_task.overwrite_vary_strength
-        shape_ceil = get_image_shape_ceil(uov_input_image)
+        shape_ceil = get_image_shape_ceil(async_task.uov_input_image)
         if shape_ceil < 1024:
             print(f'[Vary] Image is resized because it is too small.')
             shape_ceil = 1024
         elif shape_ceil > 2048:
             print(f'[Vary] Image is resized because it is too big.')
             shape_ceil = 2048
-        uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil)
-        initial_pixels = core.numpy_to_pytorch(uov_input_image)
+        async_task.uov_input_image = set_image_shape_ceil(async_task.uov_input_image, shape_ceil)
+        initial_pixels = core.numpy_to_pytorch(async_task.uov_input_image)
         progressbar(async_task, 8, 'VAE encoding ...')
         candidate_vae, _ = pipeline.get_candidate_vae(
             steps=async_task.steps,
@@ -680,7 +430,7 @@ def worker():
         return initial_latent, width, height
 
     def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
-                inpaint_mask, inpaint_parameterized, switch):
+                      inpaint_mask, inpaint_parameterized, switch):
         if len(async_task.outpaint_selections) > 0:
             H, W, C = inpaint_image.shape
             if 'top' in async_task.outpaint_selections:
@@ -833,7 +583,7 @@ def worker():
         return height, switch, width
 
     def process_prompt(async_task, base_model_additional_loras, use_expansion, use_style,
-                    use_synthetic_refiner):
+                       use_synthetic_refiner):
         prompts = remove_empty_str([safe_str(p) for p in async_task.prompt.splitlines()], default='')
         negative_prompts = remove_empty_str([safe_str(p) for p in async_task.negative_prompt.splitlines()], default='')
         prompt = prompts[0]
@@ -1021,6 +771,256 @@ def worker():
         async_task.adm_scaler_negative = 1.0
         async_task.adm_scaler_end = 0.0
 
+    @torch.no_grad()
+    @torch.inference_mode()
+    def handler(async_task: AsyncTask):
+        preparation_start_time = time.perf_counter()
+        async_task.processing = True
+
+        async_task.outpaint_selections = [o.lower() for o in async_task.outpaint_selections]
+        base_model_additional_loras = []
+        async_task.uov_method = async_task.uov_method.lower()
+
+        if fooocus_expansion in async_task.style_selections:
+            use_expansion = True
+            async_task.style_selections.remove(fooocus_expansion)
+        else:
+            use_expansion = False
+
+        use_style = len(async_task.style_selections) > 0
+
+        if async_task.base_model_name == async_task.refiner_model_name:
+            print(f'Refiner disabled because base model and refiner are same.')
+            async_task.refiner_model_name = 'None'
+
+        if async_task.performance_selection == Performance.EXTREME_SPEED:
+            set_lcm_defaults(async_task)
+        elif async_task.performance_selection == Performance.LIGHTNING:
+            set_lightning_defaults(async_task)
+        elif async_task.performance_selection == Performance.HYPER_SD:
+            set_hyper_sd_defaults(async_task)
+
+        if async_task.translate_prompts:
+            translate_prompts(async_task)
+
+        print(f'[Parameters] Adaptive CFG = {async_task.adaptive_cfg}')
+        print(f'[Parameters] CLIP Skip = {async_task.clip_skip}')
+        print(f'[Parameters] Sharpness = {async_task.sharpness}')
+        print(f'[Parameters] ControlNet Softness = {async_task.controlnet_softness}')
+        print(f'[Parameters] ADM Scale = '
+              f'{async_task.adm_scaler_positive} : '
+              f'{async_task.adm_scaler_negative} : '
+              f'{async_task.adm_scaler_end}')
+        print(f'[Parameters] Seed = {async_task.seed}')
+
+        apply_patch_settings(async_task)
+
+        print(f'[Parameters] CFG = {async_task.cfg_scale}')
+
+        initial_latent = None
+        denoising_strength = 1.0
+        tiled = False
+
+        width, height = async_task.aspect_ratios_selection.replace('×', ' ').split(' ')[:2]
+        width, height = int(width), int(height)
+
+        skip_prompt_processing = False
+
+        inpaint_parameterized = async_task.inpaint_engine != 'None'
+        inpaint_image = None
+        inpaint_mask = None
+        inpaint_head_model_path = None
+
+        use_synthetic_refiner = False
+
+        controlnet_canny_path = None
+        controlnet_cpds_path = None
+        clip_vision_path, ip_negative_path, ip_adapter_path, ip_adapter_face_path = None, None, None, None
+
+        goals = []
+        tasks = []
+
+        if async_task.input_image_checkbox:
+            if (async_task.current_tab == 'uov' or (
+                    async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
+                    and async_task.uov_method != flags.disabled and async_task.uov_input_image is not None:
+                async_task.uov_input_image = HWC3(async_task.uov_input_image)
+                if 'vary' in async_task.uov_method:
+                    goals.append('vary')
+                elif 'upscale' in async_task.uov_method:
+                    goals.append('upscale')
+                    if 'fast' in async_task.uov_method:
+                        skip_prompt_processing = True
+                    else:
+                        async_task.steps = async_task.performance_selection.steps_uov()
+
+                    progressbar(async_task, 1, 'Downloading upscale models ...')
+                    modules.config.downloading_upscale_model()
+            if (async_task.current_tab == 'inpaint' or (
+                    async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_inpaint)) \
+                    and isinstance(async_task.inpaint_input_image, dict):
+                inpaint_image = async_task.inpaint_input_image['image']
+                inpaint_mask = async_task.inpaint_input_image['mask'][:, :, 0]
+
+                if async_task.inpaint_mask_upload_checkbox:
+                    if isinstance(async_task.inpaint_mask_image_upload, dict):
+                        if (isinstance(async_task.inpaint_mask_image_upload['image'], np.ndarray)
+                                and isinstance(async_task.inpaint_mask_image_upload['mask'], np.ndarray)
+                                and async_task.inpaint_mask_image_upload['image'].ndim == 3):
+                            async_task.inpaint_mask_image_upload = np.maximum(async_task.inpaint_mask_image_upload['image'], async_task.inpaint_mask_image_upload['mask'])
+                    if isinstance(async_task.inpaint_mask_image_upload, np.ndarray) and async_task.inpaint_mask_image_upload.ndim == 3:
+                        H, W, C = inpaint_image.shape
+                        async_task.inpaint_mask_image_upload = resample_image(async_task.inpaint_mask_image_upload, width=W, height=H)
+                        async_task.inpaint_mask_image_upload = np.mean(async_task.inpaint_mask_image_upload, axis=2)
+                        async_task.inpaint_mask_image_upload = (async_task.inpaint_mask_image_upload > 127).astype(np.uint8) * 255
+                        async_task.inpaint_mask = np.maximum(inpaint_mask, async_task.inpaint_mask_image_upload)
+
+                if int(async_task.inpaint_erode_or_dilate) != 0:
+                    async_task.inpaint_mask = erode_or_dilate(async_task.inpaint_mask, async_task.inpaint_erode_or_dilate)
+
+                if async_task.invert_mask_checkbox:
+                    async_task.inpaint_mask = 255 - async_task.inpaint_mask
+
+                inpaint_image = HWC3(inpaint_image)
+                if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
+                        and (np.any(inpaint_mask > 127) or len(async_task.outpaint_selections) > 0):
+                    progressbar(async_task, 1, 'Downloading upscale models ...')
+                    modules.config.downloading_upscale_model()
+                    if inpaint_parameterized:
+                        progressbar(async_task, 1, 'Downloading inpainter ...')
+                        inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
+                            async_task.inpaint_engine)
+                        base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]
+                        print(f'[Inpaint] Current inpaint model is {inpaint_patch_model_path}')
+                        if async_task.refiner_model_name == 'None':
+                            use_synthetic_refiner = True
+                            async_task.refiner_switch = 0.8
+                    else:
+                        inpaint_head_model_path, inpaint_patch_model_path = None, None
+                        print(f'[Inpaint] Parameterized inpaint is disabled.')
+                    if async_task.inpaint_additional_prompt != '':
+                        if async_task.prompt == '':
+                            async_task.prompt = async_task.inpaint_additional_prompt
+                        else:
+                            async_task.prompt = async_task.inpaint_additional_prompt + '\n' + async_task.prompt
+                    goals.append('inpaint')
+            if async_task.current_tab == 'ip' or \
+                    async_task.mixing_image_prompt_and_vary_upscale or \
+                    async_task.mixing_image_prompt_and_inpaint:
+                goals.append('cn')
+                progressbar(async_task, 1, 'Downloading control models ...')
+                if len(async_task.cn_tasks[flags.cn_canny]) > 0:
+                    controlnet_canny_path = modules.config.downloading_controlnet_canny()
+                if len(async_task.cn_tasks[flags.cn_cpds]) > 0:
+                    controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
+                if len(async_task.cn_tasks[flags.cn_ip]) > 0:
+                    clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
+                if len(async_task.cn_tasks[flags.cn_ip_face]) > 0:
+                    clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters(
+                        'face')
+
+
+        # Load or unload CNs
+        progressbar(async_task, 1, 'Loading control models ...')
+        pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path])
+        ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
+        ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path)
+
+        height, switch, width = apply_overrides(async_task, height, width)
+
+        print(f'[Parameters] Sampler = {async_task.sampler_name} - {async_task.scheduler_name}')
+        print(f'[Parameters] Steps = {async_task.steps} - {switch}')
+
+        progressbar(async_task, 1, 'Initializing ...')
+
+        if not skip_prompt_processing:
+            tasks, use_expansion = process_prompt(async_task, base_model_additional_loras, use_expansion, use_style,
+                                                  use_synthetic_refiner)
+
+        if len(goals) > 0:
+            progressbar(async_task, 7, 'Image processing ...')
+
+        if 'vary' in goals:
+            height, initial_latent, width = apply_vary(async_task, denoising_strength, switch)
+
+        if 'upscale' in goals:
+            try:
+                denoising_strength, height, initial_latent, tiled, width = apply_upscale(async_task, switch)
+            except EarlyReturnException:
+                return
+        if 'inpaint' in goals:
+            try:
+                denoising_strength, initial_latent, height, width = apply_inpaint(async_task, initial_latent,
+                                                                                  inpaint_head_model_path, inpaint_image,
+                                                                                  inpaint_mask, inpaint_parameterized,
+                                                                                  switch)
+            except EarlyReturnException:
+                return
+
+        if 'cn' in goals:
+            apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
+            if async_task.debugging_cn_preprocessor:
+                return
+
+        if async_task.freeu_enabled:
+            apply_freeu(async_task)
+
+        all_steps = async_task.steps * async_task.image_number
+
+        print(f'[Parameters] Denoising Strength = {denoising_strength}')
+
+        if isinstance(initial_latent, dict) and 'samples' in initial_latent:
+            log_shape = initial_latent['samples'].shape
+        else:
+            log_shape = f'Image Space {(height, width)}'
+
+        print(f'[Parameters] Initial Latent shape: {log_shape}')
+
+        preparation_time = time.perf_counter() - preparation_start_time
+        print(f'Preparation time: {preparation_time:.2f} seconds')
+
+        final_scheduler_name = patch_samplers(async_task)
+        print(f'Using {final_scheduler_name} scheduler.')
+
+        async_task.yields.append(['preview', (flags.preparation_step_count, 'Moving model to GPU ...', None)])
+
+        processing_start_time = time.perf_counter()
+
+        def callback(step, x0, x, total_steps, y):
+            done_steps = current_task_id * async_task.steps + step
+            async_task.yields.append(['preview', (
+                int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)),
+                f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{async_task.image_number} ...', y)])
+
+        for current_task_id, task in enumerate(tasks):
+            current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(
+                current_task_id * async_task.steps) / float(all_steps))
+            progressbar(async_task, current_progress,
+                        f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
+            execution_start_time = time.perf_counter()
+
+            try:
+                process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
+                             current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
+                             switch, task, tasks, tiled, use_expansion, width, height)
+            except ldm_patched.modules.model_management.InterruptProcessingException:
+                if async_task.last_stop == 'skip':
+                    print('User skipped')
+                    async_task.last_stop = False
+                    continue
+                else:
+                    print('User stopped')
+                    break
+
+            execution_time = time.perf_counter() - execution_start_time
+            print(f'Generating and saving time: {execution_time:.2f} seconds')
+
+        async_task.processing = False
+
+        processing_time = time.perf_counter() - processing_start_time
+        print(f'Processing time (total): {processing_time:.2f} seconds')
+
+
     while True:
         time.sleep(0.01)
         if len(async_tasks) > 0:

From bb7293826114a594ea72bd6bde1b12dc368831d5 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 8 Jun 2024 21:41:30 +0200
Subject: [PATCH 004/101] refactor: always use format width, height

---
 extras/censor.py        |   2 +-
 modules/async_worker.py | 127 +++++++++++++++++++++-------------------
 2 files changed, 68 insertions(+), 61 deletions(-)

diff --git a/extras/censor.py b/extras/censor.py
index 45617fd8..c5dea3fd 100644
--- a/extras/censor.py
+++ b/extras/censor.py
@@ -41,7 +41,7 @@ class Censor:
         model_management.load_model_gpu(self.safety_checker_model)
 
         single = False
-        if not isinstance(images, list) or isinstance(images, np.ndarray):
+        if not isinstance(images, (list, np.ndarray)):
             images = [images]
             single = True
 
diff --git a/modules/async_worker.py b/modules/async_worker.py
index e4543edf..5e7c561f 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -274,7 +274,7 @@ def worker():
         yield_result(async_task, img_paths, async_task.black_out_nsfw, False,
                      do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
 
-        return imgs
+        return imgs, img_paths
 
     def apply_patch_settings(async_task):
         patch_settings[pid] = PatchSettings(
@@ -430,7 +430,65 @@ def worker():
         return initial_latent, width, height
 
     def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
-                      inpaint_mask, inpaint_parameterized, switch):
+                      inpaint_mask, inpaint_parameterized, denoising_strength, switch, skip_apply_outpaint=False,
+                      step_from=11):
+        if not skip_apply_outpaint:
+            inpaint_image, inpaint_mask = apply_outpaint(async_task, inpaint_image, inpaint_mask)
+
+        inpaint_worker.current_task = inpaint_worker.InpaintWorker(
+            image=inpaint_image,
+            mask=inpaint_mask,
+            use_fill=denoising_strength > 0.99,
+            k=async_task.inpaint_respective_field
+        )
+        if async_task.debugging_inpaint_preprocessor:
+            yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), async_task.black_out_nsfw,
+                         do_not_show_finished_images=True)
+            raise EarlyReturnException
+
+        progressbar(async_task, step_from, 'VAE Inpaint encoding ...')
+        inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill)
+        inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image)
+        inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask)
+        candidate_vae, candidate_vae_swap = pipeline.get_candidate_vae(
+            steps=async_task.steps,
+            switch=switch,
+            denoise=denoising_strength,
+            refiner_swap_method=async_task.refiner_swap_method
+        )
+        latent_inpaint, latent_mask = core.encode_vae_inpaint(
+            mask=inpaint_pixel_mask,
+            vae=candidate_vae,
+            pixels=inpaint_pixel_image)
+        latent_swap = None
+        if candidate_vae_swap is not None:
+            progressbar(async_task, step_from + 1, 'VAE SD15 encoding ...')
+            latent_swap = core.encode_vae(
+                vae=candidate_vae_swap,
+                pixels=inpaint_pixel_fill)['samples']
+        progressbar(async_task, step_from + 2, 'VAE encoding ...')
+        latent_fill = core.encode_vae(
+            vae=candidate_vae,
+            pixels=inpaint_pixel_fill)['samples']
+        inpaint_worker.current_task.load_latent(
+            latent_fill=latent_fill, latent_mask=latent_mask, latent_swap=latent_swap)
+        if inpaint_parameterized:
+            pipeline.final_unet = inpaint_worker.current_task.patch(
+                inpaint_head_model_path=inpaint_head_model_path,
+                inpaint_latent=latent_inpaint,
+                inpaint_latent_mask=latent_mask,
+                model=pipeline.final_unet
+            )
+        if not async_task.inpaint_disable_initial_latent:
+            initial_latent = {'samples': latent_fill}
+        B, C, H, W = latent_fill.shape
+        height, width = H * 8, W * 8
+        final_height, final_width = inpaint_worker.current_task.image.shape[:2]
+        print(f'Final resolution is {str((final_width, final_height))}, latent is {str((width, height))}.')
+
+        return denoising_strength, initial_latent, width, height
+
+    def apply_outpaint(async_task, inpaint_image, inpaint_mask):
         if len(async_task.outpaint_selections) > 0:
             H, W, C = inpaint_image.shape
             if 'top' in async_task.outpaint_selections:
@@ -456,59 +514,7 @@ def worker():
             inpaint_mask = np.ascontiguousarray(inpaint_mask.copy())
             async_task.inpaint_strength = 1.0
             async_task.inpaint_respective_field = 1.0
-        denoising_strength = async_task.inpaint_strength
-        inpaint_worker.current_task = inpaint_worker.InpaintWorker(
-            image=inpaint_image,
-            mask=inpaint_mask,
-            use_fill=denoising_strength > 0.99,
-            k=async_task.inpaint_respective_field
-        )
-        if async_task.debugging_inpaint_preprocessor:
-            yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), async_task.black_out_nsfw,
-                         do_not_show_finished_images=True)
-            raise EarlyReturnException
-
-        progressbar(async_task, 11, 'VAE Inpaint encoding ...')
-        inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill)
-        inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image)
-        inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask)
-        candidate_vae, candidate_vae_swap = pipeline.get_candidate_vae(
-            steps=async_task.steps,
-            switch=switch,
-            denoise=denoising_strength,
-            refiner_swap_method=async_task.refiner_swap_method
-        )
-        latent_inpaint, latent_mask = core.encode_vae_inpaint(
-            mask=inpaint_pixel_mask,
-            vae=candidate_vae,
-            pixels=inpaint_pixel_image)
-        latent_swap = None
-        if candidate_vae_swap is not None:
-            progressbar(async_task, 12, 'VAE SD15 encoding ...')
-            latent_swap = core.encode_vae(
-                vae=candidate_vae_swap,
-                pixels=inpaint_pixel_fill)['samples']
-        progressbar(async_task, 13, 'VAE encoding ...')
-        latent_fill = core.encode_vae(
-            vae=candidate_vae,
-            pixels=inpaint_pixel_fill)['samples']
-        inpaint_worker.current_task.load_latent(
-            latent_fill=latent_fill, latent_mask=latent_mask, latent_swap=latent_swap)
-        if inpaint_parameterized:
-            pipeline.final_unet = inpaint_worker.current_task.patch(
-                inpaint_head_model_path=inpaint_head_model_path,
-                inpaint_latent=latent_inpaint,
-                inpaint_latent_mask=latent_mask,
-                model=pipeline.final_unet
-            )
-        if not async_task.inpaint_disable_initial_latent:
-            initial_latent = {'samples': latent_fill}
-        B, C, H, W = latent_fill.shape
-        height, width = H * 8, W * 8
-        final_height, final_width = inpaint_worker.current_task.image.shape[:2]
-        print(f'Final resolution is {str((final_height, final_width))}, latent is {str((width, height))}.')
-
-        return denoising_strength, initial_latent, width, height
+        return inpaint_image, inpaint_mask
 
     def apply_upscale(async_task, switch):
         H, W, C = async_task.uov_input_image.shape
@@ -568,7 +574,7 @@ def worker():
         width = W * 8
         height = H * 8
         print(f'Final resolution is {str((width, height))}.')
-        return denoising_strength, height, initial_latent, tiled, width
+        return denoising_strength, initial_latent, tiled, width, height
 
     def apply_overrides(async_task, height, width):
         if async_task.overwrite_step > 0:
@@ -941,19 +947,20 @@ def worker():
             progressbar(async_task, 7, 'Image processing ...')
 
         if 'vary' in goals:
-            height, initial_latent, width = apply_vary(async_task, denoising_strength, switch)
+            initial_latent, width, height = apply_vary(async_task, denoising_strength, switch)
 
         if 'upscale' in goals:
             try:
-                denoising_strength, height, initial_latent, tiled, width = apply_upscale(async_task, switch)
+                denoising_strength, initial_latent, tiled, width, height = apply_upscale(async_task, switch)
             except EarlyReturnException:
                 return
+
         if 'inpaint' in goals:
             try:
-                denoising_strength, initial_latent, height, width = apply_inpaint(async_task, initial_latent,
+                denoising_strength, initial_latent, width, height = apply_inpaint(async_task, initial_latent,
                                                                                   inpaint_head_model_path, inpaint_image,
                                                                                   inpaint_mask, inpaint_parameterized,
-                                                                                  switch)
+                                                                                  async_task.inpaint_strength, switch)
             except EarlyReturnException:
                 return
 

From df70294a3e03a3111a6b483abb16545bcbac8fc7 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 8 Jun 2024 23:30:45 +0200
Subject: [PATCH 005/101] wip: add adetailer

---
 extras/adetailer/args.py                | 278 ++++++++++++++++++++++++
 extras/adetailer/common.py              | 161 ++++++++++++++
 extras/adetailer/mask.py                | 269 +++++++++++++++++++++++
 extras/adetailer/script.py              |  53 +++++
 extras/adetailer/ultralytics_predict.py |  67 ++++++
 modules/async_worker.py                 |  51 ++++-
 modules/config.py                       |   1 +
 requirements_versions.txt               |   3 +-
 8 files changed, 878 insertions(+), 5 deletions(-)
 create mode 100644 extras/adetailer/args.py
 create mode 100644 extras/adetailer/common.py
 create mode 100644 extras/adetailer/mask.py
 create mode 100644 extras/adetailer/script.py
 create mode 100644 extras/adetailer/ultralytics_predict.py

diff --git a/extras/adetailer/args.py b/extras/adetailer/args.py
new file mode 100644
index 00000000..08ad4a3a
--- /dev/null
+++ b/extras/adetailer/args.py
@@ -0,0 +1,278 @@
+from __future__ import annotations
+
+from collections import UserList
+from dataclasses import dataclass
+from functools import cached_property, partial
+from typing import Any, Literal, NamedTuple, Optional
+
+try:
+    from pydantic.v1 import (
+        BaseModel,
+        Extra,
+        NonNegativeFloat,
+        NonNegativeInt,
+        PositiveInt,
+        confloat,
+        conint,
+        validator,
+    )
+except ImportError:
+    from pydantic import (
+        BaseModel,
+        Extra,
+        NonNegativeFloat,
+        NonNegativeInt,
+        PositiveInt,
+        confloat,
+        conint,
+        validator,
+    )
+
+
+@dataclass
+class SkipImg2ImgOrig:
+    steps: int
+    sampler_name: str
+    width: int
+    height: int
+
+
+class Arg(NamedTuple):
+    attr: str
+    name: str
+
+
+class ArgsList(UserList):
+    @cached_property
+    def attrs(self) -> tuple[str, ...]:
+        return tuple(attr for attr, _ in self)
+
+    @cached_property
+    def names(self) -> tuple[str, ...]:
+        return tuple(name for _, name in self)
+
+
+class ADetailerArgs(BaseModel, extra=Extra.forbid):
+    ad_model: str = "None"
+    ad_model_classes: str = ""
+    ad_tap_enable: bool = True
+    ad_prompt: str = ""
+    ad_negative_prompt: str = ""
+    ad_confidence: confloat(ge=0.0, le=1.0) = 0.3
+    ad_mask_k_largest: NonNegativeInt = 0
+    ad_mask_min_ratio: confloat(ge=0.0, le=1.0) = 0.0
+    ad_mask_max_ratio: confloat(ge=0.0, le=1.0) = 1.0
+    ad_dilate_erode: int = 4
+    ad_x_offset: int = 0
+    ad_y_offset: int = 0
+    ad_mask_merge_invert: Literal["None", "Merge", "Merge and Invert"] = "None"
+    ad_mask_blur: NonNegativeInt = 4
+    ad_denoising_strength: confloat(ge=0.0, le=1.0) = 0.4
+    ad_inpaint_only_masked: bool = True
+    ad_inpaint_only_masked_padding: NonNegativeInt = 32
+    ad_use_inpaint_width_height: bool = False
+    ad_inpaint_width: PositiveInt = 512
+    ad_inpaint_height: PositiveInt = 512
+    ad_use_steps: bool = False
+    ad_steps: PositiveInt = 28
+    ad_use_cfg_scale: bool = False
+    ad_cfg_scale: NonNegativeFloat = 7.0
+    ad_use_checkpoint: bool = False
+    ad_checkpoint: Optional[str] = None
+    ad_use_vae: bool = False
+    ad_vae: Optional[str] = None
+    ad_use_sampler: bool = False
+    ad_sampler: str = "DPM++ 2M Karras"
+    ad_scheduler: str = "Use same scheduler"
+    ad_use_noise_multiplier: bool = False
+    ad_noise_multiplier: confloat(ge=0.5, le=1.5) = 1.0
+    ad_use_clip_skip: bool = False
+    ad_clip_skip: conint(ge=1, le=12) = 1
+    ad_restore_face: bool = False
+    ad_controlnet_model: str = "None"
+    ad_controlnet_module: str = "None"
+    ad_controlnet_weight: confloat(ge=0.0, le=1.0) = 1.0
+    ad_controlnet_guidance_start: confloat(ge=0.0, le=1.0) = 0.0
+    ad_controlnet_guidance_end: confloat(ge=0.0, le=1.0) = 1.0
+    is_api: bool = True
+
+    @validator("is_api", pre=True)
+    def is_api_validator(cls, v: Any):  # noqa: N805
+        "tuple is json serializable but cannot be made with json deserialize."
+        return type(v) is not tuple
+
+    @staticmethod
+    def ppop(
+        p: dict[str, Any],
+        key: str,
+        pops: list[str] | None = None,
+        cond: Any = None,
+    ) -> None:
+        if pops is None:
+            pops = [key]
+        if key not in p:
+            return
+        value = p[key]
+        cond = (not bool(value)) if cond is None else value == cond
+
+        if cond:
+            for k in pops:
+                p.pop(k, None)
+
+    def extra_params(self, suffix: str = "") -> dict[str, Any]:
+        if self.need_skip():
+            return {}
+
+        p = {name: getattr(self, attr) for attr, name in ALL_ARGS}
+        ppop = partial(self.ppop, p)
+
+        ppop("ADetailer model classes")
+        ppop("ADetailer prompt")
+        ppop("ADetailer negative prompt")
+        p.pop("ADetailer tap enable", None)  # always pop
+        ppop("ADetailer mask only top k largest", cond=0)
+        ppop("ADetailer mask min ratio", cond=0.0)
+        ppop("ADetailer mask max ratio", cond=1.0)
+        ppop("ADetailer x offset", cond=0)
+        ppop("ADetailer y offset", cond=0)
+        ppop("ADetailer mask merge invert", cond="None")
+        ppop("ADetailer inpaint only masked", ["ADetailer inpaint padding"])
+        ppop(
+            "ADetailer use inpaint width height",
+            [
+                "ADetailer use inpaint width height",
+                "ADetailer inpaint width",
+                "ADetailer inpaint height",
+            ],
+        )
+        ppop(
+            "ADetailer use separate steps",
+            ["ADetailer use separate steps", "ADetailer steps"],
+        )
+        ppop(
+            "ADetailer use separate CFG scale",
+            ["ADetailer use separate CFG scale", "ADetailer CFG scale"],
+        )
+        ppop(
+            "ADetailer use separate checkpoint",
+            ["ADetailer use separate checkpoint", "ADetailer checkpoint"],
+        )
+        ppop(
+            "ADetailer use separate VAE",
+            ["ADetailer use separate VAE", "ADetailer VAE"],
+        )
+        ppop(
+            "ADetailer use separate sampler",
+            [
+                "ADetailer use separate sampler",
+                "ADetailer sampler",
+                "ADetailer scheduler",
+            ],
+        )
+        ppop("ADetailer scheduler", cond="Use same scheduler")
+        ppop(
+            "ADetailer use separate noise multiplier",
+            ["ADetailer use separate noise multiplier", "ADetailer noise multiplier"],
+        )
+
+        ppop(
+            "ADetailer use separate CLIP skip",
+            ["ADetailer use separate CLIP skip", "ADetailer CLIP skip"],
+        )
+
+        ppop("ADetailer restore face")
+        ppop(
+            "ADetailer ControlNet model",
+            [
+                "ADetailer ControlNet model",
+                "ADetailer ControlNet module",
+                "ADetailer ControlNet weight",
+                "ADetailer ControlNet guidance start",
+                "ADetailer ControlNet guidance end",
+            ],
+            cond="None",
+        )
+        ppop("ADetailer ControlNet module", cond="None")
+        ppop("ADetailer ControlNet weight", cond=1.0)
+        ppop("ADetailer ControlNet guidance start", cond=0.0)
+        ppop("ADetailer ControlNet guidance end", cond=1.0)
+
+        if suffix:
+            p = {k + suffix: v for k, v in p.items()}
+
+        return p
+
+    def is_mediapipe(self) -> bool:
+        return self.ad_model.lower().startswith("mediapipe")
+
+    def need_skip(self) -> bool:
+        return self.ad_model == "None" or self.ad_tap_enable is False
+
+
+_all_args = [
+    ("ad_model", "ADetailer model"),
+    ("ad_model_classes", "ADetailer model classes"),
+    ("ad_tap_enable", "ADetailer tap enable"),
+    ("ad_prompt", "ADetailer prompt"),
+    ("ad_negative_prompt", "ADetailer negative prompt"),
+    ("ad_confidence", "ADetailer confidence"),
+    ("ad_mask_k_largest", "ADetailer mask only top k largest"),
+    ("ad_mask_min_ratio", "ADetailer mask min ratio"),
+    ("ad_mask_max_ratio", "ADetailer mask max ratio"),
+    ("ad_x_offset", "ADetailer x offset"),
+    ("ad_y_offset", "ADetailer y offset"),
+    ("ad_dilate_erode", "ADetailer dilate erode"),
+    ("ad_mask_merge_invert", "ADetailer mask merge invert"),
+    ("ad_mask_blur", "ADetailer mask blur"),
+    ("ad_denoising_strength", "ADetailer denoising strength"),
+    ("ad_inpaint_only_masked", "ADetailer inpaint only masked"),
+    ("ad_inpaint_only_masked_padding", "ADetailer inpaint padding"),
+    ("ad_use_inpaint_width_height", "ADetailer use inpaint width height"),
+    ("ad_inpaint_width", "ADetailer inpaint width"),
+    ("ad_inpaint_height", "ADetailer inpaint height"),
+    ("ad_use_steps", "ADetailer use separate steps"),
+    ("ad_steps", "ADetailer steps"),
+    ("ad_use_cfg_scale", "ADetailer use separate CFG scale"),
+    ("ad_cfg_scale", "ADetailer CFG scale"),
+    ("ad_use_checkpoint", "ADetailer use separate checkpoint"),
+    ("ad_checkpoint", "ADetailer checkpoint"),
+    ("ad_use_vae", "ADetailer use separate VAE"),
+    ("ad_vae", "ADetailer VAE"),
+    ("ad_use_sampler", "ADetailer use separate sampler"),
+    ("ad_sampler", "ADetailer sampler"),
+    ("ad_scheduler", "ADetailer scheduler"),
+    ("ad_use_noise_multiplier", "ADetailer use separate noise multiplier"),
+    ("ad_noise_multiplier", "ADetailer noise multiplier"),
+    ("ad_use_clip_skip", "ADetailer use separate CLIP skip"),
+    ("ad_clip_skip", "ADetailer CLIP skip"),
+    ("ad_restore_face", "ADetailer restore face"),
+    ("ad_controlnet_model", "ADetailer ControlNet model"),
+    ("ad_controlnet_module", "ADetailer ControlNet module"),
+    ("ad_controlnet_weight", "ADetailer ControlNet weight"),
+    ("ad_controlnet_guidance_start", "ADetailer ControlNet guidance start"),
+    ("ad_controlnet_guidance_end", "ADetailer ControlNet guidance end"),
+]
+
+_args = [Arg(*args) for args in _all_args]
+ALL_ARGS = ArgsList(_args)
+
+BBOX_SORTBY = [
+    "None",
+    "Position (left to right)",
+    "Position (center to edge)",
+    "Area (large to small)",
+]
+MASK_MERGE_INVERT = ["None", "Merge", "Merge and Invert"]
+
+_script_default = (
+    "dynamic_prompting",
+    "dynamic_thresholding",
+    "wildcard_recursive",
+    "wildcards",
+    "lora_block_weight",
+    "negpip",
+)
+SCRIPT_DEFAULT = ",".join(sorted(_script_default))
+
+_builtin_script = ("soft_inpainting", "hypertile_script")
+BUILTIN_SCRIPT = ",".join(sorted(_builtin_script))
\ No newline at end of file
diff --git a/extras/adetailer/common.py b/extras/adetailer/common.py
new file mode 100644
index 00000000..f80103fc
--- /dev/null
+++ b/extras/adetailer/common.py
@@ -0,0 +1,161 @@
+from __future__ import annotations
+
+import os
+from collections import OrderedDict
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Generic, Optional, TypeVar
+
+from huggingface_hub import hf_hub_download
+from PIL import Image, ImageDraw
+from torchvision.transforms.functional import to_pil_image
+
+REPO_ID = "Bingsu/adetailer"
+
+T = TypeVar("T", int, float)
+
+
+@dataclass
+class PredictOutput(Generic[T]):
+    bboxes: list[list[T]] = field(default_factory=list)
+    masks: list[Image.Image] = field(default_factory=list)
+    preview: Optional[Image.Image] = None
+
+
+def hf_download(file: str, repo_id: str = REPO_ID) -> str:
+    try:
+        path = hf_hub_download(repo_id, file)
+    except Exception:
+        print(f"[ADetailer] Failed to load model {file!r} from huggingface")
+        path = "INVALID"
+    return path
+
+
+def safe_mkdir(path: str | os.PathLike[str]) -> None:
+    path = Path(path)
+    if not path.exists() and path.parent.exists() and os.access(path.parent, os.W_OK):
+        path.mkdir()
+
+
+def scan_model_dir(path: Path) -> list[Path]:
+    if not path.is_dir():
+        return []
+    return [p for p in path.rglob("*") if p.is_file() and p.suffix == ".pt"]
+
+
+def download_models(*names: str) -> dict[str, str]:
+    models = OrderedDict()
+    with ThreadPoolExecutor() as executor:
+        for name in names:
+            if "-world" in name:
+                models[name] = executor.submit(
+                    hf_download, name, repo_id="Bingsu/yolo-world-mirror"
+                )
+            else:
+                models[name] = executor.submit(hf_download, name)
+    return {name: future.result() for name, future in models.items()}
+
+
+def get_models(
+    *dirs: str | os.PathLike[str], huggingface: bool = True
+) -> OrderedDict[str, str]:
+    model_paths = []
+
+    for dir_ in dirs:
+        if not dir_:
+            continue
+        model_paths.extend(scan_model_dir(Path(dir_)))
+
+    models = OrderedDict()
+    if huggingface:
+        to_download = [
+            "face_yolov8n.pt",
+            "face_yolov8s.pt",
+            "hand_yolov8n.pt",
+            "person_yolov8n-seg.pt",
+            "person_yolov8s-seg.pt",
+            "yolov8x-worldv2.pt",
+        ]
+        models.update(download_models(*to_download))
+
+    models.update(
+        {
+            "mediapipe_face_full": "mediapipe_face_full",
+            "mediapipe_face_short": "mediapipe_face_short",
+            "mediapipe_face_mesh": "mediapipe_face_mesh",
+            "mediapipe_face_mesh_eyes_only": "mediapipe_face_mesh_eyes_only",
+        }
+    )
+
+    invalid_keys = [k for k, v in models.items() if v == "INVALID"]
+    for key in invalid_keys:
+        models.pop(key)
+
+    for path in model_paths:
+        if path.name in models:
+            continue
+        models[path.name] = str(path)
+
+    return models
+
+
+def create_mask_from_bbox(
+    bboxes: list[list[float]], shape: tuple[int, int]
+) -> list[Image.Image]:
+    """
+    Parameters
+    ----------
+        bboxes: list[list[float]]
+            list of [x1, y1, x2, y2]
+            bounding boxes
+        shape: tuple[int, int]
+            shape of the image (width, height)
+
+    Returns
+    -------
+        masks: list[Image.Image]
+        A list of masks
+
+    """
+    masks = []
+    for bbox in bboxes:
+        mask = Image.new("L", shape, 0)
+        mask_draw = ImageDraw.Draw(mask)
+        mask_draw.rectangle(bbox, fill=255)
+        masks.append(mask)
+    return masks
+
+
+def create_bbox_from_mask(
+    masks: list[Image.Image], shape: tuple[int, int]
+) -> list[list[int]]:
+    """
+    Parameters
+    ----------
+        masks: list[Image.Image]
+            A list of masks
+        shape: tuple[int, int]
+            shape of the image (width, height)
+
+    Returns
+    -------
+        bboxes: list[list[float]]
+        A list of bounding boxes
+
+    """
+    bboxes = []
+    for mask in masks:
+        mask = mask.resize(shape)
+        bbox = mask.getbbox()
+        if bbox is not None:
+            bboxes.append(list(bbox))
+    return bboxes
+
+
+def ensure_pil_image(image: Any, mode: str = "RGB") -> Image.Image:
+    if not isinstance(image, Image.Image):
+        image = to_pil_image(image)
+    if image.mode != mode:
+        image = image.convert(mode)
+    return image
\ No newline at end of file
diff --git a/extras/adetailer/mask.py b/extras/adetailer/mask.py
new file mode 100644
index 00000000..2faee71a
--- /dev/null
+++ b/extras/adetailer/mask.py
@@ -0,0 +1,269 @@
+from __future__ import annotations
+
+from enum import IntEnum
+from functools import partial, reduce
+from math import dist
+from typing import Any, TypeVar
+
+import cv2
+import numpy as np
+from PIL import Image, ImageChops
+
+from extras.adetailer.args import MASK_MERGE_INVERT
+from extras.adetailer.common import ensure_pil_image, PredictOutput
+
+
+class SortBy(IntEnum):
+    NONE = 0
+    LEFT_TO_RIGHT = 1
+    CENTER_TO_EDGE = 2
+    AREA = 3
+
+
+class MergeInvert(IntEnum):
+    NONE = 0
+    MERGE = 1
+    MERGE_INVERT = 2
+
+
+T = TypeVar("T", int, float)
+
+
+def _dilate(arr: np.ndarray, value: int) -> np.ndarray:
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
+    return cv2.dilate(arr, kernel, iterations=1)
+
+
+def _erode(arr: np.ndarray, value: int) -> np.ndarray:
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
+    return cv2.erode(arr, kernel, iterations=1)
+
+
+def dilate_erode(img: Image.Image, value: int) -> Image.Image:
+    """
+    The dilate_erode function takes an image and a value.
+    If the value is positive, it dilates the image by that amount.
+    If the value is negative, it erodes the image by that amount.
+
+    Parameters
+    ----------
+        img: PIL.Image.Image
+            the image to be processed
+        value: int
+            kernel size of dilation or erosion
+
+    Returns
+    -------
+        PIL.Image.Image
+            The image that has been dilated or eroded
+    """
+    if value == 0:
+        return img
+
+    arr = np.array(img)
+    arr = _dilate(arr, value) if value > 0 else _erode(arr, -value)
+
+    return Image.fromarray(arr)
+
+
+def offset(img: Image.Image, x: int = 0, y: int = 0) -> Image.Image:
+    """
+    The offset function takes an image and offsets it by a given x(→) and y(↑) value.
+
+    Parameters
+    ----------
+        mask: Image.Image
+            Pass the mask image to the function
+        x: int
+            →
+        y: int
+            ↑
+
+    Returns
+    -------
+        PIL.Image.Image
+            A new image that is offset by x and y
+    """
+    return ImageChops.offset(img, x, -y)
+
+
+def is_all_black(img: Image.Image | np.ndarray) -> bool:
+    if isinstance(img, Image.Image):
+        img = np.array(ensure_pil_image(img, "L"))
+    return cv2.countNonZero(img) == 0
+
+
+def has_intersection(im1: Any, im2: Any) -> bool:
+    arr1 = np.array(ensure_pil_image(im1, "L"))
+    arr2 = np.array(ensure_pil_image(im2, "L"))
+    return not is_all_black(cv2.bitwise_and(arr1, arr2))
+
+
+def bbox_area(bbox: list[T]) -> T:
+    return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
+
+
+def mask_preprocess(
+    masks: list[Image.Image],
+    kernel: int = 0,
+    x_offset: int = 0,
+    y_offset: int = 0,
+    merge_invert: int | MergeInvert | str = MergeInvert.NONE,
+) -> list[Image.Image]:
+    """
+    The mask_preprocess function takes a list of masks and preprocesses them.
+    It dilates and erodes the masks, and offsets them by x_offset and y_offset.
+
+    Parameters
+    ----------
+        masks: list[Image.Image]
+            A list of masks
+        kernel: int
+            kernel size of dilation or erosion
+        x_offset: int
+            →
+        y_offset: int
+            ↑
+
+    Returns
+    -------
+        list[Image.Image]
+            A list of processed masks
+    """
+    if not masks:
+        return []
+
+    if x_offset != 0 or y_offset != 0:
+        masks = [offset(m, x_offset, y_offset) for m in masks]
+
+    if kernel != 0:
+        masks = [dilate_erode(m, kernel) for m in masks]
+        masks = [m for m in masks if not is_all_black(m)]
+
+    return mask_merge_invert(masks, mode=merge_invert)
+
+
+# Bbox sorting
+def _key_left_to_right(bbox: list[T]) -> T:
+    """
+    Left to right
+
+    Parameters
+    ----------
+    bbox: list[int] | list[float]
+        list of [x1, y1, x2, y2]
+    """
+    return bbox[0]
+
+
+def _key_center_to_edge(bbox: list[T], *, center: tuple[float, float]) -> float:
+    """
+    Center to edge
+
+    Parameters
+    ----------
+    bbox: list[int] | list[float]
+        list of [x1, y1, x2, y2]
+    image: Image.Image
+        the image
+    """
+    bbox_center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
+    return dist(center, bbox_center)
+
+
+def _key_area(bbox: list[T]) -> T:
+    """
+    Large to small
+
+    Parameters
+    ----------
+    bbox: list[int] | list[float]
+        list of [x1, y1, x2, y2]
+    """
+    return -bbox_area(bbox)
+
+
+def sort_bboxes(
+    pred: PredictOutput[T], order: int | SortBy = SortBy.NONE
+) -> PredictOutput[T]:
+    if order == SortBy.NONE or len(pred.bboxes) <= 1:
+        return pred
+
+    if order == SortBy.LEFT_TO_RIGHT:
+        key = _key_left_to_right
+    elif order == SortBy.CENTER_TO_EDGE:
+        width, height = pred.preview.size
+        center = (width / 2, height / 2)
+        key = partial(_key_center_to_edge, center=center)
+    elif order == SortBy.AREA:
+        key = _key_area
+    else:
+        raise RuntimeError
+
+    items = len(pred.bboxes)
+    idx = sorted(range(items), key=lambda i: key(pred.bboxes[i]))
+    pred.bboxes = [pred.bboxes[i] for i in idx]
+    pred.masks = [pred.masks[i] for i in idx]
+    return pred
+
+
+# Filter by ratio
+def is_in_ratio(bbox: list[T], low: float, high: float, orig_area: int) -> bool:
+    area = bbox_area(bbox)
+    return low <= area / orig_area <= high
+
+
+def filter_by_ratio(
+    pred: PredictOutput[T], low: float, high: float
+) -> PredictOutput[T]:
+    if not pred.bboxes:
+        return pred
+
+    w, h = pred.preview.size
+    orig_area = w * h
+    items = len(pred.bboxes)
+    idx = [i for i in range(items) if is_in_ratio(pred.bboxes[i], low, high, orig_area)]
+    pred.bboxes = [pred.bboxes[i] for i in idx]
+    pred.masks = [pred.masks[i] for i in idx]
+    return pred
+
+
+def filter_k_largest(pred: PredictOutput[T], k: int = 0) -> PredictOutput[T]:
+    if not pred.bboxes or k == 0:
+        return pred
+    areas = [bbox_area(bbox) for bbox in pred.bboxes]
+    idx = np.argsort(areas)[-k:]
+    idx = idx[::-1]
+    pred.bboxes = [pred.bboxes[i] for i in idx]
+    pred.masks = [pred.masks[i] for i in idx]
+    return pred
+
+
+# Merge / Invert
+def mask_merge(masks: list[Image.Image]) -> list[Image.Image]:
+    arrs = [np.array(m) for m in masks]
+    arr = reduce(cv2.bitwise_or, arrs)
+    return [Image.fromarray(arr)]
+
+
+def mask_invert(masks: list[Image.Image]) -> list[Image.Image]:
+    return [ImageChops.invert(m) for m in masks]
+
+
+def mask_merge_invert(
+    masks: list[Image.Image], mode: int | MergeInvert | str
+) -> list[Image.Image]:
+    if isinstance(mode, str):
+        mode = MASK_MERGE_INVERT.index(mode)
+
+    if mode == MergeInvert.NONE or not masks:
+        return masks
+
+    if mode == MergeInvert.MERGE:
+        return mask_merge(masks)
+
+    if mode == MergeInvert.MERGE_INVERT:
+        merged = mask_merge(masks)
+        return mask_invert(merged)
+
+    raise RuntimeError
\ No newline at end of file
diff --git a/extras/adetailer/script.py b/extras/adetailer/script.py
new file mode 100644
index 00000000..05a4110e
--- /dev/null
+++ b/extras/adetailer/script.py
@@ -0,0 +1,53 @@
+from extras.adetailer.args import ADetailerArgs
+from extras.adetailer.common import get_models, PredictOutput
+from extras.adetailer.mask import filter_by_ratio, filter_k_largest, sort_bboxes, mask_preprocess
+from modules import config
+
+model_mapping = get_models(
+    config.path_adetailer,
+    huggingface=True,
+)
+
+
+def get_ad_model(name: str):
+    if name not in model_mapping:
+        msg = f"[-] ADetailer: Model {name!r} not found. Available models: {list(model_mapping.keys())}"
+        raise ValueError(msg)
+    return model_mapping[name]
+
+
+def pred_preprocessing(p, pred: PredictOutput, args: ADetailerArgs, inpaint_only_masked=False):
+    pred = filter_by_ratio(
+        pred, low=args.ad_mask_min_ratio, high=args.ad_mask_max_ratio
+    )
+    pred = filter_k_largest(pred, k=args.ad_mask_k_largest)
+    pred = sort_bboxes(pred)
+    masks = mask_preprocess(
+        pred.masks,
+        kernel=args.ad_dilate_erode,
+        x_offset=args.ad_x_offset,
+        y_offset=args.ad_y_offset,
+        merge_invert=args.ad_mask_merge_invert,
+    )
+
+    #if inpaint_only_masked:
+    # image_mask = self.get_image_mask(p)
+    # masks = self.inpaint_mask_filter(image_mask, masks)
+    return masks
+
+
+    # def get_image_mask(p) -> Image.Image:
+    #     mask = p.image_mask
+    #     if getattr(p, "inpainting_mask_invert", False):
+    #         mask = ImageChops.invert(mask)
+    #     mask = create_binary_mask(mask)
+    #
+    #     if is_skip_img2img(p):
+    #         if hasattr(p, "init_images") and p.init_images:
+    #             width, height = p.init_images[0].size
+    #         else:
+    #             msg = "[-] ADetailer: no init_images."
+    #             raise RuntimeError(msg)
+    #     else:
+    #         width, height = p.width, p.height
+    #     return images.resize_image(p.resize_mode, mask, width, height)
\ No newline at end of file
diff --git a/extras/adetailer/ultralytics_predict.py b/extras/adetailer/ultralytics_predict.py
new file mode 100644
index 00000000..b028ea83
--- /dev/null
+++ b/extras/adetailer/ultralytics_predict.py
@@ -0,0 +1,67 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import cv2
+from PIL import Image
+from torchvision.transforms.functional import to_pil_image
+
+from extras.adetailer.common import PredictOutput, create_mask_from_bbox
+
+if TYPE_CHECKING:
+    import torch
+    from ultralytics import YOLO, YOLOWorld
+
+
+def ultralytics_predict(
+    model_path: str | Path,
+    image: Image.Image,
+    confidence: float = 0.3,
+    device: str = "",
+    classes: str = "",
+) -> PredictOutput[float]:
+    from ultralytics import YOLO
+
+    model = YOLO(model_path)
+    apply_classes(model, model_path, classes)
+    pred = model(image, conf=confidence, device=device)
+
+    bboxes = pred[0].boxes.xyxy.cpu().numpy()
+    if bboxes.size == 0:
+        return PredictOutput()
+    bboxes = bboxes.tolist()
+
+    if pred[0].masks is None:
+        masks = create_mask_from_bbox(bboxes, image.size)
+    else:
+        masks = mask_to_pil(pred[0].masks.data, image.size)
+    preview = pred[0].plot()
+    preview = cv2.cvtColor(preview, cv2.COLOR_BGR2RGB)
+    preview = Image.fromarray(preview)
+
+    return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
+
+
+def apply_classes(model: YOLO | YOLOWorld, model_path: str | Path, classes: str):
+    if not classes or "-world" not in Path(model_path).stem:
+        return
+    parsed = [c.strip() for c in classes.split(",") if c.strip()]
+    if parsed:
+        model.set_classes(parsed)
+
+
+def mask_to_pil(masks: torch.Tensor, shape: tuple[int, int]) -> list[Image.Image]:
+    """
+    Parameters
+    ----------
+    masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W).
+        The device can be CUDA, but `to_pil_image` takes care of that.
+
+    shape: tuple[int, int]
+        (W, H) of the original image
+    """
+    n = masks.shape[0]
+    return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)]
+
+
diff --git a/modules/async_worker.py b/modules/async_worker.py
index 5e7c561f..b79598fe 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -229,7 +229,7 @@ def worker():
 
     def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
                      denoising_strength, final_scheduler_name, goals, initial_latent, switch, task, tasks,
-                     tiled, use_expansion, width, height):
+                     tiled, use_expansion, width, height, cleanup_conds=True):
         if async_task.last_stop is not False:
             ldm_patched.modules.model_management.interrupt_current_processing()
         positive_cond, negative_cond = task['c'], task['uc']
@@ -260,7 +260,8 @@ def worker():
             refiner_swap_method=async_task.refiner_swap_method,
             disable_preview=async_task.disable_preview
         )
-        del task['c'], task['uc'], positive_cond, negative_cond  # Save memory
+        if cleanup_conds:
+            del task['c'], task['uc'], positive_cond, negative_cond  # Save memory
         if inpaint_worker.current_task is not None:
             imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
         current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(
@@ -1007,9 +1008,51 @@ def worker():
             execution_start_time = time.perf_counter()
 
             try:
-                process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
+                imgs, img_paths = process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
                              current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
-                             switch, task, tasks, tiled, use_expansion, width, height)
+                             switch, task, tasks, tiled, use_expansion, width, height, False)
+
+                # adetailer
+                for img in imgs:
+                    from extras.adetailer.ultralytics_predict import ultralytics_predict
+                    predictor = ultralytics_predict
+                    from extras.adetailer.script import get_ad_model
+                    ad_model = get_ad_model('face_yolov8n.pt')
+
+                    kwargs = {}
+                    kwargs["device"] = torch.device('cpu')
+                    kwargs["classes"] = ""
+                    from PIL import Image
+                    img2 = Image.fromarray(img)
+                    pred = predictor(ad_model, img2, **kwargs)
+
+                    if pred.preview is None:
+                        print(
+                            f"[-] ADetailer: nothing detected on image"
+                        )
+                        return False
+
+                    from extras.adetailer.args import ADetailerArgs
+                    args = ADetailerArgs()
+                    from extras.adetailer.script import pred_preprocessing
+                    masks = pred_preprocessing(img, pred, args)
+                    merged_masks = np.maximum(*[np.array(mask) for mask in masks])
+                    async_task.yields.append(['preview', (100, '...', merged_masks)])
+                    denoising_strength = 0.5
+                    inpaint_head_model_path = None
+                    inpaint_parameterized = False
+                    denoising_strength, initial_latent, width, height = apply_inpaint(async_task, None,
+                                                                                      inpaint_head_model_path, img,
+                                                                                      merged_masks,
+                                                                                      inpaint_parameterized,
+                                                                                      denoising_strength, switch)
+
+                    imgs, img_paths = process_task(all_steps, async_task, callback, controlnet_canny_path,
+                                                   controlnet_cpds_path,
+                                                   current_task_id, denoising_strength, final_scheduler_name, goals,
+                                                   initial_latent,
+                                                   switch, task, tasks, tiled, use_expansion, width, height)
+
             except ldm_patched.modules.model_management.InterruptProcessingException:
                 if async_task.last_stop == 'skip':
                     print('User skipped')
diff --git a/modules/config.py b/modules/config.py
index ae00685d..d7bd2d31 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -191,6 +191,7 @@ def get_dir_or_set_default(key, default_value, as_array=False, make_directory=Fa
 paths_checkpoints = get_dir_or_set_default('path_checkpoints', ['../models/checkpoints/'], True)
 paths_loras = get_dir_or_set_default('path_loras', ['../models/loras/'], True)
 path_embeddings = get_dir_or_set_default('path_embeddings', '../models/embeddings/')
+path_adetailer = get_dir_or_set_default('path_adetailer', '../models/adetailer/')
 path_vae_approx = get_dir_or_set_default('path_vae_approx', '../models/vae_approx/')
 path_vae = get_dir_or_set_default('path_vae', '../models/vae/')
 path_upscale_models = get_dir_or_set_default('path_upscale_models', '../models/upscale_models/')
diff --git a/requirements_versions.txt b/requirements_versions.txt
index ebcd0297..d4e45e49 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -19,4 +19,5 @@ onnxruntime==1.16.3
 timm==0.9.2
 translators==5.8.9
 rembg==2.0.53
-groundingdino-py==0.4.0
\ No newline at end of file
+groundingdino-py==0.4.0
+ultralytics==8.2.28
\ No newline at end of file

From 5d8353eb74e9fa6afef2d4205ff428029c0fa199 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 9 Jun 2024 01:10:11 +0200
Subject: [PATCH 006/101] fix: correctly reset pipeline

reset unet and inpaint worker after each adetailer task, improve memory efficiency
---
 modules/async_worker.py | 49 ++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index b79598fe..eada2122 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -229,7 +229,7 @@ def worker():
 
     def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
                      denoising_strength, final_scheduler_name, goals, initial_latent, switch, task, tasks,
-                     tiled, use_expansion, width, height, cleanup_conds=True):
+                     tiled, use_expansion, width, height):
         if async_task.last_stop is not False:
             ldm_patched.modules.model_management.interrupt_current_processing()
         positive_cond, negative_cond = task['c'], task['uc']
@@ -260,8 +260,7 @@ def worker():
             refiner_swap_method=async_task.refiner_swap_method,
             disable_preview=async_task.disable_preview
         )
-        if cleanup_conds:
-            del task['c'], task['uc'], positive_cond, negative_cond  # Save memory
+        del positive_cond, negative_cond  # Save memory
         if inpaint_worker.current_task is not None:
             imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
         current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(
@@ -833,6 +832,7 @@ def worker():
 
         skip_prompt_processing = False
 
+        inpaint_worker.current_task = None
         inpaint_parameterized = async_task.inpaint_engine != 'None'
         inpaint_image = None
         inpaint_mask = None
@@ -1010,9 +1010,12 @@ def worker():
             try:
                 imgs, img_paths = process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
                              current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
-                             switch, task, tasks, tiled, use_expansion, width, height, False)
+                             switch, task, tasks, tiled, use_expansion, width, height)
 
                 # adetailer
+                progressbar(async_task, current_progress, 'Processing adetailer ...')
+                final_unet = pipeline.final_unet.clone()
+
                 for img in imgs:
                     from extras.adetailer.ultralytics_predict import ultralytics_predict
                     predictor = ultralytics_predict
@@ -1028,30 +1031,35 @@ def worker():
 
                     if pred.preview is None:
                         print(
-                            f"[-] ADetailer: nothing detected on image"
+                            f"[ADetailer] nothing detected on image"
                         )
-                        return False
+                        continue
 
                     from extras.adetailer.args import ADetailerArgs
                     args = ADetailerArgs()
                     from extras.adetailer.script import pred_preprocessing
                     masks = pred_preprocessing(img, pred, args)
                     merged_masks = np.maximum(*[np.array(mask) for mask in masks])
-                    async_task.yields.append(['preview', (100, '...', merged_masks)])
-                    denoising_strength = 0.5
-                    inpaint_head_model_path = None
-                    inpaint_parameterized = False
-                    denoising_strength, initial_latent, width, height = apply_inpaint(async_task, None,
-                                                                                      inpaint_head_model_path, img,
-                                                                                      merged_masks,
-                                                                                      inpaint_parameterized,
-                                                                                      denoising_strength, switch)
+                    async_task.yields.append(['preview', (current_progress, 'Loading ...', merged_masks)])
+                    # TODO also show do_not_show_finished_images=len(tasks) == 1 when adetailer is on
+                    yield_result(async_task, merged_masks, async_task.black_out_nsfw, False,
+                                 do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
+                    denoising_strength_adetailer = 0.5
+                    inpaint_head_model_path_adetailer = None
+                    inpaint_parameterized_adetailer = False
+                    goals_adetailer = ['inpaint']
+                    denoising_strength_adetailer, initial_latent_adetailer, width_adetailer, height_adetailer = apply_inpaint(
+                        async_task, None, inpaint_head_model_path_adetailer, img, merged_masks,
+                        inpaint_parameterized_adetailer, denoising_strength_adetailer, switch)
 
-                    imgs, img_paths = process_task(all_steps, async_task, callback, controlnet_canny_path,
-                                                   controlnet_cpds_path,
-                                                   current_task_id, denoising_strength, final_scheduler_name, goals,
-                                                   initial_latent,
-                                                   switch, task, tasks, tiled, use_expansion, width, height)
+                    process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
+                                 current_task_id, denoising_strength_adetailer, final_scheduler_name, goals_adetailer,
+                                 initial_latent_adetailer, switch, task, tasks, tiled, use_expansion, width_adetailer,
+                                 height_adetailer)
+
+                    # reset unet and inpaint_worker
+                    pipeline.final_unet = final_unet
+                    inpaint_worker.current_task = None
 
             except ldm_patched.modules.model_management.InterruptProcessingException:
                 if async_task.last_stop == 'skip':
@@ -1062,6 +1070,7 @@ def worker():
                     print('User stopped')
                     break
 
+            del task['c'], task['uc']  # Save memory
             execution_time = time.perf_counter() - execution_start_time
             print(f'Generating and saving time: {execution_time:.2f} seconds')
 

From 85b9005fa73ec9e03afac77d786c700c4a0566e8 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 9 Jun 2024 01:23:44 +0200
Subject: [PATCH 007/101] wip: adjust progressbar

---
 modules/async_worker.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index eada2122..c14f1f69 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -274,7 +274,7 @@ def worker():
         yield_result(async_task, img_paths, async_task.black_out_nsfw, False,
                      do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
 
-        return imgs, img_paths
+        return imgs, img_paths, current_progress
 
     def apply_patch_settings(async_task):
         patch_settings[pid] = PatchSettings(
@@ -430,8 +430,8 @@ def worker():
         return initial_latent, width, height
 
     def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
-                      inpaint_mask, inpaint_parameterized, denoising_strength, switch, skip_apply_outpaint=False,
-                      step_from=11):
+                      inpaint_mask, inpaint_parameterized, denoising_strength, switch, current_progress,
+                      skip_apply_outpaint=False):
         if not skip_apply_outpaint:
             inpaint_image, inpaint_mask = apply_outpaint(async_task, inpaint_image, inpaint_mask)
 
@@ -446,7 +446,7 @@ def worker():
                          do_not_show_finished_images=True)
             raise EarlyReturnException
 
-        progressbar(async_task, step_from, 'VAE Inpaint encoding ...')
+        progressbar(async_task, current_progress, 'VAE Inpaint encoding ...')
         inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill)
         inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image)
         inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask)
@@ -462,11 +462,11 @@ def worker():
             pixels=inpaint_pixel_image)
         latent_swap = None
         if candidate_vae_swap is not None:
-            progressbar(async_task, step_from + 1, 'VAE SD15 encoding ...')
+            progressbar(async_task, current_progress, 'VAE SD15 encoding ...')
             latent_swap = core.encode_vae(
                 vae=candidate_vae_swap,
                 pixels=inpaint_pixel_fill)['samples']
-        progressbar(async_task, step_from + 2, 'VAE encoding ...')
+        progressbar(async_task, current_progress, 'VAE encoding ...')
         latent_fill = core.encode_vae(
             vae=candidate_vae,
             pixels=inpaint_pixel_fill)['samples']
@@ -961,7 +961,8 @@ def worker():
                 denoising_strength, initial_latent, width, height = apply_inpaint(async_task, initial_latent,
                                                                                   inpaint_head_model_path, inpaint_image,
                                                                                   inpaint_mask, inpaint_parameterized,
-                                                                                  async_task.inpaint_strength, switch)
+                                                                                  async_task.inpaint_strength, switch,
+                                                                                  11)
             except EarlyReturnException:
                 return
 
@@ -1008,7 +1009,7 @@ def worker():
             execution_start_time = time.perf_counter()
 
             try:
-                imgs, img_paths = process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
+                imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
                              current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
                              switch, task, tasks, tiled, use_expansion, width, height)
 
@@ -1030,9 +1031,7 @@ def worker():
                     pred = predictor(ad_model, img2, **kwargs)
 
                     if pred.preview is None:
-                        print(
-                            f"[ADetailer] nothing detected on image"
-                        )
+                        print('[ADetailer] nothing detected on image')
                         continue
 
                     from extras.adetailer.args import ADetailerArgs
@@ -1044,13 +1043,14 @@ def worker():
                     # TODO also show do_not_show_finished_images=len(tasks) == 1 when adetailer is on
                     yield_result(async_task, merged_masks, async_task.black_out_nsfw, False,
                                  do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
+                    # TODO make configurable
                     denoising_strength_adetailer = 0.5
                     inpaint_head_model_path_adetailer = None
                     inpaint_parameterized_adetailer = False
                     goals_adetailer = ['inpaint']
                     denoising_strength_adetailer, initial_latent_adetailer, width_adetailer, height_adetailer = apply_inpaint(
                         async_task, None, inpaint_head_model_path_adetailer, img, merged_masks,
-                        inpaint_parameterized_adetailer, denoising_strength_adetailer, switch)
+                        inpaint_parameterized_adetailer, denoising_strength_adetailer, switch, current_progress, True)
 
                     process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
                                  current_task_id, denoising_strength_adetailer, final_scheduler_name, goals_adetailer,

From 9affa32583aab1d40d50ebc04f2df96a840c39bd Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 9 Jun 2024 01:36:32 +0200
Subject: [PATCH 008/101] feat: set inpaint_respective_field manually

---
 modules/async_worker.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index c14f1f69..e8d536db 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -430,8 +430,8 @@ def worker():
         return initial_latent, width, height
 
     def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
-                      inpaint_mask, inpaint_parameterized, denoising_strength, switch, current_progress,
-                      skip_apply_outpaint=False):
+                      inpaint_mask, inpaint_parameterized, denoising_strength, inpaint_respective_field, switch,
+                      current_progress, skip_apply_outpaint=False):
         if not skip_apply_outpaint:
             inpaint_image, inpaint_mask = apply_outpaint(async_task, inpaint_image, inpaint_mask)
 
@@ -439,7 +439,7 @@ def worker():
             image=inpaint_image,
             mask=inpaint_mask,
             use_fill=denoising_strength > 0.99,
-            k=async_task.inpaint_respective_field
+            k=inpaint_respective_field
         )
         if async_task.debugging_inpaint_preprocessor:
             yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), async_task.black_out_nsfw,
@@ -961,8 +961,9 @@ def worker():
                 denoising_strength, initial_latent, width, height = apply_inpaint(async_task, initial_latent,
                                                                                   inpaint_head_model_path, inpaint_image,
                                                                                   inpaint_mask, inpaint_parameterized,
-                                                                                  async_task.inpaint_strength, switch,
-                                                                                  11)
+                                                                                  async_task.inpaint_strength,
+                                                                                  async_task.inpaint_respective_field,
+                                                                                  switch, 11)
             except EarlyReturnException:
                 return
 
@@ -1044,13 +1045,15 @@ def worker():
                     yield_result(async_task, merged_masks, async_task.black_out_nsfw, False,
                                  do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
                     # TODO make configurable
-                    denoising_strength_adetailer = 0.5
+                    denoising_strength_adetailer = 0.3
+                    inpaint_respective_field_adetailer = 0.0
                     inpaint_head_model_path_adetailer = None
                     inpaint_parameterized_adetailer = False
                     goals_adetailer = ['inpaint']
                     denoising_strength_adetailer, initial_latent_adetailer, width_adetailer, height_adetailer = apply_inpaint(
                         async_task, None, inpaint_head_model_path_adetailer, img, merged_masks,
-                        inpaint_parameterized_adetailer, denoising_strength_adetailer, switch, current_progress, True)
+                        inpaint_parameterized_adetailer, denoising_strength_adetailer,
+                        inpaint_respective_field_adetailer, switch, current_progress, True)
 
                     process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
                                  current_task_id, denoising_strength_adetailer, final_scheduler_name, goals_adetailer,

From ff9fa6c8372552150c0b2e7e8f0e378ac4dca64d Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 9 Jun 2024 18:45:13 +0200
Subject: [PATCH 009/101] feat: add box_erode_or_dilate to
 generate_mask_from_image, expose more sam return values

---
 extras/GroundingDINO/util/inference.py |  4 ++--
 extras/inpaint_mask.py                 | 24 +++++++++++++++++++-----
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/extras/GroundingDINO/util/inference.py b/extras/GroundingDINO/util/inference.py
index 259094f2..bc8b6429 100644
--- a/extras/GroundingDINO/util/inference.py
+++ b/extras/GroundingDINO/util/inference.py
@@ -25,7 +25,7 @@ class GroundingDinoModel(Model):
             caption: str,
             box_threshold: float = 0.35,
             text_threshold: float = 0.25
-    ) -> Tuple[sv.Detections, List[str]]:
+    ) -> Tuple[sv.Detections, torch.Tensor, torch.Tensor, List[str]]:
         if self.model is None:
             filename = load_file_from_url(
                 url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth",
@@ -56,7 +56,7 @@ class GroundingDinoModel(Model):
             source_w=source_w,
             boxes=boxes,
             logits=logits)
-        return detections, phrases
+        return detections, boxes, logits, phrases
 
 
 def predict(
diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 4999f258..ea6e8819 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -10,17 +10,17 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 def run_grounded_sam(input_image, text_prompt, box_threshold, text_threshold):
 
     # run grounding dino model
-    boxes, _ = default_groundingdino(
+    detections, _, _, _ = default_groundingdino(
         image=np.array(input_image),
         caption=text_prompt,
         box_threshold=box_threshold,
         text_threshold=text_threshold
     )
 
-    return boxes.xyxy
+    return detections.xyxy
 
 
-def generate_mask_from_image(image, mask_model, extras):
+def generate_mask_from_image(image, mask_model, extras, box_erode_or_dilate: int=0):
     if image is None:
         return
 
@@ -28,15 +28,29 @@ def generate_mask_from_image(image, mask_model, extras):
         image = image['image']
 
     if mask_model == 'sam':
-        boxes = run_grounded_sam(Image.fromarray(image), extras['sam_prompt_text'], box_threshold=extras['box_threshold'], text_threshold=extras['text_threshold'])
+        img = Image.fromarray(image)
+        boxes = run_grounded_sam(img, extras['sam_prompt_text'], box_threshold=extras['box_threshold'], text_threshold=extras['text_threshold'])
+        # use full image if no box has been found
         boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(boxes) == 0 else boxes
+
         extras['sam_prompt'] = []
+        # from PIL import ImageDraw
+        # draw = ImageDraw.Draw(img)
         for idx, box in enumerate(boxes):
-            extras['sam_prompt'] += [{"type": "rectangle", "data": box.tolist()}]
+            box_list = box.tolist()
+            if box_erode_or_dilate != 0:
+                box_list[0] -= box_erode_or_dilate
+                box_list[1] -= box_erode_or_dilate
+                box_list[2] += box_erode_or_dilate
+                box_list[3] += box_erode_or_dilate
+        #     draw.rectangle(box_list, fill=128, outline ="red")
+            extras['sam_prompt'] += [{"type": "rectangle", "data": box_list}]
+        # img.show()
 
     return remove(
         image,
         session=new_session(mask_model, **extras),
         only_mask=True,
+        # post_process_mask=True,
         **extras
     )

From 80878617436444694585b6e3660c59617d05460b Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 9 Jun 2024 18:46:18 +0200
Subject: [PATCH 010/101] refactor: reorder inpaint_mask_sam_model by model
 size ascending

---
 modules/flags.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/flags.py b/modules/flags.py
index 60563c9a..6fec3663 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -76,7 +76,7 @@ output_formats = ['png', 'jpeg', 'webp']
 
 inpaint_mask_models = ['u2net', 'u2netp', 'u2net_human_seg', 'u2net_cloth_seg', 'silueta', 'isnet-general-use', 'isnet-anime', 'sam']
 inpaint_mask_cloth_category = ['full', 'upper', 'lower']
-inpaint_mask_sam_model = ['sam_vit_b_01ec64', 'sam_vit_h_4b8939', 'sam_vit_l_0b3195']
+inpaint_mask_sam_model = ['sam_vit_b_01ec64', 'sam_vit_l_0b3195', 'sam_vit_h_4b8939']
 
 inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6']
 inpaint_option_default = 'Inpaint or Outpaint (default)'

From f2e7b65ed305add19b71802f9649e74a036a5902 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 9 Jun 2024 18:47:29 +0200
Subject: [PATCH 011/101] wip: add experiments mask generation, install
 segment_anything v1.0

---
 experiments_mask_generation.py | 120 +++++++++++++++++++++++++++++++++
 requirements_versions.txt      |   3 +-
 2 files changed, 122 insertions(+), 1 deletion(-)
 create mode 100644 experiments_mask_generation.py

diff --git a/experiments_mask_generation.py b/experiments_mask_generation.py
new file mode 100644
index 00000000..a28c66e2
--- /dev/null
+++ b/experiments_mask_generation.py
@@ -0,0 +1,120 @@
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from segment_anything.utils.amg import remove_small_regions
+
+from extras.GroundingDINO.util.inference import default_groundingdino
+from extras.adetailer.args import ADetailerArgs
+from extras.adetailer.script import get_ad_model
+from extras.adetailer.script import pred_preprocessing
+from extras.adetailer.ultralytics_predict import ultralytics_predict
+from extras.inpaint_mask import run_grounded_sam, generate_mask_from_image
+
+original_image1 = cv2.imread('cat.webp')
+original_image = Image.fromarray(original_image1)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+# predictor = ultralytics_predict
+#
+# ad_model = get_ad_model('face_yolov8n.pt')
+#
+# kwargs = {}
+# kwargs["device"] = torch.device('cpu')
+# kwargs["classes"] = ""
+#
+# img2 = Image.fromarray(img)
+# pred = predictor(ad_model, img2, **kwargs)
+#
+# if pred.preview is None:
+#     print('[ADetailer] nothing detected on image')
+#
+# args = ADetailerArgs()
+#
+# masks = pred_preprocessing(img, pred, args)
+# merged_masks = np.maximum(*[np.array(mask) for mask in masks])
+#
+#
+# merged_masks_img = Image.fromarray(merged_masks)
+# merged_masks_img.show()
+
+sam_prompt = 'eye'
+sam_model = 'sam_vit_l_0b3195'
+dino_box_threshold = 0.3
+dino_text_threshold = 0.25
+box_erode_or_dilate = 0
+
+detections, boxes, logits, phrases = default_groundingdino(
+    image=np.array(original_image),
+    caption=sam_prompt,
+    box_threshold=dino_box_threshold,
+    text_threshold=dino_text_threshold
+)
+
+# for boxes.xyxy
+#boxes = run_grounded_sam(img, sam_prompt, box_threshold=dino_box_threshold, text_threshold=dino_text_threshold)
+#boxes = np.array([[0, 0, img.shape[1], img.shape[0]]]) if len(boxes) == 0 else boxes
+
+# from PIL import ImageDraw
+# draw = ImageDraw.Draw(img)
+# for idx, box in enumerate(boxes.xyxy):
+#     box_list = box.tolist()
+#     if box_erode_or_dilate != 0:
+#         box_list[0] -= box_erode_or_dilate
+#         box_list[1] -= box_erode_or_dilate
+#         box_list[2] += box_erode_or_dilate
+#         box_list[3] += box_erode_or_dilate
+#     draw.rectangle(box_list, fill=128, outline ="red")
+# img.show()
+
+H, W = original_image.size[1], original_image.size[0]
+boxes = boxes * torch.Tensor([W, H, W, H])
+boxes[:, :2] = boxes[:, :2] - boxes[:, 2:] / 2
+boxes[:, 2:] = boxes[:, 2:] + boxes[:, :2]
+
+
+from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
+
+sam_checkpoint = "./models/sam/sam_vit_l_0b3195.pth"
+model_type = "vit_l"
+sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
+sam.to(device=device)
+mask_generator = SamAutomaticMaskGenerator(sam)
+num_boxes = 2
+
+sam_predictor = SamPredictor(sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device=device))
+
+image_np = np.array(original_image, dtype=np.uint8)
+
+final_m = torch.zeros((image_np.shape[0], image_np.shape[1]))
+
+if boxes.size(0) > 0:
+    sam_predictor.set_image(image_np)
+
+    transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes, image_np.shape[:2])
+    masks, _, _ = sam_predictor.predict_torch(
+        point_coords=None,
+        point_labels=None,
+        boxes=transformed_boxes.to(device),
+        multimask_output=False,
+    )
+
+    # remove small disconnected regions and holes
+    fine_masks = []
+    for mask in masks.to('cpu').numpy():  # masks: [num_masks, 1, h, w]
+        fine_masks.append(remove_small_regions(mask[0], 400, mode="holes")[0])
+    masks = np.stack(fine_masks, axis=0)[:, np.newaxis]
+    masks = torch.from_numpy(masks)
+
+    num_obj = min(len(logits), num_boxes)
+    for obj_ind in range(num_obj):
+        # box = boxes[obj_ind]
+
+        m = masks[obj_ind][0]
+        final_m += m
+final_m = (final_m > 0).to('cpu').numpy()
+# print(final_m.max(), final_m.min())
+mask_image = np.array(np.dstack((final_m, final_m, final_m)) * 255, dtype=np.uint8)
+
+merged_masks_img = Image.fromarray(mask_image)
+merged_masks_img.show()
diff --git a/requirements_versions.txt b/requirements_versions.txt
index d4e45e49..095452b4 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -20,4 +20,5 @@ timm==0.9.2
 translators==5.8.9
 rembg==2.0.53
 groundingdino-py==0.4.0
-ultralytics==8.2.28
\ No newline at end of file
+ultralytics==8.2.28
+segment_anything==1.0
\ No newline at end of file

From 57c049858c3a0b319153c7e4b98c5fadc2132be4 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 9 Jun 2024 22:31:41 +0200
Subject: [PATCH 012/101] feat: add debug dino and mask dilate and erode

---
 extras/inpaint_mask.py | 45 +++++++++++++++++-------------------------
 language/en.json       |  5 +++++
 webui.py               | 29 ++++++++++++++-------------
 3 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index ea6e8819..8026d7e8 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -1,26 +1,9 @@
-from PIL import Image
 import numpy as np
-import torch
 from rembg import remove, new_session
 from extras.GroundingDINO.util.inference import default_groundingdino
 
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
-
-def run_grounded_sam(input_image, text_prompt, box_threshold, text_threshold):
-
-    # run grounding dino model
-    detections, _, _, _ = default_groundingdino(
-        image=np.array(input_image),
-        caption=text_prompt,
-        box_threshold=box_threshold,
-        text_threshold=text_threshold
-    )
-
-    return detections.xyxy
-
-
-def generate_mask_from_image(image, mask_model, extras, box_erode_or_dilate: int=0):
+def generate_mask_from_image(image: np.ndarray, mask_model: str, extras: dict, box_erode_or_dilate: int=0, debug_dino: bool=False) -> np.ndarray | None:
     if image is None:
         return
 
@@ -28,29 +11,37 @@ def generate_mask_from_image(image, mask_model, extras, box_erode_or_dilate: int
         image = image['image']
 
     if mask_model == 'sam':
-        img = Image.fromarray(image)
-        boxes = run_grounded_sam(img, extras['sam_prompt_text'], box_threshold=extras['box_threshold'], text_threshold=extras['text_threshold'])
+        detections, _, _, _ = default_groundingdino(
+            image=image,
+            caption=extras['sam_prompt_text'],
+            box_threshold=extras['box_threshold'],
+            text_threshold=extras['text_threshold']
+        )
+        detection_boxes = detections.xyxy
         # use full image if no box has been found
-        boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(boxes) == 0 else boxes
+        detection_boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(detection_boxes) == 0 else detection_boxes
 
         extras['sam_prompt'] = []
-        # from PIL import ImageDraw
-        # draw = ImageDraw.Draw(img)
-        for idx, box in enumerate(boxes):
+        for idx, box in enumerate(detection_boxes):
             box_list = box.tolist()
             if box_erode_or_dilate != 0:
                 box_list[0] -= box_erode_or_dilate
                 box_list[1] -= box_erode_or_dilate
                 box_list[2] += box_erode_or_dilate
                 box_list[3] += box_erode_or_dilate
-        #     draw.rectangle(box_list, fill=128, outline ="red")
             extras['sam_prompt'] += [{"type": "rectangle", "data": box_list}]
-        # img.show()
+
+        if debug_dino:
+            from PIL import ImageDraw, Image
+            image_with_boxes = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
+            draw = ImageDraw.Draw(image_with_boxes)
+            for box in extras['sam_prompt']:
+                draw.rectangle(box['data'], fill="white")
+            return np.array(image_with_boxes)
 
     return remove(
         image,
         session=new_session(mask_model, **extras),
         only_mask=True,
-        # post_process_mask=True,
         **extras
     )
diff --git a/language/en.json b/language/en.json
index a5f1e271..62c0ac31 100644
--- a/language/en.json
+++ b/language/en.json
@@ -377,10 +377,15 @@
     "Disable preview during generation.": "Disable preview during generation.",
     "Disable Intermediate Results": "Disable Intermediate Results",
     "Disable intermediate results during generation, only show final gallery.": "Disable intermediate results during generation, only show final gallery.",
+    "Debug Inpaint Preprocessing": "Debug Inpaint Preprocessing",
+    "Debug GroundingDINO": "Debug GroundingDINO",
+    "Used for SAM object detection and box generation": "Used for SAM object detection and box generation",
+    "GroundingDINO Box Erode or Dilate": "GroundingDINO Box Erode or Dilate",
     "Inpaint Engine": "Inpaint Engine",
     "v1": "v1",
     "Version of Fooocus inpaint model": "Version of Fooocus inpaint model",
     "v2.5": "v2.5",
+    "v2.6": "v2.6",
     "Control Debug": "Control Debug",
     "Debug Preprocessors": "Debug Preprocessors",
     "Mixing Image Prompt and Vary/Upscale": "Mixing Image Prompt and Vary/Upscale",
diff --git a/webui.py b/webui.py
index db1c98d0..f929e49f 100644
--- a/webui.py
+++ b/webui.py
@@ -231,7 +231,7 @@ with shared.gradio_root:
                                     inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
                                 generate_mask_button = gr.Button(value='Generate mask from image')
 
-                                def generate_mask(image, mask_model, cloth_category, sam_prompt_text, sam_model, sam_quant, box_threshold, text_threshold):
+                                def generate_mask(image, mask_model, cloth_category, sam_prompt_text, sam_model, sam_quant, box_threshold, text_threshold, debug_dino, dino_erode_or_dilate):
                                     from extras.inpaint_mask import generate_mask_from_image
 
                                     extras = {}
@@ -244,19 +244,7 @@ with shared.gradio_root:
                                         extras['box_threshold'] = box_threshold
                                         extras['text_threshold'] = text_threshold
 
-                                    return generate_mask_from_image(image, mask_model, extras)
-
-                                generate_mask_button.click(fn=generate_mask,
-                                                           inputs=[
-                                                               inpaint_input_image, inpaint_mask_model,
-                                                               inpaint_mask_cloth_category,
-                                                               inpaint_mask_sam_prompt_text,
-                                                               inpaint_mask_sam_model,
-                                                               inpaint_mask_sam_quant,
-                                                               inpaint_mask_box_threshold,
-                                                               inpaint_mask_text_threshold
-                                                           ],
-                                                           outputs=inpaint_mask_image, show_progress=True, queue=True)
+                                    return generate_mask_from_image(image, mask_model, extras, dino_erode_or_dilate, debug_dino)
 
                                 inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg'), gr.update(visible=x == 'sam'), gr.update(visible=x == 'sam')],
                                                           inputs=inpaint_mask_model,
@@ -570,6 +558,8 @@ with shared.gradio_root:
 
                     with gr.Tab(label='Inpaint'):
                         debugging_inpaint_preprocessor = gr.Checkbox(label='Debug Inpaint Preprocessing', value=False)
+                        debug_dino = gr.Checkbox(label='Debug GroundingDINO', value=False,
+                                                 info='Used for SAM object detection and box generation')
                         inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
                         inpaint_engine = gr.Dropdown(label='Inpaint Engine',
                                                      value=modules.config.default_inpaint_engine_version,
@@ -592,6 +582,10 @@ with shared.gradio_root:
                                                             info='Positive value will make white area in the mask larger, '
                                                                  'negative value will make white area smaller.'
                                                                  '(default is 0, always process before any mask invert)')
+                        dino_erode_or_dilate = gr.Slider(label='GroundingDINO Box Erode or Dilate',
+                                                         minimum=-64, maximum=64, step=1, value=0,
+                                                         info='Positive value will make white area in the mask larger, '
+                                                              'negative value will make white area smaller.')
                         inpaint_mask_upload_checkbox = gr.Checkbox(label='Enable Mask Upload', value=False)
                         invert_mask_checkbox = gr.Checkbox(label='Invert Mask', value=False)
 
@@ -741,6 +735,13 @@ with shared.gradio_root:
             inpaint_strength, inpaint_respective_field
         ], show_progress=False, queue=False)
 
+        generate_mask_button.click(fn=generate_mask,
+                                   inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
+                                           inpaint_mask_sam_prompt_text, inpaint_mask_sam_model, inpaint_mask_sam_quant,
+                                           inpaint_mask_box_threshold, inpaint_mask_text_threshold, debug_dino,
+                                           dino_erode_or_dilate],
+                                   outputs=inpaint_mask_image, show_progress=True, queue=True)
+
         ctrls = [currentTask, generate_image_grid]
         ctrls += [
             prompt, negative_prompt, translate_prompts, style_selections,

From 09e23f5509356443443a233e6a7dadfdb36c623f Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 9 Jun 2024 23:09:46 +0200
Subject: [PATCH 013/101] refactor: add info to inpaint_mask_sam_prompt_text,
 rename variable for dino debugging

---
 extras/inpaint_mask.py | 6 +++---
 language/en.json       | 1 +
 webui.py               | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 8026d7e8..3ee00cf4 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -33,11 +33,11 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str, extras: dict, b
 
         if debug_dino:
             from PIL import ImageDraw, Image
-            image_with_boxes = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
-            draw = ImageDraw.Draw(image_with_boxes)
+            debug_dino_image = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
+            draw = ImageDraw.Draw(debug_dino_image)
             for box in extras['sam_prompt']:
                 draw.rectangle(box['data'], fill="white")
-            return np.array(image_with_boxes)
+            return np.array(debug_dino_image)
 
     return remove(
         image,
diff --git a/language/en.json b/language/en.json
index 62c0ac31..dc208f3e 100644
--- a/language/en.json
+++ b/language/en.json
@@ -47,6 +47,7 @@
     "Mask generation model": "Mask generation model",
     "Cloth category": "Cloth category",
     "Segmentation prompt": "Segmentation prompt",
+    "Use singular whenever possible": "Use singular whenever possible",
     "Advanced options": "Advanced options",
     "SAM model": "SAM model",
     "Quantization": "Quantization",
diff --git a/webui.py b/webui.py
index f929e49f..a036dfb5 100644
--- a/webui.py
+++ b/webui.py
@@ -223,7 +223,7 @@ with shared.gradio_root:
                                                              choices=flags.inpaint_mask_cloth_category,
                                                              value=modules.config.default_inpaint_mask_cloth_category,
                                                              visible=False)
-                                inpaint_mask_sam_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False)
+                                inpaint_mask_sam_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False, info='Use singular whenever possible')
                                 with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options:
                                     inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
                                     inpaint_mask_sam_quant = gr.Checkbox(label='Quantization', value=False)

From 8a81993940155e57b699c0e862f14515f28d3061 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 01:33:03 +0200
Subject: [PATCH 014/101] wip: remove ultralytics, always use manual sam for
 image mask instead of rembg

---
 experiments_mask_generation.py | 124 ++++------------------------
 extras/inpaint_mask.py         | 146 +++++++++++++++++++++++++--------
 requirements_versions.txt      |   1 -
 webui.py                       |  30 ++++---
 4 files changed, 146 insertions(+), 155 deletions(-)

diff --git a/experiments_mask_generation.py b/experiments_mask_generation.py
index a28c66e2..538ad712 100644
--- a/experiments_mask_generation.py
+++ b/experiments_mask_generation.py
@@ -1,120 +1,24 @@
-import cv2
+# https://github.com/sail-sg/EditAnything/blob/main/sam2groundingdino_edit.py
+
 import numpy as np
-import torch
 from PIL import Image
-from segment_anything.utils.amg import remove_small_regions
 
-from extras.GroundingDINO.util.inference import default_groundingdino
-from extras.adetailer.args import ADetailerArgs
-from extras.adetailer.script import get_ad_model
-from extras.adetailer.script import pred_preprocessing
-from extras.adetailer.ultralytics_predict import ultralytics_predict
-from extras.inpaint_mask import run_grounded_sam, generate_mask_from_image
+from extras.inpaint_mask import SAMOptions, generate_mask_from_image
 
-original_image1 = cv2.imread('cat.webp')
-original_image = Image.fromarray(original_image1)
-device = "cuda" if torch.cuda.is_available() else "cpu"
+original_image = Image.open('cat.webp')
+image = np.array(original_image, dtype=np.uint8)
 
-# predictor = ultralytics_predict
-#
-# ad_model = get_ad_model('face_yolov8n.pt')
-#
-# kwargs = {}
-# kwargs["device"] = torch.device('cpu')
-# kwargs["classes"] = ""
-#
-# img2 = Image.fromarray(img)
-# pred = predictor(ad_model, img2, **kwargs)
-#
-# if pred.preview is None:
-#     print('[ADetailer] nothing detected on image')
-#
-# args = ADetailerArgs()
-#
-# masks = pred_preprocessing(img, pred, args)
-# merged_masks = np.maximum(*[np.array(mask) for mask in masks])
-#
-#
-# merged_masks_img = Image.fromarray(merged_masks)
-# merged_masks_img.show()
-
-sam_prompt = 'eye'
-sam_model = 'sam_vit_l_0b3195'
-dino_box_threshold = 0.3
-dino_text_threshold = 0.25
-box_erode_or_dilate = 0
-
-detections, boxes, logits, phrases = default_groundingdino(
-    image=np.array(original_image),
-    caption=sam_prompt,
-    box_threshold=dino_box_threshold,
-    text_threshold=dino_text_threshold
+sam_options = SAMOptions(
+    dino_prompt='eye',
+    dino_box_threshold=0.3,
+    dino_text_threshold=0.25,
+    box_erode_or_dilate=0,
+    max_num_boxes=2,
+    sam_checkpoint="./models/sam/sam_vit_l.safetensors",
+    model_type="vit_l"
 )
 
-# for boxes.xyxy
-#boxes = run_grounded_sam(img, sam_prompt, box_threshold=dino_box_threshold, text_threshold=dino_text_threshold)
-#boxes = np.array([[0, 0, img.shape[1], img.shape[0]]]) if len(boxes) == 0 else boxes
-
-# from PIL import ImageDraw
-# draw = ImageDraw.Draw(img)
-# for idx, box in enumerate(boxes.xyxy):
-#     box_list = box.tolist()
-#     if box_erode_or_dilate != 0:
-#         box_list[0] -= box_erode_or_dilate
-#         box_list[1] -= box_erode_or_dilate
-#         box_list[2] += box_erode_or_dilate
-#         box_list[3] += box_erode_or_dilate
-#     draw.rectangle(box_list, fill=128, outline ="red")
-# img.show()
-
-H, W = original_image.size[1], original_image.size[0]
-boxes = boxes * torch.Tensor([W, H, W, H])
-boxes[:, :2] = boxes[:, :2] - boxes[:, 2:] / 2
-boxes[:, 2:] = boxes[:, 2:] + boxes[:, :2]
-
-
-from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
-
-sam_checkpoint = "./models/sam/sam_vit_l_0b3195.pth"
-model_type = "vit_l"
-sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
-sam.to(device=device)
-mask_generator = SamAutomaticMaskGenerator(sam)
-num_boxes = 2
-
-sam_predictor = SamPredictor(sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device=device))
-
-image_np = np.array(original_image, dtype=np.uint8)
-
-final_m = torch.zeros((image_np.shape[0], image_np.shape[1]))
-
-if boxes.size(0) > 0:
-    sam_predictor.set_image(image_np)
-
-    transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes, image_np.shape[:2])
-    masks, _, _ = sam_predictor.predict_torch(
-        point_coords=None,
-        point_labels=None,
-        boxes=transformed_boxes.to(device),
-        multimask_output=False,
-    )
-
-    # remove small disconnected regions and holes
-    fine_masks = []
-    for mask in masks.to('cpu').numpy():  # masks: [num_masks, 1, h, w]
-        fine_masks.append(remove_small_regions(mask[0], 400, mode="holes")[0])
-    masks = np.stack(fine_masks, axis=0)[:, np.newaxis]
-    masks = torch.from_numpy(masks)
-
-    num_obj = min(len(logits), num_boxes)
-    for obj_ind in range(num_obj):
-        # box = boxes[obj_ind]
-
-        m = masks[obj_ind][0]
-        final_m += m
-final_m = (final_m > 0).to('cpu').numpy()
-# print(final_m.max(), final_m.min())
-mask_image = np.array(np.dstack((final_m, final_m, final_m)) * 255, dtype=np.uint8)
+mask_image = generate_mask_from_image(image, sam_options=sam_options)
 
 merged_masks_img = Image.fromarray(mask_image)
 merged_masks_img.show()
diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 3ee00cf4..85cd7fc5 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -1,47 +1,129 @@
 import numpy as np
+import torch
 from rembg import remove, new_session
+from segment_anything import sam_model_registry, SamPredictor
+from segment_anything.utils.amg import remove_small_regions
+
 from extras.GroundingDINO.util.inference import default_groundingdino
 
 
-def generate_mask_from_image(image: np.ndarray, mask_model: str, extras: dict, box_erode_or_dilate: int=0, debug_dino: bool=False) -> np.ndarray | None:
+class SAMOptions:
+    def __init__(self,
+                 # GroundingDINO
+                 dino_prompt: str = '',
+                 dino_box_threshold=0.3,
+                 dino_text_threshold=0.25,
+                 box_erode_or_dilate=0,
+
+                 # SAM
+                 max_num_boxes=2,
+                 sam_checkpoint="./models/sam/sam_vit_l_0b3195.pth",
+                 model_type="vit_l"
+                 ):
+        self.dino_prompt = dino_prompt
+        self.dino_box_threshold = dino_box_threshold
+        self.dino_text_threshold = dino_text_threshold
+        self.box_erode_or_dilate = box_erode_or_dilate
+        self.max_num_boxes = max_num_boxes
+        self.sam_checkpoint = sam_checkpoint
+        self.model_type = model_type
+
+
+def optimize_masks(masks: torch.Tensor) -> torch.Tensor:
+    """
+    removes small disconnected regions and holes
+    """
+    fine_masks = []
+    for mask in masks.to('cpu').numpy():  # masks: [num_masks, 1, h, w]
+        fine_masks.append(remove_small_regions(mask[0], 400, mode="holes")[0])
+    masks = np.stack(fine_masks, axis=0)[:, np.newaxis]
+    return torch.from_numpy(masks)
+
+
+def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=None,
+                             sam_options: SAMOptions | None = SAMOptions) -> np.ndarray | None:
     if image is None:
         return
 
+    if extras is None:
+        extras = {}
+
     if 'image' in image:
         image = image['image']
 
-    if mask_model == 'sam':
-        detections, _, _, _ = default_groundingdino(
-            image=image,
-            caption=extras['sam_prompt_text'],
-            box_threshold=extras['box_threshold'],
-            text_threshold=extras['text_threshold']
+    if mask_model != 'sam' and sam_options is None:
+        return remove(
+            image,
+            session=new_session(mask_model, **extras),
+            only_mask=True,
+            **extras
         )
-        detection_boxes = detections.xyxy
-        # use full image if no box has been found
-        detection_boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(detection_boxes) == 0 else detection_boxes
 
-        extras['sam_prompt'] = []
-        for idx, box in enumerate(detection_boxes):
-            box_list = box.tolist()
-            if box_erode_or_dilate != 0:
-                box_list[0] -= box_erode_or_dilate
-                box_list[1] -= box_erode_or_dilate
-                box_list[2] += box_erode_or_dilate
-                box_list[3] += box_erode_or_dilate
-            extras['sam_prompt'] += [{"type": "rectangle", "data": box_list}]
+    assert sam_options is not None
 
-        if debug_dino:
-            from PIL import ImageDraw, Image
-            debug_dino_image = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
-            draw = ImageDraw.Draw(debug_dino_image)
-            for box in extras['sam_prompt']:
-                draw.rectangle(box['data'], fill="white")
-            return np.array(debug_dino_image)
-
-    return remove(
-        image,
-        session=new_session(mask_model, **extras),
-        only_mask=True,
-        **extras
+    detections, boxes, logits, phrases = default_groundingdino(
+        image=image,
+        caption=sam_options.dino_prompt,
+        box_threshold=sam_options.dino_box_threshold,
+        text_threshold=sam_options.dino_text_threshold
     )
+    # detection_boxes = detections.xyxy
+    # # use full image if no box has been found
+    # detection_boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(detection_boxes) == 0 else detection_boxes
+    #
+    #
+    # for idx, box in enumerate(detection_boxes):
+    #     box_list = box.tolist()
+    #     if box_erode_or_dilate != 0:
+    #         box_list[0] -= box_erode_or_dilate
+    #         box_list[1] -= box_erode_or_dilate
+    #         box_list[2] += box_erode_or_dilate
+    #         box_list[3] += box_erode_or_dilate
+    #     extras['sam_prompt'] += [{"type": "rectangle", "data": box_list}]
+    #
+    # if debug_dino:
+    #     from PIL import ImageDraw, Image
+    #     debug_dino_image = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
+    #     draw = ImageDraw.Draw(debug_dino_image)
+    #     for box in extras['sam_prompt']:
+    #         draw.rectangle(box['data'], fill="white")
+    #     return np.array(debug_dino_image)
+
+    # TODO add support for box_erode_or_dilate again
+
+    H, W = image.shape[0], image.shape[1]
+    boxes = boxes * torch.Tensor([W, H, W, H])
+    boxes[:, :2] = boxes[:, :2] - boxes[:, 2:] / 2
+    boxes[:, 2:] = boxes[:, 2:] + boxes[:, :2]
+
+    # TODO add model patcher for model logic and device management
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    sam = sam_model_registry[sam_options.model_type](checkpoint=sam_options.sam_checkpoint)
+    sam.to(device=device)
+
+    sam_predictor = SamPredictor(sam)
+    final_mask_tensor = torch.zeros((image.shape[0], image.shape[1]))
+
+    if boxes.size(0) > 0:
+        sam_predictor.set_image(image)
+
+        transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes, image.shape[:2])
+        masks, _, _ = sam_predictor.predict_torch(
+            point_coords=None,
+            point_labels=None,
+            boxes=transformed_boxes.to(device),
+            multimask_output=False,
+        )
+
+        masks = optimize_masks(masks)
+
+        num_obj = min(len(logits), sam_options.max_num_boxes)
+        for obj_ind in range(num_obj):
+            mask_tensor = masks[obj_ind][0]
+            final_mask_tensor += mask_tensor
+
+    final_mask_tensor = (final_mask_tensor > 0).to('cpu').numpy()
+    mask_image = np.dstack((final_mask_tensor, final_mask_tensor, final_mask_tensor)) * 255
+    mask_image = np.array(mask_image, dtype=np.uint8)
+    return mask_image
diff --git a/requirements_versions.txt b/requirements_versions.txt
index 095452b4..bc86caac 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -20,5 +20,4 @@ timm==0.9.2
 translators==5.8.9
 rembg==2.0.53
 groundingdino-py==0.4.0
-ultralytics==8.2.28
 segment_anything==1.0
\ No newline at end of file
diff --git a/webui.py b/webui.py
index a036dfb5..4b63cc2f 100644
--- a/webui.py
+++ b/webui.py
@@ -16,6 +16,7 @@ import modules.meta_parser
 import args_manager
 import copy
 import launch
+from extras.inpaint_mask import SAMOptions
 
 from modules.sdxl_styles import legal_style_names
 from modules.private_logger import get_current_html_path
@@ -223,7 +224,7 @@ with shared.gradio_root:
                                                              choices=flags.inpaint_mask_cloth_category,
                                                              value=modules.config.default_inpaint_mask_cloth_category,
                                                              visible=False)
-                                inpaint_mask_sam_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False, info='Use singular whenever possible')
+                                inpaint_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False, info='Use singular whenever possible')
                                 with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options:
                                     inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
                                     inpaint_mask_sam_quant = gr.Checkbox(label='Quantization', value=False)
@@ -231,24 +232,29 @@ with shared.gradio_root:
                                     inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
                                 generate_mask_button = gr.Button(value='Generate mask from image')
 
-                                def generate_mask(image, mask_model, cloth_category, sam_prompt_text, sam_model, sam_quant, box_threshold, text_threshold, debug_dino, dino_erode_or_dilate):
+                                def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, sam_quant, box_threshold, text_threshold, dino_erode_or_dilate, debug_dino):
                                     from extras.inpaint_mask import generate_mask_from_image
 
                                     extras = {}
+                                    sam_options = None
                                     if mask_model == 'u2net_cloth_seg':
                                         extras['cloth_category'] = cloth_category
                                     elif mask_model == 'sam':
-                                        extras['sam_prompt_text'] = sam_prompt_text
-                                        extras['sam_model'] = sam_model
-                                        extras['sam_quant'] = sam_quant
-                                        extras['box_threshold'] = box_threshold
-                                        extras['text_threshold'] = text_threshold
+                                        sam_options = SAMOptions(
+                                            dino_prompt=dino_prompt_text,
+                                            dino_box_threshold=box_threshold,
+                                            dino_text_threshold=text_threshold,
+                                            box_erode_or_dilate=dino_erode_or_dilate,
+                                            max_num_boxes=2, #TODO replace with actual value
+                                            sam_checkpoint="./models/sam/sam_vit_l_0b3195.pth", # TODO replace with actual value
+                                            model_type="vit_l"
+                                        )
 
-                                    return generate_mask_from_image(image, mask_model, extras, dino_erode_or_dilate, debug_dino)
+                                    return generate_mask_from_image(image, mask_model, extras, sam_options)
 
                                 inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg'), gr.update(visible=x == 'sam'), gr.update(visible=x == 'sam')],
                                                           inputs=inpaint_mask_model,
-                                                          outputs=[inpaint_mask_cloth_category, inpaint_mask_sam_prompt_text, inpaint_mask_advanced_options],
+                                                          outputs=[inpaint_mask_cloth_category, inpaint_mask_dino_prompt_text, inpaint_mask_advanced_options],
                                                           queue=False, show_progress=False)
 
                     with gr.TabItem(label='Describe') as desc_tab:
@@ -737,9 +743,9 @@ with shared.gradio_root:
 
         generate_mask_button.click(fn=generate_mask,
                                    inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
-                                           inpaint_mask_sam_prompt_text, inpaint_mask_sam_model, inpaint_mask_sam_quant,
-                                           inpaint_mask_box_threshold, inpaint_mask_text_threshold, debug_dino,
-                                           dino_erode_or_dilate],
+                                           inpaint_mask_dino_prompt_text, inpaint_mask_sam_model, inpaint_mask_sam_quant,
+                                           inpaint_mask_box_threshold, inpaint_mask_text_threshold, dino_erode_or_dilate,
+                                           debug_dino],
                                    outputs=inpaint_mask_image, show_progress=True, queue=True)
 
         ctrls = [currentTask, generate_image_grid]

From 980563de9d330c484cbd37471fc11c8dce48a38a Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 20:31:32 +0200
Subject: [PATCH 015/101] feat: remove inpaint_mask_sam_quant

---
 webui.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/webui.py b/webui.py
index 4b63cc2f..0d8e2396 100644
--- a/webui.py
+++ b/webui.py
@@ -227,12 +227,11 @@ with shared.gradio_root:
                                 inpaint_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False, info='Use singular whenever possible')
                                 with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options:
                                     inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
-                                    inpaint_mask_sam_quant = gr.Checkbox(label='Quantization', value=False)
                                     inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)
                                     inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
                                 generate_mask_button = gr.Button(value='Generate mask from image')
 
-                                def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, sam_quant, box_threshold, text_threshold, dino_erode_or_dilate, debug_dino):
+                                def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, dino_erode_or_dilate, debug_dino):
                                     from extras.inpaint_mask import generate_mask_from_image
 
                                     extras = {}
@@ -743,7 +742,7 @@ with shared.gradio_root:
 
         generate_mask_button.click(fn=generate_mask,
                                    inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
-                                           inpaint_mask_dino_prompt_text, inpaint_mask_sam_model, inpaint_mask_sam_quant,
+                                           inpaint_mask_dino_prompt_text, inpaint_mask_sam_model,
                                            inpaint_mask_box_threshold, inpaint_mask_text_threshold, dino_erode_or_dilate,
                                            debug_dino],
                                    outputs=inpaint_mask_image, show_progress=True, queue=True)

From ce1fb74270e6ad55ead678187a1447d9efd490a4 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 20:33:40 +0200
Subject: [PATCH 016/101] feat: add download for sam models to config

---
 extras/inpaint_mask.py |  6 +++---
 modules/config.py      | 40 ++++++++++++++++++++++++++++++++++++++++
 webui.py               |  3 +--
 3 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 85cd7fc5..71a926d1 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -5,6 +5,7 @@ from segment_anything import sam_model_registry, SamPredictor
 from segment_anything.utils.amg import remove_small_regions
 
 from extras.GroundingDINO.util.inference import default_groundingdino
+import modules.config
 
 
 class SAMOptions:
@@ -17,7 +18,6 @@ class SAMOptions:
 
                  # SAM
                  max_num_boxes=2,
-                 sam_checkpoint="./models/sam/sam_vit_l_0b3195.pth",
                  model_type="vit_l"
                  ):
         self.dino_prompt = dino_prompt
@@ -25,7 +25,6 @@ class SAMOptions:
         self.dino_text_threshold = dino_text_threshold
         self.box_erode_or_dilate = box_erode_or_dilate
         self.max_num_boxes = max_num_boxes
-        self.sam_checkpoint = sam_checkpoint
         self.model_type = model_type
 
 
@@ -99,7 +98,8 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
     # TODO add model patcher for model logic and device management
     device = "cuda" if torch.cuda.is_available() else "cpu"
 
-    sam = sam_model_registry[sam_options.model_type](checkpoint=sam_options.sam_checkpoint)
+    sam_checkpoint = modules.config.download_sam_model(sam_options.model_type)
+    sam = sam_model_registry[sam_options.model_type](checkpoint=sam_checkpoint)
     sam.to(device=device)
 
     sam_predictor = SamPredictor(sam)
diff --git a/modules/config.py b/modules/config.py
index d7bd2d31..2833413c 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -202,6 +202,7 @@ path_fooocus_expansion = get_dir_or_set_default('path_fooocus_expansion', '../mo
 path_safety_checker_models = get_dir_or_set_default('path_safety_checker_models', '../models/safety_checker_models/')
 path_wildcards = get_dir_or_set_default('path_wildcards', '../wildcards/')
 path_safety_checker = get_dir_or_set_default('path_safety_checker', '../models/safety_checker/')
+path_sam = get_dir_or_set_default('path_sam', '../models/sam/')
 path_outputs = get_path_output()
 
 
@@ -789,4 +790,43 @@ def downloading_safety_checker_model():
     return os.path.join(path_safety_checker, 'stable-diffusion-safety-checker.bin')
 
 
+def download_sam_model(sam_model: str) -> str:
+    match sam_model:
+        case 'default', 'vit_b':
+            return downloading_sam_vit_b()
+        case 'vit_l':
+            return downloading_sam_vit_l()
+        case 'vit_h':
+            return downloading_sam_vit_h()
+        case _:
+            raise ValueError(f"sam model {sam_model} does not exist.")
+
+
+def downloading_sam_vit_b():
+    load_file_from_url(
+        url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_b_01ec64.pth',
+        model_dir=path_sam,
+        file_name='sam_vit_b_01ec64.pth'
+    )
+    return os.path.join(path_sam, 'sam_vit_b_01ec64.pth')
+
+
+def downloading_sam_vit_l():
+    load_file_from_url(
+        url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_l_0b3195.pth',
+        model_dir=path_sam,
+        file_name='sam_vit_l_0b3195.pth'
+    )
+    return os.path.join(path_sam, 'sam_vit_l_0b3195.pth')
+
+
+def downloading_sam_vit_h():
+    load_file_from_url(
+        url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_h_4b8939.pth',
+        model_dir=path_sam,
+        file_name='sam_vit_h_4b8939.pth'
+    )
+    return os.path.join(path_sam, 'sam_vit_h_4b8939.pth')
+
+
 update_files()
diff --git a/webui.py b/webui.py
index 0d8e2396..a194b4c4 100644
--- a/webui.py
+++ b/webui.py
@@ -245,8 +245,7 @@ with shared.gradio_root:
                                             dino_text_threshold=text_threshold,
                                             box_erode_or_dilate=dino_erode_or_dilate,
                                             max_num_boxes=2, #TODO replace with actual value
-                                            sam_checkpoint="./models/sam/sam_vit_l_0b3195.pth", # TODO replace with actual value
-                                            model_type="vit_l"
+                                            model_type=sam_model
                                         )
 
                                     return generate_mask_from_image(image, mask_model, extras, sam_options)

From 3873892b0ad61000e540eba60d0bc44f4667c889 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 20:45:39 +0200
Subject: [PATCH 017/101] feat: change default_inpaint_mask_sam_model to match
 sam model registry

---
 extras/inpaint_mask.py | 2 +-
 modules/config.py      | 6 +++---
 modules/flags.py       | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 71a926d1..b67b81b3 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -18,7 +18,7 @@ class SAMOptions:
 
                  # SAM
                  max_num_boxes=2,
-                 model_type="vit_l"
+                 model_type='vit_b'
                  ):
         self.dino_prompt = dino_prompt
         self.dino_box_threshold = dino_box_threshold
diff --git a/modules/config.py b/modules/config.py
index 2833413c..9bd354f5 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -551,8 +551,8 @@ default_inpaint_mask_cloth_category = get_config_item_or_set_default(
 
 default_inpaint_mask_sam_model = get_config_item_or_set_default(
     key='default_inpaint_mask_sam_model',
-    default_value='sam_vit_b_01ec64',
-    validator=lambda x: x in modules.flags.inpaint_mask_sam_model,
+    default_value='vit_b',
+    validator=lambda x: x in [y[1] for y in modules.flags.inpaint_mask_sam_model if y[1] == x],
     expected_type=str
 )
 
@@ -792,7 +792,7 @@ def downloading_safety_checker_model():
 
 def download_sam_model(sam_model: str) -> str:
     match sam_model:
-        case 'default', 'vit_b':
+        case 'vit_b':
             return downloading_sam_vit_b()
         case 'vit_l':
             return downloading_sam_vit_l()
diff --git a/modules/flags.py b/modules/flags.py
index 6fec3663..ed9a5606 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -76,7 +76,7 @@ output_formats = ['png', 'jpeg', 'webp']
 
 inpaint_mask_models = ['u2net', 'u2netp', 'u2net_human_seg', 'u2net_cloth_seg', 'silueta', 'isnet-general-use', 'isnet-anime', 'sam']
 inpaint_mask_cloth_category = ['full', 'upper', 'lower']
-inpaint_mask_sam_model = ['sam_vit_b_01ec64', 'sam_vit_l_0b3195', 'sam_vit_h_4b8939']
+inpaint_mask_sam_model = [('base', 'vit_b'), ('large', 'vit_l'), ('huge', 'vit_h')]
 
 inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6']
 inpaint_option_default = 'Inpaint or Outpaint (default)'

From b8578a080ac90c624830e79997632ea89eefdac7 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 20:46:25 +0200
Subject: [PATCH 018/101] fix: remove already declared config option

---
 modules/config.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/modules/config.py b/modules/config.py
index 9bd354f5..1fa7d87b 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -529,12 +529,6 @@ metadata_created_by = get_config_item_or_set_default(
 
 example_inpaint_prompts = [[x] for x in example_inpaint_prompts]
 
-default_black_out_nsfw = get_config_item_or_set_default(
-    key='default_black_out_nsfw',
-    default_value=False,
-    validator=lambda x: isinstance(x, bool),
-    expected_type=bool
-)
 default_inpaint_mask_model = get_config_item_or_set_default(
     key='default_inpaint_mask_model',
     default_value='isnet-general-use',

From 651f9c5cfd172a3055b9821df0d621b7b8b84e08 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 20:47:07 +0200
Subject: [PATCH 019/101] refactor: rename box_erode_or_dilate to
 dino_erode_or_dilate, add option dino_debug

---
 extras/inpaint_mask.py | 18 ++++++++++--------
 webui.py               |  3 ++-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index b67b81b3..3b4d1cb6 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -14,7 +14,8 @@ class SAMOptions:
                  dino_prompt: str = '',
                  dino_box_threshold=0.3,
                  dino_text_threshold=0.25,
-                 box_erode_or_dilate=0,
+                 dino_erode_or_dilate=0,
+                 dino_debug=False,
 
                  # SAM
                  max_num_boxes=2,
@@ -23,7 +24,7 @@ class SAMOptions:
         self.dino_prompt = dino_prompt
         self.dino_box_threshold = dino_box_threshold
         self.dino_text_threshold = dino_text_threshold
-        self.box_erode_or_dilate = box_erode_or_dilate
+        self.dino_erode_or_dilate = dino_erode_or_dilate
         self.max_num_boxes = max_num_boxes
         self.model_type = model_type
 
@@ -73,11 +74,11 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
     #
     # for idx, box in enumerate(detection_boxes):
     #     box_list = box.tolist()
-    #     if box_erode_or_dilate != 0:
-    #         box_list[0] -= box_erode_or_dilate
-    #         box_list[1] -= box_erode_or_dilate
-    #         box_list[2] += box_erode_or_dilate
-    #         box_list[3] += box_erode_or_dilate
+    #     if dino_erode_or_dilate != 0:
+    #         box_list[0] -= dino_erode_or_dilate
+    #         box_list[1] -= dino_erode_or_dilate
+    #         box_list[2] += dino_erode_or_dilate
+    #         box_list[3] += dino_erode_or_dilate
     #     extras['sam_prompt'] += [{"type": "rectangle", "data": box_list}]
     #
     # if debug_dino:
@@ -88,7 +89,8 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
     #         draw.rectangle(box['data'], fill="white")
     #     return np.array(debug_dino_image)
 
-    # TODO add support for box_erode_or_dilate again
+    # TODO add support for dino_erode_or_dilate again
+    # TODO add dino_debug again
 
     H, W = image.shape[0], image.shape[1]
     boxes = boxes * torch.Tensor([W, H, W, H])
diff --git a/webui.py b/webui.py
index a194b4c4..5af17a00 100644
--- a/webui.py
+++ b/webui.py
@@ -243,7 +243,8 @@ with shared.gradio_root:
                                             dino_prompt=dino_prompt_text,
                                             dino_box_threshold=box_threshold,
                                             dino_text_threshold=text_threshold,
-                                            box_erode_or_dilate=dino_erode_or_dilate,
+                                            dino_erode_or_dilate=dino_erode_or_dilate,
+                                            dino_debug=debug_dino,
                                             max_num_boxes=2, #TODO replace with actual value
                                             model_type=sam_model
                                         )

From 757863c023843ad61ea69dbec072a97b875cbc78 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 22:42:35 +0200
Subject: [PATCH 020/101] feat: wrap sam model in model patcher for predict

---
 experiments_mask_generation.py |   4 +-
 extras/inpaint_mask.py         |   9 +-
 extras/sam/predictor.py        | 288 +++++++++++++++++++++++++++++++++
 3 files changed, 293 insertions(+), 8 deletions(-)
 create mode 100644 extras/sam/predictor.py

diff --git a/experiments_mask_generation.py b/experiments_mask_generation.py
index 538ad712..8e32c29b 100644
--- a/experiments_mask_generation.py
+++ b/experiments_mask_generation.py
@@ -12,9 +12,9 @@ sam_options = SAMOptions(
     dino_prompt='eye',
     dino_box_threshold=0.3,
     dino_text_threshold=0.25,
-    box_erode_or_dilate=0,
+    dino_erode_or_dilate=0,
+    dino_debug=False,
     max_num_boxes=2,
-    sam_checkpoint="./models/sam/sam_vit_l.safetensors",
     model_type="vit_l"
 )
 
diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 3b4d1cb6..2fd776d8 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -1,7 +1,8 @@
 import numpy as np
 import torch
+from extras.sam.predictor import SamPredictor
 from rembg import remove, new_session
-from segment_anything import sam_model_registry, SamPredictor
+from segment_anything import sam_model_registry
 from segment_anything.utils.amg import remove_small_regions
 
 from extras.GroundingDINO.util.inference import default_groundingdino
@@ -97,12 +98,8 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
     boxes[:, :2] = boxes[:, :2] - boxes[:, 2:] / 2
     boxes[:, 2:] = boxes[:, 2:] + boxes[:, :2]
 
-    # TODO add model patcher for model logic and device management
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-
     sam_checkpoint = modules.config.download_sam_model(sam_options.model_type)
     sam = sam_model_registry[sam_options.model_type](checkpoint=sam_checkpoint)
-    sam.to(device=device)
 
     sam_predictor = SamPredictor(sam)
     final_mask_tensor = torch.zeros((image.shape[0], image.shape[1]))
@@ -114,7 +111,7 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
         masks, _, _ = sam_predictor.predict_torch(
             point_coords=None,
             point_labels=None,
-            boxes=transformed_boxes.to(device),
+            boxes=transformed_boxes,
             multimask_output=False,
         )
 
diff --git a/extras/sam/predictor.py b/extras/sam/predictor.py
new file mode 100644
index 00000000..337c549b
--- /dev/null
+++ b/extras/sam/predictor.py
@@ -0,0 +1,288 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+from ldm_patched.modules import model_management
+from ldm_patched.modules.model_patcher import ModelPatcher
+
+from segment_anything.modeling import Sam
+
+from typing import Optional, Tuple
+
+from segment_anything.utils.transforms import ResizeLongestSide
+
+
+class SamPredictor:
+    def __init__(
+        self,
+        model: Sam,
+        load_device=model_management.text_encoder_device(),
+        offload_device=model_management.text_encoder_offload_device()
+    ) -> None:
+        """
+        Uses SAM to calculate the image embedding for an image, and then
+        allow repeated, efficient mask prediction given prompts.
+
+        Arguments:
+          model (Sam): The model to use for mask prediction.
+        """
+        super().__init__()
+
+        self.load_device = load_device
+        self.offload_device = offload_device
+        # can't use model.half() here as slow_conv2d_cpu is not implemented for half
+        model.to(self.offload_device)
+
+        self.patcher = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
+
+        self.transform = ResizeLongestSide(model.image_encoder.img_size)
+        self.reset_image()
+
+    def set_image(
+        self,
+        image: np.ndarray,
+        image_format: str = "RGB",
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method.
+
+        Arguments:
+          image (np.ndarray): The image for calculating masks. Expects an
+            image in HWC uint8 format, with pixel values in [0, 255].
+          image_format (str): The color format of the image, in ['RGB', 'BGR'].
+        """
+        assert image_format in [
+            "RGB",
+            "BGR",
+        ], f"image_format must be in ['RGB', 'BGR'], is {image_format}."
+        if image_format != self.patcher.model.image_format:
+            image = image[..., ::-1]
+
+        # Transform the image to the form expected by the model
+        input_image = self.transform.apply_image(image)
+        input_image_torch = torch.as_tensor(input_image, device=self.load_device)
+        input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
+
+        self.set_torch_image(input_image_torch, image.shape[:2])
+
+    @torch.no_grad()
+    def set_torch_image(
+        self,
+        transformed_image: torch.Tensor,
+        original_image_size: Tuple[int, ...],
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method. Expects the input
+        image to be already transformed to the format expected by the model.
+
+        Arguments:
+          transformed_image (torch.Tensor): The input image, with shape
+            1x3xHxW, which has been transformed with ResizeLongestSide.
+          original_image_size (tuple(int, int)): The size of the image
+            before transformation, in (H, W) format.
+        """
+        assert (
+            len(transformed_image.shape) == 4
+            and transformed_image.shape[1] == 3
+            and max(*transformed_image.shape[2:]) == self.patcher.model.image_encoder.img_size
+        ), f"set_torch_image input must be BCHW with long side {self.patcher.model.image_encoder.img_size}."
+        self.reset_image()
+
+        self.original_size = original_image_size
+        self.input_size = tuple(transformed_image.shape[-2:])
+        model_management.load_model_gpu(self.patcher)
+        input_image = self.patcher.model.preprocess(transformed_image.to(self.load_device))
+        self.features = self.patcher.model.image_encoder(input_image)
+        self.is_image_set = True
+
+    def predict(
+        self,
+        point_coords: Optional[np.ndarray] = None,
+        point_labels: Optional[np.ndarray] = None,
+        box: Optional[np.ndarray] = None,
+        mask_input: Optional[np.ndarray] = None,
+        multimask_output: bool = True,
+        return_logits: bool = False,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Predict masks for the given input prompts, using the currently set image.
+
+        Arguments:
+          point_coords (np.ndarray or None): A Nx2 array of point prompts to the
+            model. Each point is in (X,Y) in pixels.
+          point_labels (np.ndarray or None): A length N array of labels for the
+            point prompts. 1 indicates a foreground point and 0 indicates a
+            background point.
+          box (np.ndarray or None): A length 4 array given a box prompt to the
+            model, in XYXY format.
+          mask_input (np.ndarray): A low resolution mask input to the model, typically
+            coming from a previous prediction iteration. Has form 1xHxW, where
+            for SAM, H=W=256.
+          multimask_output (bool): If true, the model will return three masks.
+            For ambiguous input prompts (such as a single click), this will often
+            produce better masks than a single prediction. If only a single
+            mask is needed, the model's predicted quality score can be used
+            to select the best mask. For non-ambiguous prompts, such as multiple
+            input prompts, multimask_output=False can give better results.
+          return_logits (bool): If true, returns un-thresholded masks logits
+            instead of a binary mask.
+
+        Returns:
+          (np.ndarray): The output masks in CxHxW format, where C is the
+            number of masks, and (H, W) is the original image size.
+          (np.ndarray): An array of length C containing the model's
+            predictions for the quality of each mask.
+          (np.ndarray): An array of shape CxHxW, where C is the number
+            of masks and H=W=256. These low resolution logits can be passed to
+            a subsequent iteration as mask input.
+        """
+        if not self.is_image_set:
+            raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
+
+        # Transform input prompts
+        coords_torch, labels_torch, box_torch, mask_input_torch = None, None, None, None
+        if point_coords is not None:
+            assert (
+                point_labels is not None
+            ), "point_labels must be supplied if point_coords is supplied."
+            point_coords = self.transform.apply_coords(point_coords, self.original_size)
+            coords_torch = torch.as_tensor(point_coords, dtype=torch.float, device=self.load_device)
+            labels_torch = torch.as_tensor(point_labels, dtype=torch.int, device=self.load_device)
+            coords_torch, labels_torch = coords_torch[None, :, :], labels_torch[None, :]
+        if box is not None:
+            box = self.transform.apply_boxes(box, self.original_size)
+            box_torch = torch.as_tensor(box, dtype=torch.float, device=self.load_device)
+            box_torch = box_torch[None, :]
+        if mask_input is not None:
+            mask_input_torch = torch.as_tensor(mask_input, dtype=torch.float, device=self.load_device)
+            mask_input_torch = mask_input_torch[None, :, :, :]
+
+        masks, iou_predictions, low_res_masks = self.predict_torch(
+            coords_torch,
+            labels_torch,
+            box_torch,
+            mask_input_torch,
+            multimask_output,
+            return_logits=return_logits,
+        )
+
+        masks = masks[0].detach().cpu().numpy()
+        iou_predictions = iou_predictions[0].detach().cpu().numpy()
+        low_res_masks = low_res_masks[0].detach().cpu().numpy()
+        return masks, iou_predictions, low_res_masks
+
+    @torch.no_grad()
+    def predict_torch(
+        self,
+        point_coords: Optional[torch.Tensor],
+        point_labels: Optional[torch.Tensor],
+        boxes: Optional[torch.Tensor] = None,
+        mask_input: Optional[torch.Tensor] = None,
+        multimask_output: bool = True,
+        return_logits: bool = False,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Predict masks for the given input prompts, using the currently set image.
+        Input prompts are batched torch tensors and are expected to already be
+        transformed to the input frame using ResizeLongestSide.
+
+        Arguments:
+          point_coords (torch.Tensor or None): A BxNx2 array of point prompts to the
+            model. Each point is in (X,Y) in pixels.
+          point_labels (torch.Tensor or None): A BxN array of labels for the
+            point prompts. 1 indicates a foreground point and 0 indicates a
+            background point.
+          box (np.ndarray or None): A Bx4 array given a box prompt to the
+            model, in XYXY format.
+          mask_input (np.ndarray): A low resolution mask input to the model, typically
+            coming from a previous prediction iteration. Has form Bx1xHxW, where
+            for SAM, H=W=256. Masks returned by a previous iteration of the
+            predict method do not need further transformation.
+          multimask_output (bool): If true, the model will return three masks.
+            For ambiguous input prompts (such as a single click), this will often
+            produce better masks than a single prediction. If only a single
+            mask is needed, the model's predicted quality score can be used
+            to select the best mask. For non-ambiguous prompts, such as multiple
+            input prompts, multimask_output=False can give better results.
+          return_logits (bool): If true, returns un-thresholded masks logits
+            instead of a binary mask.
+
+        Returns:
+          (torch.Tensor): The output masks in BxCxHxW format, where C is the
+            number of masks, and (H, W) is the original image size.
+          (torch.Tensor): An array of shape BxC containing the model's
+            predictions for the quality of each mask.
+          (torch.Tensor): An array of shape BxCxHxW, where C is the number
+            of masks and H=W=256. These low res logits can be passed to
+            a subsequent iteration as mask input.
+        """
+        if not self.is_image_set:
+            raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
+
+        if point_coords is not None:
+            points = (point_coords.to(self.load_device), point_labels.to(self.load_device))
+        else:
+            points = None
+
+        # load
+        if boxes is not None:
+            boxes = boxes.to(self.load_device)
+        if mask_input is not None:
+            mask_input = mask_input.to(self.load_device)
+        model_management.load_model_gpu(self.patcher)
+
+        # Embed prompts
+        sparse_embeddings, dense_embeddings = self.patcher.model.prompt_encoder(
+            points=points,
+            boxes=boxes,
+            masks=mask_input,
+        )
+
+        # Predict masks
+        low_res_masks, iou_predictions = self.patcher.model.mask_decoder(
+            image_embeddings=self.features,
+            image_pe=self.patcher.model.prompt_encoder.get_dense_pe(),
+            sparse_prompt_embeddings=sparse_embeddings,
+            dense_prompt_embeddings=dense_embeddings,
+            multimask_output=multimask_output,
+        )
+
+        # Upscale the masks to the original image resolution
+        masks = self.patcher.model.postprocess_masks(low_res_masks, self.input_size, self.original_size)
+
+        if not return_logits:
+            masks = masks > self.patcher.model.mask_threshold
+
+        return masks, iou_predictions, low_res_masks
+
+    def get_image_embedding(self) -> torch.Tensor:
+        """
+        Returns the image embeddings for the currently set image, with
+        shape 1xCxHxW, where C is the embedding dimension and (H,W) are
+        the embedding spatial dimension of SAM (typically C=256, H=W=64).
+        """
+        if not self.is_image_set:
+            raise RuntimeError(
+                "An image must be set with .set_image(...) to generate an embedding."
+            )
+        assert self.features is not None, "Features must exist if an image has been set."
+        return self.features
+
+    @property
+    def device(self) -> torch.device:
+        return self.patcher.model.device
+
+    def reset_image(self) -> None:
+        """Resets the currently set image."""
+        self.is_image_set = False
+        self.features = None
+        self.orig_h = None
+        self.orig_w = None
+        self.input_h = None
+        self.input_w = None

From 29967d3a185ac65aa42de564a8899056d3fa779a Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 23:23:38 +0200
Subject: [PATCH 021/101] feat: add dino_erode_or_dilate and dino_debug again

---
 extras/inpaint_mask.py | 45 +++++++++++++++++-------------------------
 1 file changed, 18 insertions(+), 27 deletions(-)

diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 2fd776d8..a4f0e7c4 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -1,13 +1,12 @@
+import modules.config
 import numpy as np
 import torch
+from extras.GroundingDINO.util.inference import default_groundingdino
 from extras.sam.predictor import SamPredictor
 from rembg import remove, new_session
 from segment_anything import sam_model_registry
 from segment_anything.utils.amg import remove_small_regions
 
-from extras.GroundingDINO.util.inference import default_groundingdino
-import modules.config
-
 
 class SAMOptions:
     def __init__(self,
@@ -26,6 +25,7 @@ class SAMOptions:
         self.dino_box_threshold = dino_box_threshold
         self.dino_text_threshold = dino_text_threshold
         self.dino_erode_or_dilate = dino_erode_or_dilate
+        self.dino_debug = dino_debug
         self.max_num_boxes = max_num_boxes
         self.model_type = model_type
 
@@ -68,30 +68,6 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
         box_threshold=sam_options.dino_box_threshold,
         text_threshold=sam_options.dino_text_threshold
     )
-    # detection_boxes = detections.xyxy
-    # # use full image if no box has been found
-    # detection_boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(detection_boxes) == 0 else detection_boxes
-    #
-    #
-    # for idx, box in enumerate(detection_boxes):
-    #     box_list = box.tolist()
-    #     if dino_erode_or_dilate != 0:
-    #         box_list[0] -= dino_erode_or_dilate
-    #         box_list[1] -= dino_erode_or_dilate
-    #         box_list[2] += dino_erode_or_dilate
-    #         box_list[3] += dino_erode_or_dilate
-    #     extras['sam_prompt'] += [{"type": "rectangle", "data": box_list}]
-    #
-    # if debug_dino:
-    #     from PIL import ImageDraw, Image
-    #     debug_dino_image = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
-    #     draw = ImageDraw.Draw(debug_dino_image)
-    #     for box in extras['sam_prompt']:
-    #         draw.rectangle(box['data'], fill="white")
-    #     return np.array(debug_dino_image)
-
-    # TODO add support for dino_erode_or_dilate again
-    # TODO add dino_debug again
 
     H, W = image.shape[0], image.shape[1]
     boxes = boxes * torch.Tensor([W, H, W, H])
@@ -107,6 +83,21 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
     if boxes.size(0) > 0:
         sam_predictor.set_image(image)
 
+        for index in range(boxes.size(0)):
+            assert boxes.size(1) == 4
+            boxes[index][0] -= sam_options.dino_erode_or_dilate
+            boxes[index][1] -= sam_options.dino_erode_or_dilate
+            boxes[index][2] += sam_options.dino_erode_or_dilate
+            boxes[index][3] += sam_options.dino_erode_or_dilate
+
+        if sam_options.dino_debug:
+            from PIL import ImageDraw, Image
+            debug_dino_image = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
+            draw = ImageDraw.Draw(debug_dino_image)
+            for box in boxes.numpy():
+                draw.rectangle(box.tolist(), fill="white")
+            return np.array(debug_dino_image)
+
         transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes, image.shape[:2])
         masks, _, _ = sam_predictor.predict_torch(
             point_coords=None,

From ead1ed617a3dbf8024e1fcb5179651453f2e236a Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 23:46:25 +0200
Subject: [PATCH 022/101] refactor: remove adetailer code

---
 extras/adetailer/args.py                | 278 ------------------------
 extras/adetailer/common.py              | 161 --------------
 extras/adetailer/mask.py                | 269 -----------------------
 extras/adetailer/script.py              |  53 -----
 extras/adetailer/ultralytics_predict.py |  67 ------
 modules/async_worker.py                 |  60 ++---
 6 files changed, 23 insertions(+), 865 deletions(-)
 delete mode 100644 extras/adetailer/args.py
 delete mode 100644 extras/adetailer/common.py
 delete mode 100644 extras/adetailer/mask.py
 delete mode 100644 extras/adetailer/script.py
 delete mode 100644 extras/adetailer/ultralytics_predict.py

diff --git a/extras/adetailer/args.py b/extras/adetailer/args.py
deleted file mode 100644
index 08ad4a3a..00000000
--- a/extras/adetailer/args.py
+++ /dev/null
@@ -1,278 +0,0 @@
-from __future__ import annotations
-
-from collections import UserList
-from dataclasses import dataclass
-from functools import cached_property, partial
-from typing import Any, Literal, NamedTuple, Optional
-
-try:
-    from pydantic.v1 import (
-        BaseModel,
-        Extra,
-        NonNegativeFloat,
-        NonNegativeInt,
-        PositiveInt,
-        confloat,
-        conint,
-        validator,
-    )
-except ImportError:
-    from pydantic import (
-        BaseModel,
-        Extra,
-        NonNegativeFloat,
-        NonNegativeInt,
-        PositiveInt,
-        confloat,
-        conint,
-        validator,
-    )
-
-
-@dataclass
-class SkipImg2ImgOrig:
-    steps: int
-    sampler_name: str
-    width: int
-    height: int
-
-
-class Arg(NamedTuple):
-    attr: str
-    name: str
-
-
-class ArgsList(UserList):
-    @cached_property
-    def attrs(self) -> tuple[str, ...]:
-        return tuple(attr for attr, _ in self)
-
-    @cached_property
-    def names(self) -> tuple[str, ...]:
-        return tuple(name for _, name in self)
-
-
-class ADetailerArgs(BaseModel, extra=Extra.forbid):
-    ad_model: str = "None"
-    ad_model_classes: str = ""
-    ad_tap_enable: bool = True
-    ad_prompt: str = ""
-    ad_negative_prompt: str = ""
-    ad_confidence: confloat(ge=0.0, le=1.0) = 0.3
-    ad_mask_k_largest: NonNegativeInt = 0
-    ad_mask_min_ratio: confloat(ge=0.0, le=1.0) = 0.0
-    ad_mask_max_ratio: confloat(ge=0.0, le=1.0) = 1.0
-    ad_dilate_erode: int = 4
-    ad_x_offset: int = 0
-    ad_y_offset: int = 0
-    ad_mask_merge_invert: Literal["None", "Merge", "Merge and Invert"] = "None"
-    ad_mask_blur: NonNegativeInt = 4
-    ad_denoising_strength: confloat(ge=0.0, le=1.0) = 0.4
-    ad_inpaint_only_masked: bool = True
-    ad_inpaint_only_masked_padding: NonNegativeInt = 32
-    ad_use_inpaint_width_height: bool = False
-    ad_inpaint_width: PositiveInt = 512
-    ad_inpaint_height: PositiveInt = 512
-    ad_use_steps: bool = False
-    ad_steps: PositiveInt = 28
-    ad_use_cfg_scale: bool = False
-    ad_cfg_scale: NonNegativeFloat = 7.0
-    ad_use_checkpoint: bool = False
-    ad_checkpoint: Optional[str] = None
-    ad_use_vae: bool = False
-    ad_vae: Optional[str] = None
-    ad_use_sampler: bool = False
-    ad_sampler: str = "DPM++ 2M Karras"
-    ad_scheduler: str = "Use same scheduler"
-    ad_use_noise_multiplier: bool = False
-    ad_noise_multiplier: confloat(ge=0.5, le=1.5) = 1.0
-    ad_use_clip_skip: bool = False
-    ad_clip_skip: conint(ge=1, le=12) = 1
-    ad_restore_face: bool = False
-    ad_controlnet_model: str = "None"
-    ad_controlnet_module: str = "None"
-    ad_controlnet_weight: confloat(ge=0.0, le=1.0) = 1.0
-    ad_controlnet_guidance_start: confloat(ge=0.0, le=1.0) = 0.0
-    ad_controlnet_guidance_end: confloat(ge=0.0, le=1.0) = 1.0
-    is_api: bool = True
-
-    @validator("is_api", pre=True)
-    def is_api_validator(cls, v: Any):  # noqa: N805
-        "tuple is json serializable but cannot be made with json deserialize."
-        return type(v) is not tuple
-
-    @staticmethod
-    def ppop(
-        p: dict[str, Any],
-        key: str,
-        pops: list[str] | None = None,
-        cond: Any = None,
-    ) -> None:
-        if pops is None:
-            pops = [key]
-        if key not in p:
-            return
-        value = p[key]
-        cond = (not bool(value)) if cond is None else value == cond
-
-        if cond:
-            for k in pops:
-                p.pop(k, None)
-
-    def extra_params(self, suffix: str = "") -> dict[str, Any]:
-        if self.need_skip():
-            return {}
-
-        p = {name: getattr(self, attr) for attr, name in ALL_ARGS}
-        ppop = partial(self.ppop, p)
-
-        ppop("ADetailer model classes")
-        ppop("ADetailer prompt")
-        ppop("ADetailer negative prompt")
-        p.pop("ADetailer tap enable", None)  # always pop
-        ppop("ADetailer mask only top k largest", cond=0)
-        ppop("ADetailer mask min ratio", cond=0.0)
-        ppop("ADetailer mask max ratio", cond=1.0)
-        ppop("ADetailer x offset", cond=0)
-        ppop("ADetailer y offset", cond=0)
-        ppop("ADetailer mask merge invert", cond="None")
-        ppop("ADetailer inpaint only masked", ["ADetailer inpaint padding"])
-        ppop(
-            "ADetailer use inpaint width height",
-            [
-                "ADetailer use inpaint width height",
-                "ADetailer inpaint width",
-                "ADetailer inpaint height",
-            ],
-        )
-        ppop(
-            "ADetailer use separate steps",
-            ["ADetailer use separate steps", "ADetailer steps"],
-        )
-        ppop(
-            "ADetailer use separate CFG scale",
-            ["ADetailer use separate CFG scale", "ADetailer CFG scale"],
-        )
-        ppop(
-            "ADetailer use separate checkpoint",
-            ["ADetailer use separate checkpoint", "ADetailer checkpoint"],
-        )
-        ppop(
-            "ADetailer use separate VAE",
-            ["ADetailer use separate VAE", "ADetailer VAE"],
-        )
-        ppop(
-            "ADetailer use separate sampler",
-            [
-                "ADetailer use separate sampler",
-                "ADetailer sampler",
-                "ADetailer scheduler",
-            ],
-        )
-        ppop("ADetailer scheduler", cond="Use same scheduler")
-        ppop(
-            "ADetailer use separate noise multiplier",
-            ["ADetailer use separate noise multiplier", "ADetailer noise multiplier"],
-        )
-
-        ppop(
-            "ADetailer use separate CLIP skip",
-            ["ADetailer use separate CLIP skip", "ADetailer CLIP skip"],
-        )
-
-        ppop("ADetailer restore face")
-        ppop(
-            "ADetailer ControlNet model",
-            [
-                "ADetailer ControlNet model",
-                "ADetailer ControlNet module",
-                "ADetailer ControlNet weight",
-                "ADetailer ControlNet guidance start",
-                "ADetailer ControlNet guidance end",
-            ],
-            cond="None",
-        )
-        ppop("ADetailer ControlNet module", cond="None")
-        ppop("ADetailer ControlNet weight", cond=1.0)
-        ppop("ADetailer ControlNet guidance start", cond=0.0)
-        ppop("ADetailer ControlNet guidance end", cond=1.0)
-
-        if suffix:
-            p = {k + suffix: v for k, v in p.items()}
-
-        return p
-
-    def is_mediapipe(self) -> bool:
-        return self.ad_model.lower().startswith("mediapipe")
-
-    def need_skip(self) -> bool:
-        return self.ad_model == "None" or self.ad_tap_enable is False
-
-
-_all_args = [
-    ("ad_model", "ADetailer model"),
-    ("ad_model_classes", "ADetailer model classes"),
-    ("ad_tap_enable", "ADetailer tap enable"),
-    ("ad_prompt", "ADetailer prompt"),
-    ("ad_negative_prompt", "ADetailer negative prompt"),
-    ("ad_confidence", "ADetailer confidence"),
-    ("ad_mask_k_largest", "ADetailer mask only top k largest"),
-    ("ad_mask_min_ratio", "ADetailer mask min ratio"),
-    ("ad_mask_max_ratio", "ADetailer mask max ratio"),
-    ("ad_x_offset", "ADetailer x offset"),
-    ("ad_y_offset", "ADetailer y offset"),
-    ("ad_dilate_erode", "ADetailer dilate erode"),
-    ("ad_mask_merge_invert", "ADetailer mask merge invert"),
-    ("ad_mask_blur", "ADetailer mask blur"),
-    ("ad_denoising_strength", "ADetailer denoising strength"),
-    ("ad_inpaint_only_masked", "ADetailer inpaint only masked"),
-    ("ad_inpaint_only_masked_padding", "ADetailer inpaint padding"),
-    ("ad_use_inpaint_width_height", "ADetailer use inpaint width height"),
-    ("ad_inpaint_width", "ADetailer inpaint width"),
-    ("ad_inpaint_height", "ADetailer inpaint height"),
-    ("ad_use_steps", "ADetailer use separate steps"),
-    ("ad_steps", "ADetailer steps"),
-    ("ad_use_cfg_scale", "ADetailer use separate CFG scale"),
-    ("ad_cfg_scale", "ADetailer CFG scale"),
-    ("ad_use_checkpoint", "ADetailer use separate checkpoint"),
-    ("ad_checkpoint", "ADetailer checkpoint"),
-    ("ad_use_vae", "ADetailer use separate VAE"),
-    ("ad_vae", "ADetailer VAE"),
-    ("ad_use_sampler", "ADetailer use separate sampler"),
-    ("ad_sampler", "ADetailer sampler"),
-    ("ad_scheduler", "ADetailer scheduler"),
-    ("ad_use_noise_multiplier", "ADetailer use separate noise multiplier"),
-    ("ad_noise_multiplier", "ADetailer noise multiplier"),
-    ("ad_use_clip_skip", "ADetailer use separate CLIP skip"),
-    ("ad_clip_skip", "ADetailer CLIP skip"),
-    ("ad_restore_face", "ADetailer restore face"),
-    ("ad_controlnet_model", "ADetailer ControlNet model"),
-    ("ad_controlnet_module", "ADetailer ControlNet module"),
-    ("ad_controlnet_weight", "ADetailer ControlNet weight"),
-    ("ad_controlnet_guidance_start", "ADetailer ControlNet guidance start"),
-    ("ad_controlnet_guidance_end", "ADetailer ControlNet guidance end"),
-]
-
-_args = [Arg(*args) for args in _all_args]
-ALL_ARGS = ArgsList(_args)
-
-BBOX_SORTBY = [
-    "None",
-    "Position (left to right)",
-    "Position (center to edge)",
-    "Area (large to small)",
-]
-MASK_MERGE_INVERT = ["None", "Merge", "Merge and Invert"]
-
-_script_default = (
-    "dynamic_prompting",
-    "dynamic_thresholding",
-    "wildcard_recursive",
-    "wildcards",
-    "lora_block_weight",
-    "negpip",
-)
-SCRIPT_DEFAULT = ",".join(sorted(_script_default))
-
-_builtin_script = ("soft_inpainting", "hypertile_script")
-BUILTIN_SCRIPT = ",".join(sorted(_builtin_script))
\ No newline at end of file
diff --git a/extras/adetailer/common.py b/extras/adetailer/common.py
deleted file mode 100644
index f80103fc..00000000
--- a/extras/adetailer/common.py
+++ /dev/null
@@ -1,161 +0,0 @@
-from __future__ import annotations
-
-import os
-from collections import OrderedDict
-from concurrent.futures import ThreadPoolExecutor
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Generic, Optional, TypeVar
-
-from huggingface_hub import hf_hub_download
-from PIL import Image, ImageDraw
-from torchvision.transforms.functional import to_pil_image
-
-REPO_ID = "Bingsu/adetailer"
-
-T = TypeVar("T", int, float)
-
-
-@dataclass
-class PredictOutput(Generic[T]):
-    bboxes: list[list[T]] = field(default_factory=list)
-    masks: list[Image.Image] = field(default_factory=list)
-    preview: Optional[Image.Image] = None
-
-
-def hf_download(file: str, repo_id: str = REPO_ID) -> str:
-    try:
-        path = hf_hub_download(repo_id, file)
-    except Exception:
-        print(f"[ADetailer] Failed to load model {file!r} from huggingface")
-        path = "INVALID"
-    return path
-
-
-def safe_mkdir(path: str | os.PathLike[str]) -> None:
-    path = Path(path)
-    if not path.exists() and path.parent.exists() and os.access(path.parent, os.W_OK):
-        path.mkdir()
-
-
-def scan_model_dir(path: Path) -> list[Path]:
-    if not path.is_dir():
-        return []
-    return [p for p in path.rglob("*") if p.is_file() and p.suffix == ".pt"]
-
-
-def download_models(*names: str) -> dict[str, str]:
-    models = OrderedDict()
-    with ThreadPoolExecutor() as executor:
-        for name in names:
-            if "-world" in name:
-                models[name] = executor.submit(
-                    hf_download, name, repo_id="Bingsu/yolo-world-mirror"
-                )
-            else:
-                models[name] = executor.submit(hf_download, name)
-    return {name: future.result() for name, future in models.items()}
-
-
-def get_models(
-    *dirs: str | os.PathLike[str], huggingface: bool = True
-) -> OrderedDict[str, str]:
-    model_paths = []
-
-    for dir_ in dirs:
-        if not dir_:
-            continue
-        model_paths.extend(scan_model_dir(Path(dir_)))
-
-    models = OrderedDict()
-    if huggingface:
-        to_download = [
-            "face_yolov8n.pt",
-            "face_yolov8s.pt",
-            "hand_yolov8n.pt",
-            "person_yolov8n-seg.pt",
-            "person_yolov8s-seg.pt",
-            "yolov8x-worldv2.pt",
-        ]
-        models.update(download_models(*to_download))
-
-    models.update(
-        {
-            "mediapipe_face_full": "mediapipe_face_full",
-            "mediapipe_face_short": "mediapipe_face_short",
-            "mediapipe_face_mesh": "mediapipe_face_mesh",
-            "mediapipe_face_mesh_eyes_only": "mediapipe_face_mesh_eyes_only",
-        }
-    )
-
-    invalid_keys = [k for k, v in models.items() if v == "INVALID"]
-    for key in invalid_keys:
-        models.pop(key)
-
-    for path in model_paths:
-        if path.name in models:
-            continue
-        models[path.name] = str(path)
-
-    return models
-
-
-def create_mask_from_bbox(
-    bboxes: list[list[float]], shape: tuple[int, int]
-) -> list[Image.Image]:
-    """
-    Parameters
-    ----------
-        bboxes: list[list[float]]
-            list of [x1, y1, x2, y2]
-            bounding boxes
-        shape: tuple[int, int]
-            shape of the image (width, height)
-
-    Returns
-    -------
-        masks: list[Image.Image]
-        A list of masks
-
-    """
-    masks = []
-    for bbox in bboxes:
-        mask = Image.new("L", shape, 0)
-        mask_draw = ImageDraw.Draw(mask)
-        mask_draw.rectangle(bbox, fill=255)
-        masks.append(mask)
-    return masks
-
-
-def create_bbox_from_mask(
-    masks: list[Image.Image], shape: tuple[int, int]
-) -> list[list[int]]:
-    """
-    Parameters
-    ----------
-        masks: list[Image.Image]
-            A list of masks
-        shape: tuple[int, int]
-            shape of the image (width, height)
-
-    Returns
-    -------
-        bboxes: list[list[float]]
-        A list of bounding boxes
-
-    """
-    bboxes = []
-    for mask in masks:
-        mask = mask.resize(shape)
-        bbox = mask.getbbox()
-        if bbox is not None:
-            bboxes.append(list(bbox))
-    return bboxes
-
-
-def ensure_pil_image(image: Any, mode: str = "RGB") -> Image.Image:
-    if not isinstance(image, Image.Image):
-        image = to_pil_image(image)
-    if image.mode != mode:
-        image = image.convert(mode)
-    return image
\ No newline at end of file
diff --git a/extras/adetailer/mask.py b/extras/adetailer/mask.py
deleted file mode 100644
index 2faee71a..00000000
--- a/extras/adetailer/mask.py
+++ /dev/null
@@ -1,269 +0,0 @@
-from __future__ import annotations
-
-from enum import IntEnum
-from functools import partial, reduce
-from math import dist
-from typing import Any, TypeVar
-
-import cv2
-import numpy as np
-from PIL import Image, ImageChops
-
-from extras.adetailer.args import MASK_MERGE_INVERT
-from extras.adetailer.common import ensure_pil_image, PredictOutput
-
-
-class SortBy(IntEnum):
-    NONE = 0
-    LEFT_TO_RIGHT = 1
-    CENTER_TO_EDGE = 2
-    AREA = 3
-
-
-class MergeInvert(IntEnum):
-    NONE = 0
-    MERGE = 1
-    MERGE_INVERT = 2
-
-
-T = TypeVar("T", int, float)
-
-
-def _dilate(arr: np.ndarray, value: int) -> np.ndarray:
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
-    return cv2.dilate(arr, kernel, iterations=1)
-
-
-def _erode(arr: np.ndarray, value: int) -> np.ndarray:
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
-    return cv2.erode(arr, kernel, iterations=1)
-
-
-def dilate_erode(img: Image.Image, value: int) -> Image.Image:
-    """
-    The dilate_erode function takes an image and a value.
-    If the value is positive, it dilates the image by that amount.
-    If the value is negative, it erodes the image by that amount.
-
-    Parameters
-    ----------
-        img: PIL.Image.Image
-            the image to be processed
-        value: int
-            kernel size of dilation or erosion
-
-    Returns
-    -------
-        PIL.Image.Image
-            The image that has been dilated or eroded
-    """
-    if value == 0:
-        return img
-
-    arr = np.array(img)
-    arr = _dilate(arr, value) if value > 0 else _erode(arr, -value)
-
-    return Image.fromarray(arr)
-
-
-def offset(img: Image.Image, x: int = 0, y: int = 0) -> Image.Image:
-    """
-    The offset function takes an image and offsets it by a given x(→) and y(↑) value.
-
-    Parameters
-    ----------
-        mask: Image.Image
-            Pass the mask image to the function
-        x: int
-            →
-        y: int
-            ↑
-
-    Returns
-    -------
-        PIL.Image.Image
-            A new image that is offset by x and y
-    """
-    return ImageChops.offset(img, x, -y)
-
-
-def is_all_black(img: Image.Image | np.ndarray) -> bool:
-    if isinstance(img, Image.Image):
-        img = np.array(ensure_pil_image(img, "L"))
-    return cv2.countNonZero(img) == 0
-
-
-def has_intersection(im1: Any, im2: Any) -> bool:
-    arr1 = np.array(ensure_pil_image(im1, "L"))
-    arr2 = np.array(ensure_pil_image(im2, "L"))
-    return not is_all_black(cv2.bitwise_and(arr1, arr2))
-
-
-def bbox_area(bbox: list[T]) -> T:
-    return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
-
-
-def mask_preprocess(
-    masks: list[Image.Image],
-    kernel: int = 0,
-    x_offset: int = 0,
-    y_offset: int = 0,
-    merge_invert: int | MergeInvert | str = MergeInvert.NONE,
-) -> list[Image.Image]:
-    """
-    The mask_preprocess function takes a list of masks and preprocesses them.
-    It dilates and erodes the masks, and offsets them by x_offset and y_offset.
-
-    Parameters
-    ----------
-        masks: list[Image.Image]
-            A list of masks
-        kernel: int
-            kernel size of dilation or erosion
-        x_offset: int
-            →
-        y_offset: int
-            ↑
-
-    Returns
-    -------
-        list[Image.Image]
-            A list of processed masks
-    """
-    if not masks:
-        return []
-
-    if x_offset != 0 or y_offset != 0:
-        masks = [offset(m, x_offset, y_offset) for m in masks]
-
-    if kernel != 0:
-        masks = [dilate_erode(m, kernel) for m in masks]
-        masks = [m for m in masks if not is_all_black(m)]
-
-    return mask_merge_invert(masks, mode=merge_invert)
-
-
-# Bbox sorting
-def _key_left_to_right(bbox: list[T]) -> T:
-    """
-    Left to right
-
-    Parameters
-    ----------
-    bbox: list[int] | list[float]
-        list of [x1, y1, x2, y2]
-    """
-    return bbox[0]
-
-
-def _key_center_to_edge(bbox: list[T], *, center: tuple[float, float]) -> float:
-    """
-    Center to edge
-
-    Parameters
-    ----------
-    bbox: list[int] | list[float]
-        list of [x1, y1, x2, y2]
-    image: Image.Image
-        the image
-    """
-    bbox_center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
-    return dist(center, bbox_center)
-
-
-def _key_area(bbox: list[T]) -> T:
-    """
-    Large to small
-
-    Parameters
-    ----------
-    bbox: list[int] | list[float]
-        list of [x1, y1, x2, y2]
-    """
-    return -bbox_area(bbox)
-
-
-def sort_bboxes(
-    pred: PredictOutput[T], order: int | SortBy = SortBy.NONE
-) -> PredictOutput[T]:
-    if order == SortBy.NONE or len(pred.bboxes) <= 1:
-        return pred
-
-    if order == SortBy.LEFT_TO_RIGHT:
-        key = _key_left_to_right
-    elif order == SortBy.CENTER_TO_EDGE:
-        width, height = pred.preview.size
-        center = (width / 2, height / 2)
-        key = partial(_key_center_to_edge, center=center)
-    elif order == SortBy.AREA:
-        key = _key_area
-    else:
-        raise RuntimeError
-
-    items = len(pred.bboxes)
-    idx = sorted(range(items), key=lambda i: key(pred.bboxes[i]))
-    pred.bboxes = [pred.bboxes[i] for i in idx]
-    pred.masks = [pred.masks[i] for i in idx]
-    return pred
-
-
-# Filter by ratio
-def is_in_ratio(bbox: list[T], low: float, high: float, orig_area: int) -> bool:
-    area = bbox_area(bbox)
-    return low <= area / orig_area <= high
-
-
-def filter_by_ratio(
-    pred: PredictOutput[T], low: float, high: float
-) -> PredictOutput[T]:
-    if not pred.bboxes:
-        return pred
-
-    w, h = pred.preview.size
-    orig_area = w * h
-    items = len(pred.bboxes)
-    idx = [i for i in range(items) if is_in_ratio(pred.bboxes[i], low, high, orig_area)]
-    pred.bboxes = [pred.bboxes[i] for i in idx]
-    pred.masks = [pred.masks[i] for i in idx]
-    return pred
-
-
-def filter_k_largest(pred: PredictOutput[T], k: int = 0) -> PredictOutput[T]:
-    if not pred.bboxes or k == 0:
-        return pred
-    areas = [bbox_area(bbox) for bbox in pred.bboxes]
-    idx = np.argsort(areas)[-k:]
-    idx = idx[::-1]
-    pred.bboxes = [pred.bboxes[i] for i in idx]
-    pred.masks = [pred.masks[i] for i in idx]
-    return pred
-
-
-# Merge / Invert
-def mask_merge(masks: list[Image.Image]) -> list[Image.Image]:
-    arrs = [np.array(m) for m in masks]
-    arr = reduce(cv2.bitwise_or, arrs)
-    return [Image.fromarray(arr)]
-
-
-def mask_invert(masks: list[Image.Image]) -> list[Image.Image]:
-    return [ImageChops.invert(m) for m in masks]
-
-
-def mask_merge_invert(
-    masks: list[Image.Image], mode: int | MergeInvert | str
-) -> list[Image.Image]:
-    if isinstance(mode, str):
-        mode = MASK_MERGE_INVERT.index(mode)
-
-    if mode == MergeInvert.NONE or not masks:
-        return masks
-
-    if mode == MergeInvert.MERGE:
-        return mask_merge(masks)
-
-    if mode == MergeInvert.MERGE_INVERT:
-        merged = mask_merge(masks)
-        return mask_invert(merged)
-
-    raise RuntimeError
\ No newline at end of file
diff --git a/extras/adetailer/script.py b/extras/adetailer/script.py
deleted file mode 100644
index 05a4110e..00000000
--- a/extras/adetailer/script.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from extras.adetailer.args import ADetailerArgs
-from extras.adetailer.common import get_models, PredictOutput
-from extras.adetailer.mask import filter_by_ratio, filter_k_largest, sort_bboxes, mask_preprocess
-from modules import config
-
-model_mapping = get_models(
-    config.path_adetailer,
-    huggingface=True,
-)
-
-
-def get_ad_model(name: str):
-    if name not in model_mapping:
-        msg = f"[-] ADetailer: Model {name!r} not found. Available models: {list(model_mapping.keys())}"
-        raise ValueError(msg)
-    return model_mapping[name]
-
-
-def pred_preprocessing(p, pred: PredictOutput, args: ADetailerArgs, inpaint_only_masked=False):
-    pred = filter_by_ratio(
-        pred, low=args.ad_mask_min_ratio, high=args.ad_mask_max_ratio
-    )
-    pred = filter_k_largest(pred, k=args.ad_mask_k_largest)
-    pred = sort_bboxes(pred)
-    masks = mask_preprocess(
-        pred.masks,
-        kernel=args.ad_dilate_erode,
-        x_offset=args.ad_x_offset,
-        y_offset=args.ad_y_offset,
-        merge_invert=args.ad_mask_merge_invert,
-    )
-
-    #if inpaint_only_masked:
-    # image_mask = self.get_image_mask(p)
-    # masks = self.inpaint_mask_filter(image_mask, masks)
-    return masks
-
-
-    # def get_image_mask(p) -> Image.Image:
-    #     mask = p.image_mask
-    #     if getattr(p, "inpainting_mask_invert", False):
-    #         mask = ImageChops.invert(mask)
-    #     mask = create_binary_mask(mask)
-    #
-    #     if is_skip_img2img(p):
-    #         if hasattr(p, "init_images") and p.init_images:
-    #             width, height = p.init_images[0].size
-    #         else:
-    #             msg = "[-] ADetailer: no init_images."
-    #             raise RuntimeError(msg)
-    #     else:
-    #         width, height = p.width, p.height
-    #     return images.resize_image(p.resize_mode, mask, width, height)
\ No newline at end of file
diff --git a/extras/adetailer/ultralytics_predict.py b/extras/adetailer/ultralytics_predict.py
deleted file mode 100644
index b028ea83..00000000
--- a/extras/adetailer/ultralytics_predict.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import annotations
-
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-import cv2
-from PIL import Image
-from torchvision.transforms.functional import to_pil_image
-
-from extras.adetailer.common import PredictOutput, create_mask_from_bbox
-
-if TYPE_CHECKING:
-    import torch
-    from ultralytics import YOLO, YOLOWorld
-
-
-def ultralytics_predict(
-    model_path: str | Path,
-    image: Image.Image,
-    confidence: float = 0.3,
-    device: str = "",
-    classes: str = "",
-) -> PredictOutput[float]:
-    from ultralytics import YOLO
-
-    model = YOLO(model_path)
-    apply_classes(model, model_path, classes)
-    pred = model(image, conf=confidence, device=device)
-
-    bboxes = pred[0].boxes.xyxy.cpu().numpy()
-    if bboxes.size == 0:
-        return PredictOutput()
-    bboxes = bboxes.tolist()
-
-    if pred[0].masks is None:
-        masks = create_mask_from_bbox(bboxes, image.size)
-    else:
-        masks = mask_to_pil(pred[0].masks.data, image.size)
-    preview = pred[0].plot()
-    preview = cv2.cvtColor(preview, cv2.COLOR_BGR2RGB)
-    preview = Image.fromarray(preview)
-
-    return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
-
-
-def apply_classes(model: YOLO | YOLOWorld, model_path: str | Path, classes: str):
-    if not classes or "-world" not in Path(model_path).stem:
-        return
-    parsed = [c.strip() for c in classes.split(",") if c.strip()]
-    if parsed:
-        model.set_classes(parsed)
-
-
-def mask_to_pil(masks: torch.Tensor, shape: tuple[int, int]) -> list[Image.Image]:
-    """
-    Parameters
-    ----------
-    masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W).
-        The device can be CUDA, but `to_pil_image` takes care of that.
-
-    shape: tuple[int, int]
-        (W, H) of the original image
-    """
-    n = masks.shape[0]
-    return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)]
-
-
diff --git a/modules/async_worker.py b/modules/async_worker.py
index e8d536db..c7a2afa4 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1,4 +1,6 @@
 import threading
+
+from extras.inpaint_mask import generate_mask_from_image, SAMOptions
 from modules.patch import PatchSettings, patch_settings, patch_all
 
 patch_all()
@@ -1014,51 +1016,35 @@ def worker():
                              current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
                              switch, task, tasks, tiled, use_expansion, width, height)
 
-                # adetailer
-                progressbar(async_task, current_progress, 'Processing adetailer ...')
+                # stage2
+                progressbar(async_task, current_progress, 'Processing stage2 ...')
                 final_unet = pipeline.final_unet.clone()
 
                 for img in imgs:
-                    from extras.adetailer.ultralytics_predict import ultralytics_predict
-                    predictor = ultralytics_predict
-                    from extras.adetailer.script import get_ad_model
-                    ad_model = get_ad_model('face_yolov8n.pt')
+                    # TODO add stage2 check and options from inputs here
+                    mask = generate_mask_from_image(img, sam_options=SAMOptions(
+                        dino_prompt='eye'
+                    ))
 
-                    kwargs = {}
-                    kwargs["device"] = torch.device('cpu')
-                    kwargs["classes"] = ""
-                    from PIL import Image
-                    img2 = Image.fromarray(img)
-                    pred = predictor(ad_model, img2, **kwargs)
-
-                    if pred.preview is None:
-                        print('[ADetailer] nothing detected on image')
-                        continue
-
-                    from extras.adetailer.args import ADetailerArgs
-                    args = ADetailerArgs()
-                    from extras.adetailer.script import pred_preprocessing
-                    masks = pred_preprocessing(img, pred, args)
-                    merged_masks = np.maximum(*[np.array(mask) for mask in masks])
-                    async_task.yields.append(['preview', (current_progress, 'Loading ...', merged_masks)])
-                    # TODO also show do_not_show_finished_images=len(tasks) == 1 when adetailer is on
-                    yield_result(async_task, merged_masks, async_task.black_out_nsfw, False,
+                    async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
+                    # TODO also show do_not_show_finished_images=len(tasks) == 1
+                    yield_result(async_task, mask, async_task.black_out_nsfw, False,
                                  do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
                     # TODO make configurable
-                    denoising_strength_adetailer = 0.3
-                    inpaint_respective_field_adetailer = 0.0
-                    inpaint_head_model_path_adetailer = None
-                    inpaint_parameterized_adetailer = False
-                    goals_adetailer = ['inpaint']
-                    denoising_strength_adetailer, initial_latent_adetailer, width_adetailer, height_adetailer = apply_inpaint(
-                        async_task, None, inpaint_head_model_path_adetailer, img, merged_masks,
-                        inpaint_parameterized_adetailer, denoising_strength_adetailer,
-                        inpaint_respective_field_adetailer, switch, current_progress, True)
+                    denoising_strength_stage2 = 0.3
+                    inpaint_respective_field_stage2 = 0.0
+                    inpaint_head_model_path_stage2 = None
+                    inpaint_parameterized_stage2 = False
+                    goals_stage2 = ['inpaint']
+                    denoising_strength_stage2, initial_latent_stage2, width_stage2, height_stage2 = apply_inpaint(
+                        async_task, None, inpaint_head_model_path_stage2, img, mask,
+                        inpaint_parameterized_stage2, denoising_strength_stage2,
+                        inpaint_respective_field_stage2, switch, current_progress, True)
 
                     process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
-                                 current_task_id, denoising_strength_adetailer, final_scheduler_name, goals_adetailer,
-                                 initial_latent_adetailer, switch, task, tasks, tiled, use_expansion, width_adetailer,
-                                 height_adetailer)
+                                 current_task_id, denoising_strength_stage2, final_scheduler_name, goals_stage2,
+                                 initial_latent_stage2, switch, task, tasks, tiled, use_expansion, width_stage2,
+                                 height_stage2)
 
                     # reset unet and inpaint_worker
                     pipeline.final_unet = final_unet

From ab3f4df93581a2f05c2447e9f17cc2aa0422c6b7 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 23:46:50 +0200
Subject: [PATCH 023/101] feat: use vit_b as default for experiment

---
 experiments_mask_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/experiments_mask_generation.py b/experiments_mask_generation.py
index 8e32c29b..272adfc5 100644
--- a/experiments_mask_generation.py
+++ b/experiments_mask_generation.py
@@ -15,7 +15,7 @@ sam_options = SAMOptions(
     dino_erode_or_dilate=0,
     dino_debug=False,
     max_num_boxes=2,
-    model_type="vit_l"
+    model_type='vit_b'
 )
 
 mask_image = generate_mask_from_image(image, sam_options=sam_options)

From b5b33361c96c7ba8d845264408798b91def37172 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 10 Jun 2024 23:47:09 +0200
Subject: [PATCH 024/101] feat: only process sam_options.dino_erode_or_dilate
 when set

---
 extras/inpaint_mask.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index a4f0e7c4..7bb671f5 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -83,12 +83,13 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
     if boxes.size(0) > 0:
         sam_predictor.set_image(image)
 
-        for index in range(boxes.size(0)):
-            assert boxes.size(1) == 4
-            boxes[index][0] -= sam_options.dino_erode_or_dilate
-            boxes[index][1] -= sam_options.dino_erode_or_dilate
-            boxes[index][2] += sam_options.dino_erode_or_dilate
-            boxes[index][3] += sam_options.dino_erode_or_dilate
+        if sam_options.dino_erode_or_dilate != 0:
+            for index in range(boxes.size(0)):
+                assert boxes.size(1) == 4
+                boxes[index][0] -= sam_options.dino_erode_or_dilate
+                boxes[index][1] -= sam_options.dino_erode_or_dilate
+                boxes[index][2] += sam_options.dino_erode_or_dilate
+                boxes[index][3] += sam_options.dino_erode_or_dilate
 
         if sam_options.dino_debug:
             from PIL import ImageDraw, Image

From e055510f1bffd73692be99ec7f59952ebd0d9f94 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 11 Jun 2024 00:12:02 +0200
Subject: [PATCH 025/101] fix: use correct mask dimensions

---
 modules/async_worker.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index c7a2afa4..200b2875 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1025,16 +1025,17 @@ def worker():
                     mask = generate_mask_from_image(img, sam_options=SAMOptions(
                         dino_prompt='eye'
                     ))
+                    mask = mask[:, :, 0]
 
                     async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
                     # TODO also show do_not_show_finished_images=len(tasks) == 1
                     yield_result(async_task, mask, async_task.black_out_nsfw, False,
                                  do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
                     # TODO make configurable
-                    denoising_strength_stage2 = 0.3
+                    denoising_strength_stage2 = 0.5
                     inpaint_respective_field_stage2 = 0.0
                     inpaint_head_model_path_stage2 = None
-                    inpaint_parameterized_stage2 = False
+                    inpaint_parameterized_stage2 = False  # inpaint_engine = None, improve detail
                     goals_stage2 = ['inpaint']
                     denoising_strength_stage2, initial_latent_stage2, width_stage2, height_stage2 = apply_inpaint(
                         async_task, None, inpaint_head_model_path_stage2, img, mask,

From 190c4b0a6f1df829abff9e1ce0171ccb89f7ebaf Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 12 Jun 2024 21:51:49 +0200
Subject: [PATCH 026/101] fix: do not set denoising_strength to async_worker
 dynamically

use parameter input and output
---
 modules/async_worker.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 200b2875..7343d903 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -403,11 +403,11 @@ def worker():
 
     def apply_vary(async_task, denoising_strength, switch):
         if 'subtle' in async_task.uov_method:
-            async_task.denoising_strength = 0.5
+            denoising_strength = 0.5
         if 'strong' in async_task.uov_method:
-            async_task.denoising_strength = 0.85
+            denoising_strength = 0.85
         if async_task.overwrite_vary_strength > 0:
-            async_task.denoising_strength = async_task.overwrite_vary_strength
+            denoising_strength = async_task.overwrite_vary_strength
         shape_ceil = get_image_shape_ceil(async_task.uov_input_image)
         if shape_ceil < 1024:
             print(f'[Vary] Image is resized because it is too small.')
@@ -429,7 +429,7 @@ def worker():
         width = W * 8
         height = H * 8
         print(f'Final resolution is {str((width, height))}.')
-        return initial_latent, width, height
+        return denoising_strength, initial_latent, width, height
 
     def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
                       inpaint_mask, inpaint_parameterized, denoising_strength, inpaint_respective_field, switch,
@@ -950,7 +950,7 @@ def worker():
             progressbar(async_task, 7, 'Image processing ...')
 
         if 'vary' in goals:
-            initial_latent, width, height = apply_vary(async_task, denoising_strength, switch)
+            denoising_strength, initial_latent, width, height = apply_vary(async_task, denoising_strength, switch)
 
         if 'upscale' in goals:
             try:

From 9998b52dd2dc1406f2e91e033200de61fea35b61 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 12 Jun 2024 21:52:48 +0200
Subject: [PATCH 027/101] feat: add gradio elements for input

---
 css/style.css           |  2 +-
 modules/async_worker.py | 78 +++++++++++++++++++++++++++--------------
 modules/config.py       | 15 ++++++++
 modules/flags.py        |  2 +-
 webui.py                | 36 +++++++++++++++++++
 5 files changed, 104 insertions(+), 29 deletions(-)

diff --git a/css/style.css b/css/style.css
index 6ed0f628..b10e644b 100644
--- a/css/style.css
+++ b/css/style.css
@@ -99,7 +99,7 @@ div:has(> #positive_prompt) {
 }
 
 .advanced_check_row {
-  width: 250px !important;
+  width: 310px !important;
 }
 
 .min_check {
diff --git a/modules/async_worker.py b/modules/async_worker.py
index 7343d903..0367d12a 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -2,6 +2,7 @@ import threading
 
 from extras.inpaint_mask import generate_mask_from_image, SAMOptions
 from modules.patch import PatchSettings, patch_settings, patch_all
+import modules.config
 
 patch_all()
 
@@ -107,6 +108,23 @@ class AsyncTask:
             if cn_img is not None:
                 self.cn_tasks[cn_type].append([cn_img, cn_stop, cn_weight])
 
+        self.stage2_ctrls = []
+        for _ in range(modules.config.default_max_stage2_tabs):
+            stage2_enabled = args.pop()
+            # stage2_mode = args.pop()
+            stage2_mask_dino_prompt_text = args.pop()
+            stage2_mask_sam_model = args.pop()
+            stage2_mask_box_threshold = args.pop()
+            stage2_mask_text_threshold = args.pop()
+            if stage2_enabled:
+                self.stage2_ctrls.append([
+                    # stage2_mode,
+                    stage2_mask_dino_prompt_text,
+                    stage2_mask_sam_model,
+                    stage2_mask_box_threshold,
+                    stage2_mask_text_threshold
+                ])
+
 
 async_tasks = []
 
@@ -131,7 +149,6 @@ def worker():
     import modules.default_pipeline as pipeline
     import modules.core as core
     import modules.flags as flags
-    import modules.config
     import modules.patch
     import ldm_patched.modules.model_management
     import extras.preprocessors as preprocessors
@@ -1019,37 +1036,44 @@ def worker():
                 # stage2
                 progressbar(async_task, current_progress, 'Processing stage2 ...')
                 final_unet = pipeline.final_unet.clone()
+                if len(async_task.stage2_ctrls) == 0:
+                    continue
 
                 for img in imgs:
-                    # TODO add stage2 check and options from inputs here
-                    mask = generate_mask_from_image(img, sam_options=SAMOptions(
-                        dino_prompt='eye'
-                    ))
-                    mask = mask[:, :, 0]
+                    for stage2_mask_dino_prompt_text, stage2_mask_sam_model, stage2_mask_box_threshold, stage2_mask_text_threshold in async_task.stage2_ctrls:
+                        mask = generate_mask_from_image(img, sam_options=SAMOptions(
+                            dino_prompt=stage2_mask_dino_prompt_text,
+                            model_type=stage2_mask_sam_model,
+                            dino_box_threshold=stage2_mask_box_threshold,
+                            dino_text_threshold=stage2_mask_text_threshold,
+                            dino_debug=True
+                        ))
+                        mask = mask[:, :, 0]
 
-                    async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
-                    # TODO also show do_not_show_finished_images=len(tasks) == 1
-                    yield_result(async_task, mask, async_task.black_out_nsfw, False,
-                                 do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
-                    # TODO make configurable
-                    denoising_strength_stage2 = 0.5
-                    inpaint_respective_field_stage2 = 0.0
-                    inpaint_head_model_path_stage2 = None
-                    inpaint_parameterized_stage2 = False  # inpaint_engine = None, improve detail
-                    goals_stage2 = ['inpaint']
-                    denoising_strength_stage2, initial_latent_stage2, width_stage2, height_stage2 = apply_inpaint(
-                        async_task, None, inpaint_head_model_path_stage2, img, mask,
-                        inpaint_parameterized_stage2, denoising_strength_stage2,
-                        inpaint_respective_field_stage2, switch, current_progress, True)
+                        async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
+                        # TODO also show do_not_show_finished_images=len(tasks) == 1
+                        yield_result(async_task, mask, async_task.black_out_nsfw, False,
+                                     do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
+                        # TODO make configurable
+                        denoising_strength_stage2 = 0.5
+                        inpaint_respective_field_stage2 = 0.0
+                        inpaint_head_model_path_stage2 = None
+                        inpaint_parameterized_stage2 = False  # inpaint_engine = None, improve detail
+                        goals_stage2 = ['inpaint']
+                        denoising_strength_stage2, initial_latent_stage2, width_stage2, height_stage2 = apply_inpaint(
+                            async_task, None, inpaint_head_model_path_stage2, img, mask,
+                            inpaint_parameterized_stage2, denoising_strength_stage2,
+                            inpaint_respective_field_stage2, switch, current_progress, True)
 
-                    process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
-                                 current_task_id, denoising_strength_stage2, final_scheduler_name, goals_stage2,
-                                 initial_latent_stage2, switch, task, tasks, tiled, use_expansion, width_stage2,
-                                 height_stage2)
+                        imgs2, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
+                                     current_task_id, denoising_strength_stage2, final_scheduler_name, goals_stage2,
+                                     initial_latent_stage2, switch, task, tasks, tiled, use_expansion, width_stage2,
+                                     height_stage2)
 
-                    # reset unet and inpaint_worker
-                    pipeline.final_unet = final_unet
-                    inpaint_worker.current_task = None
+                        # reset and prepare next iteration
+                        img = imgs2[0]
+                        pipeline.final_unet = final_unet
+                        inpaint_worker.current_task = None
 
             except ldm_patched.modules.model_management.InterruptProcessingException:
                 if async_task.last_stop == 'skip':
diff --git a/modules/config.py b/modules/config.py
index 1fa7d87b..16e3043a 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -502,6 +502,20 @@ example_inpaint_prompts = get_config_item_or_set_default(
     validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
     expected_type=list
 )
+example_stage2_prompts = get_config_item_or_set_default(
+    key='example_stage2_prompts',
+    default_value=[
+        'face', 'eye', 'mouth', 'hair', 'hand', 'body'
+    ],
+    validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
+    expected_type=list
+)
+default_max_stage2_tabs = get_config_item_or_set_default(
+    key='default_max_stage2_tabs',
+    default_value=3,
+    validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
+    expected_type=int
+)
 default_black_out_nsfw = get_config_item_or_set_default(
     key='default_black_out_nsfw',
     default_value=False,
@@ -528,6 +542,7 @@ metadata_created_by = get_config_item_or_set_default(
 )
 
 example_inpaint_prompts = [[x] for x in example_inpaint_prompts]
+example_stage2_prompts = [[x] for x in example_stage2_prompts]
 
 default_inpaint_mask_model = get_config_item_or_set_default(
     key='default_inpaint_mask_model',
diff --git a/modules/flags.py b/modules/flags.py
index ed9a5606..1169bd5b 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -76,7 +76,7 @@ output_formats = ['png', 'jpeg', 'webp']
 
 inpaint_mask_models = ['u2net', 'u2netp', 'u2net_human_seg', 'u2net_cloth_seg', 'silueta', 'isnet-general-use', 'isnet-anime', 'sam']
 inpaint_mask_cloth_category = ['full', 'upper', 'lower']
-inpaint_mask_sam_model = [('base', 'vit_b'), ('large', 'vit_l'), ('huge', 'vit_h')]
+inpaint_mask_sam_model = ['vit_b', 'vit_l', 'vit_h']
 
 inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6']
 inpaint_option_default = 'Inpaint or Outpaint (default)'
diff --git a/webui.py b/webui.py
index 5af17a00..2f0c8aa9 100644
--- a/webui.py
+++ b/webui.py
@@ -147,6 +147,7 @@ with shared.gradio_root:
                     skip_button.click(skip_clicked, inputs=currentTask, outputs=currentTask, queue=False, show_progress=False)
             with gr.Row(elem_classes='advanced_check_row'):
                 input_image_checkbox = gr.Checkbox(label='Input Image', value=False, container=False, elem_classes='min_check')
+                stage2_checkbox = gr.Checkbox(label='Stage2', value=False, container=False, elem_classes='min_check')
                 advanced_checkbox = gr.Checkbox(label='Advanced', value=modules.config.default_advanced_checkbox, container=False, elem_classes='min_check')
             with gr.Row(visible=False) as image_input_panel:
                 with gr.Tabs():
@@ -297,6 +298,37 @@ with shared.gradio_root:
                         metadata_input_image.upload(trigger_metadata_preview, inputs=metadata_input_image,
                                                     outputs=metadata_json, queue=False, show_progress=True)
 
+            with gr.Row(visible=False) as stage2_input_panel:
+                with gr.Tabs():
+                    stage2_ctrls = []
+                    for index in range(modules.config.default_max_stage2_tabs):
+                        with gr.TabItem(label=f'Iteration #{index + 1}') as stage2_tab_item:
+                            stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check', container=False)
+                            with gr.Accordion('Options', visible=True, open=False) as stage2_accordion:
+                                # stage2_mode = gr.Dropdown(choices=modules.flags.inpaint_options, value=modules.flags.inpaint_option_detail, label='Method', interactive=True)
+                                stage2_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt', info='Use singular whenever possible', interactive=True)
+                                example_stage2_mask_dino_prompt_text = gr.Dataset(samples=modules.config.example_stage2_prompts,
+                                                                                  label='Additional Prompt Quick List',
+                                                                                  components=[stage2_mask_dino_prompt_text],
+                                                                                  visible=True)
+                                example_stage2_mask_dino_prompt_text.click(lambda x: x[0], inputs=example_stage2_mask_dino_prompt_text, outputs=stage2_mask_dino_prompt_text, show_progress=False, queue=False)
+
+                                with gr.Accordion("Advanced options", visible=True, open=False) as inpaint_mask_advanced_options:
+                                    stage2_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model, interactive=True)
+                                    stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True)
+                                    stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05, interactive=True)
+
+                        stage2_ctrls += [
+                            stage2_enabled,
+                            # stage2_mode,
+                            stage2_mask_dino_prompt_text,
+                            stage2_mask_sam_model,
+                            stage2_mask_box_threshold,
+                            stage2_mask_text_threshold
+                        ]
+
+                        stage2_enabled.change(lambda x: gr.update(open=x), inputs=stage2_enabled,
+                                              outputs=stage2_accordion, queue=False, show_progress=False)
             switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}"
             down_js = "() => {viewer_to_bottom();}"
 
@@ -311,6 +343,9 @@ with shared.gradio_root:
             desc_tab.select(lambda: 'desc', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
             metadata_tab.select(lambda: 'metadata', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
 
+            stage2_checkbox.change(lambda x: gr.update(visible=x), inputs=stage2_checkbox,
+                                        outputs=stage2_input_panel, queue=False, show_progress=False, _js=switch_js)
+
         with gr.Column(scale=1, visible=modules.config.default_advanced_checkbox) as advanced_column:
             with gr.Tab(label='Settings'):
                 if not args_manager.args.disable_preset_selection:
@@ -772,6 +807,7 @@ with shared.gradio_root:
             ctrls += [save_metadata_to_images, metadata_scheme]
 
         ctrls += ip_ctrls
+        ctrls += stage2_ctrls
 
         def parse_meta(raw_prompt_txt, is_generating):
             loaded_json = None

From dbc844804b3eb3fbf0e779b54b3e40e87ed69393 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 12 Jun 2024 22:16:02 +0200
Subject: [PATCH 028/101] feat: add handling for stage2_mask_sam_max_num_boxes
 and config

---
 modules/async_worker.py | 21 ++++++++++++++-------
 modules/config.py       | 10 ++++++++--
 webui.py                | 20 +++++++++++---------
 3 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 0367d12a..3191895f 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -108,21 +108,26 @@ class AsyncTask:
             if cn_img is not None:
                 self.cn_tasks[cn_type].append([cn_img, cn_stop, cn_weight])
 
+        self.debugging_dino = args.pop()
+        self.dino_erode_or_dilate = args.pop()
+
         self.stage2_ctrls = []
-        for _ in range(modules.config.default_max_stage2_tabs):
+        for _ in range(modules.config.default_stage2_tabs):
             stage2_enabled = args.pop()
             # stage2_mode = args.pop()
             stage2_mask_dino_prompt_text = args.pop()
-            stage2_mask_sam_model = args.pop()
             stage2_mask_box_threshold = args.pop()
             stage2_mask_text_threshold = args.pop()
+            stage2_mask_sam_max_num_boxes = args.pop()
+            stage2_mask_sam_model = args.pop()
             if stage2_enabled:
                 self.stage2_ctrls.append([
                     # stage2_mode,
                     stage2_mask_dino_prompt_text,
-                    stage2_mask_sam_model,
                     stage2_mask_box_threshold,
-                    stage2_mask_text_threshold
+                    stage2_mask_text_threshold,
+                    stage2_mask_sam_max_num_boxes,
+                    stage2_mask_sam_model,
                 ])
 
 
@@ -1040,13 +1045,15 @@ def worker():
                     continue
 
                 for img in imgs:
-                    for stage2_mask_dino_prompt_text, stage2_mask_sam_model, stage2_mask_box_threshold, stage2_mask_text_threshold in async_task.stage2_ctrls:
+                    for stage2_mask_dino_prompt_text, stage2_mask_box_threshold, stage2_mask_text_threshold, stage2_mask_sam_max_num_boxes, stage2_mask_sam_model in async_task.stage2_ctrls:
                         mask = generate_mask_from_image(img, sam_options=SAMOptions(
                             dino_prompt=stage2_mask_dino_prompt_text,
-                            model_type=stage2_mask_sam_model,
                             dino_box_threshold=stage2_mask_box_threshold,
                             dino_text_threshold=stage2_mask_text_threshold,
-                            dino_debug=True
+                            dino_erode_or_dilate=async_task.dino_erode_or_dilate,
+                            dino_debug=async_task.debugging_dino,
+                            max_num_boxes=stage2_mask_sam_max_num_boxes,
+                            model_type=stage2_mask_sam_model
                         ))
                         mask = mask[:, :, 0]
 
diff --git a/modules/config.py b/modules/config.py
index 16e3043a..ef8e9576 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -510,12 +510,18 @@ example_stage2_prompts = get_config_item_or_set_default(
     validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
     expected_type=list
 )
-default_max_stage2_tabs = get_config_item_or_set_default(
-    key='default_max_stage2_tabs',
+default_stage2_tabs = get_config_item_or_set_default(
+    key='default_stage2_tabs',
     default_value=3,
     validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
     expected_type=int
 )
+default_sam_max_num_boxes = get_config_item_or_set_default(
+    key='default_sam_max_num_boxes',
+    default_value=2,
+    validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
+    expected_type=int
+)
 default_black_out_nsfw = get_config_item_or_set_default(
     key='default_black_out_nsfw',
     default_value=False,
diff --git a/webui.py b/webui.py
index 2f0c8aa9..839cd1db 100644
--- a/webui.py
+++ b/webui.py
@@ -232,7 +232,7 @@ with shared.gradio_root:
                                     inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
                                 generate_mask_button = gr.Button(value='Generate mask from image')
 
-                                def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, dino_erode_or_dilate, debug_dino):
+                                def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, dino_erode_or_dilate, dino_debug):
                                     from extras.inpaint_mask import generate_mask_from_image
 
                                     extras = {}
@@ -245,7 +245,7 @@ with shared.gradio_root:
                                             dino_box_threshold=box_threshold,
                                             dino_text_threshold=text_threshold,
                                             dino_erode_or_dilate=dino_erode_or_dilate,
-                                            dino_debug=debug_dino,
+                                            dino_debug=dino_debug,
                                             max_num_boxes=2, #TODO replace with actual value
                                             model_type=sam_model
                                         )
@@ -301,7 +301,7 @@ with shared.gradio_root:
             with gr.Row(visible=False) as stage2_input_panel:
                 with gr.Tabs():
                     stage2_ctrls = []
-                    for index in range(modules.config.default_max_stage2_tabs):
+                    for index in range(modules.config.default_stage2_tabs):
                         with gr.TabItem(label=f'Iteration #{index + 1}') as stage2_tab_item:
                             stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check', container=False)
                             with gr.Accordion('Options', visible=True, open=False) as stage2_accordion:
@@ -317,14 +317,16 @@ with shared.gradio_root:
                                     stage2_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model, interactive=True)
                                     stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True)
                                     stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05, interactive=True)
+                                    stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
 
                         stage2_ctrls += [
                             stage2_enabled,
                             # stage2_mode,
                             stage2_mask_dino_prompt_text,
-                            stage2_mask_sam_model,
                             stage2_mask_box_threshold,
-                            stage2_mask_text_threshold
+                            stage2_mask_text_threshold,
+                            stage2_mask_sam_max_num_boxes,
+                            stage2_mask_sam_model,
                         ]
 
                         stage2_enabled.change(lambda x: gr.update(open=x), inputs=stage2_enabled,
@@ -598,8 +600,8 @@ with shared.gradio_root:
 
                     with gr.Tab(label='Inpaint'):
                         debugging_inpaint_preprocessor = gr.Checkbox(label='Debug Inpaint Preprocessing', value=False)
-                        debug_dino = gr.Checkbox(label='Debug GroundingDINO', value=False,
-                                                 info='Used for SAM object detection and box generation')
+                        debugging_dino = gr.Checkbox(label='Debug GroundingDINO', value=False,
+                                                     info='Used for SAM object detection and box generation')
                         inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
                         inpaint_engine = gr.Dropdown(label='Inpaint Engine',
                                                      value=modules.config.default_inpaint_engine_version,
@@ -779,7 +781,7 @@ with shared.gradio_root:
                                    inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
                                            inpaint_mask_dino_prompt_text, inpaint_mask_sam_model,
                                            inpaint_mask_box_threshold, inpaint_mask_text_threshold, dino_erode_or_dilate,
-                                           debug_dino],
+                                           debugging_dino],
                                    outputs=inpaint_mask_image, show_progress=True, queue=True)
 
         ctrls = [currentTask, generate_image_grid]
@@ -807,7 +809,7 @@ with shared.gradio_root:
             ctrls += [save_metadata_to_images, metadata_scheme]
 
         ctrls += ip_ctrls
-        ctrls += stage2_ctrls
+        ctrls += [debugging_dino, dino_erode_or_dilate] + stage2_ctrls
 
         def parse_meta(raw_prompt_txt, is_generating):
             loaded_json = None

From f8f36828c79229b69571a153a6cebcd606d67c35 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Thu, 13 Jun 2024 01:13:21 +0200
Subject: [PATCH 029/101] feat: add disclaimer + skipping, stage2 won't
 properly work when used with inpaint or outpaint

---
 experiments_mask_generation.py |  2 +-
 extras/inpaint_mask.py         | 26 +++++++++-----
 modules/async_worker.py        | 42 +++++++++++++++++++---
 webui.py                       | 65 ++++++++++++++++++----------------
 4 files changed, 91 insertions(+), 44 deletions(-)

diff --git a/experiments_mask_generation.py b/experiments_mask_generation.py
index 272adfc5..0f6b960d 100644
--- a/experiments_mask_generation.py
+++ b/experiments_mask_generation.py
@@ -18,7 +18,7 @@ sam_options = SAMOptions(
     model_type='vit_b'
 )
 
-mask_image = generate_mask_from_image(image, sam_options=sam_options)
+mask_image, _, _, _ = generate_mask_from_image(image, sam_options=sam_options)
 
 merged_masks_img = Image.fromarray(mask_image)
 merged_masks_img.show()
diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 7bb671f5..f9025ef2 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -42,9 +42,13 @@ def optimize_masks(masks: torch.Tensor) -> torch.Tensor:
 
 
 def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=None,
-                             sam_options: SAMOptions | None = SAMOptions) -> np.ndarray | None:
+                             sam_options: SAMOptions | None = SAMOptions) -> tuple[np.ndarray | None, int | None, int | None, int | None]:
+    dino_detection_count = 0
+    sam_detection_count = 0
+    sam_detection_on_mask_count = 0
+
     if image is None:
-        return
+        return None, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
 
     if extras is None:
         extras = {}
@@ -53,13 +57,15 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
         image = image['image']
 
     if mask_model != 'sam' and sam_options is None:
-        return remove(
+        result = remove(
             image,
             session=new_session(mask_model, **extras),
             only_mask=True,
             **extras
         )
 
+        return result, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
+
     assert sam_options is not None
 
     detections, boxes, logits, phrases = default_groundingdino(
@@ -80,7 +86,11 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
     sam_predictor = SamPredictor(sam)
     final_mask_tensor = torch.zeros((image.shape[0], image.shape[1]))
 
-    if boxes.size(0) > 0:
+    dino_detection_count = boxes.size(0)
+    sam_detection_count = 0
+    sam_detection_on_mask_count = 0
+
+    if dino_detection_count > 0:
         sam_predictor.set_image(image)
 
         if sam_options.dino_erode_or_dilate != 0:
@@ -97,7 +107,7 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
             draw = ImageDraw.Draw(debug_dino_image)
             for box in boxes.numpy():
                 draw.rectangle(box.tolist(), fill="white")
-            return np.array(debug_dino_image)
+            return np.array(debug_dino_image), dino_detection_count, sam_detection_count, sam_detection_on_mask_count
 
         transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes, image.shape[:2])
         masks, _, _ = sam_predictor.predict_torch(
@@ -109,12 +119,12 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
 
         masks = optimize_masks(masks)
 
-        num_obj = min(len(logits), sam_options.max_num_boxes)
-        for obj_ind in range(num_obj):
+        sam_objects = min(len(logits), sam_options.max_num_boxes)
+        for obj_ind in range(sam_objects):
             mask_tensor = masks[obj_ind][0]
             final_mask_tensor += mask_tensor
 
     final_mask_tensor = (final_mask_tensor > 0).to('cpu').numpy()
     mask_image = np.dstack((final_mask_tensor, final_mask_tensor, final_mask_tensor)) * 255
     mask_image = np.array(mask_image, dtype=np.uint8)
-    return mask_image
+    return mask_image, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
diff --git a/modules/async_worker.py b/modules/async_worker.py
index 3191895f..1c288e16 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1040,13 +1040,15 @@ def worker():
 
                 # stage2
                 progressbar(async_task, current_progress, 'Processing stage2 ...')
-                final_unet = pipeline.final_unet.clone()
-                if len(async_task.stage2_ctrls) == 0:
+                final_unet = pipeline.final_unet
+                if len(async_task.stage2_ctrls) == 0 or 'inpaint' in goals:
+                    print(f'[Stage2] Skipping, preconditions aren\'t met')
                     continue
 
                 for img in imgs:
                     for stage2_mask_dino_prompt_text, stage2_mask_box_threshold, stage2_mask_text_threshold, stage2_mask_sam_max_num_boxes, stage2_mask_sam_model in async_task.stage2_ctrls:
-                        mask = generate_mask_from_image(img, sam_options=SAMOptions(
+                        print(f'[Stage2] Searching for "{stage2_mask_dino_prompt_text}"')
+                        mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(img, sam_options=SAMOptions(
                             dino_prompt=stage2_mask_dino_prompt_text,
                             dino_box_threshold=stage2_mask_box_threshold,
                             dino_text_threshold=stage2_mask_text_threshold,
@@ -1060,12 +1062,43 @@ def worker():
                         async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
                         # TODO also show do_not_show_finished_images=len(tasks) == 1
                         yield_result(async_task, mask, async_task.black_out_nsfw, False,
-                                     do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
+                                     do_not_show_finished_images=len(
+                                         tasks) == 1 or async_task.disable_intermediate_results)
+
+                        print(f'[Stage2] {dino_detection_count} boxes detected')
+                        print(f'[Stage2] {sam_detection_count} segments detected in boxes')
+                        print(f'[Stage2] {sam_detection_on_mask_count} segments applied to mask')
+
+                        if dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0:
+                            print(f'[Stage2] Skipping')
+                            continue
+
                         # TODO make configurable
+
+                        # # do not apply loras / controlnets / etc. twice (samplers are needed though)
+                        # pipeline.final_unet = pipeline.model_base.unet.clone()
+
+                        # pipeline.refresh_everything(refiner_model_name=async_task.refiner_model_name,
+                        #                             base_model_name=async_task.base_model_name,
+                        #                             loras=[],
+                        #                             base_model_additional_loras=[],
+                        #                             use_synthetic_refiner=use_synthetic_refiner,
+                        #                             vae_name=async_task.vae_name)
+                        # pipeline.set_clip_skip(async_task.clip_skip)
+                        #
+                        # # patch everything again except original inpainting
+                        # if 'cn' in goals:
+                        #     apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
+                        # if async_task.freeu_enabled:
+                        #     apply_freeu(async_task)
+                        # patch_samplers(async_task)
+
+                        # defaults from inpaint mode improve details
                         denoising_strength_stage2 = 0.5
                         inpaint_respective_field_stage2 = 0.0
                         inpaint_head_model_path_stage2 = None
                         inpaint_parameterized_stage2 = False  # inpaint_engine = None, improve detail
+
                         goals_stage2 = ['inpaint']
                         denoising_strength_stage2, initial_latent_stage2, width_stage2, height_stage2 = apply_inpaint(
                             async_task, None, inpaint_head_model_path_stage2, img, mask,
@@ -1080,7 +1113,6 @@ def worker():
                         # reset and prepare next iteration
                         img = imgs2[0]
                         pipeline.final_unet = final_unet
-                        inpaint_worker.current_task = None
 
             except ldm_patched.modules.model_management.InterruptProcessingException:
                 if async_task.last_stop == 'skip':
diff --git a/webui.py b/webui.py
index 839cd1db..83a2296e 100644
--- a/webui.py
+++ b/webui.py
@@ -250,7 +250,9 @@ with shared.gradio_root:
                                             model_type=sam_model
                                         )
 
-                                    return generate_mask_from_image(image, mask_model, extras, sam_options)
+                                    mask, _, _, _ = generate_mask_from_image(image, mask_model, extras, sam_options)
+
+                                    return mask
 
                                 inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg'), gr.update(visible=x == 'sam'), gr.update(visible=x == 'sam')],
                                                           inputs=inpaint_mask_model,
@@ -299,38 +301,41 @@ with shared.gradio_root:
                                                     outputs=metadata_json, queue=False, show_progress=True)
 
             with gr.Row(visible=False) as stage2_input_panel:
-                with gr.Tabs():
-                    stage2_ctrls = []
-                    for index in range(modules.config.default_stage2_tabs):
-                        with gr.TabItem(label=f'Iteration #{index + 1}') as stage2_tab_item:
-                            stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check', container=False)
-                            with gr.Accordion('Options', visible=True, open=False) as stage2_accordion:
-                                # stage2_mode = gr.Dropdown(choices=modules.flags.inpaint_options, value=modules.flags.inpaint_option_detail, label='Method', interactive=True)
-                                stage2_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt', info='Use singular whenever possible', interactive=True)
-                                example_stage2_mask_dino_prompt_text = gr.Dataset(samples=modules.config.example_stage2_prompts,
-                                                                                  label='Additional Prompt Quick List',
-                                                                                  components=[stage2_mask_dino_prompt_text],
-                                                                                  visible=True)
-                                example_stage2_mask_dino_prompt_text.click(lambda x: x[0], inputs=example_stage2_mask_dino_prompt_text, outputs=stage2_mask_dino_prompt_text, show_progress=False, queue=False)
+                with gr.Column():
+                    gr.HTML('DISCLAIMER: Stage2 will be skipped when used in combination with Inpaint or Outpaint!')
+                    with gr.Row():
+                        with gr.Tabs():
+                            stage2_ctrls = []
+                            for index in range(modules.config.default_stage2_tabs):
+                                with gr.TabItem(label=f'Iteration #{index + 1}') as stage2_tab_item:
+                                    stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check', container=False)
+                                    with gr.Accordion('Options', visible=True, open=False) as stage2_accordion:
+                                        # stage2_mode = gr.Dropdown(choices=modules.flags.inpaint_options, value=modules.flags.inpaint_option_detail, label='Method', interactive=True)
+                                        stage2_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt', info='Use singular whenever possible', interactive=True)
+                                        example_stage2_mask_dino_prompt_text = gr.Dataset(samples=modules.config.example_stage2_prompts,
+                                                                                          label='Additional Prompt Quick List',
+                                                                                          components=[stage2_mask_dino_prompt_text],
+                                                                                          visible=True)
+                                        example_stage2_mask_dino_prompt_text.click(lambda x: x[0], inputs=example_stage2_mask_dino_prompt_text, outputs=stage2_mask_dino_prompt_text, show_progress=False, queue=False)
 
-                                with gr.Accordion("Advanced options", visible=True, open=False) as inpaint_mask_advanced_options:
-                                    stage2_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model, interactive=True)
-                                    stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True)
-                                    stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05, interactive=True)
-                                    stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
+                                        with gr.Accordion("Advanced options", visible=True, open=False) as inpaint_mask_advanced_options:
+                                            stage2_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model, interactive=True)
+                                            stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True)
+                                            stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05, interactive=True)
+                                            stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
 
-                        stage2_ctrls += [
-                            stage2_enabled,
-                            # stage2_mode,
-                            stage2_mask_dino_prompt_text,
-                            stage2_mask_box_threshold,
-                            stage2_mask_text_threshold,
-                            stage2_mask_sam_max_num_boxes,
-                            stage2_mask_sam_model,
-                        ]
+                                stage2_ctrls += [
+                                    stage2_enabled,
+                                    # stage2_mode,
+                                    stage2_mask_dino_prompt_text,
+                                    stage2_mask_box_threshold,
+                                    stage2_mask_text_threshold,
+                                    stage2_mask_sam_max_num_boxes,
+                                    stage2_mask_sam_model,
+                                ]
 
-                        stage2_enabled.change(lambda x: gr.update(open=x), inputs=stage2_enabled,
-                                              outputs=stage2_accordion, queue=False, show_progress=False)
+                                stage2_enabled.change(lambda x: gr.update(open=x), inputs=stage2_enabled,
+                                                      outputs=stage2_accordion, queue=False, show_progress=False)
             switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}"
             down_js = "() => {viewer_to_bottom();}"
 

From f89d5a97b0a848803390bd7b509092798dad579c Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Thu, 13 Jun 2024 01:17:20 +0200
Subject: [PATCH 030/101] refactor: change wording of stage2 disclaimer, adjust
 gradio structure accordingly

---
 webui.py | 62 +++++++++++++++++++++++++++-----------------------------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/webui.py b/webui.py
index 83a2296e..5864b296 100644
--- a/webui.py
+++ b/webui.py
@@ -301,41 +301,39 @@ with shared.gradio_root:
                                                     outputs=metadata_json, queue=False, show_progress=True)
 
             with gr.Row(visible=False) as stage2_input_panel:
-                with gr.Column():
-                    gr.HTML('DISCLAIMER: Stage2 will be skipped when used in combination with Inpaint or Outpaint!')
-                    with gr.Row():
-                        with gr.Tabs():
-                            stage2_ctrls = []
-                            for index in range(modules.config.default_stage2_tabs):
-                                with gr.TabItem(label=f'Iteration #{index + 1}') as stage2_tab_item:
-                                    stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check', container=False)
-                                    with gr.Accordion('Options', visible=True, open=False) as stage2_accordion:
-                                        # stage2_mode = gr.Dropdown(choices=modules.flags.inpaint_options, value=modules.flags.inpaint_option_detail, label='Method', interactive=True)
-                                        stage2_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt', info='Use singular whenever possible', interactive=True)
-                                        example_stage2_mask_dino_prompt_text = gr.Dataset(samples=modules.config.example_stage2_prompts,
-                                                                                          label='Additional Prompt Quick List',
-                                                                                          components=[stage2_mask_dino_prompt_text],
-                                                                                          visible=True)
-                                        example_stage2_mask_dino_prompt_text.click(lambda x: x[0], inputs=example_stage2_mask_dino_prompt_text, outputs=stage2_mask_dino_prompt_text, show_progress=False, queue=False)
+                with gr.Tabs():
+                    stage2_ctrls = []
+                    for index in range(modules.config.default_stage2_tabs):
+                        with gr.TabItem(label=f'Iteration #{index + 1}') as stage2_tab_item:
+                            stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check', container=False)
+                            gr.HTML('DISCLAIMER: Stage2 does not work with Inpaint or Outpaint and will be skipped.')
+                            with gr.Accordion('Options', visible=True, open=False) as stage2_accordion:
+                                # stage2_mode = gr.Dropdown(choices=modules.flags.inpaint_options, value=modules.flags.inpaint_option_detail, label='Method', interactive=True)
+                                stage2_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt', info='Use singular whenever possible', interactive=True)
+                                example_stage2_mask_dino_prompt_text = gr.Dataset(samples=modules.config.example_stage2_prompts,
+                                                                                  label='Additional Prompt Quick List',
+                                                                                  components=[stage2_mask_dino_prompt_text],
+                                                                                  visible=True)
+                                example_stage2_mask_dino_prompt_text.click(lambda x: x[0], inputs=example_stage2_mask_dino_prompt_text, outputs=stage2_mask_dino_prompt_text, show_progress=False, queue=False)
 
-                                        with gr.Accordion("Advanced options", visible=True, open=False) as inpaint_mask_advanced_options:
-                                            stage2_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model, interactive=True)
-                                            stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True)
-                                            stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05, interactive=True)
-                                            stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
+                                with gr.Accordion("Advanced options", visible=True, open=False) as inpaint_mask_advanced_options:
+                                    stage2_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model, interactive=True)
+                                    stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True)
+                                    stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05, interactive=True)
+                                    stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
 
-                                stage2_ctrls += [
-                                    stage2_enabled,
-                                    # stage2_mode,
-                                    stage2_mask_dino_prompt_text,
-                                    stage2_mask_box_threshold,
-                                    stage2_mask_text_threshold,
-                                    stage2_mask_sam_max_num_boxes,
-                                    stage2_mask_sam_model,
-                                ]
+                        stage2_ctrls += [
+                            stage2_enabled,
+                            # stage2_mode,
+                            stage2_mask_dino_prompt_text,
+                            stage2_mask_box_threshold,
+                            stage2_mask_text_threshold,
+                            stage2_mask_sam_max_num_boxes,
+                            stage2_mask_sam_model,
+                        ]
 
-                                stage2_enabled.change(lambda x: gr.update(open=x), inputs=stage2_enabled,
-                                                      outputs=stage2_accordion, queue=False, show_progress=False)
+                        stage2_enabled.change(lambda x: gr.update(open=x), inputs=stage2_enabled,
+                                              outputs=stage2_accordion, queue=False, show_progress=False)
             switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}"
             down_js = "() => {viewer_to_bottom();}"
 

From a71739026bd5040dc2f42d4d0899e25b309b7141 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Thu, 13 Jun 2024 01:19:28 +0200
Subject: [PATCH 031/101] refactor: change wording of hint when nothing has
 been detected

---
 modules/async_worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 1c288e16..04a2705a 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1070,7 +1070,7 @@ def worker():
                         print(f'[Stage2] {sam_detection_on_mask_count} segments applied to mask')
 
                         if dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0:
-                            print(f'[Stage2] Skipping')
+                            print(f'[Stage2] No "{stage2_mask_dino_prompt_text}" detected, skipping')
                             continue
 
                         # TODO make configurable

From db0e0aaf59ec86b46669cd27dd52e5a5d06cf899 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Fri, 14 Jun 2024 23:58:59 +0200
Subject: [PATCH 032/101] wip: add inpaint options to stage2

---
 webui.py | 87 ++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 60 insertions(+), 27 deletions(-)

diff --git a/webui.py b/webui.py
index 5864b296..a4751ede 100644
--- a/webui.py
+++ b/webui.py
@@ -90,6 +90,34 @@ def generate_clicked(task: worker.AsyncTask):
     return
 
 
+def inpaint_mode_change(mode):
+    assert mode in modules.flags.inpaint_options
+
+    # inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
+    # inpaint_disable_initial_latent, inpaint_engine,
+    # inpaint_strength, inpaint_respective_field
+
+    if mode == modules.flags.inpaint_option_detail:
+        return [
+            gr.update(visible=True), gr.update(visible=False, value=[]),
+            gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts),
+            False, 'None', 0.5, 0.0
+        ]
+
+    if mode == modules.flags.inpaint_option_modify:
+        return [
+            gr.update(visible=True), gr.update(visible=False, value=[]),
+            gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
+            True, modules.config.default_inpaint_engine_version, 1.0, 0.0
+        ]
+
+    return [
+        gr.update(visible=False, value=''), gr.update(visible=True),
+        gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
+        False, modules.config.default_inpaint_engine_version, 1.0, 0.618
+    ]
+
+
 reload_javascript()
 
 title = f'Fooocus {fooocus_version.version}'
@@ -316,11 +344,33 @@ with shared.gradio_root:
                                                                                   visible=True)
                                 example_stage2_mask_dino_prompt_text.click(lambda x: x[0], inputs=example_stage2_mask_dino_prompt_text, outputs=stage2_mask_dino_prompt_text, show_progress=False, queue=False)
 
+                                stage2_mask_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
+                                                           value=modules.flags.inpaint_option_default, label='Method')
+
                                 with gr.Accordion("Advanced options", visible=True, open=False) as inpaint_mask_advanced_options:
                                     stage2_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model, interactive=True)
                                     stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True)
                                     stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05, interactive=True)
                                     stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
+                                    stage2_mask_denoising_strength = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
+                                    stage2_mask_inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
+                                    stage2_mask_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
+                                                                             value=modules.config.default_inpaint_engine_version,
+                                                                             choices=flags.inpaint_engine_versions,
+                                                                             info='Version of Fooocus inpaint model')
+                                    stage2_mask_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
+                                                                             minimum=0.0, maximum=1.0, step=0.001, value=1.0,
+                                                                             info='Same as the denoising strength in A1111 inpaint. '
+                                                                                  'Only used in inpaint, not used in outpaint. '
+                                                                                  '(Outpaint always use 1.0)')
+                                    stage2_mask_inpaint_respective_field = gr.Slider(label='Inpaint Respective Field',
+                                                                                     minimum=0.0, maximum=1.0, step=0.001,
+                                                                                     value=0.618,
+                                                                                     info='The area to inpaint. '
+                                                                                          'Value 0 is same as "Only Masked" in A1111. '
+                                                                                          'Value 1 is same as "Whole Image" in A1111. '
+                                                                                          'Only used in inpaint, not used in outpaint. '
+                                                                                          '(Outpaint always use 1.0)')
 
                         stage2_ctrls += [
                             stage2_enabled,
@@ -330,10 +380,20 @@ with shared.gradio_root:
                             stage2_mask_text_threshold,
                             stage2_mask_sam_max_num_boxes,
                             stage2_mask_sam_model,
+                            stage2_mask_inpaint_disable_initial_latent,
+                            stage2_mask_inpaint_engine,
+                            stage2_mask_inpaint_strength,
+                            stage2_mask_inpaint_respective_field
                         ]
 
                         stage2_enabled.change(lambda x: gr.update(open=x), inputs=stage2_enabled,
                                               outputs=stage2_accordion, queue=False, show_progress=False)
+
+                        inpaint_mode.input(inpaint_mode_change, inputs=inpaint_mode, outputs=[
+                            inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
+                            stage2_mask_inpaint_disable_initial_latent, stage2_mask_inpaint_engine,
+                            stage2_mask_inpaint_strength, stage2_mask_inpaint_respective_field
+                        ], show_progress=False, queue=False)
             switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}"
             down_js = "() => {viewer_to_bottom();}"
 
@@ -747,33 +807,6 @@ with shared.gradio_root:
                                  queue=False, show_progress=False) \
             .then(fn=lambda: None, _js='refresh_grid_delayed', queue=False, show_progress=False)
 
-        def inpaint_mode_change(mode):
-            assert mode in modules.flags.inpaint_options
-
-            # inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
-            # inpaint_disable_initial_latent, inpaint_engine,
-            # inpaint_strength, inpaint_respective_field
-
-            if mode == modules.flags.inpaint_option_detail:
-                return [
-                    gr.update(visible=True), gr.update(visible=False, value=[]),
-                    gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts),
-                    False, 'None', 0.5, 0.0
-                ]
-
-            if mode == modules.flags.inpaint_option_modify:
-                return [
-                    gr.update(visible=True), gr.update(visible=False, value=[]),
-                    gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
-                    True, modules.config.default_inpaint_engine_version, 1.0, 0.0
-                ]
-
-            return [
-                gr.update(visible=False, value=''), gr.update(visible=True),
-                gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
-                False, modules.config.default_inpaint_engine_version, 1.0, 0.618
-            ]
-
         inpaint_mode.input(inpaint_mode_change, inputs=inpaint_mode, outputs=[
             inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
             inpaint_disable_initial_latent, inpaint_engine,

From 8b21303542a6e3f64652449785168d4bde5fe606 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 15 Jun 2024 01:26:18 +0200
Subject: [PATCH 033/101] wip: refactor UI for stage2

---
 css/style.css     |   2 +-
 modules/config.py |   7 +++
 webui.py          | 106 +++++++++++++++++++++++++++++-----------------
 3 files changed, 74 insertions(+), 41 deletions(-)

diff --git a/css/style.css b/css/style.css
index b10e644b..ad9de6f2 100644
--- a/css/style.css
+++ b/css/style.css
@@ -99,7 +99,7 @@ div:has(> #positive_prompt) {
 }
 
 .advanced_check_row {
-  width: 310px !important;
+  width: 330px !important;
 }
 
 .min_check {
diff --git a/modules/config.py b/modules/config.py
index ef8e9576..3c5389b9 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -557,6 +557,13 @@ default_inpaint_mask_model = get_config_item_or_set_default(
     expected_type=str
 )
 
+default_stage2_inpaint_mask_model = get_config_item_or_set_default(
+    key='default_stage2_inpaint_mask_model',
+    default_value='sam',
+    validator=lambda x: x in modules.flags.inpaint_mask_models,
+    expected_type=str
+)
+
 default_inpaint_mask_cloth_category = get_config_item_or_set_default(
     key='default_inpaint_mask_cloth_category',
     default_value='full',
diff --git a/webui.py b/webui.py
index a4751ede..c9b703d2 100644
--- a/webui.py
+++ b/webui.py
@@ -258,9 +258,10 @@ with shared.gradio_root:
                                     inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
                                     inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)
                                     inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
+                                    inpaint_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
                                 generate_mask_button = gr.Button(value='Generate mask from image')
 
-                                def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, dino_erode_or_dilate, dino_debug):
+                                def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, sam_max_num_boxes, dino_erode_or_dilate, dino_debug):
                                     from extras.inpaint_mask import generate_mask_from_image
 
                                     extras = {}
@@ -274,7 +275,7 @@ with shared.gradio_root:
                                             dino_text_threshold=text_threshold,
                                             dino_erode_or_dilate=dino_erode_or_dilate,
                                             dino_debug=dino_debug,
-                                            max_num_boxes=2, #TODO replace with actual value
+                                            max_num_boxes=sam_max_num_boxes,
                                             model_type=sam_model
                                         )
 
@@ -332,49 +333,67 @@ with shared.gradio_root:
                 with gr.Tabs():
                     stage2_ctrls = []
                     for index in range(modules.config.default_stage2_tabs):
-                        with gr.TabItem(label=f'Iteration #{index + 1}') as stage2_tab_item:
+                        with gr.TabItem(label=f'#{index + 1}') as stage2_tab_item:
                             stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check', container=False)
                             gr.HTML('DISCLAIMER: Stage2 does not work with Inpaint or Outpaint and will be skipped.')
-                            with gr.Accordion('Options', visible=True, open=False) as stage2_accordion:
-                                # stage2_mode = gr.Dropdown(choices=modules.flags.inpaint_options, value=modules.flags.inpaint_option_detail, label='Method', interactive=True)
-                                stage2_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt', info='Use singular whenever possible', interactive=True)
-                                example_stage2_mask_dino_prompt_text = gr.Dataset(samples=modules.config.example_stage2_prompts,
-                                                                                  label='Additional Prompt Quick List',
-                                                                                  components=[stage2_mask_dino_prompt_text],
-                                                                                  visible=True)
-                                example_stage2_mask_dino_prompt_text.click(lambda x: x[0], inputs=example_stage2_mask_dino_prompt_text, outputs=stage2_mask_dino_prompt_text, show_progress=False, queue=False)
 
-                                stage2_mask_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
-                                                           value=modules.flags.inpaint_option_default, label='Method')
+                            # TODO add positive and negative prompt here
 
-                                with gr.Accordion("Advanced options", visible=True, open=False) as inpaint_mask_advanced_options:
+                            with gr.Accordion("Detection", open=False):
+                                # TODO check if limiting to SAM is better
+                                stage2_mask_inpaint_mask_model = gr.Dropdown(label='Mask generation model',
+                                                                             choices=flags.inpaint_mask_models,
+                                                                             value=modules.config.default_stage2_inpaint_mask_model)
+                                stage2_mask_inpaint_mask_cloth_category = gr.Dropdown(label='Cloth category',
+                                                                                      choices=flags.inpaint_mask_cloth_category,
+                                                                                      value=modules.config.default_inpaint_mask_cloth_category,
+                                                                                      visible=modules.config.default_stage2_inpaint_mask_model == 'u2net_cloth_seg',
+                                                                                      interactive=True)
+                                stage2_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt',
+                                                                          info='Use singular whenever possible',
+                                                                          interactive=True,
+                                                                          visible=modules.config.default_stage2_inpaint_mask_model == 'sam')
+                                example_stage2_mask_dino_prompt_text = gr.Dataset(
+                                    samples=modules.config.example_stage2_prompts,
+                                    label='Additional Prompt Quick List',
+                                    components=[stage2_mask_dino_prompt_text],
+                                    visible=modules.config.default_stage2_inpaint_mask_model == 'sam')
+                                example_stage2_mask_dino_prompt_text.click(lambda x: x[0],
+                                                                           inputs=example_stage2_mask_dino_prompt_text,
+                                                                           outputs=stage2_mask_dino_prompt_text,
+                                                                           show_progress=False, queue=False)
+
+                                with gr.Accordion("SAM Options", visible=modules.config.default_stage2_inpaint_mask_model == 'sam', open=False) as sam_options:
                                     stage2_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model, interactive=True)
                                     stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True)
                                     stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05, interactive=True)
                                     stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
-                                    stage2_mask_denoising_strength = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
-                                    stage2_mask_inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
-                                    stage2_mask_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
-                                                                             value=modules.config.default_inpaint_engine_version,
-                                                                             choices=flags.inpaint_engine_versions,
-                                                                             info='Version of Fooocus inpaint model')
-                                    stage2_mask_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
-                                                                             minimum=0.0, maximum=1.0, step=0.001, value=1.0,
-                                                                             info='Same as the denoising strength in A1111 inpaint. '
-                                                                                  'Only used in inpaint, not used in outpaint. '
-                                                                                  '(Outpaint always use 1.0)')
-                                    stage2_mask_inpaint_respective_field = gr.Slider(label='Inpaint Respective Field',
-                                                                                     minimum=0.0, maximum=1.0, step=0.001,
-                                                                                     value=0.618,
-                                                                                     info='The area to inpaint. '
-                                                                                          'Value 0 is same as "Only Masked" in A1111. '
-                                                                                          'Value 1 is same as "Whole Image" in A1111. '
-                                                                                          'Only used in inpaint, not used in outpaint. '
-                                                                                          '(Outpaint always use 1.0)')
+
+                            with gr.Accordion("Inpaint", visible=True, open=False):
+                                stage2_mask_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
+                                                                       value=modules.flags.inpaint_option_default, # TODO test
+                                                                       label='Method', interactive=True)
+                                stage2_mask_inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
+                                stage2_mask_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
+                                                                         value=modules.config.default_inpaint_engine_version,
+                                                                         choices=flags.inpaint_engine_versions,
+                                                                         info='Version of Fooocus inpaint model')
+                                stage2_mask_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
+                                                                         minimum=0.0, maximum=1.0, step=0.001, value=1.0,
+                                                                         info='Same as the denoising strength in A1111 inpaint. '
+                                                                              'Only used in inpaint, not used in outpaint. '
+                                                                              '(Outpaint always use 1.0)')
+                                stage2_mask_inpaint_respective_field = gr.Slider(label='Inpaint Respective Field',
+                                                                                 minimum=0.0, maximum=1.0, step=0.001,
+                                                                                 value=0.618,
+                                                                                 info='The area to inpaint. '
+                                                                                      'Value 0 is same as "Only Masked" in A1111. '
+                                                                                      'Value 1 is same as "Whole Image" in A1111. '
+                                                                                      'Only used in inpaint, not used in outpaint. '
+                                                                                      '(Outpaint always use 1.0)')
 
                         stage2_ctrls += [
                             stage2_enabled,
-                            # stage2_mode,
                             stage2_mask_dino_prompt_text,
                             stage2_mask_box_threshold,
                             stage2_mask_text_threshold,
@@ -386,14 +405,21 @@ with shared.gradio_root:
                             stage2_mask_inpaint_respective_field
                         ]
 
-                        stage2_enabled.change(lambda x: gr.update(open=x), inputs=stage2_enabled,
-                                              outputs=stage2_accordion, queue=False, show_progress=False)
-
-                        inpaint_mode.input(inpaint_mode_change, inputs=inpaint_mode, outputs=[
+                        stage2_mask_inpaint_mode.input(inpaint_mode_change, inputs=stage2_mask_inpaint_mode, outputs=[
                             inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
                             stage2_mask_inpaint_disable_initial_latent, stage2_mask_inpaint_engine,
                             stage2_mask_inpaint_strength, stage2_mask_inpaint_respective_field
                         ], show_progress=False, queue=False)
+
+                        stage2_mask_inpaint_mask_model.change(
+                            lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
+                                      [gr.update(visible=x == 'sam')] * 2 +
+                                      [gr.Dataset.update(visible=x == 'sam', samples=modules.config.example_stage2_prompts)],
+                            inputs=stage2_mask_inpaint_mask_model,
+                            outputs=[stage2_mask_inpaint_mask_cloth_category, stage2_mask_dino_prompt_text, sam_options,
+                                     example_stage2_mask_dino_prompt_text],
+                            queue=False, show_progress=False)
+
             switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}"
             down_js = "() => {viewer_to_bottom();}"
 
@@ -816,8 +842,8 @@ with shared.gradio_root:
         generate_mask_button.click(fn=generate_mask,
                                    inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
                                            inpaint_mask_dino_prompt_text, inpaint_mask_sam_model,
-                                           inpaint_mask_box_threshold, inpaint_mask_text_threshold, dino_erode_or_dilate,
-                                           debugging_dino],
+                                           inpaint_mask_box_threshold, inpaint_mask_text_threshold,
+                                           inpaint_mask_sam_max_num_boxes, dino_erode_or_dilate, debugging_dino],
                                    outputs=inpaint_mask_image, show_progress=True, queue=True)
 
         ctrls = [currentTask, generate_image_grid]

From 71f5e70085d286b107ed924cda7c8974d53d5a15 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 12:30:49 +0200
Subject: [PATCH 034/101] refactor: rename segmentation prompt to detection
 prompt

improved wording: prompt is only applied for GroundingDINO, not actually for segmentation
---
 language/en.json | 2 +-
 webui.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/language/en.json b/language/en.json
index dc208f3e..f80ee4b2 100644
--- a/language/en.json
+++ b/language/en.json
@@ -46,7 +46,7 @@
     "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)": "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)",
     "Mask generation model": "Mask generation model",
     "Cloth category": "Cloth category",
-    "Segmentation prompt": "Segmentation prompt",
+    "Detection prompt": "Detection prompt",
     "Use singular whenever possible": "Use singular whenever possible",
     "Advanced options": "Advanced options",
     "SAM model": "SAM model",
diff --git a/webui.py b/webui.py
index c9b703d2..877d7939 100644
--- a/webui.py
+++ b/webui.py
@@ -253,7 +253,7 @@ with shared.gradio_root:
                                                              choices=flags.inpaint_mask_cloth_category,
                                                              value=modules.config.default_inpaint_mask_cloth_category,
                                                              visible=False)
-                                inpaint_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False, info='Use singular whenever possible')
+                                inpaint_mask_dino_prompt_text = gr.Textbox(label='Detection prompt', value='', visible=False, info='Use singular whenever possible')
                                 with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options:
                                     inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
                                     inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)

From a3dcd6e1fd609dbed425714d1a7b63ad4df6d8b7 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 12:38:45 +0200
Subject: [PATCH 035/101] wip: add inputs for positive and negative prompt for
 stage2

---
 webui.py | 85 +++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 54 insertions(+), 31 deletions(-)

diff --git a/webui.py b/webui.py
index 877d7939..8cf5a087 100644
--- a/webui.py
+++ b/webui.py
@@ -334,10 +334,28 @@ with shared.gradio_root:
                     stage2_ctrls = []
                     for index in range(modules.config.default_stage2_tabs):
                         with gr.TabItem(label=f'#{index + 1}') as stage2_tab_item:
-                            stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check', container=False)
+                            stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check',
+                                                         container=False)
                             gr.HTML('DISCLAIMER: Stage2 does not work with Inpaint or Outpaint and will be skipped.')
 
-                            # TODO add positive and negative prompt here
+                            stage2_mask_dino_prompt_text = gr.Textbox(label='Detection prompt',
+                                                                      info='Use singular whenever possible',
+                                                                      interactive=True,
+                                                                      visible=modules.config.default_stage2_inpaint_mask_model == 'sam')
+                            example_stage2_mask_dino_prompt_text = gr.Dataset(
+                                samples=modules.config.example_stage2_prompts,
+                                label='Additional Prompt Quick List',
+                                components=[stage2_mask_dino_prompt_text],
+                                visible=modules.config.default_stage2_inpaint_mask_model == 'sam')
+                            example_stage2_mask_dino_prompt_text.click(lambda x: x[0],
+                                                                       inputs=example_stage2_mask_dino_prompt_text,
+                                                                       outputs=stage2_mask_dino_prompt_text,
+                                                                       show_progress=False, queue=False)
+
+                            stage2_prompt = gr.Textbox(label="Enhancement positive prompt",
+                                                       placeholder="Uses original prompt instead if empty.")
+                            stage2_negative_prompt = gr.Textbox(label="Enhancement negative prompt",
+                                                                placeholder="Uses original negative prompt instead if empty.")
 
                             with gr.Accordion("Detection", open=False):
                                 # TODO check if limiting to SAM is better
@@ -349,37 +367,39 @@ with shared.gradio_root:
                                                                                       value=modules.config.default_inpaint_mask_cloth_category,
                                                                                       visible=modules.config.default_stage2_inpaint_mask_model == 'u2net_cloth_seg',
                                                                                       interactive=True)
-                                stage2_mask_dino_prompt_text = gr.Textbox(label='Segmentation prompt',
-                                                                          info='Use singular whenever possible',
-                                                                          interactive=True,
-                                                                          visible=modules.config.default_stage2_inpaint_mask_model == 'sam')
-                                example_stage2_mask_dino_prompt_text = gr.Dataset(
-                                    samples=modules.config.example_stage2_prompts,
-                                    label='Additional Prompt Quick List',
-                                    components=[stage2_mask_dino_prompt_text],
-                                    visible=modules.config.default_stage2_inpaint_mask_model == 'sam')
-                                example_stage2_mask_dino_prompt_text.click(lambda x: x[0],
-                                                                           inputs=example_stage2_mask_dino_prompt_text,
-                                                                           outputs=stage2_mask_dino_prompt_text,
-                                                                           show_progress=False, queue=False)
 
-                                with gr.Accordion("SAM Options", visible=modules.config.default_stage2_inpaint_mask_model == 'sam', open=False) as sam_options:
-                                    stage2_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model, interactive=True)
-                                    stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05, interactive=True)
-                                    stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05, interactive=True)
-                                    stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
+                                with gr.Accordion("SAM Options",
+                                                  visible=modules.config.default_stage2_inpaint_mask_model == 'sam',
+                                                  open=False) as sam_options:
+                                    stage2_mask_sam_model = gr.Dropdown(label='SAM model',
+                                                                        choices=flags.inpaint_mask_sam_model,
+                                                                        value=modules.config.default_inpaint_mask_sam_model,
+                                                                        interactive=True)
+                                    stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0,
+                                                                          maximum=1.0, value=0.3, step=0.05,
+                                                                          interactive=True)
+                                    stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0,
+                                                                           maximum=1.0, value=0.25, step=0.05,
+                                                                           interactive=True)
+                                    stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections",
+                                                                              minimum=1, maximum=5,
+                                                                              value=modules.config.default_sam_max_num_boxes,
+                                                                              step=1, interactive=True)
 
                             with gr.Accordion("Inpaint", visible=True, open=False):
                                 stage2_mask_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
-                                                                       value=modules.flags.inpaint_option_default, # TODO test
+                                                                       value=modules.flags.inpaint_option_default,
+                                                                       # TODO test
                                                                        label='Method', interactive=True)
-                                stage2_mask_inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
+                                stage2_mask_inpaint_disable_initial_latent = gr.Checkbox(
+                                    label='Disable initial latent in inpaint', value=False)
                                 stage2_mask_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
                                                                          value=modules.config.default_inpaint_engine_version,
                                                                          choices=flags.inpaint_engine_versions,
                                                                          info='Version of Fooocus inpaint model')
                                 stage2_mask_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
-                                                                         minimum=0.0, maximum=1.0, step=0.001, value=1.0,
+                                                                         minimum=0.0, maximum=1.0, step=0.001,
+                                                                         value=1.0,
                                                                          info='Same as the denoising strength in A1111 inpaint. '
                                                                               'Only used in inpaint, not used in outpaint. '
                                                                               '(Outpaint always use 1.0)')
@@ -395,10 +415,12 @@ with shared.gradio_root:
                         stage2_ctrls += [
                             stage2_enabled,
                             stage2_mask_dino_prompt_text,
-                            stage2_mask_box_threshold,
-                            stage2_mask_text_threshold,
-                            stage2_mask_sam_max_num_boxes,
+                            stage2_prompt,
+                            stage2_negative_prompt,
                             stage2_mask_sam_model,
+                            stage2_mask_text_threshold,
+                            stage2_mask_box_threshold,
+                            stage2_mask_sam_max_num_boxes,
                             stage2_mask_inpaint_disable_initial_latent,
                             stage2_mask_inpaint_engine,
                             stage2_mask_inpaint_strength,
@@ -414,7 +436,8 @@ with shared.gradio_root:
                         stage2_mask_inpaint_mask_model.change(
                             lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
                                       [gr.update(visible=x == 'sam')] * 2 +
-                                      [gr.Dataset.update(visible=x == 'sam', samples=modules.config.example_stage2_prompts)],
+                                      [gr.Dataset.update(visible=x == 'sam',
+                                                         samples=modules.config.example_stage2_prompts)],
                             inputs=stage2_mask_inpaint_mask_model,
                             outputs=[stage2_mask_inpaint_mask_cloth_category, stage2_mask_dino_prompt_text, sam_options,
                                      example_stage2_mask_dino_prompt_text],
@@ -498,7 +521,7 @@ with shared.gradio_root:
                 def update_history_link():
                     if args_manager.args.disable_image_log:
                         return gr.update(value='')
-                    
+
                     return gr.update(value=f'<a href="file={get_current_html_path(output_format)}" target="_blank">\U0001F4DA History Log</a>')
 
                 history_link = gr.HTML()
@@ -663,7 +686,7 @@ with shared.gradio_root:
                                                        info='Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.',
                                                        visible=modules.config.default_save_metadata_to_images)
 
-                            save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme], 
+                            save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme],
                                                            queue=False, show_progress=False)
 
                     with gr.Tab(label='Control'):
@@ -826,9 +849,9 @@ with shared.gradio_root:
                                          adm_scaler_negative, refiner_switch, refiner_model, sampler_name,
                                          scheduler_name, adaptive_cfg, refiner_swap_method, negative_prompt, disable_intermediate_results
                                      ], queue=False, show_progress=False)
-        
+
         output_format.input(lambda x: gr.update(output_format=x), inputs=output_format)
-        
+
         advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, advanced_column,
                                  queue=False, show_progress=False) \
             .then(fn=lambda: None, _js='refresh_grid_delayed', queue=False, show_progress=False)

From ef9fd293ff33f83338153a1a93c64014d067e3ce Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 12:43:23 +0200
Subject: [PATCH 036/101] wip: rename stage2 to enhance

---
 modules/async_worker.py |  88 +++++++++----------
 modules/config.py       |  14 +--
 webui.py                | 187 ++++++++++++++++++++--------------------
 3 files changed, 145 insertions(+), 144 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 04a2705a..8d6004f8 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -111,23 +111,23 @@ class AsyncTask:
         self.debugging_dino = args.pop()
         self.dino_erode_or_dilate = args.pop()
 
-        self.stage2_ctrls = []
-        for _ in range(modules.config.default_stage2_tabs):
-            stage2_enabled = args.pop()
-            # stage2_mode = args.pop()
-            stage2_mask_dino_prompt_text = args.pop()
-            stage2_mask_box_threshold = args.pop()
-            stage2_mask_text_threshold = args.pop()
-            stage2_mask_sam_max_num_boxes = args.pop()
-            stage2_mask_sam_model = args.pop()
-            if stage2_enabled:
-                self.stage2_ctrls.append([
-                    # stage2_mode,
-                    stage2_mask_dino_prompt_text,
-                    stage2_mask_box_threshold,
-                    stage2_mask_text_threshold,
-                    stage2_mask_sam_max_num_boxes,
-                    stage2_mask_sam_model,
+        self.enhance_ctrls = []
+        for _ in range(modules.config.default_enhance_tabs):
+            enhance_enabled = args.pop()
+            # enhance_mode = args.pop()
+            enhance_mask_dino_prompt_text = args.pop()
+            enhance_mask_box_threshold = args.pop()
+            enhance_mask_text_threshold = args.pop()
+            enhance_mask_sam_max_num_boxes = args.pop()
+            enhance_mask_sam_model = args.pop()
+            if enhance_enabled:
+                self.enhance_ctrls.append([
+                    # enhance_mode,
+                    enhance_mask_dino_prompt_text,
+                    enhance_mask_box_threshold,
+                    enhance_mask_text_threshold,
+                    enhance_mask_sam_max_num_boxes,
+                    enhance_mask_sam_model,
                 ])
 
 
@@ -1038,24 +1038,24 @@ def worker():
                              current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
                              switch, task, tasks, tiled, use_expansion, width, height)
 
-                # stage2
-                progressbar(async_task, current_progress, 'Processing stage2 ...')
+                # enhance
+                progressbar(async_task, current_progress, 'Processing enhance ...')
                 final_unet = pipeline.final_unet
-                if len(async_task.stage2_ctrls) == 0 or 'inpaint' in goals:
-                    print(f'[Stage2] Skipping, preconditions aren\'t met')
+                if len(async_task.enhance_ctrls) == 0 or 'inpaint' in goals:
+                    print(f'[Enhance] Skipping, preconditions aren\'t met')
                     continue
 
                 for img in imgs:
-                    for stage2_mask_dino_prompt_text, stage2_mask_box_threshold, stage2_mask_text_threshold, stage2_mask_sam_max_num_boxes, stage2_mask_sam_model in async_task.stage2_ctrls:
-                        print(f'[Stage2] Searching for "{stage2_mask_dino_prompt_text}"')
+                    for enhance_mask_dino_prompt_text, enhance_mask_box_threshold, enhance_mask_text_threshold, enhance_mask_sam_max_num_boxes, enhance_mask_sam_model in async_task.enhance_ctrls:
+                        print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
                         mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(img, sam_options=SAMOptions(
-                            dino_prompt=stage2_mask_dino_prompt_text,
-                            dino_box_threshold=stage2_mask_box_threshold,
-                            dino_text_threshold=stage2_mask_text_threshold,
+                            dino_prompt=enhance_mask_dino_prompt_text,
+                            dino_box_threshold=enhance_mask_box_threshold,
+                            dino_text_threshold=enhance_mask_text_threshold,
                             dino_erode_or_dilate=async_task.dino_erode_or_dilate,
                             dino_debug=async_task.debugging_dino,
-                            max_num_boxes=stage2_mask_sam_max_num_boxes,
-                            model_type=stage2_mask_sam_model
+                            max_num_boxes=enhance_mask_sam_max_num_boxes,
+                            model_type=enhance_mask_sam_model
                         ))
                         mask = mask[:, :, 0]
 
@@ -1065,12 +1065,12 @@ def worker():
                                      do_not_show_finished_images=len(
                                          tasks) == 1 or async_task.disable_intermediate_results)
 
-                        print(f'[Stage2] {dino_detection_count} boxes detected')
-                        print(f'[Stage2] {sam_detection_count} segments detected in boxes')
-                        print(f'[Stage2] {sam_detection_on_mask_count} segments applied to mask')
+                        print(f'[Enhance] {dino_detection_count} boxes detected')
+                        print(f'[Enhance] {sam_detection_count} segments detected in boxes')
+                        print(f'[Enhance] {sam_detection_on_mask_count} segments applied to mask')
 
                         if dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0:
-                            print(f'[Stage2] No "{stage2_mask_dino_prompt_text}" detected, skipping')
+                            print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping')
                             continue
 
                         # TODO make configurable
@@ -1094,21 +1094,21 @@ def worker():
                         # patch_samplers(async_task)
 
                         # defaults from inpaint mode improve details
-                        denoising_strength_stage2 = 0.5
-                        inpaint_respective_field_stage2 = 0.0
-                        inpaint_head_model_path_stage2 = None
-                        inpaint_parameterized_stage2 = False  # inpaint_engine = None, improve detail
+                        denoising_strength_enhance = 0.5
+                        inpaint_respective_field_enhance = 0.0
+                        inpaint_head_model_path_enhance = None
+                        inpaint_parameterized_enhance = False  # inpaint_engine = None, improve detail
 
-                        goals_stage2 = ['inpaint']
-                        denoising_strength_stage2, initial_latent_stage2, width_stage2, height_stage2 = apply_inpaint(
-                            async_task, None, inpaint_head_model_path_stage2, img, mask,
-                            inpaint_parameterized_stage2, denoising_strength_stage2,
-                            inpaint_respective_field_stage2, switch, current_progress, True)
+                        goals_enhance = ['inpaint']
+                        denoising_strength_enhance, initial_latent_enhance, width_enhance, height_enhance = apply_inpaint(
+                            async_task, None, inpaint_head_model_path_enhance, img, mask,
+                            inpaint_parameterized_enhance, denoising_strength_enhance,
+                            inpaint_respective_field_enhance, switch, current_progress, True)
 
                         imgs2, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
-                                     current_task_id, denoising_strength_stage2, final_scheduler_name, goals_stage2,
-                                     initial_latent_stage2, switch, task, tasks, tiled, use_expansion, width_stage2,
-                                     height_stage2)
+                                     current_task_id, denoising_strength_enhance, final_scheduler_name, goals_enhance,
+                                     initial_latent_enhance, switch, task, tasks, tiled, use_expansion, width_enhance,
+                                     height_enhance)
 
                         # reset and prepare next iteration
                         img = imgs2[0]
diff --git a/modules/config.py b/modules/config.py
index 3c5389b9..929dd9ce 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -502,16 +502,16 @@ example_inpaint_prompts = get_config_item_or_set_default(
     validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
     expected_type=list
 )
-example_stage2_prompts = get_config_item_or_set_default(
-    key='example_stage2_prompts',
+example_enhance_prompts = get_config_item_or_set_default(
+    key='example_enhance_prompts',
     default_value=[
         'face', 'eye', 'mouth', 'hair', 'hand', 'body'
     ],
     validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
     expected_type=list
 )
-default_stage2_tabs = get_config_item_or_set_default(
-    key='default_stage2_tabs',
+default_enhance_tabs = get_config_item_or_set_default(
+    key='default_enhance_tabs',
     default_value=3,
     validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
     expected_type=int
@@ -548,7 +548,7 @@ metadata_created_by = get_config_item_or_set_default(
 )
 
 example_inpaint_prompts = [[x] for x in example_inpaint_prompts]
-example_stage2_prompts = [[x] for x in example_stage2_prompts]
+example_enhance_prompts = [[x] for x in example_enhance_prompts]
 
 default_inpaint_mask_model = get_config_item_or_set_default(
     key='default_inpaint_mask_model',
@@ -557,8 +557,8 @@ default_inpaint_mask_model = get_config_item_or_set_default(
     expected_type=str
 )
 
-default_stage2_inpaint_mask_model = get_config_item_or_set_default(
-    key='default_stage2_inpaint_mask_model',
+default_enhance_inpaint_mask_model = get_config_item_or_set_default(
+    key='default_enhance_inpaint_mask_model',
     default_value='sam',
     validator=lambda x: x in modules.flags.inpaint_mask_models,
     expected_type=str
diff --git a/webui.py b/webui.py
index 8cf5a087..4043eddf 100644
--- a/webui.py
+++ b/webui.py
@@ -175,7 +175,7 @@ with shared.gradio_root:
                     skip_button.click(skip_clicked, inputs=currentTask, outputs=currentTask, queue=False, show_progress=False)
             with gr.Row(elem_classes='advanced_check_row'):
                 input_image_checkbox = gr.Checkbox(label='Input Image', value=False, container=False, elem_classes='min_check')
-                stage2_checkbox = gr.Checkbox(label='Stage2', value=False, container=False, elem_classes='min_check')
+                enhance_checkbox = gr.Checkbox(label='Enhance', value=False, container=False, elem_classes='min_check')
                 advanced_checkbox = gr.Checkbox(label='Advanced', value=modules.config.default_advanced_checkbox, container=False, elem_classes='min_check')
             with gr.Row(visible=False) as image_input_panel:
                 with gr.Tabs():
@@ -329,118 +329,119 @@ with shared.gradio_root:
                         metadata_input_image.upload(trigger_metadata_preview, inputs=metadata_input_image,
                                                     outputs=metadata_json, queue=False, show_progress=True)
 
-            with gr.Row(visible=False) as stage2_input_panel:
+            with gr.Row(visible=False) as enhance_input_panel:
                 with gr.Tabs():
-                    stage2_ctrls = []
-                    for index in range(modules.config.default_stage2_tabs):
-                        with gr.TabItem(label=f'#{index + 1}') as stage2_tab_item:
-                            stage2_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check',
-                                                         container=False)
-                            gr.HTML('DISCLAIMER: Stage2 does not work with Inpaint or Outpaint and will be skipped.')
+                    enhance_ctrls = []
+                    for index in range(modules.config.default_enhance_tabs):
+                        with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item:
+                            enhance_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check',
+                                                          container=False)
+                            gr.HTML(
+                                'DISCLAIMER: The enhance feature does not work with Inpaint or Outpaint and will be skipped.')
 
-                            stage2_mask_dino_prompt_text = gr.Textbox(label='Detection prompt',
-                                                                      info='Use singular whenever possible',
-                                                                      interactive=True,
-                                                                      visible=modules.config.default_stage2_inpaint_mask_model == 'sam')
-                            example_stage2_mask_dino_prompt_text = gr.Dataset(
-                                samples=modules.config.example_stage2_prompts,
+                            enhance_mask_dino_prompt_text = gr.Textbox(label='Detection prompt',
+                                                                       info='Use singular whenever possible',
+                                                                       interactive=True,
+                                                                       visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
+                            example_enhance_mask_dino_prompt_text = gr.Dataset(
+                                samples=modules.config.example_enhance_prompts,
                                 label='Additional Prompt Quick List',
-                                components=[stage2_mask_dino_prompt_text],
-                                visible=modules.config.default_stage2_inpaint_mask_model == 'sam')
-                            example_stage2_mask_dino_prompt_text.click(lambda x: x[0],
-                                                                       inputs=example_stage2_mask_dino_prompt_text,
-                                                                       outputs=stage2_mask_dino_prompt_text,
-                                                                       show_progress=False, queue=False)
+                                components=[enhance_mask_dino_prompt_text],
+                                visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
+                            example_enhance_mask_dino_prompt_text.click(lambda x: x[0],
+                                                                        inputs=example_enhance_mask_dino_prompt_text,
+                                                                        outputs=enhance_mask_dino_prompt_text,
+                                                                        show_progress=False, queue=False)
 
-                            stage2_prompt = gr.Textbox(label="Enhancement positive prompt",
-                                                       placeholder="Uses original prompt instead if empty.")
-                            stage2_negative_prompt = gr.Textbox(label="Enhancement negative prompt",
-                                                                placeholder="Uses original negative prompt instead if empty.")
+                            enhance_prompt = gr.Textbox(label="Enhancement positive prompt",
+                                                        placeholder="Uses original prompt instead if empty.")
+                            enhance_negative_prompt = gr.Textbox(label="Enhancement negative prompt",
+                                                                 placeholder="Uses original negative prompt instead if empty.")
 
                             with gr.Accordion("Detection", open=False):
                                 # TODO check if limiting to SAM is better
-                                stage2_mask_inpaint_mask_model = gr.Dropdown(label='Mask generation model',
-                                                                             choices=flags.inpaint_mask_models,
-                                                                             value=modules.config.default_stage2_inpaint_mask_model)
-                                stage2_mask_inpaint_mask_cloth_category = gr.Dropdown(label='Cloth category',
-                                                                                      choices=flags.inpaint_mask_cloth_category,
-                                                                                      value=modules.config.default_inpaint_mask_cloth_category,
-                                                                                      visible=modules.config.default_stage2_inpaint_mask_model == 'u2net_cloth_seg',
-                                                                                      interactive=True)
+                                enhance_mask_model = gr.Dropdown(label='Mask generation model',
+                                                                 choices=flags.inpaint_mask_models,
+                                                                 value=modules.config.default_enhance_inpaint_mask_model)
+                                enhance_mask_cloth_category = gr.Dropdown(label='Cloth category',
+                                                                          choices=flags.inpaint_mask_cloth_category,
+                                                                          value=modules.config.default_inpaint_mask_cloth_category,
+                                                                          visible=modules.config.default_enhance_inpaint_mask_model == 'u2net_cloth_seg',
+                                                                          interactive=True)
 
                                 with gr.Accordion("SAM Options",
-                                                  visible=modules.config.default_stage2_inpaint_mask_model == 'sam',
+                                                  visible=modules.config.default_enhance_inpaint_mask_model == 'sam',
                                                   open=False) as sam_options:
-                                    stage2_mask_sam_model = gr.Dropdown(label='SAM model',
-                                                                        choices=flags.inpaint_mask_sam_model,
-                                                                        value=modules.config.default_inpaint_mask_sam_model,
-                                                                        interactive=True)
-                                    stage2_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0,
-                                                                          maximum=1.0, value=0.3, step=0.05,
-                                                                          interactive=True)
-                                    stage2_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0,
-                                                                           maximum=1.0, value=0.25, step=0.05,
+                                    enhance_mask_sam_model = gr.Dropdown(label='SAM model',
+                                                                         choices=flags.inpaint_mask_sam_model,
+                                                                         value=modules.config.default_inpaint_mask_sam_model,
+                                                                         interactive=True)
+                                    enhance_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0,
+                                                                           maximum=1.0, value=0.3, step=0.05,
                                                                            interactive=True)
-                                    stage2_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections",
-                                                                              minimum=1, maximum=5,
-                                                                              value=modules.config.default_sam_max_num_boxes,
-                                                                              step=1, interactive=True)
+                                    enhance_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0,
+                                                                            maximum=1.0, value=0.25, step=0.05,
+                                                                            interactive=True)
+                                    enhance_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections",
+                                                                               minimum=1, maximum=5,
+                                                                               value=modules.config.default_sam_max_num_boxes,
+                                                                               step=1, interactive=True)
 
                             with gr.Accordion("Inpaint", visible=True, open=False):
-                                stage2_mask_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
-                                                                       value=modules.flags.inpaint_option_default,
-                                                                       # TODO test
-                                                                       label='Method', interactive=True)
-                                stage2_mask_inpaint_disable_initial_latent = gr.Checkbox(
+                                enhance_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
+                                                                   value=modules.flags.inpaint_option_default,
+                                                                   # TODO test
+                                                                   label='Method', interactive=True)
+                                enhance_inpaint_disable_initial_latent = gr.Checkbox(
                                     label='Disable initial latent in inpaint', value=False)
-                                stage2_mask_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
-                                                                         value=modules.config.default_inpaint_engine_version,
-                                                                         choices=flags.inpaint_engine_versions,
-                                                                         info='Version of Fooocus inpaint model')
-                                stage2_mask_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
-                                                                         minimum=0.0, maximum=1.0, step=0.001,
-                                                                         value=1.0,
-                                                                         info='Same as the denoising strength in A1111 inpaint. '
-                                                                              'Only used in inpaint, not used in outpaint. '
-                                                                              '(Outpaint always use 1.0)')
-                                stage2_mask_inpaint_respective_field = gr.Slider(label='Inpaint Respective Field',
-                                                                                 minimum=0.0, maximum=1.0, step=0.001,
-                                                                                 value=0.618,
-                                                                                 info='The area to inpaint. '
-                                                                                      'Value 0 is same as "Only Masked" in A1111. '
-                                                                                      'Value 1 is same as "Whole Image" in A1111. '
-                                                                                      'Only used in inpaint, not used in outpaint. '
-                                                                                      '(Outpaint always use 1.0)')
+                                enhance_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
+                                                                     value=modules.config.default_inpaint_engine_version,
+                                                                     choices=flags.inpaint_engine_versions,
+                                                                     info='Version of Fooocus inpaint model')
+                                enhance_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
+                                                                     minimum=0.0, maximum=1.0, step=0.001,
+                                                                     value=1.0,
+                                                                     info='Same as the denoising strength in A1111 inpaint. '
+                                                                          'Only used in inpaint, not used in outpaint. '
+                                                                          '(Outpaint always use 1.0)')
+                                enhance_inpaint_respective_field = gr.Slider(label='Inpaint Respective Field',
+                                                                             minimum=0.0, maximum=1.0, step=0.001,
+                                                                             value=0.618,
+                                                                             info='The area to inpaint. '
+                                                                                  'Value 0 is same as "Only Masked" in A1111. '
+                                                                                  'Value 1 is same as "Whole Image" in A1111. '
+                                                                                  'Only used in inpaint, not used in outpaint. '
+                                                                                  '(Outpaint always use 1.0)')
 
-                        stage2_ctrls += [
-                            stage2_enabled,
-                            stage2_mask_dino_prompt_text,
-                            stage2_prompt,
-                            stage2_negative_prompt,
-                            stage2_mask_sam_model,
-                            stage2_mask_text_threshold,
-                            stage2_mask_box_threshold,
-                            stage2_mask_sam_max_num_boxes,
-                            stage2_mask_inpaint_disable_initial_latent,
-                            stage2_mask_inpaint_engine,
-                            stage2_mask_inpaint_strength,
-                            stage2_mask_inpaint_respective_field
+                        enhance_ctrls += [
+                            enhance_enabled,
+                            enhance_mask_dino_prompt_text,
+                            enhance_prompt,
+                            enhance_negative_prompt,
+                            enhance_mask_sam_model,
+                            enhance_mask_text_threshold,
+                            enhance_mask_box_threshold,
+                            enhance_mask_sam_max_num_boxes,
+                            enhance_inpaint_disable_initial_latent,
+                            enhance_inpaint_engine,
+                            enhance_inpaint_strength,
+                            enhance_inpaint_respective_field
                         ]
 
-                        stage2_mask_inpaint_mode.input(inpaint_mode_change, inputs=stage2_mask_inpaint_mode, outputs=[
+                        enhance_inpaint_mode.input(inpaint_mode_change, inputs=enhance_inpaint_mode, outputs=[
                             inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
-                            stage2_mask_inpaint_disable_initial_latent, stage2_mask_inpaint_engine,
-                            stage2_mask_inpaint_strength, stage2_mask_inpaint_respective_field
+                            enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
+                            enhance_inpaint_strength, enhance_inpaint_respective_field
                         ], show_progress=False, queue=False)
 
-                        stage2_mask_inpaint_mask_model.change(
+                        enhance_mask_model.change(
                             lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
                                       [gr.update(visible=x == 'sam')] * 2 +
                                       [gr.Dataset.update(visible=x == 'sam',
-                                                         samples=modules.config.example_stage2_prompts)],
-                            inputs=stage2_mask_inpaint_mask_model,
-                            outputs=[stage2_mask_inpaint_mask_cloth_category, stage2_mask_dino_prompt_text, sam_options,
-                                     example_stage2_mask_dino_prompt_text],
+                                                         samples=modules.config.example_enhance_prompts)],
+                            inputs=enhance_mask_model,
+                            outputs=[enhance_mask_cloth_category, enhance_mask_dino_prompt_text, sam_options,
+                                     example_enhance_mask_dino_prompt_text],
                             queue=False, show_progress=False)
 
             switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}"
@@ -457,8 +458,8 @@ with shared.gradio_root:
             desc_tab.select(lambda: 'desc', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
             metadata_tab.select(lambda: 'metadata', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
 
-            stage2_checkbox.change(lambda x: gr.update(visible=x), inputs=stage2_checkbox,
-                                        outputs=stage2_input_panel, queue=False, show_progress=False, _js=switch_js)
+            enhance_checkbox.change(lambda x: gr.update(visible=x), inputs=enhance_checkbox,
+                                        outputs=enhance_input_panel, queue=False, show_progress=False, _js=switch_js)
 
         with gr.Column(scale=1, visible=modules.config.default_advanced_checkbox) as advanced_column:
             with gr.Tab(label='Settings'):
@@ -894,7 +895,7 @@ with shared.gradio_root:
             ctrls += [save_metadata_to_images, metadata_scheme]
 
         ctrls += ip_ctrls
-        ctrls += [debugging_dino, dino_erode_or_dilate] + stage2_ctrls
+        ctrls += [debugging_dino, dino_erode_or_dilate] + enhance_ctrls
 
         def parse_meta(raw_prompt_txt, is_generating):
             loaded_json = None

From b585d9dfa78dadefcf04866b2e60a2595098f681 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 15:57:53 +0200
Subject: [PATCH 037/101] fix: correctly count sam masks

---
 extras/inpaint_mask.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index f9025ef2..f8ecd2c8 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -85,10 +85,7 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
 
     sam_predictor = SamPredictor(sam)
     final_mask_tensor = torch.zeros((image.shape[0], image.shape[1]))
-
     dino_detection_count = boxes.size(0)
-    sam_detection_count = 0
-    sam_detection_on_mask_count = 0
 
     if dino_detection_count > 0:
         sam_predictor.set_image(image)
@@ -118,11 +115,12 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
         )
 
         masks = optimize_masks(masks)
-
+        sam_detection_count = len(masks)
         sam_objects = min(len(logits), sam_options.max_num_boxes)
         for obj_ind in range(sam_objects):
             mask_tensor = masks[obj_ind][0]
             final_mask_tensor += mask_tensor
+            sam_detection_on_mask_count += 1
 
     final_mask_tensor = (final_mask_tensor > 0).to('cpu').numpy()
     mask_image = np.dstack((final_mask_tensor, final_mask_tensor, final_mask_tensor)) * 255

From 541fb2d445d2b46710a96094afd9cdd9b4d8a621 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 15:58:27 +0200
Subject: [PATCH 038/101] feat: add support for enhance prompts

---
 modules/async_worker.py | 101 +++++++++++++++++++++++++++-------------
 1 file changed, 69 insertions(+), 32 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 8d6004f8..a1fe521a 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -114,20 +114,30 @@ class AsyncTask:
         self.enhance_ctrls = []
         for _ in range(modules.config.default_enhance_tabs):
             enhance_enabled = args.pop()
-            # enhance_mode = args.pop()
             enhance_mask_dino_prompt_text = args.pop()
-            enhance_mask_box_threshold = args.pop()
-            enhance_mask_text_threshold = args.pop()
-            enhance_mask_sam_max_num_boxes = args.pop()
+            enhance_prompt = args.pop()
+            enhance_negative_prompt = args.pop()
             enhance_mask_sam_model = args.pop()
+            enhance_mask_text_threshold = args.pop()
+            enhance_mask_box_threshold = args.pop()
+            enhance_mask_sam_max_num_boxes = args.pop()
+            enhance_inpaint_disable_initial_latent = args.pop()
+            enhance_inpaint_engine = args.pop()
+            enhance_inpaint_strength = args.pop()
+            enhance_inpaint_respective_field = args.pop()
             if enhance_enabled:
                 self.enhance_ctrls.append([
-                    # enhance_mode,
                     enhance_mask_dino_prompt_text,
-                    enhance_mask_box_threshold,
-                    enhance_mask_text_threshold,
-                    enhance_mask_sam_max_num_boxes,
+                    enhance_prompt,
+                    enhance_negative_prompt,
                     enhance_mask_sam_model,
+                    enhance_mask_text_threshold,
+                    enhance_mask_box_threshold,
+                    enhance_mask_sam_max_num_boxes,
+                    enhance_inpaint_disable_initial_latent,
+                    enhance_inpaint_engine,
+                    enhance_inpaint_strength,
+                    enhance_inpaint_respective_field
                 ])
 
 
@@ -252,11 +262,10 @@ def worker():
         return
 
     def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
-                     denoising_strength, final_scheduler_name, goals, initial_latent, switch, task, tasks,
-                     tiled, use_expansion, width, height):
+                     denoising_strength, final_scheduler_name, goals, initial_latent, switch, positive_cond,
+                     negative_cond, task, tasks, tiled, use_expansion, width, height):
         if async_task.last_stop is not False:
             ldm_patched.modules.model_management.interrupt_current_processing()
-        positive_cond, negative_cond = task['c'], task['uc']
         if 'cn' in goals:
             for cn_flag, cn_path in [
                 (flags.cn_canny, controlnet_canny_path),
@@ -455,7 +464,7 @@ def worker():
 
     def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
                       inpaint_mask, inpaint_parameterized, denoising_strength, inpaint_respective_field, switch,
-                      current_progress, skip_apply_outpaint=False):
+                      inpaint_disable_initial_latent, current_progress, skip_apply_outpaint=False):
         if not skip_apply_outpaint:
             inpaint_image, inpaint_mask = apply_outpaint(async_task, inpaint_image, inpaint_mask)
 
@@ -503,7 +512,7 @@ def worker():
                 inpaint_latent_mask=latent_mask,
                 model=pipeline.final_unet
             )
-        if not async_task.inpaint_disable_initial_latent:
+        if not inpaint_disable_initial_latent:
             initial_latent = {'samples': latent_fill}
         B, C, H, W = latent_fill.shape
         height, width = H * 8, W * 8
@@ -708,7 +717,7 @@ def worker():
             else:
                 progressbar(async_task, 6, f'Encoding negative #{i + 1} ...')
                 t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k'])
-        return tasks, use_expansion
+        return tasks, use_expansion, loras
 
     def apply_freeu(async_task):
         print(f'FreeU is enabled!')
@@ -965,7 +974,7 @@ def worker():
         progressbar(async_task, 1, 'Initializing ...')
 
         if not skip_prompt_processing:
-            tasks, use_expansion = process_prompt(async_task, base_model_additional_loras, use_expansion, use_style,
+            tasks, use_expansion, loras = process_prompt(async_task, base_model_additional_loras, use_expansion, use_style,
                                                   use_synthetic_refiner)
 
         if len(goals) > 0:
@@ -983,11 +992,14 @@ def worker():
         if 'inpaint' in goals:
             try:
                 denoising_strength, initial_latent, width, height = apply_inpaint(async_task, initial_latent,
-                                                                                  inpaint_head_model_path, inpaint_image,
+                                                                                  inpaint_head_model_path,
+                                                                                  inpaint_image,
                                                                                   inpaint_mask, inpaint_parameterized,
                                                                                   async_task.inpaint_strength,
                                                                                   async_task.inpaint_respective_field,
-                                                                                  switch, 11)
+                                                                                  switch,
+                                                                                  async_task.inpaint_disable_initial_latent,
+                                                                                  11)
             except EarlyReturnException:
                 return
 
@@ -1034,9 +1046,12 @@ def worker():
             execution_start_time = time.perf_counter()
 
             try:
-                imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
-                             current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
-                             switch, task, tasks, tiled, use_expansion, width, height)
+                imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path,
+                                                                 controlnet_cpds_path,
+                                                                 current_task_id, denoising_strength,
+                                                                 final_scheduler_name, goals, initial_latent,
+                                                                 switch, task['c'], task['uc'], task,
+                                                                 tasks, tiled, use_expansion, width, height)
 
                 # enhance
                 progressbar(async_task, current_progress, 'Processing enhance ...')
@@ -1046,7 +1061,7 @@ def worker():
                     continue
 
                 for img in imgs:
-                    for enhance_mask_dino_prompt_text, enhance_mask_box_threshold, enhance_mask_text_threshold, enhance_mask_sam_max_num_boxes, enhance_mask_sam_model in async_task.enhance_ctrls:
+                    for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
                         print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
                         mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(img, sam_options=SAMOptions(
                             dino_prompt=enhance_mask_dino_prompt_text,
@@ -1093,22 +1108,44 @@ def worker():
                         #     apply_freeu(async_task)
                         # patch_samplers(async_task)
 
-                        # defaults from inpaint mode improve details
-                        denoising_strength_enhance = 0.5
-                        inpaint_respective_field_enhance = 0.0
+                        positive_cond = task['c']
+                        if enhance_prompt is not '':
+                            progressbar(async_task, current_progress, f'Encoding positive ...')
+                            positive_cond = pipeline.clip_encode(texts=[enhance_prompt], pool_top_k=1)
+
+                        negative_cond = task['uc']
+                        if abs(float(async_task.cfg_scale) - 1.0) < 1e-4:
+                            negative_cond = pipeline.clone_cond(positive_cond)
+                        elif enhance_negative_prompt is not '':
+                            progressbar(async_task, current_progress, f'Encoding negative ...')
+                            negative_cond = pipeline.clip_encode(texts=[enhance_negative_prompt], pool_top_k=1)
+
                         inpaint_head_model_path_enhance = None
-                        inpaint_parameterized_enhance = False  # inpaint_engine = None, improve detail
+                        inpaint_parameterized_enhance = enhance_inpaint_engine != 'None'  # inpaint_engine = None, improve detail
+
+                        if inpaint_parameterized_enhance:
+                            progressbar(async_task, current_progress, 'Downloading inpainter ...')
+                            inpaint_head_model_path_enhance, inpaint_patch_model_path_enhance = modules.config.downloading_inpaint_models(
+                                async_task.inpaint_engine)
+                            if inpaint_patch_model_path_enhance not in base_model_additional_loras:
+                                base_model_additional_loras += [(inpaint_patch_model_path_enhance, 1.0)]
+                                pipeline.refresh_loras(loras, base_model_additional_loras=base_model_additional_loras)
 
                         goals_enhance = ['inpaint']
-                        denoising_strength_enhance, initial_latent_enhance, width_enhance, height_enhance = apply_inpaint(
+                        enhance_inpaint_strength, initial_latent_enhance, width_enhance, height_enhance = apply_inpaint(
                             async_task, None, inpaint_head_model_path_enhance, img, mask,
-                            inpaint_parameterized_enhance, denoising_strength_enhance,
-                            inpaint_respective_field_enhance, switch, current_progress, True)
+                            inpaint_parameterized_enhance, enhance_inpaint_strength,
+                            enhance_inpaint_respective_field, switch, enhance_inpaint_disable_initial_latent,
+                            current_progress, True)
 
-                        imgs2, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
-                                     current_task_id, denoising_strength_enhance, final_scheduler_name, goals_enhance,
-                                     initial_latent_enhance, switch, task, tasks, tiled, use_expansion, width_enhance,
-                                     height_enhance)
+                        imgs2, img_paths, current_progress = process_task(all_steps, async_task, callback,
+                                                                          controlnet_canny_path, controlnet_cpds_path,
+                                                                          current_task_id, enhance_inpaint_strength,
+                                                                          final_scheduler_name, goals_enhance,
+                                                                          initial_latent_enhance, switch, positive_cond,
+                                                                          negative_cond, task, tasks,
+                                                                          tiled, use_expansion, width_enhance,
+                                                                          height_enhance)
 
                         # reset and prepare next iteration
                         img = imgs2[0]

From b3e4506009799f6e19b9a55506bc5ca6aa2b85fa Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 16:48:12 +0200
Subject: [PATCH 039/101] fix: correctly identify and remove performance LoRA

---
 modules/meta_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/meta_parser.py b/modules/meta_parser.py
index 0c9cde54..7ec8f379 100644
--- a/modules/meta_parser.py
+++ b/modules/meta_parser.py
@@ -61,7 +61,7 @@ def load_parameter_button_click(raw_metadata: dict | str, is_generating: bool):
 
     # prevent performance LoRAs to be added twice, by performance and by lora
     performance_filename = None
-    if performance is not None and performance in Performance.list():
+    if performance is not None and performance in Performance.values():
         performance = Performance(performance)
         performance_filename = performance.lora_filename()
 
@@ -232,7 +232,7 @@ def parse_meta_from_preset(preset_content):
             loras = getattr(modules.config, settings_key)
             if settings_key in items:
                 loras = items[settings_key]
-            for index, lora in enumerate(loras[:5]):
+            for index, lora in enumerate(loras[:modules.config.default_max_lora_number]):
                 preset_prepared[f'lora_combined_{index + 1}'] = ' : '.join(map(str, lora))
         elif settings_key == "default_aspect_ratio":
             if settings_key in items and items[settings_key] is not None:

From ea1562d078d2d7cd3bc65b4d4f8e0bb9c49ae263 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 17:46:48 +0200
Subject: [PATCH 040/101] fix: add workaround for same value in Steps IntEnum

---
 modules/flags.py       | 4 ++++
 modules/meta_parser.py | 5 +++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/modules/flags.py b/modules/flags.py
index 1169bd5b..4e72450e 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -135,6 +135,10 @@ class Steps(IntEnum):
     LIGHTNING = 4
     HYPER_SD = 4
 
+    @classmethod
+    def keys(cls) -> list:
+        return list(map(lambda c: c, Steps.__members__))
+
 
 class StepsUOV(IntEnum):
     QUALITY = 36
diff --git a/modules/meta_parser.py b/modules/meta_parser.py
index 7ec8f379..8997a878 100644
--- a/modules/meta_parser.py
+++ b/modules/meta_parser.py
@@ -119,8 +119,9 @@ def get_steps(key: str, fallback: str | None, source_dict: dict, results: list,
         assert h is not None
         h = int(h)
         # if not in steps or in steps and performance is not the same
-        if h not in iter(Steps) or Steps(h).name.casefold() != source_dict.get('performance', '').replace(' ',
-                                                                                                          '_').casefold():
+        performance_name = source_dict.get('performance', '').replace(' ', '_').replace('-', '_').casefold()
+        performance_candidates = [key for key in Steps.keys() if key.casefold() == performance_name and Steps[key] == h]
+        if len(performance_candidates) == 0:
             results.append(h)
             return
         results.append(-1)

From bf6820812f051204dbdb9113e08c11692cc4285e Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 18:39:40 +0200
Subject: [PATCH 041/101] feat: only process enhance when checkbox is set

similar to input image
---
 modules/async_worker.py | 16 +++++++++-------
 webui.py                |  2 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index a1fe521a..ba84bda1 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -111,6 +111,7 @@ class AsyncTask:
         self.debugging_dino = args.pop()
         self.dino_erode_or_dilate = args.pop()
 
+        self.enhance_checkbox = args.pop()
         self.enhance_ctrls = []
         for _ in range(modules.config.default_enhance_tabs):
             enhance_enabled = args.pop()
@@ -913,13 +914,13 @@ def worker():
                         async_task.inpaint_mask_image_upload = resample_image(async_task.inpaint_mask_image_upload, width=W, height=H)
                         async_task.inpaint_mask_image_upload = np.mean(async_task.inpaint_mask_image_upload, axis=2)
                         async_task.inpaint_mask_image_upload = (async_task.inpaint_mask_image_upload > 127).astype(np.uint8) * 255
-                        async_task.inpaint_mask = np.maximum(inpaint_mask, async_task.inpaint_mask_image_upload)
+                        inpaint_mask = np.maximum(inpaint_mask, async_task.inpaint_mask_image_upload)
 
                 if int(async_task.inpaint_erode_or_dilate) != 0:
-                    async_task.inpaint_mask = erode_or_dilate(async_task.inpaint_mask, async_task.inpaint_erode_or_dilate)
+                    inpaint_mask = erode_or_dilate(inpaint_mask, async_task.inpaint_erode_or_dilate)
 
                 if async_task.invert_mask_checkbox:
-                    async_task.inpaint_mask = 255 - async_task.inpaint_mask
+                    inpaint_mask = 255 - inpaint_mask
 
                 inpaint_image = HWC3(inpaint_image)
                 if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
@@ -1053,13 +1054,14 @@ def worker():
                                                                  switch, task['c'], task['uc'], task,
                                                                  tasks, tiled, use_expansion, width, height)
 
-                # enhance
-                progressbar(async_task, current_progress, 'Processing enhance ...')
-                final_unet = pipeline.final_unet
-                if len(async_task.enhance_ctrls) == 0 or 'inpaint' in goals:
+                if not async_task.enhance_checkbox or len(async_task.enhance_ctrls) == 0 or 'inpaint' in goals:
                     print(f'[Enhance] Skipping, preconditions aren\'t met')
                     continue
 
+                # enhance
+                progressbar(async_task, current_progress, 'Processing enhance ...')
+                final_unet = pipeline.final_unet.clone()
+
                 for img in imgs:
                     for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
                         print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
diff --git a/webui.py b/webui.py
index 4043eddf..7bd8cd76 100644
--- a/webui.py
+++ b/webui.py
@@ -895,7 +895,7 @@ with shared.gradio_root:
             ctrls += [save_metadata_to_images, metadata_scheme]
 
         ctrls += ip_ctrls
-        ctrls += [debugging_dino, dino_erode_or_dilate] + enhance_ctrls
+        ctrls += [debugging_dino, dino_erode_or_dilate, enhance_checkbox] + enhance_ctrls
 
         def parse_meta(raw_prompt_txt, is_generating):
             loaded_json = None

From b64d6d393cf76103d147ef84e11f02f7b18f8282 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 18:49:52 +0200
Subject: [PATCH 042/101] fix: use != for literal comparison

---
 modules/async_worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index ba84bda1..8c35d7af 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1118,7 +1118,7 @@ def worker():
                         negative_cond = task['uc']
                         if abs(float(async_task.cfg_scale) - 1.0) < 1e-4:
                             negative_cond = pipeline.clone_cond(positive_cond)
-                        elif enhance_negative_prompt is not '':
+                        elif enhance_negative_prompt != '':
                             progressbar(async_task, current_progress, f'Encoding negative ...')
                             negative_cond = pipeline.clip_encode(texts=[enhance_negative_prompt], pool_top_k=1)
 

From ff3418876de908699c81ee8f94a50bf13f3b1ea8 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 19:27:31 +0200
Subject: [PATCH 043/101] feat: refresh the whole pipeline, allows usage of
 inpaint and enhancement prompts

---
 modules/async_worker.py | 260 ++++++++++++++++++++--------------------
 webui.py                |   2 -
 2 files changed, 133 insertions(+), 129 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 8c35d7af..8925431b 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -622,10 +622,10 @@ def worker():
             height = async_task.overwrite_height
         return height, switch, width
 
-    def process_prompt(async_task, base_model_additional_loras, use_expansion, use_style,
+    def process_prompt(async_task, prompt, negative_prompt, base_model_additional_loras, image_number, disable_seed_increment, use_expansion, use_style,
                        use_synthetic_refiner):
-        prompts = remove_empty_str([safe_str(p) for p in async_task.prompt.splitlines()], default='')
-        negative_prompts = remove_empty_str([safe_str(p) for p in async_task.negative_prompt.splitlines()], default='')
+        prompts = remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')
+        negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.splitlines()], default='')
         prompt = prompts[0]
         negative_prompt = negative_prompts[0]
         if prompt == '':
@@ -647,8 +647,8 @@ def worker():
         pipeline.set_clip_skip(async_task.clip_skip)
         progressbar(async_task, 3, 'Processing prompts ...')
         tasks = []
-        for i in range(async_task.image_number):
-            if async_task.disable_seed_increment:
+        for i in range(image_number):
+            if disable_seed_increment:
                 task_seed = async_task.seed % (constants.MAX_SEED + 1)
             else:
                 task_seed = (async_task.seed + i) % (constants.MAX_SEED + 1)  # randint is inclusive, % is not
@@ -811,6 +811,95 @@ def worker():
         async_task.adm_scaler_negative = 1.0
         async_task.adm_scaler_end = 0.0
 
+    def apply_image_input(async_task, base_model_additional_loras, clip_vision_path, controlnet_canny_path,
+                          controlnet_cpds_path, goals, inpaint_head_model_path, inpaint_mask, inpaint_parameterized,
+                          ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing,
+                          use_synthetic_refiner):
+        if (async_task.current_tab == 'uov' or (
+                async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
+                and async_task.uov_method != flags.disabled and async_task.uov_input_image is not None:
+            async_task.uov_input_image = HWC3(async_task.uov_input_image)
+            if 'vary' in async_task.uov_method:
+                goals.append('vary')
+            elif 'upscale' in async_task.uov_method:
+                goals.append('upscale')
+                if 'fast' in async_task.uov_method:
+                    skip_prompt_processing = True
+                else:
+                    async_task.steps = async_task.performance_selection.steps_uov()
+
+                progressbar(async_task, 1, 'Downloading upscale models ...')
+                modules.config.downloading_upscale_model()
+        if (async_task.current_tab == 'inpaint' or (
+                async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_inpaint)) \
+                and isinstance(async_task.inpaint_input_image, dict):
+            inpaint_image = async_task.inpaint_input_image['image']
+            inpaint_mask = async_task.inpaint_input_image['mask'][:, :, 0]
+
+            if async_task.inpaint_mask_upload_checkbox:
+                if isinstance(async_task.inpaint_mask_image_upload, dict):
+                    if (isinstance(async_task.inpaint_mask_image_upload['image'], np.ndarray)
+                            and isinstance(async_task.inpaint_mask_image_upload['mask'], np.ndarray)
+                            and async_task.inpaint_mask_image_upload['image'].ndim == 3):
+                        async_task.inpaint_mask_image_upload = np.maximum(
+                            async_task.inpaint_mask_image_upload['image'],
+                            async_task.inpaint_mask_image_upload['mask'])
+                if isinstance(async_task.inpaint_mask_image_upload,
+                              np.ndarray) and async_task.inpaint_mask_image_upload.ndim == 3:
+                    H, W, C = inpaint_image.shape
+                    async_task.inpaint_mask_image_upload = resample_image(async_task.inpaint_mask_image_upload,
+                                                                          width=W, height=H)
+                    async_task.inpaint_mask_image_upload = np.mean(async_task.inpaint_mask_image_upload, axis=2)
+                    async_task.inpaint_mask_image_upload = (async_task.inpaint_mask_image_upload > 127).astype(
+                        np.uint8) * 255
+                    inpaint_mask = np.maximum(inpaint_mask, async_task.inpaint_mask_image_upload)
+
+            if int(async_task.inpaint_erode_or_dilate) != 0:
+                inpaint_mask = erode_or_dilate(inpaint_mask, async_task.inpaint_erode_or_dilate)
+
+            if async_task.invert_mask_checkbox:
+                inpaint_mask = 255 - inpaint_mask
+
+            inpaint_image = HWC3(inpaint_image)
+            if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
+                    and (np.any(inpaint_mask > 127) or len(async_task.outpaint_selections) > 0):
+                progressbar(async_task, 1, 'Downloading upscale models ...')
+                modules.config.downloading_upscale_model()
+                if inpaint_parameterized:
+                    progressbar(async_task, 1, 'Downloading inpainter ...')
+                    inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
+                        async_task.inpaint_engine)
+                    base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]
+                    print(f'[Inpaint] Current inpaint model is {inpaint_patch_model_path}')
+                    if async_task.refiner_model_name == 'None':
+                        use_synthetic_refiner = True
+                        async_task.refiner_switch = 0.8
+                else:
+                    inpaint_head_model_path, inpaint_patch_model_path = None, None
+                    print(f'[Inpaint] Parameterized inpaint is disabled.')
+                if async_task.inpaint_additional_prompt != '':
+                    if async_task.prompt == '':
+                        async_task.prompt = async_task.inpaint_additional_prompt
+                    else:
+                        async_task.prompt = async_task.inpaint_additional_prompt + '\n' + async_task.prompt
+                goals.append('inpaint')
+        if async_task.current_tab == 'ip' or \
+                async_task.mixing_image_prompt_and_vary_upscale or \
+                async_task.mixing_image_prompt_and_inpaint:
+            goals.append('cn')
+            progressbar(async_task, 1, 'Downloading control models ...')
+            if len(async_task.cn_tasks[flags.cn_canny]) > 0:
+                controlnet_canny_path = modules.config.downloading_controlnet_canny()
+            if len(async_task.cn_tasks[flags.cn_cpds]) > 0:
+                controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
+            if len(async_task.cn_tasks[flags.cn_ip]) > 0:
+                clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
+            if len(async_task.cn_tasks[flags.cn_ip_face]) > 0:
+                clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters(
+                    'face')
+        return base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner
+
+
     @torch.no_grad()
     @torch.inference_mode()
     def handler(async_task: AsyncTask):
@@ -882,83 +971,10 @@ def worker():
         tasks = []
 
         if async_task.input_image_checkbox:
-            if (async_task.current_tab == 'uov' or (
-                    async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
-                    and async_task.uov_method != flags.disabled and async_task.uov_input_image is not None:
-                async_task.uov_input_image = HWC3(async_task.uov_input_image)
-                if 'vary' in async_task.uov_method:
-                    goals.append('vary')
-                elif 'upscale' in async_task.uov_method:
-                    goals.append('upscale')
-                    if 'fast' in async_task.uov_method:
-                        skip_prompt_processing = True
-                    else:
-                        async_task.steps = async_task.performance_selection.steps_uov()
-
-                    progressbar(async_task, 1, 'Downloading upscale models ...')
-                    modules.config.downloading_upscale_model()
-            if (async_task.current_tab == 'inpaint' or (
-                    async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_inpaint)) \
-                    and isinstance(async_task.inpaint_input_image, dict):
-                inpaint_image = async_task.inpaint_input_image['image']
-                inpaint_mask = async_task.inpaint_input_image['mask'][:, :, 0]
-
-                if async_task.inpaint_mask_upload_checkbox:
-                    if isinstance(async_task.inpaint_mask_image_upload, dict):
-                        if (isinstance(async_task.inpaint_mask_image_upload['image'], np.ndarray)
-                                and isinstance(async_task.inpaint_mask_image_upload['mask'], np.ndarray)
-                                and async_task.inpaint_mask_image_upload['image'].ndim == 3):
-                            async_task.inpaint_mask_image_upload = np.maximum(async_task.inpaint_mask_image_upload['image'], async_task.inpaint_mask_image_upload['mask'])
-                    if isinstance(async_task.inpaint_mask_image_upload, np.ndarray) and async_task.inpaint_mask_image_upload.ndim == 3:
-                        H, W, C = inpaint_image.shape
-                        async_task.inpaint_mask_image_upload = resample_image(async_task.inpaint_mask_image_upload, width=W, height=H)
-                        async_task.inpaint_mask_image_upload = np.mean(async_task.inpaint_mask_image_upload, axis=2)
-                        async_task.inpaint_mask_image_upload = (async_task.inpaint_mask_image_upload > 127).astype(np.uint8) * 255
-                        inpaint_mask = np.maximum(inpaint_mask, async_task.inpaint_mask_image_upload)
-
-                if int(async_task.inpaint_erode_or_dilate) != 0:
-                    inpaint_mask = erode_or_dilate(inpaint_mask, async_task.inpaint_erode_or_dilate)
-
-                if async_task.invert_mask_checkbox:
-                    inpaint_mask = 255 - inpaint_mask
-
-                inpaint_image = HWC3(inpaint_image)
-                if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
-                        and (np.any(inpaint_mask > 127) or len(async_task.outpaint_selections) > 0):
-                    progressbar(async_task, 1, 'Downloading upscale models ...')
-                    modules.config.downloading_upscale_model()
-                    if inpaint_parameterized:
-                        progressbar(async_task, 1, 'Downloading inpainter ...')
-                        inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
-                            async_task.inpaint_engine)
-                        base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]
-                        print(f'[Inpaint] Current inpaint model is {inpaint_patch_model_path}')
-                        if async_task.refiner_model_name == 'None':
-                            use_synthetic_refiner = True
-                            async_task.refiner_switch = 0.8
-                    else:
-                        inpaint_head_model_path, inpaint_patch_model_path = None, None
-                        print(f'[Inpaint] Parameterized inpaint is disabled.')
-                    if async_task.inpaint_additional_prompt != '':
-                        if async_task.prompt == '':
-                            async_task.prompt = async_task.inpaint_additional_prompt
-                        else:
-                            async_task.prompt = async_task.inpaint_additional_prompt + '\n' + async_task.prompt
-                    goals.append('inpaint')
-            if async_task.current_tab == 'ip' or \
-                    async_task.mixing_image_prompt_and_vary_upscale or \
-                    async_task.mixing_image_prompt_and_inpaint:
-                goals.append('cn')
-                progressbar(async_task, 1, 'Downloading control models ...')
-                if len(async_task.cn_tasks[flags.cn_canny]) > 0:
-                    controlnet_canny_path = modules.config.downloading_controlnet_canny()
-                if len(async_task.cn_tasks[flags.cn_cpds]) > 0:
-                    controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
-                if len(async_task.cn_tasks[flags.cn_ip]) > 0:
-                    clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
-                if len(async_task.cn_tasks[flags.cn_ip_face]) > 0:
-                    clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters(
-                        'face')
+            base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner = apply_image_input(
+                async_task, base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path,
+                goals, inpaint_head_model_path, inpaint_mask, inpaint_parameterized, ip_adapter_face_path, ip_adapter_path,
+                ip_negative_path, skip_prompt_processing, use_synthetic_refiner)
 
 
         # Load or unload CNs
@@ -975,8 +991,10 @@ def worker():
         progressbar(async_task, 1, 'Initializing ...')
 
         if not skip_prompt_processing:
-            tasks, use_expansion, loras = process_prompt(async_task, base_model_additional_loras, use_expansion, use_style,
-                                                  use_synthetic_refiner)
+            tasks, use_expansion, loras = process_prompt(async_task, async_task.prompt, async_task.negative_prompt,
+                                                         base_model_additional_loras, async_task.image_number,
+                                                         async_task.disable_seed_increment, use_expansion, use_style,
+                                                         use_synthetic_refiner)
 
         if len(goals) > 0:
             progressbar(async_task, 7, 'Image processing ...')
@@ -1054,13 +1072,12 @@ def worker():
                                                                  switch, task['c'], task['uc'], task,
                                                                  tasks, tiled, use_expansion, width, height)
 
-                if not async_task.enhance_checkbox or len(async_task.enhance_ctrls) == 0 or 'inpaint' in goals:
+                if not async_task.enhance_checkbox or len(async_task.enhance_ctrls) == 0:
                     print(f'[Enhance] Skipping, preconditions aren\'t met')
                     continue
 
                 # enhance
                 progressbar(async_task, current_progress, 'Processing enhance ...')
-                final_unet = pipeline.final_unet.clone()
 
                 for img in imgs:
                     for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
@@ -1076,6 +1093,9 @@ def worker():
                         ))
                         mask = mask[:, :, 0]
 
+                        if int(async_task.inpaint_erode_or_dilate) != 0:
+                            mask = erode_or_dilate(mask, async_task.inpaint_erode_or_dilate)
+
                         async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
                         # TODO also show do_not_show_finished_images=len(tasks) == 1
                         yield_result(async_task, mask, async_task.black_out_nsfw, False,
@@ -1090,38 +1110,7 @@ def worker():
                             print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping')
                             continue
 
-                        # TODO make configurable
-
-                        # # do not apply loras / controlnets / etc. twice (samplers are needed though)
-                        # pipeline.final_unet = pipeline.model_base.unet.clone()
-
-                        # pipeline.refresh_everything(refiner_model_name=async_task.refiner_model_name,
-                        #                             base_model_name=async_task.base_model_name,
-                        #                             loras=[],
-                        #                             base_model_additional_loras=[],
-                        #                             use_synthetic_refiner=use_synthetic_refiner,
-                        #                             vae_name=async_task.vae_name)
-                        # pipeline.set_clip_skip(async_task.clip_skip)
-                        #
-                        # # patch everything again except original inpainting
-                        # if 'cn' in goals:
-                        #     apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
-                        # if async_task.freeu_enabled:
-                        #     apply_freeu(async_task)
-                        # patch_samplers(async_task)
-
-                        positive_cond = task['c']
-                        if enhance_prompt is not '':
-                            progressbar(async_task, current_progress, f'Encoding positive ...')
-                            positive_cond = pipeline.clip_encode(texts=[enhance_prompt], pool_top_k=1)
-
-                        negative_cond = task['uc']
-                        if abs(float(async_task.cfg_scale) - 1.0) < 1e-4:
-                            negative_cond = pipeline.clone_cond(positive_cond)
-                        elif enhance_negative_prompt != '':
-                            progressbar(async_task, current_progress, f'Encoding negative ...')
-                            negative_cond = pipeline.clip_encode(texts=[enhance_negative_prompt], pool_top_k=1)
-
+                        base_model_additional_loras_enhance = []
                         inpaint_head_model_path_enhance = None
                         inpaint_parameterized_enhance = enhance_inpaint_engine != 'None'  # inpaint_engine = None, improve detail
 
@@ -1129,9 +1118,28 @@ def worker():
                             progressbar(async_task, current_progress, 'Downloading inpainter ...')
                             inpaint_head_model_path_enhance, inpaint_patch_model_path_enhance = modules.config.downloading_inpaint_models(
                                 async_task.inpaint_engine)
-                            if inpaint_patch_model_path_enhance not in base_model_additional_loras:
-                                base_model_additional_loras += [(inpaint_patch_model_path_enhance, 1.0)]
-                                pipeline.refresh_loras(loras, base_model_additional_loras=base_model_additional_loras)
+                            if inpaint_patch_model_path_enhance not in base_model_additional_loras_enhance:
+                                base_model_additional_loras_enhance += [(inpaint_patch_model_path_enhance, 1.0)]
+
+                        if len(remove_empty_str([safe_str(p) for p in enhance_prompt.splitlines()], default='')) == 0:
+                            enhance_prompt = async_task.prompt
+                        if len(remove_empty_str([safe_str(p) for p in enhance_negative_prompt.splitlines()], default='')) == 0:
+                            enhance_negative_prompt = async_task.negative_prompt
+
+                        tasks_enhance, use_expansion, loras = process_prompt(async_task, enhance_prompt,
+                                                                             enhance_negative_prompt,
+                                                                             base_model_additional_loras_enhance,
+                                                                             1, True,
+                                                                             use_expansion, use_style,
+                                                                             use_synthetic_refiner)
+                        task_enhance = tasks_enhance[0]
+
+                        # TODO could support vary, upscale and CN in the future
+                        # if 'cn' in goals:
+                        #     apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
+                        if async_task.freeu_enabled:
+                            apply_freeu(async_task)
+                        patch_samplers(async_task)
 
                         goals_enhance = ['inpaint']
                         enhance_inpaint_strength, initial_latent_enhance, width_enhance, height_enhance = apply_inpaint(
@@ -1144,14 +1152,13 @@ def worker():
                                                                           controlnet_canny_path, controlnet_cpds_path,
                                                                           current_task_id, enhance_inpaint_strength,
                                                                           final_scheduler_name, goals_enhance,
-                                                                          initial_latent_enhance, switch, positive_cond,
-                                                                          negative_cond, task, tasks,
-                                                                          tiled, use_expansion, width_enhance,
-                                                                          height_enhance)
+                                                                          initial_latent_enhance, switch,
+                                                                          task_enhance['c'], task_enhance['uc'],
+                                                                          task_enhance, tasks_enhance, tiled,
+                                                                          use_expansion, width_enhance, height_enhance)
 
                         # reset and prepare next iteration
                         img = imgs2[0]
-                        pipeline.final_unet = final_unet
 
             except ldm_patched.modules.model_management.InterruptProcessingException:
                 if async_task.last_stop == 'skip':
@@ -1171,7 +1178,6 @@ def worker():
         processing_time = time.perf_counter() - processing_start_time
         print(f'Processing time (total): {processing_time:.2f} seconds')
 
-
     while True:
         time.sleep(0.01)
         if len(async_tasks) > 0:
diff --git a/webui.py b/webui.py
index 7bd8cd76..8f2f86ad 100644
--- a/webui.py
+++ b/webui.py
@@ -336,8 +336,6 @@ with shared.gradio_root:
                         with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item:
                             enhance_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check',
                                                           container=False)
-                            gr.HTML(
-                                'DISCLAIMER: The enhance feature does not work with Inpaint or Outpaint and will be skipped.')
 
                             enhance_mask_dino_prompt_text = gr.Textbox(label='Detection prompt',
                                                                        info='Use singular whenever possible',

From 9c93c18d0b0c8075fc269a8d07c736b37fcaada8 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 21:05:10 +0200
Subject: [PATCH 044/101] fix: process other models than sam when using enhance

---
 extras/inpaint_mask.py  |  4 +---
 modules/async_worker.py | 15 ++++++++++-----
 webui.py                |  1 +
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index f8ecd2c8..1d04d86c 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -56,7 +56,7 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
     if 'image' in image:
         image = image['image']
 
-    if mask_model != 'sam' and sam_options is None:
+    if mask_model != 'sam' or sam_options is None:
         result = remove(
             image,
             session=new_session(mask_model, **extras),
@@ -66,8 +66,6 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
 
         return result, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
 
-    assert sam_options is not None
-
     detections, boxes, logits, phrases = default_groundingdino(
         image=image,
         caption=sam_options.dino_prompt,
diff --git a/modules/async_worker.py b/modules/async_worker.py
index 8925431b..92918c73 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -118,6 +118,7 @@ class AsyncTask:
             enhance_mask_dino_prompt_text = args.pop()
             enhance_prompt = args.pop()
             enhance_negative_prompt = args.pop()
+            enhance_mask_model = args.pop()
             enhance_mask_sam_model = args.pop()
             enhance_mask_text_threshold = args.pop()
             enhance_mask_box_threshold = args.pop()
@@ -131,6 +132,7 @@ class AsyncTask:
                     enhance_mask_dino_prompt_text,
                     enhance_prompt,
                     enhance_negative_prompt,
+                    enhance_mask_model,
                     enhance_mask_sam_model,
                     enhance_mask_text_threshold,
                     enhance_mask_box_threshold,
@@ -1080,9 +1082,11 @@ def worker():
                 progressbar(async_task, current_progress, 'Processing enhance ...')
 
                 for img in imgs:
-                    for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
-                        print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
-                        mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(img, sam_options=SAMOptions(
+                    for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
+                        if enhance_mask_model == 'sam':
+                            print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
+
+                        mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(img, mask_model=enhance_mask_model, sam_options=SAMOptions(
                             dino_prompt=enhance_mask_dino_prompt_text,
                             dino_box_threshold=enhance_mask_box_threshold,
                             dino_text_threshold=enhance_mask_text_threshold,
@@ -1091,7 +1095,8 @@ def worker():
                             max_num_boxes=enhance_mask_sam_max_num_boxes,
                             model_type=enhance_mask_sam_model
                         ))
-                        mask = mask[:, :, 0]
+                        if len(mask.shape) == 3:
+                            mask = mask[:, :, 0]
 
                         if int(async_task.inpaint_erode_or_dilate) != 0:
                             mask = erode_or_dilate(mask, async_task.inpaint_erode_or_dilate)
@@ -1106,7 +1111,7 @@ def worker():
                         print(f'[Enhance] {sam_detection_count} segments detected in boxes')
                         print(f'[Enhance] {sam_detection_on_mask_count} segments applied to mask')
 
-                        if dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0:
+                        if enhance_mask_model == 'sam' and (dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0):
                             print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping')
                             continue
 
diff --git a/webui.py b/webui.py
index 8f2f86ad..2e650aa4 100644
--- a/webui.py
+++ b/webui.py
@@ -416,6 +416,7 @@ with shared.gradio_root:
                             enhance_mask_dino_prompt_text,
                             enhance_prompt,
                             enhance_negative_prompt,
+                            enhance_mask_model,
                             enhance_mask_sam_model,
                             enhance_mask_text_threshold,
                             enhance_mask_box_threshold,

From eeb1b79baa66d28b931ca1d68aa1d40a74754c1e Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 21:40:07 +0200
Subject: [PATCH 045/101] feat: optimize prompt translation

---
 modules/async_worker.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 92918c73..45db9be5 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -186,6 +186,7 @@ def worker():
     from modules.upscaler import perform_upscale
     from modules.flags import Performance
     from modules.meta_parser import get_metadata_parser
+    from modules.translator import translate2en
 
     pid = os.getpid()
     print(f'Started worker with PID {pid}')
@@ -757,11 +758,6 @@ def worker():
 
         return final_scheduler_name
 
-    def translate_prompts(async_task):
-        from modules.translator import translate2en
-        async_task.prompt = translate2en(async_task.prompt, 'prompt')
-        async_task.negative_prompt = translate2en(async_task.negative_prompt, 'negative prompt')
-
     def set_hyper_sd_defaults(async_task):
         print('Enter Hyper-SD mode.')
         progressbar(async_task, 1, 'Downloading Hyper-SD components ...')
@@ -901,6 +897,15 @@ def worker():
                     'face')
         return base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner
 
+    def prepare_enhance_prompt(prompt: str, fallback_prompt: str, translate: bool, type: str):
+        if len(remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')) == 0:
+            prompt = fallback_prompt
+        else:
+            if translate:
+                prompt = translate2en(prompt, type)
+            prompt = prompt + '\n' + fallback_prompt
+        return prompt
+
 
     @torch.no_grad()
     @torch.inference_mode()
@@ -932,7 +937,8 @@ def worker():
             set_hyper_sd_defaults(async_task)
 
         if async_task.translate_prompts:
-            translate_prompts(async_task)
+            async_task.prompt = translate2en(async_task.prompt, 'prompt')
+            async_task.negative_prompt = translate2en(async_task.negative_prompt, 'negative prompt')
 
         print(f'[Parameters] Adaptive CFG = {async_task.adaptive_cfg}')
         print(f'[Parameters] CLIP Skip = {async_task.clip_skip}')
@@ -1103,9 +1109,7 @@ def worker():
 
                         async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
                         # TODO also show do_not_show_finished_images=len(tasks) == 1
-                        yield_result(async_task, mask, async_task.black_out_nsfw, False,
-                                     do_not_show_finished_images=len(
-                                         tasks) == 1 or async_task.disable_intermediate_results)
+                        yield_result(async_task, mask, async_task.black_out_nsfw, False, do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
 
                         print(f'[Enhance] {dino_detection_count} boxes detected')
                         print(f'[Enhance] {sam_detection_count} segments detected in boxes')
@@ -1126,10 +1130,9 @@ def worker():
                             if inpaint_patch_model_path_enhance not in base_model_additional_loras_enhance:
                                 base_model_additional_loras_enhance += [(inpaint_patch_model_path_enhance, 1.0)]
 
-                        if len(remove_empty_str([safe_str(p) for p in enhance_prompt.splitlines()], default='')) == 0:
-                            enhance_prompt = async_task.prompt
-                        if len(remove_empty_str([safe_str(p) for p in enhance_negative_prompt.splitlines()], default='')) == 0:
-                            enhance_negative_prompt = async_task.negative_prompt
+                        progressbar(async_task, current_progress, 'Preparing enhance prompts ...')
+                        enhance_prompt = prepare_enhance_prompt(enhance_prompt, async_task.prompt, async_task.translate_prompts, 'prompt')
+                        enhance_negative_prompt = prepare_enhance_prompt(enhance_negative_prompt, async_task.negative_prompt, async_task.translate_prompts, 'negative prompt')
 
                         tasks_enhance, use_expansion, loras = process_prompt(async_task, enhance_prompt,
                                                                              enhance_negative_prompt,

From 33a9c156ad02b045b5595c771e597cef1778ada8 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 21:40:39 +0200
Subject: [PATCH 046/101] fix: do not apply loras more than one time when not
 using lora in prompt

---
 modules/util.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/util.py b/modules/util.py
index 4506b392..cd18081b 100644
--- a/modules/util.py
+++ b/modules/util.py
@@ -390,6 +390,9 @@ def get_enabled_loras(loras: list, remove_none=True) -> list:
 def parse_lora_references_from_prompt(prompt: str, loras: List[Tuple[AnyStr, float]], loras_limit: int = 5,
                                       skip_file_check=False, prompt_cleanup=True, deduplicate_loras=True,
                                       lora_filenames=None) -> tuple[List[Tuple[AnyStr, float]], str]:
+    # prevent unintended side effects when returning without detection
+    loras = loras.copy()
+
     if lora_filenames is None:
         lora_filenames = []
 

From e300930298f5f294de472104ade46250241108ba Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 22:18:11 +0200
Subject: [PATCH 047/101] feat: improve performance, do not reload the pipeline
 when prompts are the same as in the original task

---
 modules/async_worker.py | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 45db9be5..d6bfa299 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -897,12 +897,12 @@ def worker():
                     'face')
         return base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner
 
-    def prepare_enhance_prompt(prompt: str, fallback_prompt: str, translate: bool, type: str):
-        if len(remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')) == 0:
+    def prepare_enhance_prompt(prompt: str, fallback_prompt: str, translate: bool, prompt_type: str):
+        if safe_str(prompt) == '' or len(remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')) == 0:
             prompt = fallback_prompt
         else:
             if translate:
-                prompt = translate2en(prompt, type)
+                prompt = translate2en(prompt, prompt_type)
             prompt = prompt + '\n' + fallback_prompt
         return prompt
 
@@ -1134,20 +1134,24 @@ def worker():
                         enhance_prompt = prepare_enhance_prompt(enhance_prompt, async_task.prompt, async_task.translate_prompts, 'prompt')
                         enhance_negative_prompt = prepare_enhance_prompt(enhance_negative_prompt, async_task.negative_prompt, async_task.translate_prompts, 'negative prompt')
 
-                        tasks_enhance, use_expansion, loras = process_prompt(async_task, enhance_prompt,
-                                                                             enhance_negative_prompt,
-                                                                             base_model_additional_loras_enhance,
-                                                                             1, True,
-                                                                             use_expansion, use_style,
-                                                                             use_synthetic_refiner)
-                        task_enhance = tasks_enhance[0]
+                        if not inpaint_parameterized_enhance and enhance_prompt == async_task.prompt and enhance_negative_prompt == async_task.negative_prompt:
+                            task_enhance = task.copy()
+                            tasks_enhance = tasks.copy()
+                        else:
+                            tasks_enhance, use_expansion, loras = process_prompt(async_task, enhance_prompt,
+                                                                                 enhance_negative_prompt,
+                                                                                 base_model_additional_loras_enhance,
+                                                                                 1, True,
+                                                                                 use_expansion, use_style,
+                                                                                 use_synthetic_refiner)
+                            task_enhance = tasks_enhance[0]
 
-                        # TODO could support vary, upscale and CN in the future
-                        # if 'cn' in goals:
-                        #     apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
-                        if async_task.freeu_enabled:
-                            apply_freeu(async_task)
-                        patch_samplers(async_task)
+                            # TODO could support vary, upscale and CN in the future
+                            # if 'cn' in goals:
+                            #     apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
+                            if async_task.freeu_enabled:
+                                apply_freeu(async_task)
+                            patch_samplers(async_task)
 
                         goals_enhance = ['inpaint']
                         enhance_inpaint_strength, initial_latent_enhance, width_enhance, height_enhance = apply_inpaint(

From 65cc25e5bfd55584fea68d516f31370f885041e7 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 22:54:44 +0200
Subject: [PATCH 048/101] feat: add debug setting for enhance mask output

---
 modules/async_worker.py | 7 ++++---
 webui.py                | 7 +++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index d6bfa299..fdb7a3c4 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -110,6 +110,7 @@ class AsyncTask:
 
         self.debugging_dino = args.pop()
         self.dino_erode_or_dilate = args.pop()
+        self.debugging_enhance_masks_checkbox = args.pop()
 
         self.enhance_checkbox = args.pop()
         self.enhance_ctrls = []
@@ -1107,9 +1108,9 @@ def worker():
                         if int(async_task.inpaint_erode_or_dilate) != 0:
                             mask = erode_or_dilate(mask, async_task.inpaint_erode_or_dilate)
 
-                        async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
-                        # TODO also show do_not_show_finished_images=len(tasks) == 1
-                        yield_result(async_task, mask, async_task.black_out_nsfw, False, do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
+                        if async_task.debugging_enhance_masks_checkbox:
+                            async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
+                            yield_result(async_task, mask, async_task.black_out_nsfw, False, async_task.disable_intermediate_results)
 
                         print(f'[Enhance] {dino_detection_count} boxes detected')
                         print(f'[Enhance] {sam_detection_count} segments detected in boxes')
diff --git a/webui.py b/webui.py
index 2e650aa4..9185dd10 100644
--- a/webui.py
+++ b/webui.py
@@ -712,8 +712,10 @@ with shared.gradio_root:
 
                     with gr.Tab(label='Inpaint'):
                         debugging_inpaint_preprocessor = gr.Checkbox(label='Debug Inpaint Preprocessing', value=False)
+                        debugging_enhance_masks_checkbox = gr.Checkbox(label='Debug Enhance Masks', value=False,
+                                                                       info='Show enhance masks in preview and final results')
                         debugging_dino = gr.Checkbox(label='Debug GroundingDINO', value=False,
-                                                     info='Used for SAM object detection and box generation')
+                                                     info='Use GroundingDINO boxes instead of more detailed SAM masks')
                         inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
                         inpaint_engine = gr.Dropdown(label='Inpaint Engine',
                                                      value=modules.config.default_inpaint_engine_version,
@@ -894,7 +896,8 @@ with shared.gradio_root:
             ctrls += [save_metadata_to_images, metadata_scheme]
 
         ctrls += ip_ctrls
-        ctrls += [debugging_dino, dino_erode_or_dilate, enhance_checkbox] + enhance_ctrls
+        ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox, enhance_checkbox]
+        ctrls += enhance_ctrls
 
         def parse_meta(raw_prompt_txt, is_generating):
             loaded_json = None

From 7f3fd8f3186931fb03d5c8700180c1e82fd3da09 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 23:22:10 +0200
Subject: [PATCH 049/101] feat: update GroundingDINO from SwinT (tiny) to SwinB
 (base)

---
 .../config/GroundingDINO_SwinB_cfg.py         | 43 +++++++++++++++++++
 extras/GroundingDINO/util/inference.py        |  6 +--
 2 files changed, 46 insertions(+), 3 deletions(-)
 create mode 100644 extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py

diff --git a/extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py b/extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py
new file mode 100644
index 00000000..bc2788a8
--- /dev/null
+++ b/extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py
@@ -0,0 +1,43 @@
+batch_size = 1
+modelname = "groundingdino"
+backbone = "swin_B_384_22k"
+position_embedding = "sine"
+pe_temperatureH = 20
+pe_temperatureW = 20
+return_interm_indices = [1, 2, 3]
+backbone_freeze_keywords = None
+enc_layers = 6
+dec_layers = 6
+pre_norm = False
+dim_feedforward = 2048
+hidden_dim = 256
+dropout = 0.0
+nheads = 8
+num_queries = 900
+query_dim = 4
+num_patterns = 0
+num_feature_levels = 4
+enc_n_points = 4
+dec_n_points = 4
+two_stage_type = "standard"
+two_stage_bbox_embed_share = False
+two_stage_class_embed_share = False
+transformer_activation = "relu"
+dec_pred_bbox_embed_share = True
+dn_box_noise_scale = 1.0
+dn_label_noise_ratio = 0.5
+dn_label_coef = 1.0
+dn_bbox_coef = 1.0
+embed_init_tgt = True
+dn_labelbook_size = 2000
+max_text_len = 256
+text_encoder_type = "bert-base-uncased"
+use_text_enhancer = True
+use_fusion_layer = True
+use_checkpoint = True
+use_transformer_ckpt = True
+use_text_cross_attention = True
+text_dropout = 0.0
+fusion_dropout = 0.0
+fusion_droppath = 0.1
+sub_sentence_present = True
\ No newline at end of file
diff --git a/extras/GroundingDINO/util/inference.py b/extras/GroundingDINO/util/inference.py
index bc8b6429..11e1a77c 100644
--- a/extras/GroundingDINO/util/inference.py
+++ b/extras/GroundingDINO/util/inference.py
@@ -14,7 +14,7 @@ from groundingdino.util.inference import load_model, preprocess_caption, get_phr
 
 class GroundingDinoModel(Model):
     def __init__(self):
-        self.config_file = 'extras/GroundingDINO/config/GroundingDINO_SwinT_OGC.py'
+        self.config_file = 'extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py'
         self.model = None
         self.load_device = torch.device('cpu')
         self.offload_device = torch.device('cpu')
@@ -28,8 +28,8 @@ class GroundingDinoModel(Model):
     ) -> Tuple[sv.Detections, torch.Tensor, torch.Tensor, List[str]]:
         if self.model is None:
             filename = load_file_from_url(
-                url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth",
-                file_name='groundingdino_swint_ogc.pth',
+                url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha2/groundingdino_swinb_cogcoor.pth",
+                file_name='groundingdino_swinb_cogcoor.pth',
                 model_dir=path_inpaint)
             model = load_model(model_config_path=self.config_file, model_checkpoint_path=filename)
 

From 24d66f6f779f659dbbac2a1ecece0f4ad7fa3417 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 16 Jun 2024 23:22:21 +0200
Subject: [PATCH 050/101] Revert "feat: update GroundingDINO from SwinT (tiny)
 to SwinB (base)"

This reverts commit 7f3fd8f3186931fb03d5c8700180c1e82fd3da09.
---
 .../config/GroundingDINO_SwinB_cfg.py         | 43 -------------------
 extras/GroundingDINO/util/inference.py        |  6 +--
 2 files changed, 3 insertions(+), 46 deletions(-)
 delete mode 100644 extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py

diff --git a/extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py b/extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py
deleted file mode 100644
index bc2788a8..00000000
--- a/extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py
+++ /dev/null
@@ -1,43 +0,0 @@
-batch_size = 1
-modelname = "groundingdino"
-backbone = "swin_B_384_22k"
-position_embedding = "sine"
-pe_temperatureH = 20
-pe_temperatureW = 20
-return_interm_indices = [1, 2, 3]
-backbone_freeze_keywords = None
-enc_layers = 6
-dec_layers = 6
-pre_norm = False
-dim_feedforward = 2048
-hidden_dim = 256
-dropout = 0.0
-nheads = 8
-num_queries = 900
-query_dim = 4
-num_patterns = 0
-num_feature_levels = 4
-enc_n_points = 4
-dec_n_points = 4
-two_stage_type = "standard"
-two_stage_bbox_embed_share = False
-two_stage_class_embed_share = False
-transformer_activation = "relu"
-dec_pred_bbox_embed_share = True
-dn_box_noise_scale = 1.0
-dn_label_noise_ratio = 0.5
-dn_label_coef = 1.0
-dn_bbox_coef = 1.0
-embed_init_tgt = True
-dn_labelbook_size = 2000
-max_text_len = 256
-text_encoder_type = "bert-base-uncased"
-use_text_enhancer = True
-use_fusion_layer = True
-use_checkpoint = True
-use_transformer_ckpt = True
-use_text_cross_attention = True
-text_dropout = 0.0
-fusion_dropout = 0.0
-fusion_droppath = 0.1
-sub_sentence_present = True
\ No newline at end of file
diff --git a/extras/GroundingDINO/util/inference.py b/extras/GroundingDINO/util/inference.py
index 11e1a77c..bc8b6429 100644
--- a/extras/GroundingDINO/util/inference.py
+++ b/extras/GroundingDINO/util/inference.py
@@ -14,7 +14,7 @@ from groundingdino.util.inference import load_model, preprocess_caption, get_phr
 
 class GroundingDinoModel(Model):
     def __init__(self):
-        self.config_file = 'extras/GroundingDINO/config/GroundingDINO_SwinB_cfg.py'
+        self.config_file = 'extras/GroundingDINO/config/GroundingDINO_SwinT_OGC.py'
         self.model = None
         self.load_device = torch.device('cpu')
         self.offload_device = torch.device('cpu')
@@ -28,8 +28,8 @@ class GroundingDinoModel(Model):
     ) -> Tuple[sv.Detections, torch.Tensor, torch.Tensor, List[str]]:
         if self.model is None:
             filename = load_file_from_url(
-                url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha2/groundingdino_swinb_cogcoor.pth",
-                file_name='groundingdino_swinb_cogcoor.pth',
+                url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth",
+                file_name='groundingdino_swint_ogc.pth',
                 model_dir=path_inpaint)
             model = load_model(model_config_path=self.config_file, model_checkpoint_path=filename)
 

From 3567c04918bdf8697a52fc204c4307e1d766c1f6 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 17 Jun 2024 21:40:59 +0200
Subject: [PATCH 051/101] wip: refactor code to make it more efficient

now first processes all tasks and then does enhancements
---
 modules/async_worker.py | 243 +++++++++++++++++++++++-----------------
 1 file changed, 143 insertions(+), 100 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index fdb7a3c4..41a769e0 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -268,7 +268,7 @@ def worker():
 
     def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
                      denoising_strength, final_scheduler_name, goals, initial_latent, switch, positive_cond,
-                     negative_cond, task, tasks, tiled, use_expansion, width, height):
+                     negative_cond, task, tasks, tiled, use_expansion, width, height, base_progress, total_count):
         if async_task.last_stop is not False:
             ldm_patched.modules.model_management.interrupt_current_processing()
         if 'cn' in goals:
@@ -301,13 +301,13 @@ def worker():
         del positive_cond, negative_cond  # Save memory
         if inpaint_worker.current_task is not None:
             imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
-        current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(
+        current_progress = int(base_progress + (100 - base_progress) * float(
             (current_task_id + 1) * async_task.steps) / float(all_steps))
         if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
             progressbar(async_task, current_progress, 'Checking for NSFW content ...')
             imgs = default_censor(imgs)
         progressbar(async_task, current_progress,
-                    f'Saving image {current_task_id + 1}/{async_task.image_number} to system ...')
+                    f'Saving image {current_task_id + 1}/{total_count} to system ...')
         img_paths = save_and_log(async_task, height, imgs, task, use_expansion, width)
         yield_result(async_task, img_paths, async_task.black_out_nsfw, False,
                      do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
@@ -907,6 +907,10 @@ def worker():
             prompt = prompt + '\n' + fallback_prompt
         return prompt
 
+    def stop_processing(async_task, processing_start_time):
+        async_task.processing = False
+        processing_time = time.perf_counter() - processing_start_time
+        print(f'Processing time (total): {processing_time:.2f} seconds')
 
     @torch.no_grad()
     @torch.inference_mode()
@@ -1041,6 +1045,9 @@ def worker():
 
         all_steps = async_task.steps * async_task.image_number
 
+        if async_task.enhance_checkbox and len(async_task.enhance_ctrls) != 0:
+            all_steps += async_task.image_number * len(async_task.enhance_ctrls) * async_task.steps
+
         print(f'[Parameters] Denoising Strength = {denoising_strength}')
 
         if isinstance(initial_latent, dict) and 'samples' in initial_latent:
@@ -1060,11 +1067,17 @@ def worker():
 
         processing_start_time = time.perf_counter()
 
+        base_progress = int(flags.preparation_step_count)
+        current_progress = base_progress
+        total_count = async_task.image_number
+
         def callback(step, x0, x, total_steps, y):
             done_steps = current_task_id * async_task.steps + step
             async_task.yields.append(['preview', (
-                int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)),
-                f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{async_task.image_number} ...', y)])
+                int(base_progress + (100 - base_progress) * float(done_steps) / float(all_steps)),
+                f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{total_count} ...', y)])
+
+        generated_imgs = {}
 
         for current_task_id, task in enumerate(tasks):
             current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(
@@ -1079,99 +1092,10 @@ def worker():
                                                                  current_task_id, denoising_strength,
                                                                  final_scheduler_name, goals, initial_latent,
                                                                  switch, task['c'], task['uc'], task,
-                                                                 tasks, tiled, use_expansion, width, height)
+                                                                 tasks, tiled, use_expansion, width, height,
+                                                                 flags.preparation_step_count, async_task.image_number)
 
-                if not async_task.enhance_checkbox or len(async_task.enhance_ctrls) == 0:
-                    print(f'[Enhance] Skipping, preconditions aren\'t met')
-                    continue
-
-                # enhance
-                progressbar(async_task, current_progress, 'Processing enhance ...')
-
-                for img in imgs:
-                    for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
-                        if enhance_mask_model == 'sam':
-                            print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
-
-                        mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(img, mask_model=enhance_mask_model, sam_options=SAMOptions(
-                            dino_prompt=enhance_mask_dino_prompt_text,
-                            dino_box_threshold=enhance_mask_box_threshold,
-                            dino_text_threshold=enhance_mask_text_threshold,
-                            dino_erode_or_dilate=async_task.dino_erode_or_dilate,
-                            dino_debug=async_task.debugging_dino,
-                            max_num_boxes=enhance_mask_sam_max_num_boxes,
-                            model_type=enhance_mask_sam_model
-                        ))
-                        if len(mask.shape) == 3:
-                            mask = mask[:, :, 0]
-
-                        if int(async_task.inpaint_erode_or_dilate) != 0:
-                            mask = erode_or_dilate(mask, async_task.inpaint_erode_or_dilate)
-
-                        if async_task.debugging_enhance_masks_checkbox:
-                            async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
-                            yield_result(async_task, mask, async_task.black_out_nsfw, False, async_task.disable_intermediate_results)
-
-                        print(f'[Enhance] {dino_detection_count} boxes detected')
-                        print(f'[Enhance] {sam_detection_count} segments detected in boxes')
-                        print(f'[Enhance] {sam_detection_on_mask_count} segments applied to mask')
-
-                        if enhance_mask_model == 'sam' and (dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0):
-                            print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping')
-                            continue
-
-                        base_model_additional_loras_enhance = []
-                        inpaint_head_model_path_enhance = None
-                        inpaint_parameterized_enhance = enhance_inpaint_engine != 'None'  # inpaint_engine = None, improve detail
-
-                        if inpaint_parameterized_enhance:
-                            progressbar(async_task, current_progress, 'Downloading inpainter ...')
-                            inpaint_head_model_path_enhance, inpaint_patch_model_path_enhance = modules.config.downloading_inpaint_models(
-                                async_task.inpaint_engine)
-                            if inpaint_patch_model_path_enhance not in base_model_additional_loras_enhance:
-                                base_model_additional_loras_enhance += [(inpaint_patch_model_path_enhance, 1.0)]
-
-                        progressbar(async_task, current_progress, 'Preparing enhance prompts ...')
-                        enhance_prompt = prepare_enhance_prompt(enhance_prompt, async_task.prompt, async_task.translate_prompts, 'prompt')
-                        enhance_negative_prompt = prepare_enhance_prompt(enhance_negative_prompt, async_task.negative_prompt, async_task.translate_prompts, 'negative prompt')
-
-                        if not inpaint_parameterized_enhance and enhance_prompt == async_task.prompt and enhance_negative_prompt == async_task.negative_prompt:
-                            task_enhance = task.copy()
-                            tasks_enhance = tasks.copy()
-                        else:
-                            tasks_enhance, use_expansion, loras = process_prompt(async_task, enhance_prompt,
-                                                                                 enhance_negative_prompt,
-                                                                                 base_model_additional_loras_enhance,
-                                                                                 1, True,
-                                                                                 use_expansion, use_style,
-                                                                                 use_synthetic_refiner)
-                            task_enhance = tasks_enhance[0]
-
-                            # TODO could support vary, upscale and CN in the future
-                            # if 'cn' in goals:
-                            #     apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
-                            if async_task.freeu_enabled:
-                                apply_freeu(async_task)
-                            patch_samplers(async_task)
-
-                        goals_enhance = ['inpaint']
-                        enhance_inpaint_strength, initial_latent_enhance, width_enhance, height_enhance = apply_inpaint(
-                            async_task, None, inpaint_head_model_path_enhance, img, mask,
-                            inpaint_parameterized_enhance, enhance_inpaint_strength,
-                            enhance_inpaint_respective_field, switch, enhance_inpaint_disable_initial_latent,
-                            current_progress, True)
-
-                        imgs2, img_paths, current_progress = process_task(all_steps, async_task, callback,
-                                                                          controlnet_canny_path, controlnet_cpds_path,
-                                                                          current_task_id, enhance_inpaint_strength,
-                                                                          final_scheduler_name, goals_enhance,
-                                                                          initial_latent_enhance, switch,
-                                                                          task_enhance['c'], task_enhance['uc'],
-                                                                          task_enhance, tasks_enhance, tiled,
-                                                                          use_expansion, width_enhance, height_enhance)
-
-                        # reset and prepare next iteration
-                        img = imgs2[0]
+                generated_imgs[current_task_id] = imgs
 
             except ldm_patched.modules.model_management.InterruptProcessingException:
                 if async_task.last_stop == 'skip':
@@ -1186,10 +1110,129 @@ def worker():
             execution_time = time.perf_counter() - execution_start_time
             print(f'Generating and saving time: {execution_time:.2f} seconds')
 
-        async_task.processing = False
+        if not async_task.enhance_checkbox or len(async_task.enhance_ctrls) == 0:
+            print(f'[Enhance] Skipping, preconditions aren\'t met')
+            stop_processing(async_task, processing_start_time)
+            return
 
-        processing_time = time.perf_counter() - processing_start_time
-        print(f'Processing time (total): {processing_time:.2f} seconds')
+        # enhance
+        progressbar(async_task, current_progress, 'Processing enhance ...')
+        total_count = sum([len(imgs) for _, imgs in generated_imgs.items()]) * len(async_task.enhance_ctrls)
+        base_progress = current_progress
+        for generated_imgs_idx, (current_task_id, imgs) in enumerate(generated_imgs.items()):
+            for imgs_idx, img in enumerate(imgs):
+                for enhance_ctrls_idx, (enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field) in enumerate(async_task.enhance_ctrls):
+                    current_task_id = generated_imgs_idx + imgs_idx + enhance_ctrls_idx
+                    current_progress = int(base_progress + (100 - base_progress) * float(
+                        current_task_id * async_task.steps) / float(all_steps))
+                    progressbar(async_task, current_progress,
+                                f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
+                    enhancement_task_start_time = time.perf_counter()
+
+                    if enhance_mask_model == 'sam':
+                        print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
+
+                    mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(
+                        img, mask_model=enhance_mask_model, sam_options=SAMOptions(
+                            dino_prompt=enhance_mask_dino_prompt_text,
+                            dino_box_threshold=enhance_mask_box_threshold,
+                            dino_text_threshold=enhance_mask_text_threshold,
+                            dino_erode_or_dilate=async_task.dino_erode_or_dilate,
+                            dino_debug=async_task.debugging_dino,
+                            max_num_boxes=enhance_mask_sam_max_num_boxes,
+                            model_type=enhance_mask_sam_model
+                        ))
+                    if len(mask.shape) == 3:
+                        mask = mask[:, :, 0]
+
+                    if int(async_task.inpaint_erode_or_dilate) != 0:
+                        mask = erode_or_dilate(mask, async_task.inpaint_erode_or_dilate)
+
+                    if async_task.debugging_enhance_masks_checkbox:
+                        async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
+                        yield_result(async_task, mask, async_task.black_out_nsfw, False,
+                                     async_task.disable_intermediate_results)
+
+                    print(f'[Enhance] {dino_detection_count} boxes detected')
+                    print(f'[Enhance] {sam_detection_count} segments detected in boxes')
+                    print(f'[Enhance] {sam_detection_on_mask_count} segments applied to mask')
+
+                    if enhance_mask_model == 'sam' and (
+                            dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0):
+                        print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping')
+                        continue
+
+                    base_model_additional_loras_enhance = []
+                    inpaint_head_model_path_enhance = None
+                    inpaint_parameterized_enhance = enhance_inpaint_engine != 'None'  # inpaint_engine = None, improve detail
+
+                    if inpaint_parameterized_enhance:
+                        progressbar(async_task, current_progress, 'Downloading inpainter ...')
+                        inpaint_head_model_path_enhance, inpaint_patch_model_path_enhance = modules.config.downloading_inpaint_models(
+                            async_task.inpaint_engine)
+                        if inpaint_patch_model_path_enhance not in base_model_additional_loras_enhance:
+                            base_model_additional_loras_enhance += [(inpaint_patch_model_path_enhance, 1.0)]
+
+                    progressbar(async_task, current_progress, 'Preparing enhance prompts ...')
+                    enhance_prompt = prepare_enhance_prompt(enhance_prompt, async_task.prompt, async_task.translate_prompts,
+                                                            'prompt')
+                    enhance_negative_prompt = prepare_enhance_prompt(enhance_negative_prompt, async_task.negative_prompt,
+                                                                     async_task.translate_prompts, 'negative prompt')
+
+                    # positive and negative conditioning aren't available here anymore
+                    # if not inpaint_parameterized_enhance and enhance_prompt == async_task.prompt and enhance_negative_prompt == async_task.negative_prompt:
+                    #     task_enhance = task.copy()
+                    #     tasks_enhance = tasks.copy()
+                    # else:
+                    tasks_enhance, use_expansion, loras = process_prompt(async_task, enhance_prompt,
+                                                                         enhance_negative_prompt,
+                                                                         base_model_additional_loras_enhance,
+                                                                         1, True,
+                                                                         use_expansion, use_style,
+                                                                         use_synthetic_refiner)
+                    task_enhance = tasks_enhance[0]
+
+                    # TODO could support vary, upscale and CN in the future
+                    # if 'cn' in goals:
+                    #     apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
+                    if async_task.freeu_enabled:
+                        apply_freeu(async_task)
+                    patch_samplers(async_task)
+
+                    goals_enhance = ['inpaint']
+                    enhance_inpaint_strength, initial_latent_enhance, width_enhance, height_enhance = apply_inpaint(
+                        async_task, None, inpaint_head_model_path_enhance, img, mask,
+                        inpaint_parameterized_enhance, enhance_inpaint_strength,
+                        enhance_inpaint_respective_field, switch, enhance_inpaint_disable_initial_latent,
+                        current_progress, True)
+
+                    try:
+                        imgs2, img_paths, current_progress = process_task(all_steps, async_task, callback,
+                                                                          controlnet_canny_path, controlnet_cpds_path,
+                                                                          current_task_id, enhance_inpaint_strength,
+                                                                          final_scheduler_name, goals_enhance,
+                                                                          initial_latent_enhance, switch,
+                                                                          task_enhance['c'], task_enhance['uc'],
+                                                                          task_enhance, tasks_enhance, tiled,
+                                                                          use_expansion, width_enhance, height_enhance,
+                                                                          current_progress, total_count)
+                        img = imgs2[0]
+
+                    except ldm_patched.modules.model_management.InterruptProcessingException:
+                        if async_task.last_stop == 'skip':
+                            print('User skipped')
+                            async_task.last_stop = False
+                            continue
+                        else:
+                            print('User stopped')
+                            break
+
+                    del task_enhance['c'], task_enhance['uc']  # Save memory
+                    enhancement_task_time = time.perf_counter() - enhancement_task_start_time
+                    print(f'Enhancement time: {enhancement_task_time:.2f} seconds')
+
+        stop_processing(async_task, processing_start_time)
+        return
 
     while True:
         time.sleep(0.01)

From 1175df843b06766898ee6394b1b0fc6f63122f8b Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 11:48:42 +0200
Subject: [PATCH 052/101] fix: correctly proceed progress bar within tasks

does not yet include jumping back for preparation steps
---
 modules/async_worker.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 41a769e0..8f5529f5 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -301,8 +301,7 @@ def worker():
         del positive_cond, negative_cond  # Save memory
         if inpaint_worker.current_task is not None:
             imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
-        current_progress = int(base_progress + (100 - base_progress) * float(
-            (current_task_id + 1) * async_task.steps) / float(all_steps))
+        current_progress = int(base_progress + (100 - flags.preparation_step_count) * float((current_task_id + 1) * async_task.steps) / float(all_steps))
         if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
             progressbar(async_task, current_progress, 'Checking for NSFW content ...')
             imgs = default_censor(imgs)
@@ -1072,15 +1071,15 @@ def worker():
         total_count = async_task.image_number
 
         def callback(step, x0, x, total_steps, y):
-            done_steps = current_task_id * async_task.steps + step
+            done_steps = current_task_id * async_task.steps + step + 1
             async_task.yields.append(['preview', (
-                int(base_progress + (100 - base_progress) * float(done_steps) / float(all_steps)),
+                int(base_progress + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)),
                 f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{total_count} ...', y)])
 
         generated_imgs = {}
 
         for current_task_id, task in enumerate(tasks):
-            current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(
+            current_progress = int(base_progress + (100 - flags.preparation_step_count) * float(
                 current_task_id * async_task.steps) / float(all_steps))
             progressbar(async_task, current_progress,
                         f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
@@ -1119,14 +1118,13 @@ def worker():
         progressbar(async_task, current_progress, 'Processing enhance ...')
         total_count = sum([len(imgs) for _, imgs in generated_imgs.items()]) * len(async_task.enhance_ctrls)
         base_progress = current_progress
-        for generated_imgs_idx, (current_task_id, imgs) in enumerate(generated_imgs.items()):
-            for imgs_idx, img in enumerate(imgs):
-                for enhance_ctrls_idx, (enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field) in enumerate(async_task.enhance_ctrls):
-                    current_task_id = generated_imgs_idx + imgs_idx + enhance_ctrls_idx
-                    current_progress = int(base_progress + (100 - base_progress) * float(
-                        current_task_id * async_task.steps) / float(all_steps))
-                    progressbar(async_task, current_progress,
-                                f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
+        current_task_id = -1
+        for imgs in generated_imgs.values():
+            for img in imgs:
+                for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
+                    current_task_id += 1
+                    current_progress = int(base_progress + (100 - flags.preparation_step_count) * float(current_task_id * async_task.steps) / float(all_steps))
+                    progressbar(async_task, current_progress, f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
                     enhancement_task_start_time = time.perf_counter()
 
                     if enhance_mask_model == 'sam':
@@ -1215,7 +1213,7 @@ def worker():
                                                                           task_enhance['c'], task_enhance['uc'],
                                                                           task_enhance, tasks_enhance, tiled,
                                                                           use_expansion, width_enhance, height_enhance,
-                                                                          current_progress, total_count)
+                                                                          base_progress, total_count)
                         img = imgs2[0]
 
                     except ldm_patched.modules.model_management.InterruptProcessingException:

From b7fb42436cd86ef09a0f222c88e189fb57f5b7ac Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 12:35:41 +0200
Subject: [PATCH 053/101] fix: correctly calculate current progress, display
 progressbar without any jumping

---
 modules/async_worker.py | 178 ++++++++++++++++++++++++----------------
 1 file changed, 109 insertions(+), 69 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 8f5529f5..e100dc9f 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -205,8 +205,7 @@ def worker():
         print(f'[Fooocus] {text}')
         async_task.yields.append(['preview', (number, text, None)])
 
-    def yield_result(async_task, imgs, black_out_nsfw, censor=True, do_not_show_finished_images=False,
-                     progressbar_index=flags.preparation_step_count):
+    def yield_result(async_task, imgs, progressbar_index, black_out_nsfw, censor=True, do_not_show_finished_images=False):
         if not isinstance(imgs, list):
             imgs = [imgs]
 
@@ -268,7 +267,8 @@ def worker():
 
     def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
                      denoising_strength, final_scheduler_name, goals, initial_latent, switch, positive_cond,
-                     negative_cond, task, tasks, tiled, use_expansion, width, height, base_progress, total_count):
+                     negative_cond, task, tasks, tiled, use_expansion, width, height, base_progress, preparation_steps,
+                     total_count):
         if async_task.last_stop is not False:
             ldm_patched.modules.model_management.interrupt_current_processing()
         if 'cn' in goals:
@@ -301,14 +301,14 @@ def worker():
         del positive_cond, negative_cond  # Save memory
         if inpaint_worker.current_task is not None:
             imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
-        current_progress = int(base_progress + (100 - flags.preparation_step_count) * float((current_task_id + 1) * async_task.steps) / float(all_steps))
+        current_progress = int(base_progress + (100 - preparation_steps) * float((current_task_id + 1) * async_task.steps) / float(all_steps))
         if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
             progressbar(async_task, current_progress, 'Checking for NSFW content ...')
             imgs = default_censor(imgs)
         progressbar(async_task, current_progress,
                     f'Saving image {current_task_id + 1}/{total_count} to system ...')
         img_paths = save_and_log(async_task, height, imgs, task, use_expansion, width)
-        yield_result(async_task, img_paths, async_task.black_out_nsfw, False,
+        yield_result(async_task, img_paths, current_progress, async_task.black_out_nsfw, False,
                      do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
 
         return imgs, img_paths, current_progress
@@ -385,7 +385,7 @@ def worker():
 
         return img_paths
 
-    def apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width):
+    def apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width, current_progress):
         for task in async_task.cn_tasks[flags.cn_canny]:
             cn_img, cn_stop, cn_weight = task
             cn_img = resize_image(HWC3(cn_img), width=width, height=height)
@@ -397,7 +397,7 @@ def worker():
             cn_img = HWC3(cn_img)
             task[0] = core.numpy_to_pytorch(cn_img)
             if async_task.debugging_cn_preprocessor:
-                yield_result(async_task, cn_img, async_task.black_out_nsfw, do_not_show_finished_images=True)
+                yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True)
         for task in async_task.cn_tasks[flags.cn_cpds]:
             cn_img, cn_stop, cn_weight = task
             cn_img = resize_image(HWC3(cn_img), width=width, height=height)
@@ -408,7 +408,7 @@ def worker():
             cn_img = HWC3(cn_img)
             task[0] = core.numpy_to_pytorch(cn_img)
             if async_task.debugging_cn_preprocessor:
-                yield_result(async_task, cn_img, async_task.black_out_nsfw, do_not_show_finished_images=True)
+                yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True)
         for task in async_task.cn_tasks[flags.cn_ip]:
             cn_img, cn_stop, cn_weight = task
             cn_img = HWC3(cn_img)
@@ -418,7 +418,7 @@ def worker():
 
             task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path)
             if async_task.debugging_cn_preprocessor:
-                yield_result(async_task, cn_img, async_task.black_out_nsfw, do_not_show_finished_images=True)
+                yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True)
         for task in async_task.cn_tasks[flags.cn_ip_face]:
             cn_img, cn_stop, cn_weight = task
             cn_img = HWC3(cn_img)
@@ -431,12 +431,12 @@ def worker():
 
             task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path)
             if async_task.debugging_cn_preprocessor:
-                yield_result(async_task, cn_img, async_task.black_out_nsfw, do_not_show_finished_images=True)
+                yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True)
         all_ip_tasks = async_task.cn_tasks[flags.cn_ip] + async_task.cn_tasks[flags.cn_ip_face]
         if len(all_ip_tasks) > 0:
             pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
 
-    def apply_vary(async_task, denoising_strength, switch):
+    def apply_vary(async_task, denoising_strength, switch, current_progress, advance_progress=False):
         if 'subtle' in async_task.uov_method:
             denoising_strength = 0.5
         if 'strong' in async_task.uov_method:
@@ -452,7 +452,9 @@ def worker():
             shape_ceil = 2048
         async_task.uov_input_image = set_image_shape_ceil(async_task.uov_input_image, shape_ceil)
         initial_pixels = core.numpy_to_pytorch(async_task.uov_input_image)
-        progressbar(async_task, 8, 'VAE encoding ...')
+        if advance_progress:
+            current_progress += 1
+        progressbar(async_task, current_progress, 'VAE encoding ...')
         candidate_vae, _ = pipeline.get_candidate_vae(
             steps=async_task.steps,
             switch=switch,
@@ -464,11 +466,12 @@ def worker():
         width = W * 8
         height = H * 8
         print(f'Final resolution is {str((width, height))}.')
-        return denoising_strength, initial_latent, width, height
+        return denoising_strength, initial_latent, width, height, current_progress
 
     def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
                       inpaint_mask, inpaint_parameterized, denoising_strength, inpaint_respective_field, switch,
-                      inpaint_disable_initial_latent, current_progress, skip_apply_outpaint=False):
+                      inpaint_disable_initial_latent, current_progress, skip_apply_outpaint=False,
+                      advance_progress=False):
         if not skip_apply_outpaint:
             inpaint_image, inpaint_mask = apply_outpaint(async_task, inpaint_image, inpaint_mask)
 
@@ -479,10 +482,12 @@ def worker():
             k=inpaint_respective_field
         )
         if async_task.debugging_inpaint_preprocessor:
-            yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), async_task.black_out_nsfw,
-                         do_not_show_finished_images=True)
+            yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), 100,
+                         async_task.black_out_nsfw, do_not_show_finished_images=True)
             raise EarlyReturnException
 
+        if advance_progress:
+            current_progress += 1
         progressbar(async_task, current_progress, 'VAE Inpaint encoding ...')
         inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill)
         inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image)
@@ -499,10 +504,14 @@ def worker():
             pixels=inpaint_pixel_image)
         latent_swap = None
         if candidate_vae_swap is not None:
+            if advance_progress:
+                current_progress += 1
             progressbar(async_task, current_progress, 'VAE SD15 encoding ...')
             latent_swap = core.encode_vae(
                 vae=candidate_vae_swap,
                 pixels=inpaint_pixel_fill)['samples']
+        if advance_progress:
+            current_progress += 1
         progressbar(async_task, current_progress, 'VAE encoding ...')
         latent_fill = core.encode_vae(
             vae=candidate_vae,
@@ -523,7 +532,7 @@ def worker():
         final_height, final_width = inpaint_worker.current_task.image.shape[:2]
         print(f'Final resolution is {str((final_width, final_height))}, latent is {str((width, height))}.')
 
-        return denoising_strength, initial_latent, width, height
+        return denoising_strength, initial_latent, width, height, current_progress
 
     def apply_outpaint(async_task, inpaint_image, inpaint_mask):
         if len(async_task.outpaint_selections) > 0:
@@ -553,9 +562,11 @@ def worker():
             async_task.inpaint_respective_field = 1.0
         return inpaint_image, inpaint_mask
 
-    def apply_upscale(async_task, switch):
+    def apply_upscale(async_task, switch, current_progress, advance_progress=False):
         H, W, C = async_task.uov_input_image.shape
-        progressbar(async_task, 9, f'Upscaling image from {str((H, W))} ...')
+        if advance_progress:
+            current_progress += 1
+        progressbar(async_task, current_progress, f'Upscaling image from {str((H, W))} ...')
         async_task.uov_input_image = perform_upscale(async_task.uov_input_image)
         print(f'Image upscaled.')
         if '1.5x' in async_task.uov_method:
@@ -588,7 +599,7 @@ def worker():
                 async_task.uov_input_image = default_censor(async_task.uov_input_image)
             progressbar(async_task, 100, 'Saving image to system ...')
             uov_input_image_path = log(async_task.uov_input_image, d, output_format=async_task.output_format)
-            yield_result(async_task, uov_input_image_path, async_task.black_out_nsfw, False,
+            yield_result(async_task, uov_input_image_path, 100, async_task.black_out_nsfw, False,
                          do_not_show_finished_images=True)
             raise EarlyReturnException
 
@@ -597,7 +608,9 @@ def worker():
         if async_task.overwrite_upscale_strength > 0:
             denoising_strength = async_task.overwrite_upscale_strength
         initial_pixels = core.numpy_to_pytorch(async_task.uov_input_image)
-        progressbar(async_task, 10, 'VAE encoding ...')
+        if advance_progress:
+            current_progress += 1
+        progressbar(async_task, current_progress, 'VAE encoding ...')
         candidate_vae, _ = pipeline.get_candidate_vae(
             steps=async_task.steps,
             switch=switch,
@@ -611,7 +624,7 @@ def worker():
         width = W * 8
         height = H * 8
         print(f'Final resolution is {str((width, height))}.')
-        return denoising_strength, initial_latent, tiled, width, height
+        return denoising_strength, initial_latent, tiled, width, height, current_progress
 
     def apply_overrides(async_task, height, width):
         if async_task.overwrite_step > 0:
@@ -626,7 +639,7 @@ def worker():
         return height, switch, width
 
     def process_prompt(async_task, prompt, negative_prompt, base_model_additional_loras, image_number, disable_seed_increment, use_expansion, use_style,
-                       use_synthetic_refiner):
+                       use_synthetic_refiner, current_progress, advance_progress=False):
         prompts = remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')
         negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.splitlines()], default='')
         prompt = prompts[0]
@@ -636,7 +649,9 @@ def worker():
             use_expansion = False
         extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
         extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
-        progressbar(async_task, 2, 'Loading models ...')
+        if advance_progress:
+            current_progress += 1
+        progressbar(async_task, current_progress, 'Loading models ...')
         lora_filenames = modules.util.remove_performance_lora(modules.config.lora_filenames,
                                                               async_task.performance_selection)
         loras, prompt = parse_lora_references_from_prompt(prompt, async_task.loras,
@@ -648,7 +663,9 @@ def worker():
                                     loras=loras, base_model_additional_loras=base_model_additional_loras,
                                     use_synthetic_refiner=use_synthetic_refiner, vae_name=async_task.vae_name)
         pipeline.set_clip_skip(async_task.clip_skip)
-        progressbar(async_task, 3, 'Processing prompts ...')
+        if advance_progress:
+            current_progress += 1
+        progressbar(async_task, current_progress, 'Processing prompts ...')
         tasks = []
         for i in range(image_number):
             if disable_seed_increment:
@@ -706,22 +723,29 @@ def worker():
                 styles=task_styles
             ))
         if use_expansion:
+            if advance_progress:
+                current_progress += 1
             for i, t in enumerate(tasks):
-                progressbar(async_task, 4, f'Preparing Fooocus text #{i + 1} ...')
+
+                progressbar(async_task, current_progress, f'Preparing Fooocus text #{i + 1} ...')
                 expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed'])
                 print(f'[Prompt Expansion] {expansion}')
                 t['expansion'] = expansion
                 t['positive'] = copy.deepcopy(t['positive']) + [expansion]  # Deep copy.
+        if advance_progress:
+            current_progress += 1
         for i, t in enumerate(tasks):
-            progressbar(async_task, 5, f'Encoding positive #{i + 1} ...')
+            progressbar(async_task, current_progress, f'Encoding positive #{i + 1} ...')
             t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k'])
+        if advance_progress:
+            current_progress += 1
         for i, t in enumerate(tasks):
             if abs(float(async_task.cfg_scale) - 1.0) < 1e-4:
                 t['uc'] = pipeline.clone_cond(t['c'])
             else:
-                progressbar(async_task, 6, f'Encoding negative #{i + 1} ...')
+                progressbar(async_task, current_progress, f'Encoding negative #{i + 1} ...')
                 t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k'])
-        return tasks, use_expansion, loras
+        return tasks, use_expansion, loras, current_progress
 
     def apply_freeu(async_task):
         print(f'FreeU is enabled!')
@@ -758,9 +782,11 @@ def worker():
 
         return final_scheduler_name
 
-    def set_hyper_sd_defaults(async_task):
+    def set_hyper_sd_defaults(async_task, current_progress, advance_progress=False):
         print('Enter Hyper-SD mode.')
-        progressbar(async_task, 1, 'Downloading Hyper-SD components ...')
+        if advance_progress:
+            current_progress += 1
+        progressbar(async_task, current_progress, 'Downloading Hyper-SD components ...')
         async_task.performance_loras += [(modules.config.downloading_sdxl_hyper_sd_lora(), 0.8)]
         if async_task.refiner_model_name != 'None':
             print(f'Refiner disabled in Hyper-SD mode.')
@@ -774,9 +800,12 @@ def worker():
         async_task.adm_scaler_positive = 1.0
         async_task.adm_scaler_negative = 1.0
         async_task.adm_scaler_end = 0.0
+        return current_progress
 
-    def set_lightning_defaults(async_task):
+    def set_lightning_defaults(async_task, current_progress, advance_progress=False):
         print('Enter Lightning mode.')
+        if advance_progress:
+            current_progress += 1
         progressbar(async_task, 1, 'Downloading Lightning components ...')
         async_task.performance_loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)]
         if async_task.refiner_model_name != 'None':
@@ -791,9 +820,12 @@ def worker():
         async_task.adm_scaler_positive = 1.0
         async_task.adm_scaler_negative = 1.0
         async_task.adm_scaler_end = 0.0
+        return current_progress
 
-    def set_lcm_defaults(async_task):
+    def set_lcm_defaults(async_task, current_progress, advance_progress=False):
         print('Enter LCM mode.')
+        if advance_progress:
+            current_progress += 1
         progressbar(async_task, 1, 'Downloading LCM components ...')
         async_task.performance_loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)]
         if async_task.refiner_model_name != 'None':
@@ -808,6 +840,7 @@ def worker():
         async_task.adm_scaler_positive = 1.0
         async_task.adm_scaler_negative = 1.0
         async_task.adm_scaler_end = 0.0
+        return current_progress
 
     def apply_image_input(async_task, base_model_additional_loras, clip_vision_path, controlnet_canny_path,
                           controlnet_cpds_path, goals, inpaint_head_model_path, inpaint_mask, inpaint_parameterized,
@@ -933,12 +966,13 @@ def worker():
             print(f'Refiner disabled because base model and refiner are same.')
             async_task.refiner_model_name = 'None'
 
+        current_progress = 0
         if async_task.performance_selection == Performance.EXTREME_SPEED:
-            set_lcm_defaults(async_task)
+            set_lcm_defaults(async_task, current_progress, advance_progress=True)
         elif async_task.performance_selection == Performance.LIGHTNING:
-            set_lightning_defaults(async_task)
+            set_lightning_defaults(async_task, current_progress, advance_progress=True)
         elif async_task.performance_selection == Performance.HYPER_SD:
-            set_hyper_sd_defaults(async_task)
+            set_hyper_sd_defaults(async_task, current_progress, advance_progress=True)
 
         if async_task.translate_prompts:
             async_task.prompt = translate2en(async_task.prompt, 'prompt')
@@ -981,6 +1015,7 @@ def worker():
 
         goals = []
         tasks = []
+        current_progress = 1
 
         if async_task.input_image_checkbox:
             base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner = apply_image_input(
@@ -990,7 +1025,7 @@ def worker():
 
 
         # Load or unload CNs
-        progressbar(async_task, 1, 'Loading control models ...')
+        progressbar(async_task, current_progress, 'Loading control models ...')
         pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path])
         ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
         ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path)
@@ -1000,42 +1035,46 @@ def worker():
         print(f'[Parameters] Sampler = {async_task.sampler_name} - {async_task.scheduler_name}')
         print(f'[Parameters] Steps = {async_task.steps} - {switch}')
 
-        progressbar(async_task, 1, 'Initializing ...')
+        progressbar(async_task, current_progress, 'Initializing ...')
 
         if not skip_prompt_processing:
-            tasks, use_expansion, loras = process_prompt(async_task, async_task.prompt, async_task.negative_prompt,
+            tasks, use_expansion, loras, current_progress = process_prompt(async_task, async_task.prompt, async_task.negative_prompt,
                                                          base_model_additional_loras, async_task.image_number,
                                                          async_task.disable_seed_increment, use_expansion, use_style,
-                                                         use_synthetic_refiner)
+                                                         use_synthetic_refiner, current_progress, advance_progress=True)
 
         if len(goals) > 0:
-            progressbar(async_task, 7, 'Image processing ...')
+            current_progress += 1
+            progressbar(async_task, current_progress, 'Image processing ...')
 
         if 'vary' in goals:
-            denoising_strength, initial_latent, width, height = apply_vary(async_task, denoising_strength, switch)
+            denoising_strength, initial_latent, width, height, current_progress = apply_vary(async_task, denoising_strength, switch, current_progress)
 
         if 'upscale' in goals:
             try:
-                denoising_strength, initial_latent, tiled, width, height = apply_upscale(async_task, switch)
+                denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(async_task, switch, current_progress, advance_progress=True)
             except EarlyReturnException:
                 return
 
         if 'inpaint' in goals:
             try:
-                denoising_strength, initial_latent, width, height = apply_inpaint(async_task, initial_latent,
-                                                                                  inpaint_head_model_path,
-                                                                                  inpaint_image,
-                                                                                  inpaint_mask, inpaint_parameterized,
-                                                                                  async_task.inpaint_strength,
-                                                                                  async_task.inpaint_respective_field,
-                                                                                  switch,
-                                                                                  async_task.inpaint_disable_initial_latent,
-                                                                                  11)
+                denoising_strength, initial_latent, width, height, current_progress = apply_inpaint(async_task,
+                                                                                                    initial_latent,
+                                                                                                    inpaint_head_model_path,
+                                                                                                    inpaint_image,
+                                                                                                    inpaint_mask,
+                                                                                                    inpaint_parameterized,
+                                                                                                    async_task.inpaint_strength,
+                                                                                                    async_task.inpaint_respective_field,
+                                                                                                    switch,
+                                                                                                    async_task.inpaint_disable_initial_latent,
+                                                                                                    current_progress,
+                                                                                                    advance_progress=True)
             except EarlyReturnException:
                 return
 
         if 'cn' in goals:
-            apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
+            apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width, current_progress)
             if async_task.debugging_cn_preprocessor:
                 return
 
@@ -1062,24 +1101,23 @@ def worker():
         final_scheduler_name = patch_samplers(async_task)
         print(f'Using {final_scheduler_name} scheduler.')
 
-        async_task.yields.append(['preview', (flags.preparation_step_count, 'Moving model to GPU ...', None)])
+        async_task.yields.append(['preview', (current_progress, 'Moving model to GPU ...', None)])
 
         processing_start_time = time.perf_counter()
 
-        base_progress = int(flags.preparation_step_count)
-        current_progress = base_progress
+        preparation_steps = base_progress = current_progress
         total_count = async_task.image_number
 
         def callback(step, x0, x, total_steps, y):
             done_steps = current_task_id * async_task.steps + step + 1
             async_task.yields.append(['preview', (
-                int(base_progress + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)),
+                int(base_progress + (100 - preparation_steps) * float(done_steps) / float(all_steps)),
                 f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{total_count} ...', y)])
 
         generated_imgs = {}
 
         for current_task_id, task in enumerate(tasks):
-            current_progress = int(base_progress + (100 - flags.preparation_step_count) * float(
+            current_progress = int(base_progress + (100 - preparation_steps) * float(
                 current_task_id * async_task.steps) / float(all_steps))
             progressbar(async_task, current_progress,
                         f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
@@ -1092,7 +1130,8 @@ def worker():
                                                                  final_scheduler_name, goals, initial_latent,
                                                                  switch, task['c'], task['uc'], task,
                                                                  tasks, tiled, use_expansion, width, height,
-                                                                 flags.preparation_step_count, async_task.image_number)
+                                                                 preparation_steps, preparation_steps,
+                                                                 async_task.image_number)
 
                 generated_imgs[current_task_id] = imgs
 
@@ -1123,7 +1162,7 @@ def worker():
             for img in imgs:
                 for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
                     current_task_id += 1
-                    current_progress = int(base_progress + (100 - flags.preparation_step_count) * float(current_task_id * async_task.steps) / float(all_steps))
+                    current_progress = int(base_progress + (100 - preparation_steps) * float(current_task_id * async_task.steps) / float(all_steps))
                     progressbar(async_task, current_progress, f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
                     enhancement_task_start_time = time.perf_counter()
 
@@ -1148,7 +1187,7 @@ def worker():
 
                     if async_task.debugging_enhance_masks_checkbox:
                         async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
-                        yield_result(async_task, mask, async_task.black_out_nsfw, False,
+                        yield_result(async_task, mask, current_progress, async_task.black_out_nsfw, False,
                                      async_task.disable_intermediate_results)
 
                     print(f'[Enhance] {dino_detection_count} boxes detected')
@@ -1182,12 +1221,13 @@ def worker():
                     #     task_enhance = task.copy()
                     #     tasks_enhance = tasks.copy()
                     # else:
-                    tasks_enhance, use_expansion, loras = process_prompt(async_task, enhance_prompt,
-                                                                         enhance_negative_prompt,
-                                                                         base_model_additional_loras_enhance,
-                                                                         1, True,
-                                                                         use_expansion, use_style,
-                                                                         use_synthetic_refiner)
+                    tasks_enhance, use_expansion, loras, current_progress = process_prompt(async_task, enhance_prompt,
+                                                                                           enhance_negative_prompt,
+                                                                                           base_model_additional_loras_enhance,
+                                                                                           1, True,
+                                                                                           use_expansion, use_style,
+                                                                                           use_synthetic_refiner,
+                                                                                           current_progress)
                     task_enhance = tasks_enhance[0]
 
                     # TODO could support vary, upscale and CN in the future
@@ -1198,7 +1238,7 @@ def worker():
                     patch_samplers(async_task)
 
                     goals_enhance = ['inpaint']
-                    enhance_inpaint_strength, initial_latent_enhance, width_enhance, height_enhance = apply_inpaint(
+                    enhance_inpaint_strength, initial_latent_enhance, width_enhance, height_enhance, current_progress = apply_inpaint(
                         async_task, None, inpaint_head_model_path_enhance, img, mask,
                         inpaint_parameterized_enhance, enhance_inpaint_strength,
                         enhance_inpaint_respective_field, switch, enhance_inpaint_disable_initial_latent,
@@ -1213,7 +1253,7 @@ def worker():
                                                                           task_enhance['c'], task_enhance['uc'],
                                                                           task_enhance, tasks_enhance, tiled,
                                                                           use_expansion, width_enhance, height_enhance,
-                                                                          base_progress, total_count)
+                                                                          base_progress, preparation_steps, total_count)
                         img = imgs2[0]
 
                     except ldm_patched.modules.model_management.InterruptProcessingException:

From 229ff8173856d036270c5d64a0d9a4a6e7a69c40 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 21:07:27 +0200
Subject: [PATCH 054/101] refactor: rename max_num_boxes to max_detections

---
 experiments_mask_generation.py |  2 +-
 extras/inpaint_mask.py         | 10 +++++++---
 modules/async_worker.py        |  8 ++++----
 modules/config.py              |  8 ++++----
 webui.py                       | 19 ++++++++++---------
 5 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/experiments_mask_generation.py b/experiments_mask_generation.py
index 0f6b960d..a27eb39c 100644
--- a/experiments_mask_generation.py
+++ b/experiments_mask_generation.py
@@ -14,7 +14,7 @@ sam_options = SAMOptions(
     dino_text_threshold=0.25,
     dino_erode_or_dilate=0,
     dino_debug=False,
-    max_num_boxes=2,
+    max_detections=2,
     model_type='vit_b'
 )
 
diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 1d04d86c..086b7da6 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -1,3 +1,5 @@
+import sys
+
 import modules.config
 import numpy as np
 import torch
@@ -18,7 +20,7 @@ class SAMOptions:
                  dino_debug=False,
 
                  # SAM
-                 max_num_boxes=2,
+                 max_detections=2,
                  model_type='vit_b'
                  ):
         self.dino_prompt = dino_prompt
@@ -26,7 +28,7 @@ class SAMOptions:
         self.dino_text_threshold = dino_text_threshold
         self.dino_erode_or_dilate = dino_erode_or_dilate
         self.dino_debug = dino_debug
-        self.max_num_boxes = max_num_boxes
+        self.max_detections = max_detections
         self.model_type = model_type
 
 
@@ -114,7 +116,9 @@ def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=
 
         masks = optimize_masks(masks)
         sam_detection_count = len(masks)
-        sam_objects = min(len(logits), sam_options.max_num_boxes)
+        if sam_options.max_detections == 0:
+            sam_options.max_detections = sys.maxsize
+        sam_objects = min(len(logits), sam_options.max_detections)
         for obj_ind in range(sam_objects):
             mask_tensor = masks[obj_ind][0]
             final_mask_tensor += mask_tensor
diff --git a/modules/async_worker.py b/modules/async_worker.py
index e100dc9f..8864582b 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -123,7 +123,7 @@ class AsyncTask:
             enhance_mask_sam_model = args.pop()
             enhance_mask_text_threshold = args.pop()
             enhance_mask_box_threshold = args.pop()
-            enhance_mask_sam_max_num_boxes = args.pop()
+            enhance_mask_sam_max_detections = args.pop()
             enhance_inpaint_disable_initial_latent = args.pop()
             enhance_inpaint_engine = args.pop()
             enhance_inpaint_strength = args.pop()
@@ -137,7 +137,7 @@ class AsyncTask:
                     enhance_mask_sam_model,
                     enhance_mask_text_threshold,
                     enhance_mask_box_threshold,
-                    enhance_mask_sam_max_num_boxes,
+                    enhance_mask_sam_max_detections,
                     enhance_inpaint_disable_initial_latent,
                     enhance_inpaint_engine,
                     enhance_inpaint_strength,
@@ -1160,7 +1160,7 @@ def worker():
         current_task_id = -1
         for imgs in generated_imgs.values():
             for img in imgs:
-                for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_num_boxes, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
+                for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
                     current_task_id += 1
                     current_progress = int(base_progress + (100 - preparation_steps) * float(current_task_id * async_task.steps) / float(all_steps))
                     progressbar(async_task, current_progress, f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
@@ -1176,7 +1176,7 @@ def worker():
                             dino_text_threshold=enhance_mask_text_threshold,
                             dino_erode_or_dilate=async_task.dino_erode_or_dilate,
                             dino_debug=async_task.debugging_dino,
-                            max_num_boxes=enhance_mask_sam_max_num_boxes,
+                            max_detections=enhance_mask_sam_max_detections,
                             model_type=enhance_mask_sam_model
                         ))
                     if len(mask.shape) == 3:
diff --git a/modules/config.py b/modules/config.py
index 929dd9ce..d3240888 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -516,10 +516,10 @@ default_enhance_tabs = get_config_item_or_set_default(
     validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
     expected_type=int
 )
-default_sam_max_num_boxes = get_config_item_or_set_default(
-    key='default_sam_max_num_boxes',
-    default_value=2,
-    validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
+default_sam_max_detections = get_config_item_or_set_default(
+    key='default_sam_max_detections',
+    default_value=0,
+    validator=lambda x: isinstance(x, int) and 0 <= x <= 10,
     expected_type=int
 )
 default_black_out_nsfw = get_config_item_or_set_default(
diff --git a/webui.py b/webui.py
index 9185dd10..021ed55b 100644
--- a/webui.py
+++ b/webui.py
@@ -258,10 +258,10 @@ with shared.gradio_root:
                                     inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
                                     inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)
                                     inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
-                                    inpaint_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections", minimum=1, maximum=5, value=modules.config.default_sam_max_num_boxes, step=1, interactive=True)
+                                    inpaint_mask_sam_num_boxes = gr.Slider(label="Maximum number of detections", info="Set to 0 to detect all", minimum=0, maximum=10, value=modules.config.default_sam_max_detections, step=1, interactive=True)
                                 generate_mask_button = gr.Button(value='Generate mask from image')
 
-                                def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, sam_max_num_boxes, dino_erode_or_dilate, dino_debug):
+                                def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, sam_max_detections, dino_erode_or_dilate, dino_debug):
                                     from extras.inpaint_mask import generate_mask_from_image
 
                                     extras = {}
@@ -275,7 +275,7 @@ with shared.gradio_root:
                                             dino_text_threshold=text_threshold,
                                             dino_erode_or_dilate=dino_erode_or_dilate,
                                             dino_debug=dino_debug,
-                                            max_num_boxes=sam_max_num_boxes,
+                                            max_detections=sam_max_detections,
                                             model_type=sam_model
                                         )
 
@@ -380,10 +380,11 @@ with shared.gradio_root:
                                     enhance_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0,
                                                                             maximum=1.0, value=0.25, step=0.05,
                                                                             interactive=True)
-                                    enhance_mask_sam_max_num_boxes = gr.Slider(label="Maximum number of box detections",
-                                                                               minimum=1, maximum=5,
-                                                                               value=modules.config.default_sam_max_num_boxes,
-                                                                               step=1, interactive=True)
+                                    enhance_mask_sam_max_detections = gr.Slider(label="Maximum number of detections",
+                                                                                info="Set to 0 to detect all",
+                                                                                minimum=0, maximum=10,
+                                                                                value=modules.config.default_sam_max_detections,
+                                                                                step=1, interactive=True)
 
                             with gr.Accordion("Inpaint", visible=True, open=False):
                                 enhance_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
@@ -420,7 +421,7 @@ with shared.gradio_root:
                             enhance_mask_sam_model,
                             enhance_mask_text_threshold,
                             enhance_mask_box_threshold,
-                            enhance_mask_sam_max_num_boxes,
+                            enhance_mask_sam_max_detections,
                             enhance_inpaint_disable_initial_latent,
                             enhance_inpaint_engine,
                             enhance_inpaint_strength,
@@ -868,7 +869,7 @@ with shared.gradio_root:
                                    inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
                                            inpaint_mask_dino_prompt_text, inpaint_mask_sam_model,
                                            inpaint_mask_box_threshold, inpaint_mask_text_threshold,
-                                           inpaint_mask_sam_max_num_boxes, dino_erode_or_dilate, debugging_dino],
+                                           inpaint_mask_sam_num_boxes, dino_erode_or_dilate, debugging_dino],
                                    outputs=inpaint_mask_image, show_progress=True, queue=True)
 
         ctrls = [currentTask, generate_image_grid]

From c0f7c3a8ee6d389428bfdf7559d893ca250f4449 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 21:39:27 +0200
Subject: [PATCH 055/101] feat: add erode or dilate and mask invert to enhance
 mask inpaint settings

---
 modules/async_worker.py | 17 ++++++++++++-----
 webui.py                | 22 +++++++++++++++-------
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 8864582b..dfdcd1d1 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -128,6 +128,8 @@ class AsyncTask:
             enhance_inpaint_engine = args.pop()
             enhance_inpaint_strength = args.pop()
             enhance_inpaint_respective_field = args.pop()
+            enhance_inpaint_erode_or_dilate = args.pop()
+            enhance_mask_invert = args.pop()
             if enhance_enabled:
                 self.enhance_ctrls.append([
                     enhance_mask_dino_prompt_text,
@@ -141,7 +143,9 @@ class AsyncTask:
                     enhance_inpaint_disable_initial_latent,
                     enhance_inpaint_engine,
                     enhance_inpaint_strength,
-                    enhance_inpaint_respective_field
+                    enhance_inpaint_respective_field,
+                    enhance_inpaint_erode_or_dilate,
+                    enhance_mask_invert
                 ])
 
 
@@ -1160,7 +1164,7 @@ def worker():
         current_task_id = -1
         for imgs in generated_imgs.values():
             for img in imgs:
-                for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field in async_task.enhance_ctrls:
+                for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field, enhance_inpaint_erode_or_dilate, enhance_mask_invert in async_task.enhance_ctrls:
                     current_task_id += 1
                     current_progress = int(base_progress + (100 - preparation_steps) * float(current_task_id * async_task.steps) / float(all_steps))
                     progressbar(async_task, current_progress, f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
@@ -1177,13 +1181,16 @@ def worker():
                             dino_erode_or_dilate=async_task.dino_erode_or_dilate,
                             dino_debug=async_task.debugging_dino,
                             max_detections=enhance_mask_sam_max_detections,
-                            model_type=enhance_mask_sam_model
+                            model_type=enhance_mask_sam_model,
                         ))
                     if len(mask.shape) == 3:
                         mask = mask[:, :, 0]
 
-                    if int(async_task.inpaint_erode_or_dilate) != 0:
-                        mask = erode_or_dilate(mask, async_task.inpaint_erode_or_dilate)
+                    if int(enhance_inpaint_erode_or_dilate) != 0:
+                        mask = erode_or_dilate(mask, enhance_inpaint_erode_or_dilate)
+
+                    if enhance_mask_invert:
+                        mask = 255 - mask
 
                     if async_task.debugging_enhance_masks_checkbox:
                         async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
diff --git a/webui.py b/webui.py
index 021ed55b..e7519b6c 100644
--- a/webui.py
+++ b/webui.py
@@ -258,7 +258,7 @@ with shared.gradio_root:
                                     inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
                                     inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)
                                     inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
-                                    inpaint_mask_sam_num_boxes = gr.Slider(label="Maximum number of detections", info="Set to 0 to detect all", minimum=0, maximum=10, value=modules.config.default_sam_max_detections, step=1, interactive=True)
+                                    inpaint_mask_sam_max_detections = gr.Slider(label="Maximum number of detections", info="Set to 0 to detect all", minimum=0, maximum=10, value=modules.config.default_sam_max_detections, step=1, interactive=True)
                                 generate_mask_button = gr.Button(value='Generate mask from image')
 
                                 def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, sam_max_detections, dino_erode_or_dilate, dino_debug):
@@ -389,7 +389,6 @@ with shared.gradio_root:
                             with gr.Accordion("Inpaint", visible=True, open=False):
                                 enhance_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
                                                                    value=modules.flags.inpaint_option_default,
-                                                                   # TODO test
                                                                    label='Method', interactive=True)
                                 enhance_inpaint_disable_initial_latent = gr.Checkbox(
                                     label='Disable initial latent in inpaint', value=False)
@@ -411,6 +410,12 @@ with shared.gradio_root:
                                                                                   'Value 1 is same as "Whole Image" in A1111. '
                                                                                   'Only used in inpaint, not used in outpaint. '
                                                                                   '(Outpaint always use 1.0)')
+                                enhance_inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate',
+                                                                            minimum=-64, maximum=64, step=1, value=0,
+                                                                            info='Positive value will make white area in the mask larger, '
+                                                                                 'negative value will make white area smaller. '
+                                                                                 '(default is 0, always processed before any mask invert)')
+                                enhance_mask_invert = gr.Checkbox(label='Invert Mask', value=False)
 
                         enhance_ctrls += [
                             enhance_enabled,
@@ -425,7 +430,9 @@ with shared.gradio_root:
                             enhance_inpaint_disable_initial_latent,
                             enhance_inpaint_engine,
                             enhance_inpaint_strength,
-                            enhance_inpaint_respective_field
+                            enhance_inpaint_respective_field,
+                            enhance_inpaint_erode_or_dilate,
+                            enhance_mask_invert
                         ]
 
                         enhance_inpaint_mode.input(inpaint_mode_change, inputs=enhance_inpaint_mode, outputs=[
@@ -737,12 +744,13 @@ with shared.gradio_root:
                         inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate',
                                                             minimum=-64, maximum=64, step=1, value=0,
                                                             info='Positive value will make white area in the mask larger, '
-                                                                 'negative value will make white area smaller.'
-                                                                 '(default is 0, always process before any mask invert)')
+                                                                 'negative value will make white area smaller. '
+                                                                 '(default is 0, always processed before any mask invert)')
                         dino_erode_or_dilate = gr.Slider(label='GroundingDINO Box Erode or Dilate',
                                                          minimum=-64, maximum=64, step=1, value=0,
                                                          info='Positive value will make white area in the mask larger, '
-                                                              'negative value will make white area smaller.')
+                                                              'negative value will make white area smaller. '
+                                                              '(default is 0, processed before SAM)')
                         inpaint_mask_upload_checkbox = gr.Checkbox(label='Enable Mask Upload', value=False)
                         invert_mask_checkbox = gr.Checkbox(label='Invert Mask', value=False)
 
@@ -869,7 +877,7 @@ with shared.gradio_root:
                                    inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
                                            inpaint_mask_dino_prompt_text, inpaint_mask_sam_model,
                                            inpaint_mask_box_threshold, inpaint_mask_text_threshold,
-                                           inpaint_mask_sam_num_boxes, dino_erode_or_dilate, debugging_dino],
+                                           inpaint_mask_sam_max_detections, dino_erode_or_dilate, debugging_dino],
                                    outputs=inpaint_mask_image, show_progress=True, queue=True)
 
         ctrls = [currentTask, generate_image_grid]

From dd866616bca194efae20872ac2ce5ad34aa4b0a3 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 22:16:48 +0200
Subject: [PATCH 056/101] refactor: code cleanup

---
 modules/async_worker.py | 7 +------
 modules/flags.py        | 1 -
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index dfdcd1d1..85116a85 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1157,7 +1157,6 @@ def worker():
             stop_processing(async_task, processing_start_time)
             return
 
-        # enhance
         progressbar(async_task, current_progress, 'Processing enhance ...')
         total_count = sum([len(imgs) for _, imgs in generated_imgs.items()]) * len(async_task.enhance_ctrls)
         base_progress = current_progress
@@ -1223,11 +1222,7 @@ def worker():
                     enhance_negative_prompt = prepare_enhance_prompt(enhance_negative_prompt, async_task.negative_prompt,
                                                                      async_task.translate_prompts, 'negative prompt')
 
-                    # positive and negative conditioning aren't available here anymore
-                    # if not inpaint_parameterized_enhance and enhance_prompt == async_task.prompt and enhance_negative_prompt == async_task.negative_prompt:
-                    #     task_enhance = task.copy()
-                    #     tasks_enhance = tasks.copy()
-                    # else:
+                    # positive and negative conditioning aren't available here anymore, process prompt again
                     tasks_enhance, use_expansion, loras, current_progress = process_prompt(async_task, enhance_prompt,
                                                                                            enhance_negative_prompt,
                                                                                            base_model_additional_loras_enhance,
diff --git a/modules/flags.py b/modules/flags.py
index f15222c6..1ee39465 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -107,7 +107,6 @@ metadata_scheme = [
 ]
 
 controlnet_image_count = 4
-preparation_step_count = 13
 
 
 class OutputFormat(Enum):

From 6cb0b2143b13712e2dbe4f71e7a6cf96f340f5a5 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 22:38:36 +0200
Subject: [PATCH 057/101] fix: correctly initialize inpaint_mask var, rename
 task styles index

---
 modules/async_worker.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 85116a85..f7e65478 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -693,10 +693,10 @@ def worker():
 
             task_styles = async_task.style_selections.copy()
             if use_style:
-                for i, s in enumerate(task_styles):
+                for j, s in enumerate(task_styles):
                     if s == random_style_name:
                         s = get_random_style(task_rng)
-                        task_styles[i] = s
+                        task_styles[j] = s
                     p, n = apply_style(s, positive=task_prompt)
                     positive_basic_workloads = positive_basic_workloads + p
                     negative_basic_workloads = negative_basic_workloads + n
@@ -847,9 +847,9 @@ def worker():
         return current_progress
 
     def apply_image_input(async_task, base_model_additional_loras, clip_vision_path, controlnet_canny_path,
-                          controlnet_cpds_path, goals, inpaint_head_model_path, inpaint_mask, inpaint_parameterized,
-                          ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing,
-                          use_synthetic_refiner):
+                          controlnet_cpds_path, goals, inpaint_head_model_path, inpaint_image, inpaint_mask,
+                          inpaint_parameterized,  ip_adapter_face_path, ip_adapter_path, ip_negative_path,
+                          skip_prompt_processing, use_synthetic_refiner):
         if (async_task.current_tab == 'uov' or (
                 async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
                 and async_task.uov_method != flags.disabled and async_task.uov_input_image is not None:
@@ -1024,9 +1024,8 @@ def worker():
         if async_task.input_image_checkbox:
             base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner = apply_image_input(
                 async_task, base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path,
-                goals, inpaint_head_model_path, inpaint_mask, inpaint_parameterized, ip_adapter_face_path, ip_adapter_path,
-                ip_negative_path, skip_prompt_processing, use_synthetic_refiner)
-
+                goals, inpaint_head_model_path, inpaint_image, inpaint_mask, inpaint_parameterized, ip_adapter_face_path,
+                ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner)
 
         # Load or unload CNs
         progressbar(async_task, current_progress, 'Loading control models ...')

From c0397bddbdb6ea5427fc4f0362f1488de6fd65aa Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 22:39:09 +0200
Subject: [PATCH 058/101] feat: enable edit attention with ctrl + arrow keys
 for enhance prompts

---
 webui.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/webui.py b/webui.py
index e7519b6c..d7b89c72 100644
--- a/webui.py
+++ b/webui.py
@@ -352,9 +352,11 @@ with shared.gradio_root:
                                                                         show_progress=False, queue=False)
 
                             enhance_prompt = gr.Textbox(label="Enhancement positive prompt",
-                                                        placeholder="Uses original prompt instead if empty.")
+                                                        placeholder="Uses original prompt instead if empty.",
+                                                        elem_id='enhance_prompt')
                             enhance_negative_prompt = gr.Textbox(label="Enhancement negative prompt",
-                                                                 placeholder="Uses original negative prompt instead if empty.")
+                                                                 placeholder="Uses original negative prompt instead if empty.",
+                                                                 elem_id='enhance_negative_prompt')
 
                             with gr.Accordion("Detection", open=False):
                                 # TODO check if limiting to SAM is better

From ed6c5269860f7cc93ad2f9426973dae5f1dbaf1d Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 23:51:36 +0200
Subject: [PATCH 059/101] feat: add hint for performance when enhance inpaint
 engine is not none

---
 webui.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/webui.py b/webui.py
index d7b89c72..3f229a4d 100644
--- a/webui.py
+++ b/webui.py
@@ -387,6 +387,7 @@ with shared.gradio_root:
                                                                                 minimum=0, maximum=10,
                                                                                 value=modules.config.default_sam_max_detections,
                                                                                 step=1, interactive=True)
+                            enhance_inpaint_hint = gr.Markdown()
 
                             with gr.Accordion("Inpaint", visible=True, open=False):
                                 enhance_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
@@ -419,6 +420,15 @@ with shared.gradio_root:
                                                                                  '(default is 0, always processed before any mask invert)')
                                 enhance_mask_invert = gr.Checkbox(label='Invert Mask', value=False)
 
+
+                            def update_inpaint_hint(mode):
+                                if mode == modules.flags.inpaint_option_detail:
+                                    return gr.update(value='')
+
+                                return gr.update(value="Hint: Use performance Quality or Speed (no performance LoRAs) for best results.")
+
+                            shared.gradio_root.load(update_inpaint_hint, inputs=enhance_inpaint_mode, outputs=enhance_inpaint_hint, queue=False, show_progress=False)
+
                         enhance_ctrls += [
                             enhance_enabled,
                             enhance_mask_dino_prompt_text,
@@ -441,7 +451,8 @@ with shared.gradio_root:
                             inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
                             enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
                             enhance_inpaint_strength, enhance_inpaint_respective_field
-                        ], show_progress=False, queue=False)
+                        ], show_progress=False, queue=False) \
+                            .then(update_inpaint_hint, inputs=enhance_inpaint_mode, outputs=enhance_inpaint_hint, show_progress=False, queue=False)
 
                         enhance_mask_model.change(
                             lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +

From ff3dff8323761cb2e43bf150de07cea96b726a32 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 23:52:13 +0200
Subject: [PATCH 060/101] Revert "feat: add hint for performance when enhance
 inpaint engine is not none"

This reverts commit ed6c5269860f7cc93ad2f9426973dae5f1dbaf1d.
---
 webui.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/webui.py b/webui.py
index 3f229a4d..d7b89c72 100644
--- a/webui.py
+++ b/webui.py
@@ -387,7 +387,6 @@ with shared.gradio_root:
                                                                                 minimum=0, maximum=10,
                                                                                 value=modules.config.default_sam_max_detections,
                                                                                 step=1, interactive=True)
-                            enhance_inpaint_hint = gr.Markdown()
 
                             with gr.Accordion("Inpaint", visible=True, open=False):
                                 enhance_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
@@ -420,15 +419,6 @@ with shared.gradio_root:
                                                                                  '(default is 0, always processed before any mask invert)')
                                 enhance_mask_invert = gr.Checkbox(label='Invert Mask', value=False)
 
-
-                            def update_inpaint_hint(mode):
-                                if mode == modules.flags.inpaint_option_detail:
-                                    return gr.update(value='')
-
-                                return gr.update(value="Hint: Use performance Quality or Speed (no performance LoRAs) for best results.")
-
-                            shared.gradio_root.load(update_inpaint_hint, inputs=enhance_inpaint_mode, outputs=enhance_inpaint_hint, queue=False, show_progress=False)
-
                         enhance_ctrls += [
                             enhance_enabled,
                             enhance_mask_dino_prompt_text,
@@ -451,8 +441,7 @@ with shared.gradio_root:
                             inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
                             enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
                             enhance_inpaint_strength, enhance_inpaint_respective_field
-                        ], show_progress=False, queue=False) \
-                            .then(update_inpaint_hint, inputs=enhance_inpaint_mode, outputs=enhance_inpaint_hint, show_progress=False, queue=False)
+                        ], show_progress=False, queue=False)
 
                         enhance_mask_model.change(
                             lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +

From 2c721e06ff1ad5bc66136cb3a920fd4c3f6d6670 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 18 Jun 2024 23:57:29 +0200
Subject: [PATCH 061/101] feat: update performance hint for inpaint engine

inpaint engine model has only been trained for SDXL and no other performance LoRA, will produce bad results for inpaint larger areas
---
 language/en.json | 2 +-
 webui.py         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/language/en.json b/language/en.json
index f80ee4b2..ec7ef09d 100644
--- a/language/en.json
+++ b/language/en.json
@@ -383,8 +383,8 @@
     "Used for SAM object detection and box generation": "Used for SAM object detection and box generation",
     "GroundingDINO Box Erode or Dilate": "GroundingDINO Box Erode or Dilate",
     "Inpaint Engine": "Inpaint Engine",
+    "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.": "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.",
     "v1": "v1",
-    "Version of Fooocus inpaint model": "Version of Fooocus inpaint model",
     "v2.5": "v2.5",
     "v2.6": "v2.6",
     "Control Debug": "Control Debug",
diff --git a/webui.py b/webui.py
index d7b89c72..c7573998 100644
--- a/webui.py
+++ b/webui.py
@@ -397,7 +397,7 @@ with shared.gradio_root:
                                 enhance_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
                                                                      value=modules.config.default_inpaint_engine_version,
                                                                      choices=flags.inpaint_engine_versions,
-                                                                     info='Version of Fooocus inpaint model')
+                                                                     info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.')
                                 enhance_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
                                                                      minimum=0.0, maximum=1.0, step=0.001,
                                                                      value=1.0,
@@ -730,7 +730,7 @@ with shared.gradio_root:
                         inpaint_engine = gr.Dropdown(label='Inpaint Engine',
                                                      value=modules.config.default_inpaint_engine_version,
                                                      choices=flags.inpaint_engine_versions,
-                                                     info='Version of Fooocus inpaint model')
+                                                     info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.')
                         inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
                                                      minimum=0.0, maximum=1.0, step=0.001, value=1.0,
                                                      info='Same as the denoising strength in A1111 inpaint. '

From 53b27e4b748df54407a1debe3d742a9c9608d61e Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 19 Jun 2024 00:21:57 +0200
Subject: [PATCH 062/101] release: bump version to 2..5.0-rc1

---
 fooocus_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index 106c67f2..27c52170 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.4.3 (mashb1t)'
+version = '2.5.0-rc1 (mashb1t)'

From 3b55e64990acb7cab088a843fb4a0fcd40626ff0 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 19 Jun 2024 00:45:50 +0200
Subject: [PATCH 063/101] fix: remove unused path_adetailer from config

---
 modules/config.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/config.py b/modules/config.py
index d3240888..02f56d7d 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -191,7 +191,6 @@ def get_dir_or_set_default(key, default_value, as_array=False, make_directory=Fa
 paths_checkpoints = get_dir_or_set_default('path_checkpoints', ['../models/checkpoints/'], True)
 paths_loras = get_dir_or_set_default('path_loras', ['../models/loras/'], True)
 path_embeddings = get_dir_or_set_default('path_embeddings', '../models/embeddings/')
-path_adetailer = get_dir_or_set_default('path_adetailer', '../models/adetailer/')
 path_vae_approx = get_dir_or_set_default('path_vae_approx', '../models/vae_approx/')
 path_vae = get_dir_or_set_default('path_vae', '../models/vae/')
 path_upscale_models = get_dir_or_set_default('path_upscale_models', '../models/upscale_models/')

From 22e1b08e4f8f869dbbda3c215fa634fd5c569df1 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 19 Jun 2024 00:58:55 +0200
Subject: [PATCH 064/101] feat: add detection prompt dataset for quick
 selection

---
 modules/config.py |  6 +++---
 webui.py          | 25 +++++++++++++++++++++----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/modules/config.py b/modules/config.py
index 02f56d7d..0099870e 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -501,8 +501,8 @@ example_inpaint_prompts = get_config_item_or_set_default(
     validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
     expected_type=list
 )
-example_enhance_prompts = get_config_item_or_set_default(
-    key='example_enhance_prompts',
+example_enhance_detection_prompts = get_config_item_or_set_default(
+    key='example_enhance_detection_prompts',
     default_value=[
         'face', 'eye', 'mouth', 'hair', 'hand', 'body'
     ],
@@ -547,7 +547,7 @@ metadata_created_by = get_config_item_or_set_default(
 )
 
 example_inpaint_prompts = [[x] for x in example_inpaint_prompts]
-example_enhance_prompts = [[x] for x in example_enhance_prompts]
+example_enhance_detection_prompts = [[x] for x in example_enhance_detection_prompts]
 
 default_inpaint_mask_model = get_config_item_or_set_default(
     key='default_inpaint_mask_model',
diff --git a/webui.py b/webui.py
index c7573998..f4124e48 100644
--- a/webui.py
+++ b/webui.py
@@ -254,6 +254,16 @@ with shared.gradio_root:
                                                              value=modules.config.default_inpaint_mask_cloth_category,
                                                              visible=False)
                                 inpaint_mask_dino_prompt_text = gr.Textbox(label='Detection prompt', value='', visible=False, info='Use singular whenever possible')
+                                example_inpaint_mask_dino_prompt_text = gr.Dataset(
+                                    samples=modules.config.example_enhance_detection_prompts,
+                                    label='Detection Prompt Quick List',
+                                    components=[inpaint_mask_dino_prompt_text],
+                                    visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
+                                example_inpaint_mask_dino_prompt_text.click(lambda x: x[0],
+                                                                            inputs=example_inpaint_mask_dino_prompt_text,
+                                                                            outputs=inpaint_mask_dino_prompt_text,
+                                                                            show_progress=False, queue=False)
+
                                 with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options:
                                     inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
                                     inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)
@@ -283,9 +293,16 @@ with shared.gradio_root:
 
                                     return mask
 
-                                inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg'), gr.update(visible=x == 'sam'), gr.update(visible=x == 'sam')],
+
+                                inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
+                                                                    [gr.update(visible=x == 'sam')] * 2 +
+                                                                    [gr.Dataset.update(visible=x == 'sam',
+                                                                                       samples=modules.config.example_enhance_detection_prompts)],
                                                           inputs=inpaint_mask_model,
-                                                          outputs=[inpaint_mask_cloth_category, inpaint_mask_dino_prompt_text, inpaint_mask_advanced_options],
+                                                          outputs=[inpaint_mask_cloth_category,
+                                                                   inpaint_mask_dino_prompt_text,
+                                                                   inpaint_mask_advanced_options,
+                                                                   example_inpaint_mask_dino_prompt_text],
                                                           queue=False, show_progress=False)
 
                     with gr.TabItem(label='Describe') as desc_tab:
@@ -342,8 +359,8 @@ with shared.gradio_root:
                                                                        interactive=True,
                                                                        visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
                             example_enhance_mask_dino_prompt_text = gr.Dataset(
-                                samples=modules.config.example_enhance_prompts,
-                                label='Additional Prompt Quick List',
+                                samples=modules.config.example_enhance_detection_prompts,
+                                label='Detection Prompt Quick List',
                                 components=[enhance_mask_dino_prompt_text],
                                 visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
                             example_enhance_mask_dino_prompt_text.click(lambda x: x[0],

From 0d817cc0d0ce42770242fd5c2164501be5fffb57 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 19 Jun 2024 00:59:51 +0200
Subject: [PATCH 065/101] feat: add placeholder for detection prompt text input
 elements

---
 webui.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/webui.py b/webui.py
index f4124e48..dd55a86c 100644
--- a/webui.py
+++ b/webui.py
@@ -253,7 +253,7 @@ with shared.gradio_root:
                                                              choices=flags.inpaint_mask_cloth_category,
                                                              value=modules.config.default_inpaint_mask_cloth_category,
                                                              visible=False)
-                                inpaint_mask_dino_prompt_text = gr.Textbox(label='Detection prompt', value='', visible=False, info='Use singular whenever possible')
+                                inpaint_mask_dino_prompt_text = gr.Textbox(label='Detection prompt', value='', visible=False, info='Use singular whenever possible', placeholder='Describe what you want to detect.')
                                 example_inpaint_mask_dino_prompt_text = gr.Dataset(
                                     samples=modules.config.example_enhance_detection_prompts,
                                     label='Detection Prompt Quick List',
@@ -356,6 +356,7 @@ with shared.gradio_root:
 
                             enhance_mask_dino_prompt_text = gr.Textbox(label='Detection prompt',
                                                                        info='Use singular whenever possible',
+                                                                       placeholder='Describe what you want to detect.',
                                                                        interactive=True,
                                                                        visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
                             example_enhance_mask_dino_prompt_text = gr.Dataset(
@@ -464,7 +465,7 @@ with shared.gradio_root:
                             lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
                                       [gr.update(visible=x == 'sam')] * 2 +
                                       [gr.Dataset.update(visible=x == 'sam',
-                                                         samples=modules.config.example_enhance_prompts)],
+                                                         samples=modules.config.example_enhance_detection_prompts)],
                             inputs=enhance_mask_model,
                             outputs=[enhance_mask_cloth_category, enhance_mask_dino_prompt_text, sam_options,
                                      example_enhance_mask_dino_prompt_text],

From 6e3919f854b76e09641398d02e9b640b71aa0c04 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 19 Jun 2024 01:01:46 +0200
Subject: [PATCH 066/101] fix: adjust default visibility for detection prompt
 dataset

---
 webui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/webui.py b/webui.py
index dd55a86c..cad6d157 100644
--- a/webui.py
+++ b/webui.py
@@ -258,7 +258,7 @@ with shared.gradio_root:
                                     samples=modules.config.example_enhance_detection_prompts,
                                     label='Detection Prompt Quick List',
                                     components=[inpaint_mask_dino_prompt_text],
-                                    visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
+                                    visible=modules.config.default_inpaint_mask_model == 'sam')
                                 example_inpaint_mask_dino_prompt_text.click(lambda x: x[0],
                                                                             inputs=example_inpaint_mask_dino_prompt_text,
                                                                             outputs=inpaint_mask_dino_prompt_text,

From 87b3cec7d4c293eaf472b91a358d42191437e8a1 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 19 Jun 2024 21:30:36 +0200
Subject: [PATCH 067/101] fix: download correct inpaint engine in enhance

---
 modules/async_worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index f7e65478..84b7c5fe 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1211,7 +1211,7 @@ def worker():
                     if inpaint_parameterized_enhance:
                         progressbar(async_task, current_progress, 'Downloading inpainter ...')
                         inpaint_head_model_path_enhance, inpaint_patch_model_path_enhance = modules.config.downloading_inpaint_models(
-                            async_task.inpaint_engine)
+                            enhance_inpaint_engine)
                         if inpaint_patch_model_path_enhance not in base_model_additional_loras_enhance:
                             base_model_additional_loras_enhance += [(inpaint_patch_model_path_enhance, 1.0)]
 

From 4e575b9eb1272cee3cc91a80bf5a5d5b85dbf5e5 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Wed, 19 Jun 2024 23:53:15 +0200
Subject: [PATCH 068/101] wip: add upscale or variation to enhance

---
 modules/async_worker.py | 256 +++++++++++++++++++++++++---------------
 webui.py                |   7 +-
 2 files changed, 165 insertions(+), 98 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 84b7c5fe..166c5ed5 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -113,6 +113,7 @@ class AsyncTask:
         self.debugging_enhance_masks_checkbox = args.pop()
 
         self.enhance_checkbox = args.pop()
+        self.enhance_uov_method = args.pop()
         self.enhance_ctrls = []
         for _ in range(modules.config.default_enhance_tabs):
             enhance_enabled = args.pop()
@@ -270,7 +271,7 @@ def worker():
         return
 
     def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
-                     denoising_strength, final_scheduler_name, goals, initial_latent, switch, positive_cond,
+                     denoising_strength, final_scheduler_name, goals, initial_latent, steps, switch, positive_cond,
                      negative_cond, task, tasks, tiled, use_expansion, width, height, base_progress, preparation_steps,
                      total_count):
         if async_task.last_stop is not False:
@@ -287,7 +288,7 @@ def worker():
         imgs = pipeline.process_diffusion(
             positive_cond=positive_cond,
             negative_cond=negative_cond,
-            steps=async_task.steps,
+            steps=steps,
             switch=switch,
             width=width,
             height=height,
@@ -305,7 +306,7 @@ def worker():
         del positive_cond, negative_cond  # Save memory
         if inpaint_worker.current_task is not None:
             imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
-        current_progress = int(base_progress + (100 - preparation_steps) * float((current_task_id + 1) * async_task.steps) / float(all_steps))
+        current_progress = int(base_progress + (100 - preparation_steps) * float((current_task_id + 1) * steps) / float(all_steps))
         if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
             progressbar(async_task, current_progress, 'Checking for NSFW content ...')
             imgs = default_censor(imgs)
@@ -440,22 +441,22 @@ def worker():
         if len(all_ip_tasks) > 0:
             pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
 
-    def apply_vary(async_task, denoising_strength, switch, current_progress, advance_progress=False):
-        if 'subtle' in async_task.uov_method:
+    def apply_vary(async_task, uov_method, denoising_strength, uov_input_image, switch, current_progress, advance_progress=False):
+        if 'subtle' in uov_method:
             denoising_strength = 0.5
-        if 'strong' in async_task.uov_method:
+        if 'strong' in uov_method:
             denoising_strength = 0.85
         if async_task.overwrite_vary_strength > 0:
             denoising_strength = async_task.overwrite_vary_strength
-        shape_ceil = get_image_shape_ceil(async_task.uov_input_image)
+        shape_ceil = get_image_shape_ceil(uov_input_image)
         if shape_ceil < 1024:
             print(f'[Vary] Image is resized because it is too small.')
             shape_ceil = 1024
         elif shape_ceil > 2048:
             print(f'[Vary] Image is resized because it is too big.')
             shape_ceil = 2048
-        async_task.uov_input_image = set_image_shape_ceil(async_task.uov_input_image, shape_ceil)
-        initial_pixels = core.numpy_to_pytorch(async_task.uov_input_image)
+        uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil)
+        initial_pixels = core.numpy_to_pytorch(uov_input_image)
         if advance_progress:
             current_progress += 1
         progressbar(async_task, current_progress, 'VAE encoding ...')
@@ -470,7 +471,7 @@ def worker():
         width = W * 8
         height = H * 8
         print(f'Final resolution is {str((width, height))}.')
-        return denoising_strength, initial_latent, width, height, current_progress
+        return uov_input_image, denoising_strength, initial_latent, width, height, current_progress
 
     def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
                       inpaint_mask, inpaint_parameterized, denoising_strength, inpaint_respective_field, switch,
@@ -566,28 +567,28 @@ def worker():
             async_task.inpaint_respective_field = 1.0
         return inpaint_image, inpaint_mask
 
-    def apply_upscale(async_task, switch, current_progress, advance_progress=False):
-        H, W, C = async_task.uov_input_image.shape
+    def apply_upscale(async_task, uov_input_image, uov_method, switch, current_progress, advance_progress=False):
+        H, W, C = uov_input_image.shape
         if advance_progress:
             current_progress += 1
         progressbar(async_task, current_progress, f'Upscaling image from {str((H, W))} ...')
-        async_task.uov_input_image = perform_upscale(async_task.uov_input_image)
+        uov_input_image = perform_upscale(uov_input_image)
         print(f'Image upscaled.')
-        if '1.5x' in async_task.uov_method:
+        if '1.5x' in uov_method:
             f = 1.5
-        elif '2x' in async_task.uov_method:
+        elif '2x' in uov_method:
             f = 2.0
         else:
             f = 1.0
         shape_ceil = get_shape_ceil(H * f, W * f)
         if shape_ceil < 1024:
             print(f'[Upscale] Image is resized because it is too small.')
-            async_task.uov_input_image = set_image_shape_ceil(async_task.uov_input_image, 1024)
+            uov_input_image = set_image_shape_ceil(uov_input_image, 1024)
             shape_ceil = 1024
         else:
-            async_task.uov_input_image = resample_image(async_task.uov_input_image, width=W * f, height=H * f)
+            uov_input_image = resample_image(uov_input_image, width=W * f, height=H * f)
         image_is_super_large = shape_ceil > 2800
-        if 'fast' in async_task.uov_method:
+        if 'fast' in uov_method:
             direct_return = True
         elif image_is_super_large:
             print('Image is too large. Directly returned the SR image. '
@@ -597,21 +598,13 @@ def worker():
         else:
             direct_return = False
         if direct_return:
-            d = [('Upscale (Fast)', 'upscale_fast', '2x')]
-            if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
-                progressbar(async_task, 100, 'Checking for NSFW content ...')
-                async_task.uov_input_image = default_censor(async_task.uov_input_image)
-            progressbar(async_task, 100, 'Saving image to system ...')
-            uov_input_image_path = log(async_task.uov_input_image, d, output_format=async_task.output_format)
-            yield_result(async_task, uov_input_image_path, 100, async_task.black_out_nsfw, False,
-                         do_not_show_finished_images=True)
-            raise EarlyReturnException
+            return direct_return, uov_input_image, None, None, None, None, None, current_progress
 
         tiled = True
         denoising_strength = 0.382
         if async_task.overwrite_upscale_strength > 0:
             denoising_strength = async_task.overwrite_upscale_strength
-        initial_pixels = core.numpy_to_pytorch(async_task.uov_input_image)
+        initial_pixels = core.numpy_to_pytorch(uov_input_image)
         if advance_progress:
             current_progress += 1
         progressbar(async_task, current_progress, 'VAE encoding ...')
@@ -628,7 +621,7 @@ def worker():
         width = W * 8
         height = H * 8
         print(f'Final resolution is {str((width, height))}.')
-        return denoising_strength, initial_latent, tiled, width, height, current_progress
+        return direct_return, uov_input_image, denoising_strength, initial_latent, tiled, width, height, current_progress
 
     def apply_overrides(async_task, height, width):
         if async_task.overwrite_step > 0:
@@ -853,18 +846,9 @@ def worker():
         if (async_task.current_tab == 'uov' or (
                 async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
                 and async_task.uov_method != flags.disabled and async_task.uov_input_image is not None:
-            async_task.uov_input_image = HWC3(async_task.uov_input_image)
-            if 'vary' in async_task.uov_method:
-                goals.append('vary')
-            elif 'upscale' in async_task.uov_method:
-                goals.append('upscale')
-                if 'fast' in async_task.uov_method:
-                    skip_prompt_processing = True
-                else:
-                    async_task.steps = async_task.performance_selection.steps_uov()
-
-                progressbar(async_task, 1, 'Downloading upscale models ...')
-                modules.config.downloading_upscale_model()
+            async_task.uov_input_image, skip_prompt_processing, async_task.steps = prepare_upscale(
+                async_task, goals, async_task.uov_input_image, async_task.uov_method, async_task.performance_selection,
+                async_task.steps, 1, skip_prompt_processing=skip_prompt_processing)
         if (async_task.current_tab == 'inpaint' or (
                 async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_inpaint)) \
                 and isinstance(async_task.inpaint_input_image, dict):
@@ -934,6 +918,24 @@ def worker():
                     'face')
         return base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner
 
+    def prepare_upscale(async_task, goals, uov_input_image, uov_method, performance, steps, current_progress,
+                        advance_progress=False, skip_prompt_processing=False):
+        uov_input_image = HWC3(uov_input_image)
+        if 'vary' in uov_method:
+            goals.append('vary')
+        elif 'upscale' in uov_method:
+            goals.append('upscale')
+            if 'fast' in uov_method:
+                skip_prompt_processing = True
+            else:
+                steps = performance.steps_uov()
+
+            if advance_progress:
+                current_progress += 1
+            progressbar(async_task, current_progress, 'Downloading upscale models ...')
+            modules.config.downloading_upscale_model()
+        return uov_input_image, skip_prompt_processing, steps
+
     def prepare_enhance_prompt(prompt: str, fallback_prompt: str, translate: bool, prompt_type: str):
         if safe_str(prompt) == '' or len(remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')) == 0:
             prompt = fallback_prompt
@@ -948,6 +950,64 @@ def worker():
         processing_time = time.perf_counter() - processing_start_time
         print(f'Processing time (total): {processing_time:.2f} seconds')
 
+    def process_enhance(all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+                        current_progress, current_task_id, denoising_strength, inpaint_disable_initial_latent,
+                        inpaint_engine, inpaint_respective_field, inpaint_strength,
+                        negative_prompt, prompt, final_scheduler_name, goals, height, img, mask,
+                        preparation_steps, steps, switch, tiled, total_count, use_expansion, use_style,
+                        use_synthetic_refiner, width):
+        base_model_additional_loras = []
+        inpaint_head_model_path = None
+        inpaint_parameterized = inpaint_engine != 'None'  # inpaint_engine = None, improve detail
+        initial_latent = None
+
+        if 'vary' in goals:
+            img, denoising_strength, initial_latent, width, height, current_progress = apply_vary(
+                async_task, async_task.enhance_uov_method, img, denoising_strength, switch, current_progress)
+        if 'upscale' in goals:
+            direct_return, img, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(
+                async_task, img, async_task.enhance_uov_method, switch, current_progress,
+                advance_progress=True)
+
+            if direct_return:
+                return current_progress, img
+        if 'inpaint' in goals and inpaint_parameterized:
+            progressbar(async_task, current_progress, 'Downloading inpainter ...')
+            inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
+                inpaint_engine)
+            if inpaint_patch_model_path not in base_model_additional_loras:
+                base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]
+        progressbar(async_task, current_progress, 'Preparing enhance prompts ...')
+        prompt = prepare_enhance_prompt(prompt, async_task.prompt, async_task.translate_prompts, 'prompt')
+        negative_prompt = prepare_enhance_prompt(negative_prompt, async_task.negative_prompt,
+                                                 async_task.translate_prompts, 'negative prompt')
+        # positive and negative conditioning aren't available here anymore, process prompt again
+        tasks_enhance, use_expansion, loras, current_progress = process_prompt(
+            async_task, prompt, negative_prompt, base_model_additional_loras, 1, True,
+            use_expansion, use_style, use_synthetic_refiner, current_progress)
+        task_enhance = tasks_enhance[0]
+        # TODO could support vary, upscale and CN in the future
+        # if 'cn' in goals:
+        #     apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
+        if async_task.freeu_enabled:
+            apply_freeu(async_task)
+        patch_samplers(async_task)
+        if 'inpaint' in goals:
+            denoising_strength, initial_latent, width, height, current_progress = apply_inpaint(
+                async_task, None, inpaint_head_model_path, img, mask,
+                inpaint_parameterized, inpaint_strength,
+                inpaint_respective_field, switch, inpaint_disable_initial_latent,
+                current_progress, True)
+        imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path,
+                                                          controlnet_cpds_path, current_task_id, denoising_strength,
+                                                          final_scheduler_name, goals, initial_latent, steps, switch,
+                                                          task_enhance['c'], task_enhance['uc'], task_enhance,
+                                                          tasks_enhance, tiled, use_expansion, width, height,
+                                                          base_progress, preparation_steps, total_count)
+
+        del task_enhance['c'], task_enhance['uc']  # Save memory
+        return current_progress, imgs[0]
+
     @torch.no_grad()
     @torch.inference_mode()
     def handler(async_task: AsyncTask):
@@ -957,6 +1017,7 @@ def worker():
         async_task.outpaint_selections = [o.lower() for o in async_task.outpaint_selections]
         base_model_additional_loras = []
         async_task.uov_method = async_task.uov_method.lower()
+        async_task.enhance_uov_method = async_task.enhance_uov_method.lower()
 
         if fooocus_expansion in async_task.style_selections:
             use_expansion = True
@@ -1051,12 +1112,23 @@ def worker():
             progressbar(async_task, current_progress, 'Image processing ...')
 
         if 'vary' in goals:
-            denoising_strength, initial_latent, width, height, current_progress = apply_vary(async_task, denoising_strength, switch, current_progress)
+            async_task.uov_input_image, denoising_strength, initial_latent, width, height, current_progress = apply_vary(
+                async_task, async_task.uov_method, async_task.uov_input_image, denoising_strength, switch,
+                current_progress)
 
         if 'upscale' in goals:
-            try:
-                denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(async_task, switch, current_progress, advance_progress=True)
-            except EarlyReturnException:
+            direct_return, async_task.uov_input_image, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(
+                async_task, async_task.uov_input_image, async_task.uov_method, switch, current_progress,
+                advance_progress=True)
+            if direct_return:
+                d = [('Upscale (Fast)', 'upscale_fast', '2x')]
+                if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
+                    progressbar(async_task, 100, 'Checking for NSFW content ...')
+                    async_task.uov_input_image = default_censor(async_task.uov_input_image)
+                progressbar(async_task, 100, 'Saving image to system ...')
+                uov_input_image_path = log(async_task.uov_input_image, d, output_format=async_task.output_format)
+                yield_result(async_task, uov_input_image_path, 100, async_task.black_out_nsfw, False,
+                             do_not_show_finished_images=True)
                 return
 
         if 'inpaint' in goals:
@@ -1131,7 +1203,7 @@ def worker():
                                                                  controlnet_cpds_path,
                                                                  current_task_id, denoising_strength,
                                                                  final_scheduler_name, goals, initial_latent,
-                                                                 switch, task['c'], task['uc'], task,
+                                                                 async_task.steps, switch, task['c'], task['uc'], task,
                                                                  tasks, tiled, use_expansion, width, height,
                                                                  preparation_steps, preparation_steps,
                                                                  async_task.image_number)
@@ -1151,7 +1223,7 @@ def worker():
             execution_time = time.perf_counter() - execution_start_time
             print(f'Generating and saving time: {execution_time:.2f} seconds')
 
-        if not async_task.enhance_checkbox or len(async_task.enhance_ctrls) == 0:
+        if not async_task.enhance_checkbox or ('upscale' in goals and async_task.enhance_uov_method != flags.disabled) and len(async_task.enhance_ctrls) == 0:
             print(f'[Enhance] Skipping, preconditions aren\'t met')
             stop_processing(async_task, processing_start_time)
             return
@@ -1162,6 +1234,36 @@ def worker():
         current_task_id = -1
         for imgs in generated_imgs.values():
             for img in imgs:
+                enhancement_image_start_time = time.perf_counter()
+                # upscale if not disabled or already in goals
+                if 'upscale' not in goals and async_task.enhance_uov_method != flags.disabled:
+                    current_task_id += 1
+                    goals_enhance = []
+                    img, skip_prompt_processing, steps = prepare_upscale(async_task, goals_enhance, img,
+                                                                         async_task.enhance_uov_method,
+                                                                         async_task.performance_selection,
+                                                                         async_task.steps, current_progress)
+
+                    if len(goals_enhance) > 0:
+                        try:
+                            current_progress, img = process_enhance(
+                                all_steps, async_task, base_progress, callback, controlnet_canny_path,
+                                controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False,
+                                'None', 0.0, 0.0, async_task.negative_prompt, async_task.prompt, final_scheduler_name,
+                                goals_enhance, height, img, None, preparation_steps, steps, switch, tiled, total_count,
+                                use_expansion, use_style, use_synthetic_refiner, width)
+
+                            # TODO check steps in progress bar, 100% wrong
+                        except ldm_patched.modules.model_management.InterruptProcessingException:
+                            if async_task.last_stop == 'skip':
+                                print('User skipped')
+                                async_task.last_stop = False
+                                continue
+                            else:
+                                print('User stopped')
+                                break
+
+                # inpaint for all other tabs
                 for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field, enhance_inpaint_erode_or_dilate, enhance_mask_invert in async_task.enhance_ctrls:
                     current_task_id += 1
                     current_progress = int(base_progress + (100 - preparation_steps) * float(current_task_id * async_task.steps) / float(all_steps))
@@ -1204,58 +1306,16 @@ def worker():
                         print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping')
                         continue
 
-                    base_model_additional_loras_enhance = []
-                    inpaint_head_model_path_enhance = None
-                    inpaint_parameterized_enhance = enhance_inpaint_engine != 'None'  # inpaint_engine = None, improve detail
-
-                    if inpaint_parameterized_enhance:
-                        progressbar(async_task, current_progress, 'Downloading inpainter ...')
-                        inpaint_head_model_path_enhance, inpaint_patch_model_path_enhance = modules.config.downloading_inpaint_models(
-                            enhance_inpaint_engine)
-                        if inpaint_patch_model_path_enhance not in base_model_additional_loras_enhance:
-                            base_model_additional_loras_enhance += [(inpaint_patch_model_path_enhance, 1.0)]
-
-                    progressbar(async_task, current_progress, 'Preparing enhance prompts ...')
-                    enhance_prompt = prepare_enhance_prompt(enhance_prompt, async_task.prompt, async_task.translate_prompts,
-                                                            'prompt')
-                    enhance_negative_prompt = prepare_enhance_prompt(enhance_negative_prompt, async_task.negative_prompt,
-                                                                     async_task.translate_prompts, 'negative prompt')
-
-                    # positive and negative conditioning aren't available here anymore, process prompt again
-                    tasks_enhance, use_expansion, loras, current_progress = process_prompt(async_task, enhance_prompt,
-                                                                                           enhance_negative_prompt,
-                                                                                           base_model_additional_loras_enhance,
-                                                                                           1, True,
-                                                                                           use_expansion, use_style,
-                                                                                           use_synthetic_refiner,
-                                                                                           current_progress)
-                    task_enhance = tasks_enhance[0]
-
-                    # TODO could support vary, upscale and CN in the future
-                    # if 'cn' in goals:
-                    #     apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
-                    if async_task.freeu_enabled:
-                        apply_freeu(async_task)
-                    patch_samplers(async_task)
-
                     goals_enhance = ['inpaint']
-                    enhance_inpaint_strength, initial_latent_enhance, width_enhance, height_enhance, current_progress = apply_inpaint(
-                        async_task, None, inpaint_head_model_path_enhance, img, mask,
-                        inpaint_parameterized_enhance, enhance_inpaint_strength,
-                        enhance_inpaint_respective_field, switch, enhance_inpaint_disable_initial_latent,
-                        current_progress, True)
 
                     try:
-                        imgs2, img_paths, current_progress = process_task(all_steps, async_task, callback,
-                                                                          controlnet_canny_path, controlnet_cpds_path,
-                                                                          current_task_id, enhance_inpaint_strength,
-                                                                          final_scheduler_name, goals_enhance,
-                                                                          initial_latent_enhance, switch,
-                                                                          task_enhance['c'], task_enhance['uc'],
-                                                                          task_enhance, tasks_enhance, tiled,
-                                                                          use_expansion, width_enhance, height_enhance,
-                                                                          base_progress, preparation_steps, total_count)
-                        img = imgs2[0]
+                        current_progress, img = process_enhance(
+                            all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+                            current_progress, current_task_id, denoising_strength,
+                            enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
+                            enhance_inpaint_respective_field, enhance_inpaint_strength, enhance_negative_prompt,
+                            enhance_prompt, final_scheduler_name, goals_enhance, height, img, mask, preparation_steps,
+                            async_task.steps, switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, width)
 
                     except ldm_patched.modules.model_management.InterruptProcessingException:
                         if async_task.last_stop == 'skip':
@@ -1266,10 +1326,12 @@ def worker():
                             print('User stopped')
                             break
 
-                    del task_enhance['c'], task_enhance['uc']  # Save memory
                     enhancement_task_time = time.perf_counter() - enhancement_task_start_time
                     print(f'Enhancement time: {enhancement_task_time:.2f} seconds')
 
+                enhancement_image_time = time.perf_counter() - enhancement_image_start_time
+                print(f'Enhancement image time: {enhancement_image_time:.2f} seconds')
+
         stop_processing(async_task, processing_start_time)
         return
 
diff --git a/webui.py b/webui.py
index cad6d157..b3b7025a 100644
--- a/webui.py
+++ b/webui.py
@@ -348,6 +348,11 @@ with shared.gradio_root:
 
             with gr.Row(visible=False) as enhance_input_panel:
                 with gr.Tabs():
+                    with gr.TabItem(label='Upscale or Variation'):
+                        with gr.Row():
+                            with gr.Column():
+                                enhance_uov_method = gr.Radio(label='', show_label=False, choices=flags.uov_list, value=flags.disabled)
+                                gr.HTML('<a href="https://github.com/lllyasviel/Fooocus/discussions/390" target="_blank">\U0001F4D4 Document</a>')
                     enhance_ctrls = []
                     for index in range(modules.config.default_enhance_tabs):
                         with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item:
@@ -925,7 +930,7 @@ with shared.gradio_root:
             ctrls += [save_metadata_to_images, metadata_scheme]
 
         ctrls += ip_ctrls
-        ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox, enhance_checkbox]
+        ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox, enhance_checkbox, enhance_uov_method]
         ctrls += enhance_ctrls
 
         def parse_meta(raw_prompt_txt, is_generating):

From 5012fb70673bbb593c28c796dab14331b4f9f687 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Thu, 20 Jun 2024 02:20:49 +0200
Subject: [PATCH 069/101] feat: display first enhance tab as #1

---
 webui.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/webui.py b/webui.py
index b3b7025a..d1e3bf1f 100644
--- a/webui.py
+++ b/webui.py
@@ -348,14 +348,14 @@ with shared.gradio_root:
 
             with gr.Row(visible=False) as enhance_input_panel:
                 with gr.Tabs():
-                    with gr.TabItem(label='Upscale or Variation'):
+                    with gr.TabItem(label='#1'):
                         with gr.Row():
                             with gr.Column():
-                                enhance_uov_method = gr.Radio(label='', show_label=False, choices=flags.uov_list, value=flags.disabled)
+                                enhance_uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list, value=flags.disabled)
                                 gr.HTML('<a href="https://github.com/lllyasviel/Fooocus/discussions/390" target="_blank">\U0001F4D4 Document</a>')
                     enhance_ctrls = []
                     for index in range(modules.config.default_enhance_tabs):
-                        with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item:
+                        with gr.TabItem(label=f'#{index + 2}') as enhance_tab_item:
                             enhance_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check',
                                                           container=False)
 

From eada51dee4fc3d1d5ad6fba96ca00b3b3d2c3926 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Thu, 20 Jun 2024 02:43:14 +0200
Subject: [PATCH 070/101] feat: optimize progress bar, now correctly uses uov
 steps and overrides

---
 modules/async_worker.py | 56 ++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 17 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 166c5ed5..55081cb4 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -306,7 +306,7 @@ def worker():
         del positive_cond, negative_cond  # Save memory
         if inpaint_worker.current_task is not None:
             imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
-        current_progress = int(base_progress + (100 - preparation_steps) * float((current_task_id + 1) * steps) / float(all_steps))
+        current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * steps)
         if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
             progressbar(async_task, current_progress, 'Checking for NSFW content ...')
             imgs = default_censor(imgs)
@@ -950,7 +950,7 @@ def worker():
         processing_time = time.perf_counter() - processing_start_time
         print(f'Processing time (total): {processing_time:.2f} seconds')
 
-    def process_enhance(all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+    def process_enhance(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
                         current_progress, current_task_id, denoising_strength, inpaint_disable_initial_latent,
                         inpaint_engine, inpaint_respective_field, inpaint_strength,
                         negative_prompt, prompt, final_scheduler_name, goals, height, img, mask,
@@ -966,11 +966,10 @@ def worker():
                 async_task, async_task.enhance_uov_method, img, denoising_strength, switch, current_progress)
         if 'upscale' in goals:
             direct_return, img, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(
-                async_task, img, async_task.enhance_uov_method, switch, current_progress,
-                advance_progress=True)
-
+                async_task, img, async_task.enhance_uov_method, switch, current_progress)
             if direct_return:
                 return current_progress, img
+
         if 'inpaint' in goals and inpaint_parameterized:
             progressbar(async_task, current_progress, 'Downloading inpainter ...')
             inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
@@ -1003,7 +1002,7 @@ def worker():
                                                           final_scheduler_name, goals, initial_latent, steps, switch,
                                                           task_enhance['c'], task_enhance['uc'], task_enhance,
                                                           tasks_enhance, tiled, use_expansion, width, height,
-                                                          base_progress, preparation_steps, total_count)
+                                                          current_progress, preparation_steps, total_count)
 
         del task_enhance['c'], task_enhance['uc']  # Save memory
         return current_progress, imgs[0]
@@ -1161,6 +1160,14 @@ def worker():
         if async_task.enhance_checkbox and len(async_task.enhance_ctrls) != 0:
             all_steps += async_task.image_number * len(async_task.enhance_ctrls) * async_task.steps
 
+        enhance_upscale_steps = 0
+        enhance_upscale_steps_total = 0
+        if 'upscale' not in goals and async_task.enhance_uov_method != flags.disabled:
+            enhance_upscale_steps = async_task.overwrite_step if async_task.overwrite_step > 0 else async_task.performance_selection.steps_uov()
+            enhance_upscale_steps_total = async_task.image_number * enhance_upscale_steps
+            all_steps += enhance_upscale_steps_total
+
+
         print(f'[Parameters] Denoising Strength = {denoising_strength}')
 
         if isinstance(initial_latent, dict) and 'samples' in initial_latent:
@@ -1180,20 +1187,20 @@ def worker():
 
         processing_start_time = time.perf_counter()
 
-        preparation_steps = base_progress = current_progress
+        preparation_steps = current_progress
         total_count = async_task.image_number
 
         def callback(step, x0, x, total_steps, y):
-            done_steps = current_task_id * async_task.steps + step + 1
+            if step == 0:
+                async_task.callback_steps = 0
+            async_task.callback_steps += (100 - preparation_steps) / float(all_steps)
             async_task.yields.append(['preview', (
-                int(base_progress + (100 - preparation_steps) * float(done_steps) / float(all_steps)),
+                int(current_progress + async_task.callback_steps),
                 f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{total_count} ...', y)])
 
         generated_imgs = {}
 
         for current_task_id, task in enumerate(tasks):
-            current_progress = int(base_progress + (100 - preparation_steps) * float(
-                current_task_id * async_task.steps) / float(all_steps))
             progressbar(async_task, current_progress,
                         f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
             execution_start_time = time.perf_counter()
@@ -1229,15 +1236,25 @@ def worker():
             return
 
         progressbar(async_task, current_progress, 'Processing enhance ...')
-        total_count = sum([len(imgs) for _, imgs in generated_imgs.items()]) * len(async_task.enhance_ctrls)
+
+        active_enhance_tabs = len(async_task.enhance_ctrls)
+        should_process_uov = 'upscale' not in goals and async_task.enhance_uov_method != flags.disabled
+        if should_process_uov:
+            active_enhance_tabs += 1
+        total_count = sum([len(imgs) for _, imgs in generated_imgs.items()]) * active_enhance_tabs
+
         base_progress = current_progress
         current_task_id = -1
+        done_steps_upscaling = 0
+        done_steps_inpainting = 0
         for imgs in generated_imgs.values():
             for img in imgs:
                 enhancement_image_start_time = time.perf_counter()
+
                 # upscale if not disabled or already in goals
-                if 'upscale' not in goals and async_task.enhance_uov_method != flags.disabled:
+                if should_process_uov:
                     current_task_id += 1
+                    current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
                     goals_enhance = []
                     img, skip_prompt_processing, steps = prepare_upscale(async_task, goals_enhance, img,
                                                                          async_task.enhance_uov_method,
@@ -1247,26 +1264,29 @@ def worker():
                     if len(goals_enhance) > 0:
                         try:
                             current_progress, img = process_enhance(
-                                all_steps, async_task, base_progress, callback, controlnet_canny_path,
+                                all_steps, async_task, callback, controlnet_canny_path,
                                 controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False,
                                 'None', 0.0, 0.0, async_task.negative_prompt, async_task.prompt, final_scheduler_name,
                                 goals_enhance, height, img, None, preparation_steps, steps, switch, tiled, total_count,
                                 use_expansion, use_style, use_synthetic_refiner, width)
 
-                            # TODO check steps in progress bar, 100% wrong
                         except ldm_patched.modules.model_management.InterruptProcessingException:
                             if async_task.last_stop == 'skip':
                                 print('User skipped')
                                 async_task.last_stop = False
+                                # also skip all enhance steps for this image, but add the steps to the progress bar
+                                done_steps_inpainting += len(async_task.enhance_ctrls) * async_task.steps
                                 continue
                             else:
                                 print('User stopped')
                                 break
+                        finally:
+                            done_steps_upscaling += steps
 
                 # inpaint for all other tabs
                 for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field, enhance_inpaint_erode_or_dilate, enhance_mask_invert in async_task.enhance_ctrls:
                     current_task_id += 1
-                    current_progress = int(base_progress + (100 - preparation_steps) * float(current_task_id * async_task.steps) / float(all_steps))
+                    current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
                     progressbar(async_task, current_progress, f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
                     enhancement_task_start_time = time.perf_counter()
 
@@ -1310,7 +1330,7 @@ def worker():
 
                     try:
                         current_progress, img = process_enhance(
-                            all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+                            all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
                             current_progress, current_task_id, denoising_strength,
                             enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
                             enhance_inpaint_respective_field, enhance_inpaint_strength, enhance_negative_prompt,
@@ -1325,6 +1345,8 @@ def worker():
                         else:
                             print('User stopped')
                             break
+                    finally:
+                        done_steps_inpainting += async_task.steps
 
                     enhancement_task_time = time.perf_counter() - enhancement_task_start_time
                     print(f'Enhancement time: {enhancement_task_time:.2f} seconds')

From 51dc53b5c810b86ce46fb79977f7a8c997b5a93d Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Thu, 20 Jun 2024 02:49:22 +0200
Subject: [PATCH 071/101] feat: save and output upscaled image with method fast
 in enhance

---
 modules/async_worker.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 55081cb4..6558aa95 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -968,6 +968,14 @@ def worker():
             direct_return, img, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(
                 async_task, img, async_task.enhance_uov_method, switch, current_progress)
             if direct_return:
+                d = [('Upscale (Fast)', 'upscale_fast', '2x')]
+                if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
+                    progressbar(async_task, current_progress, 'Checking for NSFW content ...')
+                    async_task.uov_input_image = default_censor(async_task.uov_input_image)
+                progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{total_count} to system ...')
+                uov_input_image_path = log(async_task.uov_input_image, d, output_format=async_task.output_format)
+                yield_result(async_task, uov_input_image_path, current_progress, async_task.black_out_nsfw, False,
+                             do_not_show_finished_images=True)
                 return current_progress, img
 
         if 'inpaint' in goals and inpaint_parameterized:

From 5bc6a0c650b7cb87e4c6e32f1150b74d5c565d86 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Fri, 21 Jun 2024 19:56:35 +0200
Subject: [PATCH 072/101] fix: use correct parameter for upscale (fast)

---
 modules/async_worker.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 6558aa95..53091765 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -971,10 +971,10 @@ def worker():
                 d = [('Upscale (Fast)', 'upscale_fast', '2x')]
                 if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
                     progressbar(async_task, current_progress, 'Checking for NSFW content ...')
-                    async_task.uov_input_image = default_censor(async_task.uov_input_image)
+                    img = default_censor(img)
                 progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{total_count} to system ...')
-                uov_input_image_path = log(async_task.uov_input_image, d, output_format=async_task.output_format)
-                yield_result(async_task, uov_input_image_path, current_progress, async_task.black_out_nsfw, False,
+                uov_image_path = log(img, d, output_format=async_task.output_format)
+                yield_result(async_task, uov_image_path, current_progress, async_task.black_out_nsfw, False,
                              do_not_show_finished_images=True)
                 return current_progress, img
 

From ebd5e24531e3ef77920d6b6eeb972166dcebc528 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Fri, 21 Jun 2024 22:42:43 +0200
Subject: [PATCH 073/101] feat: allow upscaled input for enhance upscaling,
 correctly calculate steps

---
 modules/async_worker.py | 68 +++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 53091765..91cc674b 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -34,6 +34,7 @@ class AsyncTask:
 
         self.performance_selection = Performance(args.pop())
         self.steps = self.performance_selection.steps()
+        self.original_steps = self.steps
 
         self.aspect_ratios_selection = args.pop()
         self.image_number = args.pop()
@@ -336,21 +337,18 @@ def worker():
                  ('Fooocus V2 Expansion', 'prompt_expansion', task['expansion']),
                  ('Styles', 'styles',
                   str(task['styles'] if not use_expansion else [fooocus_expansion] + task['styles'])),
-                 ('Performance', 'performance', async_task.performance_selection.value)]
-
-            if async_task.performance_selection.steps() != async_task.steps:
-                d.append(('Steps', 'steps', async_task.steps))
-
-            d += [('Resolution', 'resolution', str((width, height))),
-                  ('Guidance Scale', 'guidance_scale', async_task.cfg_scale),
-                  ('Sharpness', 'sharpness', async_task.sharpness),
-                  ('ADM Guidance', 'adm_guidance', str((
-                      modules.patch.patch_settings[pid].positive_adm_scale,
-                      modules.patch.patch_settings[pid].negative_adm_scale,
-                      modules.patch.patch_settings[pid].adm_scaler_end))),
-                  ('Base Model', 'base_model', async_task.base_model_name),
-                  ('Refiner Model', 'refiner_model', async_task.refiner_model_name),
-                  ('Refiner Switch', 'refiner_switch', async_task.refiner_switch)]
+                 ('Performance', 'performance', async_task.performance_selection.value),
+                 ('Steps', 'steps', async_task.steps),
+                 ('Resolution', 'resolution', str((width, height))),
+                 ('Guidance Scale', 'guidance_scale', async_task.cfg_scale),
+                 ('Sharpness', 'sharpness', async_task.sharpness),
+                 ('ADM Guidance', 'adm_guidance', str((
+                     modules.patch.patch_settings[pid].positive_adm_scale,
+                     modules.patch.patch_settings[pid].negative_adm_scale,
+                     modules.patch.patch_settings[pid].adm_scaler_end))),
+                 ('Base Model', 'base_model', async_task.base_model_name),
+                 ('Refiner Model', 'refiner_model', async_task.refiner_model_name),
+                 ('Refiner Switch', 'refiner_switch', async_task.refiner_switch)]
 
             if async_task.refiner_model_name != 'None':
                 if async_task.overwrite_switch > 0:
@@ -623,9 +621,9 @@ def worker():
         print(f'Final resolution is {str((width, height))}.')
         return direct_return, uov_input_image, denoising_strength, initial_latent, tiled, width, height, current_progress
 
-    def apply_overrides(async_task, height, width):
+    def apply_overrides(async_task, steps, height, width):
         if async_task.overwrite_step > 0:
-            async_task.steps = async_task.overwrite_step
+            steps = async_task.overwrite_step
         switch = int(round(async_task.steps * async_task.refiner_switch))
         if async_task.overwrite_switch > 0:
             switch = async_task.overwrite_switch
@@ -633,7 +631,7 @@ def worker():
             width = async_task.overwrite_width
         if async_task.overwrite_height > 0:
             height = async_task.overwrite_height
-        return height, switch, width
+        return steps, switch, width, height
 
     def process_prompt(async_task, prompt, negative_prompt, base_model_additional_loras, image_number, disable_seed_increment, use_expansion, use_style,
                        use_synthetic_refiner, current_progress, advance_progress=False):
@@ -1101,7 +1099,7 @@ def worker():
         ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
         ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path)
 
-        height, switch, width = apply_overrides(async_task, height, width)
+        async_task.steps, switch, width, height = apply_overrides(async_task, async_task.steps, height, width)
 
         print(f'[Parameters] Sampler = {async_task.sampler_name} - {async_task.scheduler_name}')
         print(f'[Parameters] Steps = {async_task.steps} - {switch}')
@@ -1163,18 +1161,19 @@ def worker():
         if async_task.freeu_enabled:
             apply_freeu(async_task)
 
-        all_steps = async_task.steps * async_task.image_number
+        steps, _, _, _ = apply_overrides(async_task, async_task.steps, height, width)
+        all_steps = steps * async_task.image_number
 
-        if async_task.enhance_checkbox and len(async_task.enhance_ctrls) != 0:
-            all_steps += async_task.image_number * len(async_task.enhance_ctrls) * async_task.steps
-
-        enhance_upscale_steps = 0
-        enhance_upscale_steps_total = 0
-        if 'upscale' not in goals and async_task.enhance_uov_method != flags.disabled:
-            enhance_upscale_steps = async_task.overwrite_step if async_task.overwrite_step > 0 else async_task.performance_selection.steps_uov()
+        # enhance_upscale_steps = 0
+        # enhance_upscale_steps_total = 0
+        if async_task.enhance_checkbox and async_task.enhance_uov_method != flags.disabled:
+            enhance_upscale_steps, _, _, _ = apply_overrides(async_task, async_task.performance_selection.steps_uov(), height, width)
             enhance_upscale_steps_total = async_task.image_number * enhance_upscale_steps
             all_steps += enhance_upscale_steps_total
 
+        if async_task.enhance_checkbox and len(async_task.enhance_ctrls) != 0:
+            all_steps += async_task.image_number * len(async_task.enhance_ctrls) * steps
+
 
         print(f'[Parameters] Denoising Strength = {denoising_strength}')
 
@@ -1238,7 +1237,7 @@ def worker():
             execution_time = time.perf_counter() - execution_start_time
             print(f'Generating and saving time: {execution_time:.2f} seconds')
 
-        if not async_task.enhance_checkbox or ('upscale' in goals and async_task.enhance_uov_method != flags.disabled) and len(async_task.enhance_ctrls) == 0:
+        if not async_task.enhance_checkbox or (async_task.enhance_uov_method == flags.disabled and len(async_task.enhance_ctrls) == 0):
             print(f'[Enhance] Skipping, preconditions aren\'t met')
             stop_processing(async_task, processing_start_time)
             return
@@ -1246,7 +1245,7 @@ def worker():
         progressbar(async_task, current_progress, 'Processing enhance ...')
 
         active_enhance_tabs = len(async_task.enhance_ctrls)
-        should_process_uov = 'upscale' not in goals and async_task.enhance_uov_method != flags.disabled
+        should_process_uov = async_task.enhance_uov_method != flags.disabled
         if should_process_uov:
             active_enhance_tabs += 1
         total_count = sum([len(imgs) for _, imgs in generated_imgs.items()]) * active_enhance_tabs
@@ -1255,6 +1254,7 @@ def worker():
         current_task_id = -1
         done_steps_upscaling = 0
         done_steps_inpainting = 0
+        enhance_steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width)
         for imgs in generated_imgs.values():
             for img in imgs:
                 enhancement_image_start_time = time.perf_counter()
@@ -1267,7 +1267,9 @@ def worker():
                     img, skip_prompt_processing, steps = prepare_upscale(async_task, goals_enhance, img,
                                                                          async_task.enhance_uov_method,
                                                                          async_task.performance_selection,
-                                                                         async_task.steps, current_progress)
+                                                                         enhance_steps, current_progress)
+
+                    steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width)
 
                     if len(goals_enhance) > 0:
                         try:
@@ -1283,7 +1285,7 @@ def worker():
                                 print('User skipped')
                                 async_task.last_stop = False
                                 # also skip all enhance steps for this image, but add the steps to the progress bar
-                                done_steps_inpainting += len(async_task.enhance_ctrls) * async_task.steps
+                                done_steps_inpainting += len(async_task.enhance_ctrls) * enhance_steps
                                 continue
                             else:
                                 print('User stopped')
@@ -1343,7 +1345,7 @@ def worker():
                             enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
                             enhance_inpaint_respective_field, enhance_inpaint_strength, enhance_negative_prompt,
                             enhance_prompt, final_scheduler_name, goals_enhance, height, img, mask, preparation_steps,
-                            async_task.steps, switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, width)
+                            enhance_steps, switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, width)
 
                     except ldm_patched.modules.model_management.InterruptProcessingException:
                         if async_task.last_stop == 'skip':
@@ -1354,7 +1356,7 @@ def worker():
                             print('User stopped')
                             break
                     finally:
-                        done_steps_inpainting += async_task.steps
+                        done_steps_inpainting += enhance_steps
 
                     enhancement_task_time = time.perf_counter() - enhancement_task_start_time
                     print(f'Enhancement time: {enhancement_task_time:.2f} seconds')

From c7a411a8c79f0ff738ac08464edc6d936aaae8b3 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Fri, 21 Jun 2024 22:43:03 +0200
Subject: [PATCH 074/101] refactor: code cleanup

---
 modules/flags.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/modules/flags.py b/modules/flags.py
index 16f4c19c..a75eed98 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -162,14 +162,6 @@ class Performance(Enum):
     def values(cls) -> list:
         return list(map(lambda c: c.value, cls))
 
-    @classmethod
-    def values(cls) -> list:
-        return list(map(lambda c: c.value, cls))
-
-    @classmethod
-    def values(cls) -> list:
-        return list(map(lambda c: c.value, cls))
-
     @classmethod
     def by_steps(cls, steps: int | str):
         return cls[Steps(int(steps)).name]

From 40e1c82b742b91e743fe984dbdbce09b4eab202e Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Fri, 21 Jun 2024 23:51:00 +0200
Subject: [PATCH 075/101] feat: add enhance image input

use this so you don't have to modify an image before enhancement
---
 modules/async_worker.py | 218 +++++++++++++++++++++-------------------
 webui.py                |   9 +-
 2 files changed, 122 insertions(+), 105 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 91cc674b..d5b3dedc 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -113,6 +113,7 @@ class AsyncTask:
         self.dino_erode_or_dilate = args.pop()
         self.debugging_enhance_masks_checkbox = args.pop()
 
+        self.enhance_input_image = args.pop()
         self.enhance_checkbox = args.pop()
         self.enhance_uov_method = args.pop()
         self.enhance_ctrls = []
@@ -569,7 +570,7 @@ def worker():
         H, W, C = uov_input_image.shape
         if advance_progress:
             current_progress += 1
-        progressbar(async_task, current_progress, f'Upscaling image from {str((H, W))} ...')
+        progressbar(async_task, current_progress, f'Upscaling image from {str((W, H))} ...')
         uov_input_image = perform_upscale(uov_input_image)
         print(f'Image upscaled.')
         if '1.5x' in uov_method:
@@ -843,7 +844,7 @@ def worker():
                           skip_prompt_processing, use_synthetic_refiner):
         if (async_task.current_tab == 'uov' or (
                 async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
-                and async_task.uov_method != flags.disabled and async_task.uov_input_image is not None:
+                and async_task.uov_method != flags.disabled.lower() and async_task.uov_input_image is not None:
             async_task.uov_input_image, skip_prompt_processing, async_task.steps = prepare_upscale(
                 async_task, goals, async_task.uov_input_image, async_task.uov_method, async_task.performance_selection,
                 async_task.steps, 1, skip_prompt_processing=skip_prompt_processing)
@@ -914,6 +915,10 @@ def worker():
             if len(async_task.cn_tasks[flags.cn_ip_face]) > 0:
                 clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters(
                     'face')
+        if async_task.current_tab == 'enhance' and async_task.enhance_input_image is not None:
+            goals.append('enhance')
+            skip_prompt_processing = True
+            async_task.enhance_input_image = HWC3(async_task.enhance_input_image)
         return base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner
 
     def prepare_upscale(async_task, goals, uov_input_image, uov_method, performance, steps, current_progress,
@@ -1161,18 +1166,26 @@ def worker():
         if async_task.freeu_enabled:
             apply_freeu(async_task)
 
+        # async_task.steps can have value of uov steps here when upscale has been applied
         steps, _, _, _ = apply_overrides(async_task, async_task.steps, height, width)
+
+        images_to_enhance = []
+        if 'enhance' in goals:
+            images_to_enhance += [async_task.enhance_input_image]
+            height, width, _ = async_task.enhance_input_image.shape
+            # input image already provided, processing is skipped
+            steps = 0
+
         all_steps = steps * async_task.image_number
 
-        # enhance_upscale_steps = 0
-        # enhance_upscale_steps_total = 0
-        if async_task.enhance_checkbox and async_task.enhance_uov_method != flags.disabled:
+        if async_task.enhance_checkbox and async_task.enhance_uov_method != flags.disabled.lower():
             enhance_upscale_steps, _, _, _ = apply_overrides(async_task, async_task.performance_selection.steps_uov(), height, width)
             enhance_upscale_steps_total = async_task.image_number * enhance_upscale_steps
             all_steps += enhance_upscale_steps_total
 
         if async_task.enhance_checkbox and len(async_task.enhance_ctrls) != 0:
-            all_steps += async_task.image_number * len(async_task.enhance_ctrls) * steps
+            enhance_steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width)
+            all_steps += async_task.image_number * len(async_task.enhance_ctrls) * enhance_steps
 
 
         print(f'[Parameters] Denoising Strength = {denoising_strength}')
@@ -1205,8 +1218,6 @@ def worker():
                 int(current_progress + async_task.callback_steps),
                 f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{total_count} ...', y)])
 
-        generated_imgs = {}
-
         for current_task_id, task in enumerate(tasks):
             progressbar(async_task, current_progress,
                         f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
@@ -1222,7 +1233,7 @@ def worker():
                                                                  preparation_steps, preparation_steps,
                                                                  async_task.image_number)
 
-                generated_imgs[current_task_id] = imgs
+                images_to_enhance += imgs
 
             except ldm_patched.modules.model_management.InterruptProcessingException:
                 if async_task.last_stop == 'skip':
@@ -1237,7 +1248,7 @@ def worker():
             execution_time = time.perf_counter() - execution_start_time
             print(f'Generating and saving time: {execution_time:.2f} seconds')
 
-        if not async_task.enhance_checkbox or (async_task.enhance_uov_method == flags.disabled and len(async_task.enhance_ctrls) == 0):
+        if not async_task.enhance_checkbox or (async_task.enhance_uov_method == flags.disabled.lower() and len(async_task.enhance_ctrls) == 0):
             print(f'[Enhance] Skipping, preconditions aren\'t met')
             stop_processing(async_task, processing_start_time)
             return
@@ -1245,124 +1256,123 @@ def worker():
         progressbar(async_task, current_progress, 'Processing enhance ...')
 
         active_enhance_tabs = len(async_task.enhance_ctrls)
-        should_process_uov = async_task.enhance_uov_method != flags.disabled
-        if should_process_uov:
+        should_process_enhance_uov = async_task.enhance_uov_method != flags.disabled.lower()
+        if should_process_enhance_uov:
             active_enhance_tabs += 1
-        total_count = sum([len(imgs) for _, imgs in generated_imgs.items()]) * active_enhance_tabs
+        total_count = len(images_to_enhance) * active_enhance_tabs
 
         base_progress = current_progress
         current_task_id = -1
         done_steps_upscaling = 0
         done_steps_inpainting = 0
         enhance_steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width)
-        for imgs in generated_imgs.values():
-            for img in imgs:
-                enhancement_image_start_time = time.perf_counter()
+        for img in images_to_enhance:
+            enhancement_image_start_time = time.perf_counter()
 
-                # upscale if not disabled or already in goals
-                if should_process_uov:
-                    current_task_id += 1
-                    current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
-                    goals_enhance = []
-                    img, skip_prompt_processing, steps = prepare_upscale(async_task, goals_enhance, img,
-                                                                         async_task.enhance_uov_method,
-                                                                         async_task.performance_selection,
-                                                                         enhance_steps, current_progress)
+            # upscale if not disabled or already in goals
+            if should_process_enhance_uov:
+                current_task_id += 1
+                current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
+                goals_enhance = []
+                img, skip_prompt_processing, steps = prepare_upscale(async_task, goals_enhance, img,
+                                                                     async_task.enhance_uov_method,
+                                                                     async_task.performance_selection,
+                                                                     enhance_steps, current_progress)
 
-                    steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width)
-
-                    if len(goals_enhance) > 0:
-                        try:
-                            current_progress, img = process_enhance(
-                                all_steps, async_task, callback, controlnet_canny_path,
-                                controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False,
-                                'None', 0.0, 0.0, async_task.negative_prompt, async_task.prompt, final_scheduler_name,
-                                goals_enhance, height, img, None, preparation_steps, steps, switch, tiled, total_count,
-                                use_expansion, use_style, use_synthetic_refiner, width)
-
-                        except ldm_patched.modules.model_management.InterruptProcessingException:
-                            if async_task.last_stop == 'skip':
-                                print('User skipped')
-                                async_task.last_stop = False
-                                # also skip all enhance steps for this image, but add the steps to the progress bar
-                                done_steps_inpainting += len(async_task.enhance_ctrls) * enhance_steps
-                                continue
-                            else:
-                                print('User stopped')
-                                break
-                        finally:
-                            done_steps_upscaling += steps
-
-                # inpaint for all other tabs
-                for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field, enhance_inpaint_erode_or_dilate, enhance_mask_invert in async_task.enhance_ctrls:
-                    current_task_id += 1
-                    current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
-                    progressbar(async_task, current_progress, f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
-                    enhancement_task_start_time = time.perf_counter()
-
-                    if enhance_mask_model == 'sam':
-                        print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
-
-                    mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(
-                        img, mask_model=enhance_mask_model, sam_options=SAMOptions(
-                            dino_prompt=enhance_mask_dino_prompt_text,
-                            dino_box_threshold=enhance_mask_box_threshold,
-                            dino_text_threshold=enhance_mask_text_threshold,
-                            dino_erode_or_dilate=async_task.dino_erode_or_dilate,
-                            dino_debug=async_task.debugging_dino,
-                            max_detections=enhance_mask_sam_max_detections,
-                            model_type=enhance_mask_sam_model,
-                        ))
-                    if len(mask.shape) == 3:
-                        mask = mask[:, :, 0]
-
-                    if int(enhance_inpaint_erode_or_dilate) != 0:
-                        mask = erode_or_dilate(mask, enhance_inpaint_erode_or_dilate)
-
-                    if enhance_mask_invert:
-                        mask = 255 - mask
-
-                    if async_task.debugging_enhance_masks_checkbox:
-                        async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
-                        yield_result(async_task, mask, current_progress, async_task.black_out_nsfw, False,
-                                     async_task.disable_intermediate_results)
-
-                    print(f'[Enhance] {dino_detection_count} boxes detected')
-                    print(f'[Enhance] {sam_detection_count} segments detected in boxes')
-                    print(f'[Enhance] {sam_detection_on_mask_count} segments applied to mask')
-
-                    if enhance_mask_model == 'sam' and (
-                            dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0):
-                        print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping')
-                        continue
-
-                    goals_enhance = ['inpaint']
+                steps, _, _, _ = apply_overrides(async_task, steps, height, width)
 
+                if len(goals_enhance) > 0:
                     try:
                         current_progress, img = process_enhance(
-                            all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
-                            current_progress, current_task_id, denoising_strength,
-                            enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
-                            enhance_inpaint_respective_field, enhance_inpaint_strength, enhance_negative_prompt,
-                            enhance_prompt, final_scheduler_name, goals_enhance, height, img, mask, preparation_steps,
-                            enhance_steps, switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, width)
+                            all_steps, async_task, callback, controlnet_canny_path,
+                            controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False,
+                            'None', 0.0, 0.0, async_task.negative_prompt, async_task.prompt, final_scheduler_name,
+                            goals_enhance, height, img, None, preparation_steps, steps, switch, tiled, total_count,
+                            use_expansion, use_style, use_synthetic_refiner, width)
 
                     except ldm_patched.modules.model_management.InterruptProcessingException:
                         if async_task.last_stop == 'skip':
                             print('User skipped')
                             async_task.last_stop = False
+                            # also skip all enhance steps for this image, but add the steps to the progress bar
+                            done_steps_inpainting += len(async_task.enhance_ctrls) * enhance_steps
                             continue
                         else:
                             print('User stopped')
                             break
                     finally:
-                        done_steps_inpainting += enhance_steps
+                        done_steps_upscaling += steps
 
-                    enhancement_task_time = time.perf_counter() - enhancement_task_start_time
-                    print(f'Enhancement time: {enhancement_task_time:.2f} seconds')
+            # inpaint for all other tabs
+            for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field, enhance_inpaint_erode_or_dilate, enhance_mask_invert in async_task.enhance_ctrls:
+                current_task_id += 1
+                current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
+                progressbar(async_task, current_progress, f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
+                enhancement_task_start_time = time.perf_counter()
 
-                enhancement_image_time = time.perf_counter() - enhancement_image_start_time
-                print(f'Enhancement image time: {enhancement_image_time:.2f} seconds')
+                if enhance_mask_model == 'sam':
+                    print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
+
+                mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(
+                    img, mask_model=enhance_mask_model, sam_options=SAMOptions(
+                        dino_prompt=enhance_mask_dino_prompt_text,
+                        dino_box_threshold=enhance_mask_box_threshold,
+                        dino_text_threshold=enhance_mask_text_threshold,
+                        dino_erode_or_dilate=async_task.dino_erode_or_dilate,
+                        dino_debug=async_task.debugging_dino,
+                        max_detections=enhance_mask_sam_max_detections,
+                        model_type=enhance_mask_sam_model,
+                    ))
+                if len(mask.shape) == 3:
+                    mask = mask[:, :, 0]
+
+                if int(enhance_inpaint_erode_or_dilate) != 0:
+                    mask = erode_or_dilate(mask, enhance_inpaint_erode_or_dilate)
+
+                if enhance_mask_invert:
+                    mask = 255 - mask
+
+                if async_task.debugging_enhance_masks_checkbox:
+                    async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
+                    yield_result(async_task, mask, current_progress, async_task.black_out_nsfw, False,
+                                 async_task.disable_intermediate_results)
+
+                print(f'[Enhance] {dino_detection_count} boxes detected')
+                print(f'[Enhance] {sam_detection_count} segments detected in boxes')
+                print(f'[Enhance] {sam_detection_on_mask_count} segments applied to mask')
+
+                if enhance_mask_model == 'sam' and (
+                        dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0):
+                    print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping')
+                    continue
+
+                goals_enhance = ['inpaint']
+
+                try:
+                    current_progress, img = process_enhance(
+                        all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
+                        current_progress, current_task_id, denoising_strength,
+                        enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
+                        enhance_inpaint_respective_field, enhance_inpaint_strength, enhance_negative_prompt,
+                        enhance_prompt, final_scheduler_name, goals_enhance, height, img, mask, preparation_steps,
+                        enhance_steps, switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, width)
+
+                except ldm_patched.modules.model_management.InterruptProcessingException:
+                    if async_task.last_stop == 'skip':
+                        print('User skipped')
+                        async_task.last_stop = False
+                        continue
+                    else:
+                        print('User stopped')
+                        break
+                finally:
+                    done_steps_inpainting += enhance_steps
+
+                enhancement_task_time = time.perf_counter() - enhancement_task_start_time
+                print(f'Enhancement time: {enhancement_task_time:.2f} seconds')
+
+            enhancement_image_time = time.perf_counter() - enhancement_image_start_time
+            print(f'Enhancement image time: {enhancement_image_time:.2f} seconds')
 
         stop_processing(async_task, processing_start_time)
         return
diff --git a/webui.py b/webui.py
index d1e3bf1f..ca436057 100644
--- a/webui.py
+++ b/webui.py
@@ -325,6 +325,11 @@ with shared.gradio_root:
                                 desc_input_image.upload(trigger_show_image_properties, inputs=desc_input_image,
                                                         outputs=desc_image_size, show_progress=False, queue=False)
 
+                    with gr.TabItem(label='Enhance') as enhance_tab:
+                        with gr.Row():
+                            with gr.Column():
+                                enhance_input_image = grh.Image(label='Image to enhance', source='upload', type='numpy')
+
                     with gr.TabItem(label='Metadata') as metadata_tab:
                         with gr.Column():
                             metadata_input_image = grh.Image(label='For images created by Fooocus', source='upload', type='filepath')
@@ -488,6 +493,7 @@ with shared.gradio_root:
             inpaint_tab.select(lambda: 'inpaint', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
             ip_tab.select(lambda: 'ip', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
             desc_tab.select(lambda: 'desc', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
+            enhance_tab.select(lambda: 'enhance', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
             metadata_tab.select(lambda: 'metadata', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
 
             enhance_checkbox.change(lambda x: gr.update(visible=x), inputs=enhance_checkbox,
@@ -930,7 +936,8 @@ with shared.gradio_root:
             ctrls += [save_metadata_to_images, metadata_scheme]
 
         ctrls += ip_ctrls
-        ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox, enhance_checkbox, enhance_uov_method]
+        ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox,
+                  enhance_input_image, enhance_checkbox, enhance_uov_method]
         ctrls += enhance_ctrls
 
         def parse_meta(raw_prompt_txt, is_generating):

From 6f7443276a7fef4cdafc9c13a37480d017d74606 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Fri, 21 Jun 2024 23:52:11 +0200
Subject: [PATCH 076/101] feat: use casefold for comparing uov_method to
 flags.disabled

label "Disabled" is uppercase, convert to casefold before comparison
---
 modules/async_worker.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index d5b3dedc..19c919f4 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -844,7 +844,7 @@ def worker():
                           skip_prompt_processing, use_synthetic_refiner):
         if (async_task.current_tab == 'uov' or (
                 async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
-                and async_task.uov_method != flags.disabled.lower() and async_task.uov_input_image is not None:
+                and async_task.uov_method != flags.disabled.casefold() and async_task.uov_input_image is not None:
             async_task.uov_input_image, skip_prompt_processing, async_task.steps = prepare_upscale(
                 async_task, goals, async_task.uov_input_image, async_task.uov_method, async_task.performance_selection,
                 async_task.steps, 1, skip_prompt_processing=skip_prompt_processing)
@@ -1026,8 +1026,8 @@ def worker():
 
         async_task.outpaint_selections = [o.lower() for o in async_task.outpaint_selections]
         base_model_additional_loras = []
-        async_task.uov_method = async_task.uov_method.lower()
-        async_task.enhance_uov_method = async_task.enhance_uov_method.lower()
+        async_task.uov_method = async_task.uov_method.casefold()
+        async_task.enhance_uov_method = async_task.enhance_uov_method.casefold()
 
         if fooocus_expansion in async_task.style_selections:
             use_expansion = True
@@ -1178,7 +1178,7 @@ def worker():
 
         all_steps = steps * async_task.image_number
 
-        if async_task.enhance_checkbox and async_task.enhance_uov_method != flags.disabled.lower():
+        if async_task.enhance_checkbox and async_task.enhance_uov_method != flags.disabled.casefold():
             enhance_upscale_steps, _, _, _ = apply_overrides(async_task, async_task.performance_selection.steps_uov(), height, width)
             enhance_upscale_steps_total = async_task.image_number * enhance_upscale_steps
             all_steps += enhance_upscale_steps_total
@@ -1248,7 +1248,7 @@ def worker():
             execution_time = time.perf_counter() - execution_start_time
             print(f'Generating and saving time: {execution_time:.2f} seconds')
 
-        if not async_task.enhance_checkbox or (async_task.enhance_uov_method == flags.disabled.lower() and len(async_task.enhance_ctrls) == 0):
+        if not async_task.enhance_checkbox or (async_task.enhance_uov_method == flags.disabled.casefold() and len(async_task.enhance_ctrls) == 0):
             print(f'[Enhance] Skipping, preconditions aren\'t met')
             stop_processing(async_task, processing_start_time)
             return
@@ -1256,7 +1256,7 @@ def worker():
         progressbar(async_task, current_progress, 'Processing enhance ...')
 
         active_enhance_tabs = len(async_task.enhance_ctrls)
-        should_process_enhance_uov = async_task.enhance_uov_method != flags.disabled.lower()
+        should_process_enhance_uov = async_task.enhance_uov_method != flags.disabled.casefold()
         if should_process_enhance_uov:
             active_enhance_tabs += 1
         total_count = len(images_to_enhance) * active_enhance_tabs

From ec533f0c427afa89adcb1843065f6aefba72bf3d Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 22 Jun 2024 00:07:32 +0200
Subject: [PATCH 077/101] feat: rename arg --enable-describe-uov-image to
 --enable-auto-describe-image, add handling for enhance input image

---
 args_manager.py |  4 ++--
 webui.py        | 11 +++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/args_manager.py b/args_manager.py
index 5a2b37c9..08e4dc6e 100644
--- a/args_manager.py
+++ b/args_manager.py
@@ -28,8 +28,8 @@ args_parser.parser.add_argument("--disable-metadata", action='store_true',
 args_parser.parser.add_argument("--disable-preset-download", action='store_true',
                                 help="Disables downloading models for presets", default=False)
 
-args_parser.parser.add_argument("--enable-describe-uov-image", action='store_true',
-                                help="Disables automatic description of uov images when prompt is empty", default=False)
+args_parser.parser.add_argument("--enable-auto-describe-image", action='store_true',
+                                help="Enables automatic description of uov and enhance image when prompt is empty", default=False)
 
 args_parser.parser.add_argument("--always-download-new-model", action='store_true',
                                 help="Always download newer models ", default=False)
diff --git a/webui.py b/webui.py
index ca436057..02bb93b5 100644
--- a/webui.py
+++ b/webui.py
@@ -1001,15 +1001,18 @@ with shared.gradio_root:
         desc_btn.click(trigger_describe, inputs=[desc_method, desc_input_image],
                        outputs=[prompt, style_selections], show_progress=True, queue=True)
 
-        if args_manager.args.enable_describe_uov_image:
-            def trigger_uov_describe(mode, img, prompt):
+        if args_manager.args.enable_auto_describe_image:
+            def trigger_auto_describe(mode, img, prompt):
                 # keep prompt if not empty
                 if prompt == '':
                     return trigger_describe(mode, img)
                 return gr.update(), gr.update()
 
-            uov_input_image.upload(trigger_uov_describe, inputs=[desc_method, uov_input_image, prompt],
-                           outputs=[prompt, style_selections], show_progress=True, queue=True)
+            uov_input_image.upload(trigger_auto_describe, inputs=[desc_method, uov_input_image, prompt],
+                                   outputs=[prompt, style_selections], show_progress=True, queue=True)
+
+            enhance_input_image.upload(trigger_auto_describe, inputs=[desc_method, enhance_input_image, prompt],
+                                   outputs=[prompt, style_selections], show_progress=True, queue=True)
 
 def dump_default_english_config():
     from modules.localization import dump_english_config

From 18d294de63ff6051d3c4f4ea769f2a05e6e35dee Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 22 Jun 2024 00:14:32 +0200
Subject: [PATCH 078/101] feat: do not extend main prompt when enhance prompt
 is provided

reduces prompt consistency with input but greatly improves prompt accuracy
---
 modules/async_worker.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 19c919f4..3c2b02c8 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -945,7 +945,6 @@ def worker():
         else:
             if translate:
                 prompt = translate2en(prompt, prompt_type)
-            prompt = prompt + '\n' + fallback_prompt
         return prompt
 
     def stop_processing(async_task, processing_start_time):

From bef53fcf6e41be91c3bcbe1511a3d8bd16158d05 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 22 Jun 2024 00:30:47 +0200
Subject: [PATCH 079/101] feat: automatically open tab enhance on enhance image
 upload

---
 webui.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/webui.py b/webui.py
index 02bb93b5..c26666e4 100644
--- a/webui.py
+++ b/webui.py
@@ -1011,8 +1011,8 @@ with shared.gradio_root:
             uov_input_image.upload(trigger_auto_describe, inputs=[desc_method, uov_input_image, prompt],
                                    outputs=[prompt, style_selections], show_progress=True, queue=True)
 
-            enhance_input_image.upload(trigger_auto_describe, inputs=[desc_method, enhance_input_image, prompt],
-                                   outputs=[prompt, style_selections], show_progress=True, queue=True)
+            enhance_input_image.upload(lambda: gr.update(value=True), outputs=enhance_checkbox, queue=False, show_progress=False) \
+                .then(trigger_auto_describe, inputs=[desc_method, enhance_input_image, prompt], outputs=[prompt, style_selections], show_progress=True, queue=True)
 
 def dump_default_english_config():
     from modules.localization import dump_english_config

From e8220bae4903f4f2ff0a34989b17e68d4e79cbed Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 22 Jun 2024 01:06:22 +0200
Subject: [PATCH 080/101] feat: display intermediate results wqhen enhance is
 true

---
 modules/async_worker.py | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 3c2b02c8..7b691d22 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -274,8 +274,8 @@ def worker():
 
     def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
                      denoising_strength, final_scheduler_name, goals, initial_latent, steps, switch, positive_cond,
-                     negative_cond, task, tasks, tiled, use_expansion, width, height, base_progress, preparation_steps,
-                     total_count):
+                     negative_cond, task, tiled, use_expansion, width, height, base_progress, preparation_steps,
+                     total_count, show_intermediate_results):
         if async_task.last_stop is not False:
             ldm_patched.modules.model_management.interrupt_current_processing()
         if 'cn' in goals:
@@ -316,7 +316,7 @@ def worker():
                     f'Saving image {current_task_id + 1}/{total_count} to system ...')
         img_paths = save_and_log(async_task, height, imgs, task, use_expansion, width)
         yield_result(async_task, img_paths, current_progress, async_task.black_out_nsfw, False,
-                     do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
+                     do_not_show_finished_images=not show_intermediate_results or async_task.disable_intermediate_results)
 
         return imgs, img_paths, current_progress
 
@@ -957,7 +957,7 @@ def worker():
                         inpaint_engine, inpaint_respective_field, inpaint_strength,
                         negative_prompt, prompt, final_scheduler_name, goals, height, img, mask,
                         preparation_steps, steps, switch, tiled, total_count, use_expansion, use_style,
-                        use_synthetic_refiner, width):
+                        use_synthetic_refiner, width, show_intermediate_results=True):
         base_model_additional_loras = []
         inpaint_head_model_path = None
         inpaint_parameterized = inpaint_engine != 'None'  # inpaint_engine = None, improve detail
@@ -1008,11 +1008,11 @@ def worker():
                 inpaint_respective_field, switch, inpaint_disable_initial_latent,
                 current_progress, True)
         imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path,
-                                                          controlnet_cpds_path, current_task_id, denoising_strength,
-                                                          final_scheduler_name, goals, initial_latent, steps, switch,
-                                                          task_enhance['c'], task_enhance['uc'], task_enhance,
-                                                          tasks_enhance, tiled, use_expansion, width, height,
-                                                          current_progress, preparation_steps, total_count)
+                                                         controlnet_cpds_path, current_task_id, denoising_strength,
+                                                         final_scheduler_name, goals, initial_latent, steps, switch,
+                                                         task_enhance['c'], task_enhance['uc'], task_enhance, tiled,
+                                                         use_expansion, width, height, current_progress,
+                                                         preparation_steps, total_count, show_intermediate_results)
 
         del task_enhance['c'], task_enhance['uc']  # Save memory
         return current_progress, imgs[0]
@@ -1217,20 +1217,21 @@ def worker():
                 int(current_progress + async_task.callback_steps),
                 f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{total_count} ...', y)])
 
+        should_enhance = async_task.enhance_checkbox and (async_task.enhance_uov_method != flags.disabled.casefold() or len(async_task.enhance_ctrls) > 0)
+        show_intermediate_results = len(tasks) > 1 or should_enhance
+
         for current_task_id, task in enumerate(tasks):
-            progressbar(async_task, current_progress,
-                        f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
+            progressbar(async_task, current_progress, f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
             execution_start_time = time.perf_counter()
 
             try:
                 imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path,
-                                                                 controlnet_cpds_path,
-                                                                 current_task_id, denoising_strength,
-                                                                 final_scheduler_name, goals, initial_latent,
-                                                                 async_task.steps, switch, task['c'], task['uc'], task,
-                                                                 tasks, tiled, use_expansion, width, height,
+                                                                 controlnet_cpds_path, current_task_id,
+                                                                 denoising_strength, final_scheduler_name, goals,
+                                                                 initial_latent, async_task.steps, switch, task['c'],
+                                                                 task['uc'], task, tiled, use_expansion, width, height,
                                                                  preparation_steps, preparation_steps,
-                                                                 async_task.image_number)
+                                                                 async_task.image_number, show_intermediate_results)
 
                 images_to_enhance += imgs
 
@@ -1247,7 +1248,7 @@ def worker():
             execution_time = time.perf_counter() - execution_start_time
             print(f'Generating and saving time: {execution_time:.2f} seconds')
 
-        if not async_task.enhance_checkbox or (async_task.enhance_uov_method == flags.disabled.casefold() and len(async_task.enhance_ctrls) == 0):
+        if not should_enhance:
             print(f'[Enhance] Skipping, preconditions aren\'t met')
             stop_processing(async_task, processing_start_time)
             return

From 7ed98521ecdd9f91f8a516a79af0f29a96e5d5dc Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 22 Jun 2024 01:06:40 +0200
Subject: [PATCH 081/101] fix: correctly call upscale vary

---
 modules/async_worker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 7b691d22..4d8b8d1d 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -965,7 +965,7 @@ def worker():
 
         if 'vary' in goals:
             img, denoising_strength, initial_latent, width, height, current_progress = apply_vary(
-                async_task, async_task.enhance_uov_method, img, denoising_strength, switch, current_progress)
+                async_task, async_task.enhance_uov_method, denoising_strength, img, switch, current_progress)
         if 'upscale' in goals:
             direct_return, img, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(
                 async_task, img, async_task.enhance_uov_method, switch, current_progress)
@@ -1122,7 +1122,7 @@ def worker():
 
         if 'vary' in goals:
             async_task.uov_input_image, denoising_strength, initial_latent, width, height, current_progress = apply_vary(
-                async_task, async_task.uov_method, async_task.uov_input_image, denoising_strength, switch,
+                async_task, async_task.uov_method, denoising_strength, switch, async_task.uov_input_image,
                 current_progress)
 
         if 'upscale' in goals:

From bd3a29ba68e26c955853585c908b1b92e0ec0a45 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sat, 22 Jun 2024 01:45:53 +0200
Subject: [PATCH 082/101] fix: use 0 steps when upscaling with
 skip_prompt_processing

---
 modules/async_worker.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 4d8b8d1d..677a3cef 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -930,6 +930,7 @@ def worker():
             goals.append('upscale')
             if 'fast' in uov_method:
                 skip_prompt_processing = True
+                steps = 0
             else:
                 steps = performance.steps_uov()
 
@@ -977,7 +978,7 @@ def worker():
                 progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{total_count} to system ...')
                 uov_image_path = log(img, d, output_format=async_task.output_format)
                 yield_result(async_task, uov_image_path, current_progress, async_task.black_out_nsfw, False,
-                             do_not_show_finished_images=True)
+                             do_not_show_finished_images=not show_intermediate_results or async_task.disable_intermediate_results)
                 return current_progress, img
 
         if 'inpaint' in goals and inpaint_parameterized:
@@ -1122,7 +1123,7 @@ def worker():
 
         if 'vary' in goals:
             async_task.uov_input_image, denoising_strength, initial_latent, width, height, current_progress = apply_vary(
-                async_task, async_task.uov_method, denoising_strength, switch, async_task.uov_input_image,
+                async_task, async_task.uov_method, denoising_strength, async_task.uov_input_image, switch,
                 current_progress)
 
         if 'upscale' in goals:
@@ -1178,7 +1179,13 @@ def worker():
         all_steps = steps * async_task.image_number
 
         if async_task.enhance_checkbox and async_task.enhance_uov_method != flags.disabled.casefold():
-            enhance_upscale_steps, _, _, _ = apply_overrides(async_task, async_task.performance_selection.steps_uov(), height, width)
+            enhance_upscale_steps = async_task.performance_selection.steps()
+            if 'upscale' in async_task.enhance_uov_method:
+                if 'fast' in async_task.enhance_uov_method:
+                    enhance_upscale_steps = 0
+                else:
+                    enhance_upscale_steps = async_task.performance_selection.steps_uov()
+            enhance_upscale_steps, _, _, _ = apply_overrides(async_task, enhance_upscale_steps, height, width)
             enhance_upscale_steps_total = async_task.image_number * enhance_upscale_steps
             all_steps += enhance_upscale_steps_total
 

From b094ac820bcbbccd0dd43f437fc087899b205829 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 23 Jun 2024 16:21:10 +0200
Subject: [PATCH 083/101] fix: only allow one image number if enhance image has
 been uploaded

---
 modules/async_worker.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 677a3cef..ef84523a 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1171,6 +1171,7 @@ def worker():
 
         images_to_enhance = []
         if 'enhance' in goals:
+            async_task.image_number = 1
             images_to_enhance += [async_task.enhance_input_image]
             height, width, _ = async_task.enhance_input_image.shape
             # input image already provided, processing is skipped

From 54f25cb1ae7b2b0d5b3f14f6291fb66a8e223380 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 23 Jun 2024 16:22:08 +0200
Subject: [PATCH 084/101] feat: cleanup python package versions, remove
 diffusers

diffusers is not needed anymore as safety checker has been implemented natively
---
 requirements_versions.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/requirements_versions.txt b/requirements_versions.txt
index bc86caac..9196f1db 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -13,11 +13,10 @@ omegaconf==2.2.3
 gradio==3.41.2
 pygit2==1.12.2
 opencv-contrib-python==4.8.0.74
-diffusers==0.25.1
 httpx==0.24.1
 onnxruntime==1.16.3
 timm==0.9.2
-translators==5.8.9
-rembg==2.0.53
+translators==5.9.2
+rembg==2.0.57
 groundingdino-py==0.4.0
 segment_anything==1.0
\ No newline at end of file

From 7ecaee82851f4cdc76ab8f94e2ce9fb6e6eab33a Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 23 Jun 2024 17:40:20 +0200
Subject: [PATCH 085/101] fix: correctly track progress with image_number > 1
 and not enhance

---
 modules/async_worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index ef84523a..6ab89eb3 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1238,7 +1238,7 @@ def worker():
                                                                  denoising_strength, final_scheduler_name, goals,
                                                                  initial_latent, async_task.steps, switch, task['c'],
                                                                  task['uc'], task, tiled, use_expansion, width, height,
-                                                                 preparation_steps, preparation_steps,
+                                                                 current_progress, preparation_steps,
                                                                  async_task.image_number, show_intermediate_results)
 
                 images_to_enhance += imgs

From 1ef6367635b1b921a3a1b70525be645a9359dd9d Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 23 Jun 2024 17:41:21 +0200
Subject: [PATCH 086/101] feat: add option to process uov either before or
 after enhance

---
 language/en.json        |  1 +
 modules/async_worker.py | 94 ++++++++++++++++++++++++++---------------
 modules/config.py       |  6 +++
 modules/flags.py        |  9 ++++
 webui.py                | 12 ++++--
 5 files changed, 85 insertions(+), 37 deletions(-)

diff --git a/language/en.json b/language/en.json
index ec7ef09d..17bffa17 100644
--- a/language/en.json
+++ b/language/en.json
@@ -18,6 +18,7 @@
     "Art/Anime": "Art/Anime",
     "Describe this Image into Prompt": "Describe this Image into Prompt",
     "Image Size and Recommended Size": "Image Size and Recommended Size",
+    "Upscale or Variation": "Upscale or Variation",
     "Upscale or Variation:": "Upscale or Variation:",
     "Disabled": "Disabled",
     "Vary (Subtle)": "Vary (Subtle)",
diff --git a/modules/async_worker.py b/modules/async_worker.py
index 6ab89eb3..2d051210 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -116,6 +116,7 @@ class AsyncTask:
         self.enhance_input_image = args.pop()
         self.enhance_checkbox = args.pop()
         self.enhance_uov_method = args.pop()
+        self.enhance_uov_processing_order = args.pop()
         self.enhance_ctrls = []
         for _ in range(modules.config.default_enhance_tabs):
             enhance_enabled = args.pop()
@@ -1018,6 +1019,45 @@ def worker():
         del task_enhance['c'], task_enhance['uc']  # Save memory
         return current_progress, imgs[0]
 
+    def enhance_upscale(all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+                        current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps,
+                        final_scheduler_name, height, img, preparation_steps, switch, tiled, total_count, use_expansion,
+                        use_style, use_synthetic_refiner, width):
+        # reset inpaint worker to prevent tensor size issues and not mix upscale and inpainting
+        inpaint_worker.current_task = None
+
+        current_task_id += 1
+        current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
+        goals_enhance = []
+        img, skip_prompt_processing, steps = prepare_upscale(
+            async_task, goals_enhance, img, async_task.enhance_uov_method, async_task.performance_selection,
+            enhance_steps, current_progress)
+        steps, _, _, _ = apply_overrides(async_task, steps, height, width)
+        exception_result = ''
+        if len(goals_enhance) > 0:
+            try:
+                current_progress, img = process_enhance(
+                    all_steps, async_task, callback, controlnet_canny_path,
+                    controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False,
+                    'None', 0.0, 0.0, async_task.negative_prompt, async_task.prompt, final_scheduler_name,
+                    goals_enhance, height, img, None, preparation_steps, steps, switch, tiled, total_count,
+                    use_expansion, use_style, use_synthetic_refiner, width)
+
+            except ldm_patched.modules.model_management.InterruptProcessingException:
+                if async_task.last_stop == 'skip':
+                    print('User skipped')
+                    async_task.last_stop = False
+                    # also skip all enhance steps for this image, but add the steps to the progress bar
+                    if async_task.enhance_uov_processing_order == flags.enhancement_uov_before:
+                        done_steps_inpainting += len(async_task.enhance_ctrls) * enhance_steps
+                    exception_result = 'continue'
+                else:
+                    print('User stopped')
+                    exception_result = 'break'
+            finally:
+                done_steps_upscaling += steps
+        return current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result
+
     @torch.no_grad()
     @torch.inference_mode()
     def handler(async_task: AsyncTask):
@@ -1277,39 +1317,16 @@ def worker():
         for img in images_to_enhance:
             enhancement_image_start_time = time.perf_counter()
 
-            # upscale if not disabled or already in goals
-            if should_process_enhance_uov:
-                current_task_id += 1
-                current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
-                goals_enhance = []
-                img, skip_prompt_processing, steps = prepare_upscale(async_task, goals_enhance, img,
-                                                                     async_task.enhance_uov_method,
-                                                                     async_task.performance_selection,
-                                                                     enhance_steps, current_progress)
-
-                steps, _, _, _ = apply_overrides(async_task, steps, height, width)
-
-                if len(goals_enhance) > 0:
-                    try:
-                        current_progress, img = process_enhance(
-                            all_steps, async_task, callback, controlnet_canny_path,
-                            controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False,
-                            'None', 0.0, 0.0, async_task.negative_prompt, async_task.prompt, final_scheduler_name,
-                            goals_enhance, height, img, None, preparation_steps, steps, switch, tiled, total_count,
-                            use_expansion, use_style, use_synthetic_refiner, width)
-
-                    except ldm_patched.modules.model_management.InterruptProcessingException:
-                        if async_task.last_stop == 'skip':
-                            print('User skipped')
-                            async_task.last_stop = False
-                            # also skip all enhance steps for this image, but add the steps to the progress bar
-                            done_steps_inpainting += len(async_task.enhance_ctrls) * enhance_steps
-                            continue
-                        else:
-                            print('User stopped')
-                            break
-                    finally:
-                        done_steps_upscaling += steps
+            if should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_before:
+                current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result = enhance_upscale(
+                    all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+                    current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps,
+                    final_scheduler_name, height, img, preparation_steps, switch, tiled, total_count, use_expansion,
+                    use_style, use_synthetic_refiner, width)
+                if exception_result == 'continue':
+                    continue
+                elif exception_result == 'break':
+                    break
 
             # inpaint for all other tabs
             for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field, enhance_inpaint_erode_or_dilate, enhance_mask_invert in async_task.enhance_ctrls:
@@ -1379,6 +1396,17 @@ def worker():
                 enhancement_task_time = time.perf_counter() - enhancement_task_start_time
                 print(f'Enhancement time: {enhancement_task_time:.2f} seconds')
 
+            if should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_after:
+                current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result = enhance_upscale(
+                    all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+                    current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps,
+                    final_scheduler_name, height, img, preparation_steps, switch, tiled, total_count, use_expansion,
+                    use_style, use_synthetic_refiner, width)
+                if exception_result == 'continue':
+                    continue
+                elif exception_result == 'break':
+                    break
+
             enhancement_image_time = time.perf_counter() - enhancement_image_start_time
             print(f'Enhancement image time: {enhancement_image_time:.2f} seconds')
 
diff --git a/modules/config.py b/modules/config.py
index b0dd7d43..d985f118 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -515,6 +515,12 @@ default_enhance_tabs = get_config_item_or_set_default(
     validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
     expected_type=int
 )
+enhance_uov_processing_order = get_config_item_or_set_default(
+    key='default_enhance_uov_processing_order',
+    default_value=modules.flags.enhancement_uov_before,
+    validator=lambda x: x in modules.flags.enhancement_uov_processing_order,
+    expected_type=int
+)
 default_sam_max_detections = get_config_item_or_set_default(
     key='default_sam_max_detections',
     default_value=0,
diff --git a/modules/flags.py b/modules/flags.py
index a75eed98..4050b21b 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -12,6 +12,15 @@ uov_list = [
     disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast
 ]
 
+enhancement_uov_before = "before"
+enhancement_uov_before_label = "Before First Enhancement"
+enhancement_uov_after = "after"
+enhancement_uov_after_label = "After Last Enhancement"
+enhancement_uov_processing_order = [
+    (enhancement_uov_before_label, enhancement_uov_before),
+    (enhancement_uov_after_label, enhancement_uov_after)
+]
+
 CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"]
 
 # fooocus: a1111 (Civitai)
diff --git a/webui.py b/webui.py
index c26666e4..8c7a4fca 100644
--- a/webui.py
+++ b/webui.py
@@ -353,14 +353,18 @@ with shared.gradio_root:
 
             with gr.Row(visible=False) as enhance_input_panel:
                 with gr.Tabs():
-                    with gr.TabItem(label='#1'):
+                    with gr.TabItem(label='Upscale or Variation'):
                         with gr.Row():
                             with gr.Column():
                                 enhance_uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list, value=flags.disabled)
-                                gr.HTML('<a href="https://github.com/lllyasviel/Fooocus/discussions/390" target="_blank">\U0001F4D4 Document</a>')
+                                enhance_uov_processing_order = gr.Radio(label='Order of Processing',
+                                                                        info='Before is slower (larger area to enhance), but might enhance the overall image quality, whereas after is faster but may apply changes to the image which were already fixed by enhance. Use after when enhancing large areas.',
+                                                                        choices=flags.enhancement_uov_processing_order,
+                                                                        value=modules.config.enhance_uov_processing_order)
+                                # gr.HTML('<a href="https://github.com/lllyasviel/Fooocus/discussions/390" target="_blank">\U0001F4D4 Document</a>')
                     enhance_ctrls = []
                     for index in range(modules.config.default_enhance_tabs):
-                        with gr.TabItem(label=f'#{index + 2}') as enhance_tab_item:
+                        with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item:
                             enhance_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check',
                                                           container=False)
 
@@ -937,7 +941,7 @@ with shared.gradio_root:
 
         ctrls += ip_ctrls
         ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox,
-                  enhance_input_image, enhance_checkbox, enhance_uov_method]
+                  enhance_input_image, enhance_checkbox, enhance_uov_method, enhance_uov_processing_order]
         ctrls += enhance_ctrls
 
         def parse_meta(raw_prompt_txt, is_generating):

From 25f4484c79c8fb62dc7fbd0a88e62266e5ff3b90 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 23 Jun 2024 18:17:44 +0200
Subject: [PATCH 087/101] fix: correctly display current progress on
 non-enhance steps after skipping

---
 modules/async_worker.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 2d051210..bcb6e82a 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1281,6 +1281,7 @@ def worker():
                                                                  current_progress, preparation_steps,
                                                                  async_task.image_number, show_intermediate_results)
 
+                current_progress = int(preparation_steps + (100 - preparation_steps) / float(all_steps) * async_task.steps * (current_task_id + 1))
                 images_to_enhance += imgs
 
             except ldm_patched.modules.model_management.InterruptProcessingException:

From fe1e4ce4e50eec1b3409720c7c92cdd0f150fa10 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Sun, 23 Jun 2024 18:48:21 +0200
Subject: [PATCH 088/101] feat: bump version, add links to documentation

---
 fooocus_version.py | 2 +-
 webui.py           | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index 27c52170..7e080368 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.5.0-rc1 (mashb1t)'
+version = '2.5.0-rc4 (mashb1t)'
diff --git a/webui.py b/webui.py
index 8c7a4fca..3e0cedfa 100644
--- a/webui.py
+++ b/webui.py
@@ -361,7 +361,7 @@ with shared.gradio_root:
                                                                         info='Before is slower (larger area to enhance), but might enhance the overall image quality, whereas after is faster but may apply changes to the image which were already fixed by enhance. Use after when enhancing large areas.',
                                                                         choices=flags.enhancement_uov_processing_order,
                                                                         value=modules.config.enhance_uov_processing_order)
-                                # gr.HTML('<a href="https://github.com/lllyasviel/Fooocus/discussions/390" target="_blank">\U0001F4D4 Document</a>')
+                                gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
                     enhance_ctrls = []
                     for index in range(modules.config.default_enhance_tabs):
                         with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item:
@@ -451,6 +451,8 @@ with shared.gradio_root:
                                                                                  '(default is 0, always processed before any mask invert)')
                                 enhance_mask_invert = gr.Checkbox(label='Invert Mask', value=False)
 
+                            gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
+
                         enhance_ctrls += [
                             enhance_enabled,
                             enhance_mask_dino_prompt_text,

From 1d0965f8c99350beb94c6183886352dde8be50ac Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 20:43:28 +0200
Subject: [PATCH 089/101] feat: rename enhance image input label, add link to
 documentation

---
 language/en.json | 1 -
 webui.py         | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/language/en.json b/language/en.json
index 17bffa17..ec7ef09d 100644
--- a/language/en.json
+++ b/language/en.json
@@ -18,7 +18,6 @@
     "Art/Anime": "Art/Anime",
     "Describe this Image into Prompt": "Describe this Image into Prompt",
     "Image Size and Recommended Size": "Image Size and Recommended Size",
-    "Upscale or Variation": "Upscale or Variation",
     "Upscale or Variation:": "Upscale or Variation:",
     "Disabled": "Disabled",
     "Vary (Subtle)": "Vary (Subtle)",
diff --git a/webui.py b/webui.py
index 3e0cedfa..abaf1994 100644
--- a/webui.py
+++ b/webui.py
@@ -328,7 +328,8 @@ with shared.gradio_root:
                     with gr.TabItem(label='Enhance') as enhance_tab:
                         with gr.Row():
                             with gr.Column():
-                                enhance_input_image = grh.Image(label='Image to enhance', source='upload', type='numpy')
+                                enhance_input_image = grh.Image(label='Base image for enhance', source='upload', type='numpy')
+                                gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
 
                     with gr.TabItem(label='Metadata') as metadata_tab:
                         with gr.Column():
@@ -501,7 +502,6 @@ with shared.gradio_root:
             desc_tab.select(lambda: 'desc', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
             enhance_tab.select(lambda: 'enhance', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
             metadata_tab.select(lambda: 'metadata', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
-
             enhance_checkbox.change(lambda x: gr.update(visible=x), inputs=enhance_checkbox,
                                         outputs=enhance_input_panel, queue=False, show_progress=False, _js=switch_js)
 

From acb76adf67a71d62ec181ccfeb19c0577ca1ed8e Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 20:45:05 +0200
Subject: [PATCH 090/101] feat: use last enhanced prompt for upscaling if
 provided

prevents replaced subjects from being replaced again with original subjects when upscaling due to using the original prompt
---
 modules/async_worker.py | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index bcb6e82a..45a33c8e 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -957,7 +957,7 @@ def worker():
     def process_enhance(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
                         current_progress, current_task_id, denoising_strength, inpaint_disable_initial_latent,
                         inpaint_engine, inpaint_respective_field, inpaint_strength,
-                        negative_prompt, prompt, final_scheduler_name, goals, height, img, mask,
+                        prompt, negative_prompt, final_scheduler_name, goals, height, img, mask,
                         preparation_steps, steps, switch, tiled, total_count, use_expansion, use_style,
                         use_synthetic_refiner, width, show_intermediate_results=True):
         base_model_additional_loras = []
@@ -1017,12 +1017,12 @@ def worker():
                                                          preparation_steps, total_count, show_intermediate_results)
 
         del task_enhance['c'], task_enhance['uc']  # Save memory
-        return current_progress, imgs[0]
+        return current_progress, imgs[0], prompt, negative_prompt
 
     def enhance_upscale(all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
                         current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps,
-                        final_scheduler_name, height, img, preparation_steps, switch, tiled, total_count, use_expansion,
-                        use_style, use_synthetic_refiner, width):
+                        prompt, negative_prompt, final_scheduler_name, height, img, preparation_steps, switch, tiled,
+                        total_count, use_expansion, use_style, use_synthetic_refiner, width):
         # reset inpaint worker to prevent tensor size issues and not mix upscale and inpainting
         inpaint_worker.current_task = None
 
@@ -1039,7 +1039,7 @@ def worker():
                 current_progress, img = process_enhance(
                     all_steps, async_task, callback, controlnet_canny_path,
                     controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False,
-                    'None', 0.0, 0.0, async_task.negative_prompt, async_task.prompt, final_scheduler_name,
+                    'None', 0.0, 0.0, prompt, negative_prompt, final_scheduler_name,
                     goals_enhance, height, img, None, preparation_steps, steps, switch, tiled, total_count,
                     use_expansion, use_style, use_synthetic_refiner, width)
 
@@ -1314,6 +1314,8 @@ def worker():
         current_task_id = -1
         done_steps_upscaling = 0
         done_steps_inpainting = 0
+        last_enhance_prompt = async_task.prompt
+        last_enhance_negative_prompt = async_task.negative_prompt
         enhance_steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width)
         for img in images_to_enhance:
             enhancement_image_start_time = time.perf_counter()
@@ -1322,8 +1324,8 @@ def worker():
                 current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result = enhance_upscale(
                     all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
                     current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps,
-                    final_scheduler_name, height, img, preparation_steps, switch, tiled, total_count, use_expansion,
-                    use_style, use_synthetic_refiner, width)
+                    async_task.prompt, async_task.negative_prompt, final_scheduler_name, height, img, preparation_steps,
+                    switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, width)
                 if exception_result == 'continue':
                     continue
                 elif exception_result == 'break':
@@ -1375,13 +1377,18 @@ def worker():
                 goals_enhance = ['inpaint']
 
                 try:
-                    current_progress, img = process_enhance(
+                    current_progress, img, enhance_prompt_processed, enhance_negative_prompt_processed = process_enhance(
                         all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
-                        current_progress, current_task_id, denoising_strength,
-                        enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
-                        enhance_inpaint_respective_field, enhance_inpaint_strength, enhance_negative_prompt,
-                        enhance_prompt, final_scheduler_name, goals_enhance, height, img, mask, preparation_steps,
-                        enhance_steps, switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, width)
+                        current_progress, current_task_id, denoising_strength, enhance_inpaint_disable_initial_latent,
+                        enhance_inpaint_engine, enhance_inpaint_respective_field, enhance_inpaint_strength,
+                        enhance_prompt, enhance_negative_prompt, final_scheduler_name, goals_enhance, height, img, mask,
+                        preparation_steps, enhance_steps, switch, tiled, total_count, use_expansion, use_style,
+                        use_synthetic_refiner, width)
+
+                    if enhance_prompt_processed != '':
+                        last_enhance_prompt = enhance_prompt_processed
+                    if enhance_negative_prompt_processed != '':
+                        last_enhance_negative_prompt = enhance_negative_prompt_processed
 
                 except ldm_patched.modules.model_management.InterruptProcessingException:
                     if async_task.last_stop == 'skip':
@@ -1401,8 +1408,9 @@ def worker():
                 current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result = enhance_upscale(
                     all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
                     current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps,
-                    final_scheduler_name, height, img, preparation_steps, switch, tiled, total_count, use_expansion,
-                    use_style, use_synthetic_refiner, width)
+                    last_enhance_prompt, last_enhance_negative_prompt, final_scheduler_name, height, img,
+                    preparation_steps, switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner,
+                    width)
                 if exception_result == 'continue':
                     continue
                 elif exception_result == 'break':

From 9f9fcfa7ad15f167299a19d6245dfc54fe378551 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 20:45:50 +0200
Subject: [PATCH 091/101] fix: start enhance from index 1, not 0

---
 modules/async_worker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 45a33c8e..ada4f284 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1311,7 +1311,7 @@ def worker():
         total_count = len(images_to_enhance) * active_enhance_tabs
 
         base_progress = current_progress
-        current_task_id = -1
+        current_task_id = 0
         done_steps_upscaling = 0
         done_steps_inpainting = 0
         last_enhance_prompt = async_task.prompt

From 00a27e74008500c4b1a72891307704f8c381c5bc Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 21:21:11 +0200
Subject: [PATCH 092/101] feat: add config and option for uov prompt type

---
 modules/async_worker.py | 12 +++++++-----
 modules/config.py       |  6 ++++++
 modules/flags.py        |  4 ++++
 webui.py                | 15 +++++++++++++--
 4 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index ada4f284..5550eb70 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -117,6 +117,7 @@ class AsyncTask:
         self.enhance_checkbox = args.pop()
         self.enhance_uov_method = args.pop()
         self.enhance_uov_processing_order = args.pop()
+        self.enhance_uov_prompt_type = args.pop()
         self.enhance_ctrls = []
         for _ in range(modules.config.default_enhance_tabs):
             enhance_enabled = args.pop()
@@ -1036,7 +1037,7 @@ def worker():
         exception_result = ''
         if len(goals_enhance) > 0:
             try:
-                current_progress, img = process_enhance(
+                current_progress, img, prompt, negative_prompt = process_enhance(
                     all_steps, async_task, callback, controlnet_canny_path,
                     controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False,
                     'None', 0.0, 0.0, prompt, negative_prompt, final_scheduler_name,
@@ -1385,10 +1386,11 @@ def worker():
                         preparation_steps, enhance_steps, switch, tiled, total_count, use_expansion, use_style,
                         use_synthetic_refiner, width)
 
-                    if enhance_prompt_processed != '':
-                        last_enhance_prompt = enhance_prompt_processed
-                    if enhance_negative_prompt_processed != '':
-                        last_enhance_negative_prompt = enhance_negative_prompt_processed
+                    if async_task.enhance_uov_prompt_type == flags.enhancement_uov_prompt_type_last:
+                        if enhance_prompt_processed != '':
+                            last_enhance_prompt = enhance_prompt_processed
+                        if enhance_negative_prompt_processed != '':
+                            last_enhance_negative_prompt = enhance_negative_prompt_processed
 
                 except ldm_patched.modules.model_management.InterruptProcessingException:
                     if async_task.last_stop == 'skip':
diff --git a/modules/config.py b/modules/config.py
index d985f118..9b4f6877 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -521,6 +521,12 @@ enhance_uov_processing_order = get_config_item_or_set_default(
     validator=lambda x: x in modules.flags.enhancement_uov_processing_order,
     expected_type=int
 )
+enhance_uov_prompt_type = get_config_item_or_set_default(
+    key='default_enhance_uov_prompt_type',
+    default_value=modules.flags.enhancement_uov_prompt_type_last,
+    validator=lambda x: x in modules.flags.enhancement_uov_prompt_types,
+    expected_type=int
+)
 default_sam_max_detections = get_config_item_or_set_default(
     key='default_sam_max_detections',
     default_value=0,
diff --git a/modules/flags.py b/modules/flags.py
index 4050b21b..7c77cafe 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -21,6 +21,10 @@ enhancement_uov_processing_order = [
     (enhancement_uov_after_label, enhancement_uov_after)
 ]
 
+enhancement_uov_prompt_type_original = 'Original Prompts'
+enhancement_uov_prompt_type_last = 'Last Filled Enhancement Prompts'
+enhancement_uov_prompt_types = [enhancement_uov_prompt_type_original, enhancement_uov_prompt_type_last]
+
 CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"]
 
 # fooocus: a1111 (Civitai)
diff --git a/webui.py b/webui.py
index abaf1994..5879c2dc 100644
--- a/webui.py
+++ b/webui.py
@@ -359,9 +359,19 @@ with shared.gradio_root:
                             with gr.Column():
                                 enhance_uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list, value=flags.disabled)
                                 enhance_uov_processing_order = gr.Radio(label='Order of Processing',
-                                                                        info='Before is slower (larger area to enhance), but might enhance the overall image quality, whereas after is faster but may apply changes to the image which were already fixed by enhance. Use after when enhancing large areas.',
+                                                                        info='Use before for enhancement of small details and after for large areas.',
                                                                         choices=flags.enhancement_uov_processing_order,
                                                                         value=modules.config.enhance_uov_processing_order)
+                                enhance_uov_prompt_type = gr.Radio(label='Prompt',
+                                                                   info='Choose which prompt to use for Upscale or Variation.',
+                                                                   choices=flags.enhancement_uov_prompt_types,
+                                                                   value=modules.config.enhance_uov_prompt_type,
+                                                                   visible=modules.config.enhance_uov_prompt_type == flags.enhancement_uov_after)
+
+                                enhance_uov_processing_order.change(lambda x: gr.update(visible=x == flags.enhancement_uov_after),
+                                                                    inputs=enhance_uov_processing_order,
+                                                                    outputs=enhance_uov_prompt_type,
+                                                                    queue=False, show_progress=False)
                                 gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
                     enhance_ctrls = []
                     for index in range(modules.config.default_enhance_tabs):
@@ -943,7 +953,8 @@ with shared.gradio_root:
 
         ctrls += ip_ctrls
         ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox,
-                  enhance_input_image, enhance_checkbox, enhance_uov_method, enhance_uov_processing_order]
+                  enhance_input_image, enhance_checkbox, enhance_uov_method, enhance_uov_processing_order,
+                  enhance_uov_prompt_type]
         ctrls += enhance_ctrls
 
         def parse_meta(raw_prompt_txt, is_generating):

From 85121a03cbc6e56b670900fbdb1656aba7b1b06a Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 21:26:14 +0200
Subject: [PATCH 093/101] feat: change default selection for enhancement uov
 prompt type

---
 modules/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/config.py b/modules/config.py
index 9b4f6877..c443a00e 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -523,7 +523,7 @@ enhance_uov_processing_order = get_config_item_or_set_default(
 )
 enhance_uov_prompt_type = get_config_item_or_set_default(
     key='default_enhance_uov_prompt_type',
-    default_value=modules.flags.enhancement_uov_prompt_type_last,
+    default_value=modules.flags.enhancement_uov_prompt_type_original,
     validator=lambda x: x in modules.flags.enhancement_uov_prompt_types,
     expected_type=int
 )

From 5c1191258ead911b8dde2c3b345cb8f38c27024a Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 21:27:47 +0200
Subject: [PATCH 094/101] refactor: use labels as enhancement uov values before
 and after

allows for usage of label as default value in config
---
 modules/flags.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/modules/flags.py b/modules/flags.py
index 7c77cafe..ad06dedd 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -12,14 +12,9 @@ uov_list = [
     disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast
 ]
 
-enhancement_uov_before = "before"
-enhancement_uov_before_label = "Before First Enhancement"
-enhancement_uov_after = "after"
-enhancement_uov_after_label = "After Last Enhancement"
-enhancement_uov_processing_order = [
-    (enhancement_uov_before_label, enhancement_uov_before),
-    (enhancement_uov_after_label, enhancement_uov_after)
-]
+enhancement_uov_before = "Before First Enhancement"
+enhancement_uov_after = "After Last Enhancement"
+enhancement_uov_processing_order = [enhancement_uov_before, enhancement_uov_after]
 
 enhancement_uov_prompt_type_original = 'Original Prompts'
 enhancement_uov_prompt_type_last = 'Last Filled Enhancement Prompts'

From a245b67531f344980878627b49ac85840e73b461 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 21:43:38 +0200
Subject: [PATCH 095/101] feat: add config for enhance upv checkbox and method,
 prefix with default_

---
 modules/async_worker.py |  8 ++++----
 modules/config.py       | 16 ++++++++++++++--
 modules/flags.py        |  4 +---
 webui.py                | 13 +++++++------
 4 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 5550eb70..954e46b9 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -968,10 +968,10 @@ def worker():
 
         if 'vary' in goals:
             img, denoising_strength, initial_latent, width, height, current_progress = apply_vary(
-                async_task, async_task.enhance_uov_method, denoising_strength, img, switch, current_progress)
+                async_task, async_task.default_enhance_uov_method, denoising_strength, img, switch, current_progress)
         if 'upscale' in goals:
             direct_return, img, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(
-                async_task, img, async_task.enhance_uov_method, switch, current_progress)
+                async_task, img, async_task.default_enhance_uov_method, switch, current_progress)
             if direct_return:
                 d = [('Upscale (Fast)', 'upscale_fast', '2x')]
                 if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
@@ -1031,7 +1031,7 @@ def worker():
         current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
         goals_enhance = []
         img, skip_prompt_processing, steps = prepare_upscale(
-            async_task, goals_enhance, img, async_task.enhance_uov_method, async_task.performance_selection,
+            async_task, goals_enhance, img, async_task.default_enhance_uov_method, async_task.performance_selection,
             enhance_steps, current_progress)
         steps, _, _, _ = apply_overrides(async_task, steps, height, width)
         exception_result = ''
@@ -1049,7 +1049,7 @@ def worker():
                     print('User skipped')
                     async_task.last_stop = False
                     # also skip all enhance steps for this image, but add the steps to the progress bar
-                    if async_task.enhance_uov_processing_order == flags.enhancement_uov_before:
+                    if async_task.default_enhance_uov_processing_order == flags.enhancement_uov_before:
                         done_steps_inpainting += len(async_task.enhance_ctrls) * enhance_steps
                     exception_result = 'continue'
                 else:
diff --git a/modules/config.py b/modules/config.py
index c443a00e..f8af6599 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -515,13 +515,25 @@ default_enhance_tabs = get_config_item_or_set_default(
     validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
     expected_type=int
 )
-enhance_uov_processing_order = get_config_item_or_set_default(
+default_enhance_checkbox = get_config_item_or_set_default(
+    key='default_enhance_checkbox',
+    default_value=False,
+    validator=lambda x: isinstance(x, bool),
+    expected_type=bool
+)
+default_enhance_uov_method = get_config_item_or_set_default(
+    key='default_enhance_uov_method',
+    default_value=modules.flags.disabled,
+    validator=lambda x: x in modules.flags.uov_list,
+    expected_type=int
+)
+default_enhance_uov_processing_order = get_config_item_or_set_default(
     key='default_enhance_uov_processing_order',
     default_value=modules.flags.enhancement_uov_before,
     validator=lambda x: x in modules.flags.enhancement_uov_processing_order,
     expected_type=int
 )
-enhance_uov_prompt_type = get_config_item_or_set_default(
+default_enhance_uov_prompt_type = get_config_item_or_set_default(
     key='default_enhance_uov_prompt_type',
     default_value=modules.flags.enhancement_uov_prompt_type_original,
     validator=lambda x: x in modules.flags.enhancement_uov_prompt_types,
diff --git a/modules/flags.py b/modules/flags.py
index ad06dedd..f86fe078 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -8,9 +8,7 @@ upscale_15 = 'Upscale (1.5x)'
 upscale_2 = 'Upscale (2x)'
 upscale_fast = 'Upscale (Fast 2x)'
 
-uov_list = [
-    disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast
-]
+uov_list = [disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast]
 
 enhancement_uov_before = "Before First Enhancement"
 enhancement_uov_after = "After Last Enhancement"
diff --git a/webui.py b/webui.py
index 5879c2dc..fa468352 100644
--- a/webui.py
+++ b/webui.py
@@ -175,7 +175,7 @@ with shared.gradio_root:
                     skip_button.click(skip_clicked, inputs=currentTask, outputs=currentTask, queue=False, show_progress=False)
             with gr.Row(elem_classes='advanced_check_row'):
                 input_image_checkbox = gr.Checkbox(label='Input Image', value=False, container=False, elem_classes='min_check')
-                enhance_checkbox = gr.Checkbox(label='Enhance', value=False, container=False, elem_classes='min_check')
+                enhance_checkbox = gr.Checkbox(label='Enhance', value=modules.config.default_enhance_checkbox, container=False, elem_classes='min_check')
                 advanced_checkbox = gr.Checkbox(label='Advanced', value=modules.config.default_advanced_checkbox, container=False, elem_classes='min_check')
             with gr.Row(visible=False) as image_input_panel:
                 with gr.Tabs():
@@ -352,21 +352,22 @@ with shared.gradio_root:
                         metadata_input_image.upload(trigger_metadata_preview, inputs=metadata_input_image,
                                                     outputs=metadata_json, queue=False, show_progress=True)
 
-            with gr.Row(visible=False) as enhance_input_panel:
+            with gr.Row(visible=modules.config.default_enhance_checkbox) as enhance_input_panel:
                 with gr.Tabs():
                     with gr.TabItem(label='Upscale or Variation'):
                         with gr.Row():
                             with gr.Column():
-                                enhance_uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list, value=flags.disabled)
+                                enhance_uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list,
+                                                              value=modules.config.default_enhance_uov_method)
                                 enhance_uov_processing_order = gr.Radio(label='Order of Processing',
                                                                         info='Use before for enhancement of small details and after for large areas.',
                                                                         choices=flags.enhancement_uov_processing_order,
-                                                                        value=modules.config.enhance_uov_processing_order)
+                                                                        value=modules.config.default_enhance_uov_processing_order)
                                 enhance_uov_prompt_type = gr.Radio(label='Prompt',
                                                                    info='Choose which prompt to use for Upscale or Variation.',
                                                                    choices=flags.enhancement_uov_prompt_types,
-                                                                   value=modules.config.enhance_uov_prompt_type,
-                                                                   visible=modules.config.enhance_uov_prompt_type == flags.enhancement_uov_after)
+                                                                   value=modules.config.default_enhance_uov_prompt_type,
+                                                                   visible=modules.config.default_enhance_uov_processing_order == flags.enhancement_uov_after)
 
                                 enhance_uov_processing_order.change(lambda x: gr.update(visible=x == flags.enhancement_uov_after),
                                                                     inputs=enhance_uov_processing_order,

From 1d3c0b9bca9c66fc19bed5e0ebd5281cad9345e8 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 21:49:04 +0200
Subject: [PATCH 096/101] fix: use non-prefixed uov attributes in async_worker

---
 modules/async_worker.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 954e46b9..5550eb70 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -968,10 +968,10 @@ def worker():
 
         if 'vary' in goals:
             img, denoising_strength, initial_latent, width, height, current_progress = apply_vary(
-                async_task, async_task.default_enhance_uov_method, denoising_strength, img, switch, current_progress)
+                async_task, async_task.enhance_uov_method, denoising_strength, img, switch, current_progress)
         if 'upscale' in goals:
             direct_return, img, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(
-                async_task, img, async_task.default_enhance_uov_method, switch, current_progress)
+                async_task, img, async_task.enhance_uov_method, switch, current_progress)
             if direct_return:
                 d = [('Upscale (Fast)', 'upscale_fast', '2x')]
                 if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
@@ -1031,7 +1031,7 @@ def worker():
         current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
         goals_enhance = []
         img, skip_prompt_processing, steps = prepare_upscale(
-            async_task, goals_enhance, img, async_task.default_enhance_uov_method, async_task.performance_selection,
+            async_task, goals_enhance, img, async_task.enhance_uov_method, async_task.performance_selection,
             enhance_steps, current_progress)
         steps, _, _, _ = apply_overrides(async_task, steps, height, width)
         exception_result = ''
@@ -1049,7 +1049,7 @@ def worker():
                     print('User skipped')
                     async_task.last_stop = False
                     # also skip all enhance steps for this image, but add the steps to the progress bar
-                    if async_task.default_enhance_uov_processing_order == flags.enhancement_uov_before:
+                    if async_task.enhance_uov_processing_order == flags.enhancement_uov_before:
                         done_steps_inpainting += len(async_task.enhance_ctrls) * enhance_steps
                     exception_result = 'continue'
                 else:

From 3fae0cf15a96c2f0522eaac68f4e9b6e63553335 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 21:49:54 +0200
Subject: [PATCH 097/101] feat: bump version for 2.5.0-rc5

---
 fooocus_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index 7e080368..42b7d3f3 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.5.0-rc4 (mashb1t)'
+version = '2.5.0-rc5 (mashb1t)'

From b2a23262fc272cdca704acbb5631fae72a2755b5 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 21:54:10 +0200
Subject: [PATCH 098/101] feat: only update uov last prompt when all
 preconditions are met

---
 modules/async_worker.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 5550eb70..2ab51bda 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1386,7 +1386,8 @@ def worker():
                         preparation_steps, enhance_steps, switch, tiled, total_count, use_expansion, use_style,
                         use_synthetic_refiner, width)
 
-                    if async_task.enhance_uov_prompt_type == flags.enhancement_uov_prompt_type_last:
+                    if (should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_after
+                            and async_task.enhance_uov_prompt_type == flags.enhancement_uov_prompt_type_last):
                         if enhance_prompt_processed != '':
                             last_enhance_prompt = enhance_prompt_processed
                         if enhance_negative_prompt_processed != '':

From dfa586d483dce52973423eef384a71e1c14f52d6 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 21:55:43 +0200
Subject: [PATCH 099/101] refactor: rename enhancement uov flag prompt last to
 last filled

---
 modules/async_worker.py | 2 +-
 modules/flags.py        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/async_worker.py b/modules/async_worker.py
index 2ab51bda..287edbcf 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1387,7 +1387,7 @@ def worker():
                         use_synthetic_refiner, width)
 
                     if (should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_after
-                            and async_task.enhance_uov_prompt_type == flags.enhancement_uov_prompt_type_last):
+                            and async_task.enhance_uov_prompt_type == flags.enhancement_uov_prompt_type_last_filled):
                         if enhance_prompt_processed != '':
                             last_enhance_prompt = enhance_prompt_processed
                         if enhance_negative_prompt_processed != '':
diff --git a/modules/flags.py b/modules/flags.py
index f86fe078..c8ea0a0c 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -15,8 +15,8 @@ enhancement_uov_after = "After Last Enhancement"
 enhancement_uov_processing_order = [enhancement_uov_before, enhancement_uov_after]
 
 enhancement_uov_prompt_type_original = 'Original Prompts'
-enhancement_uov_prompt_type_last = 'Last Filled Enhancement Prompts'
-enhancement_uov_prompt_types = [enhancement_uov_prompt_type_original, enhancement_uov_prompt_type_last]
+enhancement_uov_prompt_type_last_filled = 'Last Filled Enhancement Prompts'
+enhancement_uov_prompt_types = [enhancement_uov_prompt_type_original, enhancement_uov_prompt_type_last_filled]
 
 CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"]
 

From 32d77909f86f5566802ad2355d6543990f439e77 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Mon, 24 Jun 2024 21:58:00 +0200
Subject: [PATCH 100/101] fix: reset last enhance prompts for each enhancement
 iteration, bump version

---
 fooocus_version.py      | 2 +-
 modules/async_worker.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fooocus_version.py b/fooocus_version.py
index 42b7d3f3..65912fab 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.5.0-rc5 (mashb1t)'
+version = '2.5.0-rc6 (mashb1t)'
diff --git a/modules/async_worker.py b/modules/async_worker.py
index 287edbcf..a9d45086 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1315,12 +1315,13 @@ def worker():
         current_task_id = 0
         done_steps_upscaling = 0
         done_steps_inpainting = 0
-        last_enhance_prompt = async_task.prompt
-        last_enhance_negative_prompt = async_task.negative_prompt
         enhance_steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width)
         for img in images_to_enhance:
             enhancement_image_start_time = time.perf_counter()
 
+            last_enhance_prompt = async_task.prompt
+            last_enhance_negative_prompt = async_task.negative_prompt
+
             if should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_before:
                 current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result = enhance_upscale(
                     all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,

From bf953420579abd544b9c256ea474a3c5375228d9 Mon Sep 17 00:00:00 2001
From: Manuel Schmid <dev@mash1t.de>
Date: Tue, 25 Jun 2024 20:18:27 +0200
Subject: [PATCH 101/101] i18n: add translations for enhance gradio output
 elements

---
 language/en.json | 69 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 59 insertions(+), 10 deletions(-)

diff --git a/language/en.json b/language/en.json
index ec7ef09d..da8f2271 100644
--- a/language/en.json
+++ b/language/en.json
@@ -44,15 +44,7 @@
     "Top": "Top",
     "Bottom": "Bottom",
     "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)": "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)",
-    "Mask generation model": "Mask generation model",
-    "Cloth category": "Cloth category",
-    "Detection prompt": "Detection prompt",
-    "Use singular whenever possible": "Use singular whenever possible",
     "Advanced options": "Advanced options",
-    "SAM model": "SAM model",
-    "Quantization": "Quantization",
-    "Box Threshold": "Box Threshold",
-    "Text Threshold": "Text Threshold",
     "Generate mask from image": "Generate mask from image",
     "Setting": "Setting",
     "Style": "Style",
@@ -383,7 +375,6 @@
     "Used for SAM object detection and box generation": "Used for SAM object detection and box generation",
     "GroundingDINO Box Erode or Dilate": "GroundingDINO Box Erode or Dilate",
     "Inpaint Engine": "Inpaint Engine",
-    "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.": "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.",
     "v1": "v1",
     "v2.5": "v2.5",
     "v2.6": "v2.6",
@@ -416,5 +407,63 @@
     "Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.": "Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.",
     "fooocus (json)": "fooocus (json)",
     "a1111 (plain text)": "a1111 (plain text)",
-    "Unsupported image type in input": "Unsupported image type in input"
+    "Unsupported image type in input": "Unsupported image type in input",
+    "Enhance": "Enhance",
+    "Detection prompt": "Detection prompt",
+    "Detection Prompt Quick List": "Detection Prompt Quick List",
+    "Maximum number of detections": "Maximum number of detections",
+    "Base image for enhance": "Base image for enhance",
+    "Order of Processing": "Order of Processing",
+    "Use before for enhancement of small details and after for large areas.": "Use before for enhancement of small details and after for large areas.",
+    "Before First Enhancement": "Before First Enhancement",
+    "After Last Enhancement": "After Last Enhancement",
+    "Prompt Type": "Prompt Type",
+    "Choose which prompt to use for Upscale or Variation.": "Choose which prompt to use for Upscale or Variation.",
+    "Original Prompts": "Original Prompts",
+    "Last Filled Enhancement Prompts": "Last Filled Enhancement Prompts",
+    "Enable": "Enable",
+    "Describe what you want to detect.": "Describe what you want to detect.",
+    "Enhancement positive prompt": "Enhancement positive prompt",
+    "Uses original prompt instead if empty.": "Uses original prompt instead if empty.",
+    "Enhancement negative prompt": "Enhancement negative prompt",
+    "Uses original negative prompt instead if empty.": "Uses original negative prompt instead if empty.",
+    "Detection": "Detection",
+    "u2net": "u2net",
+    "u2netp": "u2netp",
+    "u2net_human_seg": "u2net_human_seg",
+    "u2net_cloth_seg": "u2net_cloth_seg",
+    "silueta": "silueta",
+    "isnet-general-use": "isnet-general-use",
+    "isnet-anime": "isnet-anime",
+    "sam": "sam",
+    "Mask generation model": "Mask generation model",
+    "Cloth category": "Cloth category",
+    "Use singular whenever possible": "Use singular whenever possible",
+    "full": "full",
+    "upper": "upper",
+    "lower": "lower",
+    "SAM Options": "SAM Options",
+    "SAM model": "SAM model",
+    "vit_b": "vit_b",
+    "vit_l": "vit_l",
+    "vit_h": "vit_h",
+    "Box Threshold": "Box Threshold",
+    "Text Threshold": "Text Threshold",
+    "Set to 0 to detect all": "Set to 0 to detect all",
+    "Inpaint": "Inpaint",
+    "Inpaint or Outpaint (default)": "Inpaint or Outpaint (default)",
+    "Improve Detail (face, hand, eyes, etc.)": "Improve Detail (face, hand, eyes, etc.)",
+    "Modify Content (add objects, change background, etc.)": "Modify Content (add objects, change background, etc.)",
+    "Disable initial latent in inpaint": "Disable initial latent in inpaint",
+    "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.": "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.",
+    "Inpaint Denoising Strength": "Inpaint Denoising Strength",
+    "Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)",
+    "Inpaint Respective Field": "Inpaint Respective Field",
+    "The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)",
+    "Mask Erode or Dilate": "Mask Erode or Dilate",
+    "Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)": "Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)",
+    "Invert Mask": "Invert Mask",
+    "Debug Enhance Masks": "Debug Enhance Masks",
+    "Show enhance masks in preview and final results": "Show enhance masks in preview and final results",
+    "Use GroundingDINO boxes instead of more detailed SAM masks": "Use GroundingDINO boxes instead of more detailed SAM masks"
 }
\ No newline at end of file