diff --git a/args_manager.py b/args_manager.py index 5a2b37c9..08e4dc6e 100644 --- a/args_manager.py +++ b/args_manager.py @@ -28,8 +28,8 @@ args_parser.parser.add_argument("--disable-metadata", action='store_true', args_parser.parser.add_argument("--disable-preset-download", action='store_true', help="Disables downloading models for presets", default=False) -args_parser.parser.add_argument("--enable-describe-uov-image", action='store_true', - help="Disables automatic description of uov images when prompt is empty", default=False) +args_parser.parser.add_argument("--enable-auto-describe-image", action='store_true', + help="Enables automatic description of uov and enhance image when prompt is empty", default=False) args_parser.parser.add_argument("--always-download-new-model", action='store_true', help="Always download newer models ", default=False) diff --git a/css/style.css b/css/style.css index 6ed0f628..ad9de6f2 100644 --- a/css/style.css +++ b/css/style.css @@ -99,7 +99,7 @@ div:has(> #positive_prompt) { } .advanced_check_row { - width: 250px !important; + width: 330px !important; } .min_check { diff --git a/experiments_mask_generation.py b/experiments_mask_generation.py new file mode 100644 index 00000000..a27eb39c --- /dev/null +++ b/experiments_mask_generation.py @@ -0,0 +1,24 @@ +# https://github.com/sail-sg/EditAnything/blob/main/sam2groundingdino_edit.py + +import numpy as np +from PIL import Image + +from extras.inpaint_mask import SAMOptions, generate_mask_from_image + +original_image = Image.open('cat.webp') +image = np.array(original_image, dtype=np.uint8) + +sam_options = SAMOptions( + dino_prompt='eye', + dino_box_threshold=0.3, + dino_text_threshold=0.25, + dino_erode_or_dilate=0, + dino_debug=False, + max_detections=2, + model_type='vit_b' +) + +mask_image, _, _, _ = generate_mask_from_image(image, sam_options=sam_options) + +merged_masks_img = Image.fromarray(mask_image) +merged_masks_img.show() diff --git a/extras/GroundingDINO/util/inference.py b/extras/GroundingDINO/util/inference.py index 259094f2..bc8b6429 100644 --- a/extras/GroundingDINO/util/inference.py +++ b/extras/GroundingDINO/util/inference.py @@ -25,7 +25,7 @@ class GroundingDinoModel(Model): caption: str, box_threshold: float = 0.35, text_threshold: float = 0.25 - ) -> Tuple[sv.Detections, List[str]]: + ) -> Tuple[sv.Detections, torch.Tensor, torch.Tensor, List[str]]: if self.model is None: filename = load_file_from_url( url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth", @@ -56,7 +56,7 @@ class GroundingDinoModel(Model): source_w=source_w, boxes=boxes, logits=logits) - return detections, phrases + return detections, boxes, logits, phrases def predict( diff --git a/extras/censor.py b/extras/censor.py index 45617fd8..c5dea3fd 100644 --- a/extras/censor.py +++ b/extras/censor.py @@ -41,7 +41,7 @@ class Censor: model_management.load_model_gpu(self.safety_checker_model) single = False - if not isinstance(images, list) or isinstance(images, np.ndarray): + if not isinstance(images, (list, np.ndarray)): images = [images] single = True diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py index 4999f258..086b7da6 100644 --- a/extras/inpaint_mask.py +++ b/extras/inpaint_mask.py @@ -1,42 +1,130 @@ -from PIL import Image +import sys + +import modules.config import numpy as np import torch -from rembg import remove, new_session from extras.GroundingDINO.util.inference import default_groundingdino - -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +from extras.sam.predictor import SamPredictor +from rembg import remove, new_session +from segment_anything import sam_model_registry +from segment_anything.utils.amg import remove_small_regions -def run_grounded_sam(input_image, text_prompt, box_threshold, text_threshold): +class SAMOptions: + def __init__(self, + # GroundingDINO + dino_prompt: str = '', + dino_box_threshold=0.3, + dino_text_threshold=0.25, + dino_erode_or_dilate=0, + dino_debug=False, - # run grounding dino model - boxes, _ = default_groundingdino( - image=np.array(input_image), - caption=text_prompt, - box_threshold=box_threshold, - text_threshold=text_threshold - ) - - return boxes.xyxy + # SAM + max_detections=2, + model_type='vit_b' + ): + self.dino_prompt = dino_prompt + self.dino_box_threshold = dino_box_threshold + self.dino_text_threshold = dino_text_threshold + self.dino_erode_or_dilate = dino_erode_or_dilate + self.dino_debug = dino_debug + self.max_detections = max_detections + self.model_type = model_type -def generate_mask_from_image(image, mask_model, extras): +def optimize_masks(masks: torch.Tensor) -> torch.Tensor: + """ + removes small disconnected regions and holes + """ + fine_masks = [] + for mask in masks.to('cpu').numpy(): # masks: [num_masks, 1, h, w] + fine_masks.append(remove_small_regions(mask[0], 400, mode="holes")[0]) + masks = np.stack(fine_masks, axis=0)[:, np.newaxis] + return torch.from_numpy(masks) + + +def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=None, + sam_options: SAMOptions | None = SAMOptions) -> tuple[np.ndarray | None, int | None, int | None, int | None]: + dino_detection_count = 0 + sam_detection_count = 0 + sam_detection_on_mask_count = 0 + if image is None: - return + return None, dino_detection_count, sam_detection_count, sam_detection_on_mask_count + + if extras is None: + extras = {} if 'image' in image: image = image['image'] - if mask_model == 'sam': - boxes = run_grounded_sam(Image.fromarray(image), extras['sam_prompt_text'], box_threshold=extras['box_threshold'], text_threshold=extras['text_threshold']) - boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(boxes) == 0 else boxes - extras['sam_prompt'] = [] - for idx, box in enumerate(boxes): - extras['sam_prompt'] += [{"type": "rectangle", "data": box.tolist()}] + if mask_model != 'sam' or sam_options is None: + result = remove( + image, + session=new_session(mask_model, **extras), + only_mask=True, + **extras + ) - return remove( - image, - session=new_session(mask_model, **extras), - only_mask=True, - **extras + return result, dino_detection_count, sam_detection_count, sam_detection_on_mask_count + + detections, boxes, logits, phrases = default_groundingdino( + image=image, + caption=sam_options.dino_prompt, + box_threshold=sam_options.dino_box_threshold, + text_threshold=sam_options.dino_text_threshold ) + + H, W = image.shape[0], image.shape[1] + boxes = boxes * torch.Tensor([W, H, W, H]) + boxes[:, :2] = boxes[:, :2] - boxes[:, 2:] / 2 + boxes[:, 2:] = boxes[:, 2:] + boxes[:, :2] + + sam_checkpoint = modules.config.download_sam_model(sam_options.model_type) + sam = sam_model_registry[sam_options.model_type](checkpoint=sam_checkpoint) + + sam_predictor = SamPredictor(sam) + final_mask_tensor = torch.zeros((image.shape[0], image.shape[1])) + dino_detection_count = boxes.size(0) + + if dino_detection_count > 0: + sam_predictor.set_image(image) + + if sam_options.dino_erode_or_dilate != 0: + for index in range(boxes.size(0)): + assert boxes.size(1) == 4 + boxes[index][0] -= sam_options.dino_erode_or_dilate + boxes[index][1] -= sam_options.dino_erode_or_dilate + boxes[index][2] += sam_options.dino_erode_or_dilate + boxes[index][3] += sam_options.dino_erode_or_dilate + + if sam_options.dino_debug: + from PIL import ImageDraw, Image + debug_dino_image = Image.new("RGB", (image.shape[1], image.shape[0]), color="black") + draw = ImageDraw.Draw(debug_dino_image) + for box in boxes.numpy(): + draw.rectangle(box.tolist(), fill="white") + return np.array(debug_dino_image), dino_detection_count, sam_detection_count, sam_detection_on_mask_count + + transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes, image.shape[:2]) + masks, _, _ = sam_predictor.predict_torch( + point_coords=None, + point_labels=None, + boxes=transformed_boxes, + multimask_output=False, + ) + + masks = optimize_masks(masks) + sam_detection_count = len(masks) + if sam_options.max_detections == 0: + sam_options.max_detections = sys.maxsize + sam_objects = min(len(logits), sam_options.max_detections) + for obj_ind in range(sam_objects): + mask_tensor = masks[obj_ind][0] + final_mask_tensor += mask_tensor + sam_detection_on_mask_count += 1 + + final_mask_tensor = (final_mask_tensor > 0).to('cpu').numpy() + mask_image = np.dstack((final_mask_tensor, final_mask_tensor, final_mask_tensor)) * 255 + mask_image = np.array(mask_image, dtype=np.uint8) + return mask_image, dino_detection_count, sam_detection_count, sam_detection_on_mask_count diff --git a/extras/sam/predictor.py b/extras/sam/predictor.py new file mode 100644 index 00000000..337c549b --- /dev/null +++ b/extras/sam/predictor.py @@ -0,0 +1,288 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import numpy as np +import torch +from ldm_patched.modules import model_management +from ldm_patched.modules.model_patcher import ModelPatcher + +from segment_anything.modeling import Sam + +from typing import Optional, Tuple + +from segment_anything.utils.transforms import ResizeLongestSide + + +class SamPredictor: + def __init__( + self, + model: Sam, + load_device=model_management.text_encoder_device(), + offload_device=model_management.text_encoder_offload_device() + ) -> None: + """ + Uses SAM to calculate the image embedding for an image, and then + allow repeated, efficient mask prediction given prompts. + + Arguments: + model (Sam): The model to use for mask prediction. + """ + super().__init__() + + self.load_device = load_device + self.offload_device = offload_device + # can't use model.half() here as slow_conv2d_cpu is not implemented for half + model.to(self.offload_device) + + self.patcher = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device) + + self.transform = ResizeLongestSide(model.image_encoder.img_size) + self.reset_image() + + def set_image( + self, + image: np.ndarray, + image_format: str = "RGB", + ) -> None: + """ + Calculates the image embeddings for the provided image, allowing + masks to be predicted with the 'predict' method. + + Arguments: + image (np.ndarray): The image for calculating masks. Expects an + image in HWC uint8 format, with pixel values in [0, 255]. + image_format (str): The color format of the image, in ['RGB', 'BGR']. + """ + assert image_format in [ + "RGB", + "BGR", + ], f"image_format must be in ['RGB', 'BGR'], is {image_format}." + if image_format != self.patcher.model.image_format: + image = image[..., ::-1] + + # Transform the image to the form expected by the model + input_image = self.transform.apply_image(image) + input_image_torch = torch.as_tensor(input_image, device=self.load_device) + input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :] + + self.set_torch_image(input_image_torch, image.shape[:2]) + + @torch.no_grad() + def set_torch_image( + self, + transformed_image: torch.Tensor, + original_image_size: Tuple[int, ...], + ) -> None: + """ + Calculates the image embeddings for the provided image, allowing + masks to be predicted with the 'predict' method. Expects the input + image to be already transformed to the format expected by the model. + + Arguments: + transformed_image (torch.Tensor): The input image, with shape + 1x3xHxW, which has been transformed with ResizeLongestSide. + original_image_size (tuple(int, int)): The size of the image + before transformation, in (H, W) format. + """ + assert ( + len(transformed_image.shape) == 4 + and transformed_image.shape[1] == 3 + and max(*transformed_image.shape[2:]) == self.patcher.model.image_encoder.img_size + ), f"set_torch_image input must be BCHW with long side {self.patcher.model.image_encoder.img_size}." + self.reset_image() + + self.original_size = original_image_size + self.input_size = tuple(transformed_image.shape[-2:]) + model_management.load_model_gpu(self.patcher) + input_image = self.patcher.model.preprocess(transformed_image.to(self.load_device)) + self.features = self.patcher.model.image_encoder(input_image) + self.is_image_set = True + + def predict( + self, + point_coords: Optional[np.ndarray] = None, + point_labels: Optional[np.ndarray] = None, + box: Optional[np.ndarray] = None, + mask_input: Optional[np.ndarray] = None, + multimask_output: bool = True, + return_logits: bool = False, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Predict masks for the given input prompts, using the currently set image. + + Arguments: + point_coords (np.ndarray or None): A Nx2 array of point prompts to the + model. Each point is in (X,Y) in pixels. + point_labels (np.ndarray or None): A length N array of labels for the + point prompts. 1 indicates a foreground point and 0 indicates a + background point. + box (np.ndarray or None): A length 4 array given a box prompt to the + model, in XYXY format. + mask_input (np.ndarray): A low resolution mask input to the model, typically + coming from a previous prediction iteration. Has form 1xHxW, where + for SAM, H=W=256. + multimask_output (bool): If true, the model will return three masks. + For ambiguous input prompts (such as a single click), this will often + produce better masks than a single prediction. If only a single + mask is needed, the model's predicted quality score can be used + to select the best mask. For non-ambiguous prompts, such as multiple + input prompts, multimask_output=False can give better results. + return_logits (bool): If true, returns un-thresholded masks logits + instead of a binary mask. + + Returns: + (np.ndarray): The output masks in CxHxW format, where C is the + number of masks, and (H, W) is the original image size. + (np.ndarray): An array of length C containing the model's + predictions for the quality of each mask. + (np.ndarray): An array of shape CxHxW, where C is the number + of masks and H=W=256. These low resolution logits can be passed to + a subsequent iteration as mask input. + """ + if not self.is_image_set: + raise RuntimeError("An image must be set with .set_image(...) before mask prediction.") + + # Transform input prompts + coords_torch, labels_torch, box_torch, mask_input_torch = None, None, None, None + if point_coords is not None: + assert ( + point_labels is not None + ), "point_labels must be supplied if point_coords is supplied." + point_coords = self.transform.apply_coords(point_coords, self.original_size) + coords_torch = torch.as_tensor(point_coords, dtype=torch.float, device=self.load_device) + labels_torch = torch.as_tensor(point_labels, dtype=torch.int, device=self.load_device) + coords_torch, labels_torch = coords_torch[None, :, :], labels_torch[None, :] + if box is not None: + box = self.transform.apply_boxes(box, self.original_size) + box_torch = torch.as_tensor(box, dtype=torch.float, device=self.load_device) + box_torch = box_torch[None, :] + if mask_input is not None: + mask_input_torch = torch.as_tensor(mask_input, dtype=torch.float, device=self.load_device) + mask_input_torch = mask_input_torch[None, :, :, :] + + masks, iou_predictions, low_res_masks = self.predict_torch( + coords_torch, + labels_torch, + box_torch, + mask_input_torch, + multimask_output, + return_logits=return_logits, + ) + + masks = masks[0].detach().cpu().numpy() + iou_predictions = iou_predictions[0].detach().cpu().numpy() + low_res_masks = low_res_masks[0].detach().cpu().numpy() + return masks, iou_predictions, low_res_masks + + @torch.no_grad() + def predict_torch( + self, + point_coords: Optional[torch.Tensor], + point_labels: Optional[torch.Tensor], + boxes: Optional[torch.Tensor] = None, + mask_input: Optional[torch.Tensor] = None, + multimask_output: bool = True, + return_logits: bool = False, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Predict masks for the given input prompts, using the currently set image. + Input prompts are batched torch tensors and are expected to already be + transformed to the input frame using ResizeLongestSide. + + Arguments: + point_coords (torch.Tensor or None): A BxNx2 array of point prompts to the + model. Each point is in (X,Y) in pixels. + point_labels (torch.Tensor or None): A BxN array of labels for the + point prompts. 1 indicates a foreground point and 0 indicates a + background point. + box (np.ndarray or None): A Bx4 array given a box prompt to the + model, in XYXY format. + mask_input (np.ndarray): A low resolution mask input to the model, typically + coming from a previous prediction iteration. Has form Bx1xHxW, where + for SAM, H=W=256. Masks returned by a previous iteration of the + predict method do not need further transformation. + multimask_output (bool): If true, the model will return three masks. + For ambiguous input prompts (such as a single click), this will often + produce better masks than a single prediction. If only a single + mask is needed, the model's predicted quality score can be used + to select the best mask. For non-ambiguous prompts, such as multiple + input prompts, multimask_output=False can give better results. + return_logits (bool): If true, returns un-thresholded masks logits + instead of a binary mask. + + Returns: + (torch.Tensor): The output masks in BxCxHxW format, where C is the + number of masks, and (H, W) is the original image size. + (torch.Tensor): An array of shape BxC containing the model's + predictions for the quality of each mask. + (torch.Tensor): An array of shape BxCxHxW, where C is the number + of masks and H=W=256. These low res logits can be passed to + a subsequent iteration as mask input. + """ + if not self.is_image_set: + raise RuntimeError("An image must be set with .set_image(...) before mask prediction.") + + if point_coords is not None: + points = (point_coords.to(self.load_device), point_labels.to(self.load_device)) + else: + points = None + + # load + if boxes is not None: + boxes = boxes.to(self.load_device) + if mask_input is not None: + mask_input = mask_input.to(self.load_device) + model_management.load_model_gpu(self.patcher) + + # Embed prompts + sparse_embeddings, dense_embeddings = self.patcher.model.prompt_encoder( + points=points, + boxes=boxes, + masks=mask_input, + ) + + # Predict masks + low_res_masks, iou_predictions = self.patcher.model.mask_decoder( + image_embeddings=self.features, + image_pe=self.patcher.model.prompt_encoder.get_dense_pe(), + sparse_prompt_embeddings=sparse_embeddings, + dense_prompt_embeddings=dense_embeddings, + multimask_output=multimask_output, + ) + + # Upscale the masks to the original image resolution + masks = self.patcher.model.postprocess_masks(low_res_masks, self.input_size, self.original_size) + + if not return_logits: + masks = masks > self.patcher.model.mask_threshold + + return masks, iou_predictions, low_res_masks + + def get_image_embedding(self) -> torch.Tensor: + """ + Returns the image embeddings for the currently set image, with + shape 1xCxHxW, where C is the embedding dimension and (H,W) are + the embedding spatial dimension of SAM (typically C=256, H=W=64). + """ + if not self.is_image_set: + raise RuntimeError( + "An image must be set with .set_image(...) to generate an embedding." + ) + assert self.features is not None, "Features must exist if an image has been set." + return self.features + + @property + def device(self) -> torch.device: + return self.patcher.model.device + + def reset_image(self) -> None: + """Resets the currently set image.""" + self.is_image_set = False + self.features = None + self.orig_h = None + self.orig_w = None + self.input_h = None + self.input_w = None diff --git a/fooocus_version.py b/fooocus_version.py index 106c67f2..65912fab 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.4.3 (mashb1t)' +version = '2.5.0-rc6 (mashb1t)' diff --git a/language/en.json b/language/en.json index a5f1e271..da8f2271 100644 --- a/language/en.json +++ b/language/en.json @@ -44,14 +44,7 @@ "Top": "Top", "Bottom": "Bottom", "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)": "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)", - "Mask generation model": "Mask generation model", - "Cloth category": "Cloth category", - "Segmentation prompt": "Segmentation prompt", "Advanced options": "Advanced options", - "SAM model": "SAM model", - "Quantization": "Quantization", - "Box Threshold": "Box Threshold", - "Text Threshold": "Text Threshold", "Generate mask from image": "Generate mask from image", "Setting": "Setting", "Style": "Style", @@ -377,10 +370,14 @@ "Disable preview during generation.": "Disable preview during generation.", "Disable Intermediate Results": "Disable Intermediate Results", "Disable intermediate results during generation, only show final gallery.": "Disable intermediate results during generation, only show final gallery.", + "Debug Inpaint Preprocessing": "Debug Inpaint Preprocessing", + "Debug GroundingDINO": "Debug GroundingDINO", + "Used for SAM object detection and box generation": "Used for SAM object detection and box generation", + "GroundingDINO Box Erode or Dilate": "GroundingDINO Box Erode or Dilate", "Inpaint Engine": "Inpaint Engine", "v1": "v1", - "Version of Fooocus inpaint model": "Version of Fooocus inpaint model", "v2.5": "v2.5", + "v2.6": "v2.6", "Control Debug": "Control Debug", "Debug Preprocessors": "Debug Preprocessors", "Mixing Image Prompt and Vary/Upscale": "Mixing Image Prompt and Vary/Upscale", @@ -410,5 +407,63 @@ "Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.": "Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.", "fooocus (json)": "fooocus (json)", "a1111 (plain text)": "a1111 (plain text)", - "Unsupported image type in input": "Unsupported image type in input" + "Unsupported image type in input": "Unsupported image type in input", + "Enhance": "Enhance", + "Detection prompt": "Detection prompt", + "Detection Prompt Quick List": "Detection Prompt Quick List", + "Maximum number of detections": "Maximum number of detections", + "Base image for enhance": "Base image for enhance", + "Order of Processing": "Order of Processing", + "Use before for enhancement of small details and after for large areas.": "Use before for enhancement of small details and after for large areas.", + "Before First Enhancement": "Before First Enhancement", + "After Last Enhancement": "After Last Enhancement", + "Prompt Type": "Prompt Type", + "Choose which prompt to use for Upscale or Variation.": "Choose which prompt to use for Upscale or Variation.", + "Original Prompts": "Original Prompts", + "Last Filled Enhancement Prompts": "Last Filled Enhancement Prompts", + "Enable": "Enable", + "Describe what you want to detect.": "Describe what you want to detect.", + "Enhancement positive prompt": "Enhancement positive prompt", + "Uses original prompt instead if empty.": "Uses original prompt instead if empty.", + "Enhancement negative prompt": "Enhancement negative prompt", + "Uses original negative prompt instead if empty.": "Uses original negative prompt instead if empty.", + "Detection": "Detection", + "u2net": "u2net", + "u2netp": "u2netp", + "u2net_human_seg": "u2net_human_seg", + "u2net_cloth_seg": "u2net_cloth_seg", + "silueta": "silueta", + "isnet-general-use": "isnet-general-use", + "isnet-anime": "isnet-anime", + "sam": "sam", + "Mask generation model": "Mask generation model", + "Cloth category": "Cloth category", + "Use singular whenever possible": "Use singular whenever possible", + "full": "full", + "upper": "upper", + "lower": "lower", + "SAM Options": "SAM Options", + "SAM model": "SAM model", + "vit_b": "vit_b", + "vit_l": "vit_l", + "vit_h": "vit_h", + "Box Threshold": "Box Threshold", + "Text Threshold": "Text Threshold", + "Set to 0 to detect all": "Set to 0 to detect all", + "Inpaint": "Inpaint", + "Inpaint or Outpaint (default)": "Inpaint or Outpaint (default)", + "Improve Detail (face, hand, eyes, etc.)": "Improve Detail (face, hand, eyes, etc.)", + "Modify Content (add objects, change background, etc.)": "Modify Content (add objects, change background, etc.)", + "Disable initial latent in inpaint": "Disable initial latent in inpaint", + "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.": "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.", + "Inpaint Denoising Strength": "Inpaint Denoising Strength", + "Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)", + "Inpaint Respective Field": "Inpaint Respective Field", + "The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)", + "Mask Erode or Dilate": "Mask Erode or Dilate", + "Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)": "Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)", + "Invert Mask": "Invert Mask", + "Debug Enhance Masks": "Debug Enhance Masks", + "Show enhance masks in preview and final results": "Show enhance masks in preview and final results", + "Use GroundingDINO boxes instead of more detailed SAM masks": "Use GroundingDINO boxes instead of more detailed SAM masks" } \ No newline at end of file diff --git a/modules/async_worker.py b/modules/async_worker.py index 92831427..a9d45086 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -1,23 +1,166 @@ import threading -import os -import re + +from extras.inpaint_mask import generate_mask_from_image, SAMOptions from modules.patch import PatchSettings, patch_settings, patch_all +import modules.config patch_all() class AsyncTask: def __init__(self, args): - self.args = args + from modules.flags import Performance, MetadataScheme, ip_list, controlnet_image_count + from modules.util import get_enabled_loras + from modules.config import default_max_lora_number + import args_manager + + self.args = args.copy() self.yields = [] self.results = [] self.last_stop = False self.processing = False + self.performance_loras = [] + + if len(args) == 0: + return + + args.reverse() + self.generate_image_grid = args.pop() + self.prompt = args.pop() + self.negative_prompt = args.pop() + self.translate_prompts = args.pop() + self.style_selections = args.pop() + + self.performance_selection = Performance(args.pop()) + self.steps = self.performance_selection.steps() + self.original_steps = self.steps + + self.aspect_ratios_selection = args.pop() + self.image_number = args.pop() + self.output_format = args.pop() + self.seed = int(args.pop()) + self.read_wildcards_in_order = args.pop() + self.sharpness = args.pop() + self.cfg_scale = args.pop() + self.base_model_name = args.pop() + self.refiner_model_name = args.pop() + self.refiner_switch = args.pop() + self.loras = get_enabled_loras([(bool(args.pop()), str(args.pop()), float(args.pop())) for _ in + range(default_max_lora_number)]) + self.input_image_checkbox = args.pop() + self.current_tab = args.pop() + self.uov_method = args.pop() + self.uov_input_image = args.pop() + self.outpaint_selections = args.pop() + self.inpaint_input_image = args.pop() + self.inpaint_additional_prompt = args.pop() + self.inpaint_mask_image_upload = args.pop() + + self.disable_preview = args.pop() + self.disable_intermediate_results = args.pop() + self.disable_seed_increment = args.pop() + self.black_out_nsfw = args.pop() + self.adm_scaler_positive = args.pop() + self.adm_scaler_negative = args.pop() + self.adm_scaler_end = args.pop() + self.adaptive_cfg = args.pop() + self.clip_skip = args.pop() + self.sampler_name = args.pop() + self.scheduler_name = args.pop() + self.vae_name = args.pop() + self.overwrite_step = args.pop() + self.overwrite_switch = args.pop() + self.overwrite_width = args.pop() + self.overwrite_height = args.pop() + self.overwrite_vary_strength = args.pop() + self.overwrite_upscale_strength = args.pop() + self.mixing_image_prompt_and_vary_upscale = args.pop() + self.mixing_image_prompt_and_inpaint = args.pop() + self.debugging_cn_preprocessor = args.pop() + self.skipping_cn_preprocessor = args.pop() + self.canny_low_threshold = args.pop() + self.canny_high_threshold = args.pop() + self.refiner_swap_method = args.pop() + self.controlnet_softness = args.pop() + self.freeu_enabled = args.pop() + self.freeu_b1 = args.pop() + self.freeu_b2 = args.pop() + self.freeu_s1 = args.pop() + self.freeu_s2 = args.pop() + self.debugging_inpaint_preprocessor = args.pop() + self.inpaint_disable_initial_latent = args.pop() + self.inpaint_engine = args.pop() + self.inpaint_strength = args.pop() + self.inpaint_respective_field = args.pop() + self.inpaint_mask_upload_checkbox = args.pop() + self.invert_mask_checkbox = args.pop() + self.inpaint_erode_or_dilate = args.pop() + self.save_metadata_to_images = args.pop() if not args_manager.args.disable_metadata else False + self.metadata_scheme = MetadataScheme( + args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS + + self.cn_tasks = {x: [] for x in ip_list} + for _ in range(controlnet_image_count): + cn_img = args.pop() + cn_stop = args.pop() + cn_weight = args.pop() + cn_type = args.pop() + if cn_img is not None: + self.cn_tasks[cn_type].append([cn_img, cn_stop, cn_weight]) + + self.debugging_dino = args.pop() + self.dino_erode_or_dilate = args.pop() + self.debugging_enhance_masks_checkbox = args.pop() + + self.enhance_input_image = args.pop() + self.enhance_checkbox = args.pop() + self.enhance_uov_method = args.pop() + self.enhance_uov_processing_order = args.pop() + self.enhance_uov_prompt_type = args.pop() + self.enhance_ctrls = [] + for _ in range(modules.config.default_enhance_tabs): + enhance_enabled = args.pop() + enhance_mask_dino_prompt_text = args.pop() + enhance_prompt = args.pop() + enhance_negative_prompt = args.pop() + enhance_mask_model = args.pop() + enhance_mask_sam_model = args.pop() + enhance_mask_text_threshold = args.pop() + enhance_mask_box_threshold = args.pop() + enhance_mask_sam_max_detections = args.pop() + enhance_inpaint_disable_initial_latent = args.pop() + enhance_inpaint_engine = args.pop() + enhance_inpaint_strength = args.pop() + enhance_inpaint_respective_field = args.pop() + enhance_inpaint_erode_or_dilate = args.pop() + enhance_mask_invert = args.pop() + if enhance_enabled: + self.enhance_ctrls.append([ + enhance_mask_dino_prompt_text, + enhance_prompt, + enhance_negative_prompt, + enhance_mask_model, + enhance_mask_sam_model, + enhance_mask_text_threshold, + enhance_mask_box_threshold, + enhance_mask_sam_max_detections, + enhance_inpaint_disable_initial_latent, + enhance_inpaint_engine, + enhance_inpaint_strength, + enhance_inpaint_respective_field, + enhance_inpaint_erode_or_dilate, + enhance_mask_invert + ]) + async_tasks = [] +class EarlyReturnException: + pass + + def worker(): global async_tasks @@ -25,7 +168,6 @@ def worker(): import traceback import math import numpy as np - import cv2 import torch import time import shared @@ -35,7 +177,6 @@ def worker(): import modules.default_pipeline as pipeline import modules.core as core import modules.flags as flags - import modules.config import modules.patch import ldm_patched.modules.model_management import extras.preprocessors as preprocessors @@ -44,18 +185,18 @@ def worker(): import extras.ip_adapter as ip_adapter import extras.face_crop import fooocus_version - import args_manager from extras.censor import default_censor from modules.sdxl_styles import apply_style, get_random_style, fooocus_expansion, apply_arrays, random_style_name from modules.private_logger import log from extras.expansion import safe_str from modules.util import (remove_empty_str, HWC3, resize_image, get_image_shape_ceil, set_image_shape_ceil, - get_shape_ceil, resample_image, erode_or_dilate, get_enabled_loras, - parse_lora_references_from_prompt, apply_wildcards) + get_shape_ceil, resample_image, erode_or_dilate, parse_lora_references_from_prompt, + apply_wildcards) from modules.upscaler import perform_upscale from modules.flags import Performance - from modules.meta_parser import get_metadata_parser, MetadataScheme + from modules.meta_parser import get_metadata_parser + from modules.translator import translate2en pid = os.getpid() print(f'Started worker with PID {pid}') @@ -73,8 +214,7 @@ def worker(): print(f'[Fooocus] {text}') async_task.yields.append(['preview', (number, text, None)]) - def yield_result(async_task, imgs, black_out_nsfw, censor=True, do_not_show_finished_images=False, - progressbar_index=flags.preparation_step_count): + def yield_result(async_task, imgs, progressbar_index, black_out_nsfw, censor=True, do_not_show_finished_images=False): if not isinstance(imgs, list): imgs = [imgs] @@ -134,209 +274,851 @@ def worker(): async_task.results = async_task.results + [wall] return + def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id, + denoising_strength, final_scheduler_name, goals, initial_latent, steps, switch, positive_cond, + negative_cond, task, tiled, use_expansion, width, height, base_progress, preparation_steps, + total_count, show_intermediate_results): + if async_task.last_stop is not False: + ldm_patched.modules.model_management.interrupt_current_processing() + if 'cn' in goals: + for cn_flag, cn_path in [ + (flags.cn_canny, controlnet_canny_path), + (flags.cn_cpds, controlnet_cpds_path) + ]: + for cn_img, cn_stop, cn_weight in async_task.cn_tasks[cn_flag]: + positive_cond, negative_cond = core.apply_controlnet( + positive_cond, negative_cond, + pipeline.loaded_ControlNets[cn_path], cn_img, cn_weight, 0, cn_stop) + imgs = pipeline.process_diffusion( + positive_cond=positive_cond, + negative_cond=negative_cond, + steps=steps, + switch=switch, + width=width, + height=height, + image_seed=task['task_seed'], + callback=callback, + sampler_name=async_task.sampler_name, + scheduler_name=final_scheduler_name, + latent=initial_latent, + denoise=denoising_strength, + tiled=tiled, + cfg_scale=async_task.cfg_scale, + refiner_swap_method=async_task.refiner_swap_method, + disable_preview=async_task.disable_preview + ) + del positive_cond, negative_cond # Save memory + if inpaint_worker.current_task is not None: + imgs = [inpaint_worker.current_task.post_process(x) for x in imgs] + current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * steps) + if modules.config.default_black_out_nsfw or async_task.black_out_nsfw: + progressbar(async_task, current_progress, 'Checking for NSFW content ...') + imgs = default_censor(imgs) + progressbar(async_task, current_progress, + f'Saving image {current_task_id + 1}/{total_count} to system ...') + img_paths = save_and_log(async_task, height, imgs, task, use_expansion, width) + yield_result(async_task, img_paths, current_progress, async_task.black_out_nsfw, False, + do_not_show_finished_images=not show_intermediate_results or async_task.disable_intermediate_results) + + return imgs, img_paths, current_progress + + def apply_patch_settings(async_task): + patch_settings[pid] = PatchSettings( + async_task.sharpness, + async_task.adm_scaler_end, + async_task.adm_scaler_positive, + async_task.adm_scaler_negative, + async_task.controlnet_softness, + async_task.adaptive_cfg + ) + + def save_and_log(async_task, height, imgs, task, use_expansion, width) -> list: + img_paths = [] + for x in imgs: + d = [('Prompt', 'prompt', task['log_positive_prompt']), + ('Negative Prompt', 'negative_prompt', task['log_negative_prompt']), + ('Fooocus V2 Expansion', 'prompt_expansion', task['expansion']), + ('Styles', 'styles', + str(task['styles'] if not use_expansion else [fooocus_expansion] + task['styles'])), + ('Performance', 'performance', async_task.performance_selection.value), + ('Steps', 'steps', async_task.steps), + ('Resolution', 'resolution', str((width, height))), + ('Guidance Scale', 'guidance_scale', async_task.cfg_scale), + ('Sharpness', 'sharpness', async_task.sharpness), + ('ADM Guidance', 'adm_guidance', str(( + modules.patch.patch_settings[pid].positive_adm_scale, + modules.patch.patch_settings[pid].negative_adm_scale, + modules.patch.patch_settings[pid].adm_scaler_end))), + ('Base Model', 'base_model', async_task.base_model_name), + ('Refiner Model', 'refiner_model', async_task.refiner_model_name), + ('Refiner Switch', 'refiner_switch', async_task.refiner_switch)] + + if async_task.refiner_model_name != 'None': + if async_task.overwrite_switch > 0: + d.append(('Overwrite Switch', 'overwrite_switch', async_task.overwrite_switch)) + if async_task.refiner_swap_method != flags.refiner_swap_method: + d.append(('Refiner Swap Method', 'refiner_swap_method', async_task.refiner_swap_method)) + if modules.patch.patch_settings[pid].adaptive_cfg != modules.config.default_cfg_tsnr: + d.append( + ('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg)) + + if async_task.clip_skip > 1: + d.append(('CLIP Skip', 'clip_skip', async_task.clip_skip)) + d.append(('Sampler', 'sampler', async_task.sampler_name)) + d.append(('Scheduler', 'scheduler', async_task.scheduler_name)) + d.append(('VAE', 'vae', async_task.vae_name)) + d.append(('Seed', 'seed', str(task['task_seed']))) + + if async_task.freeu_enabled: + d.append(('FreeU', 'freeu', + str((async_task.freeu_b1, async_task.freeu_b2, async_task.freeu_s1, async_task.freeu_s2)))) + + for li, (n, w) in enumerate(async_task.loras): + if n != 'None': + d.append((f'LoRA {li + 1}', f'lora_combined_{li + 1}', f'{n} : {w}')) + + metadata_parser = None + if async_task.save_metadata_to_images: + metadata_parser = modules.meta_parser.get_metadata_parser(async_task.metadata_scheme) + metadata_parser.set_data(task['log_positive_prompt'], task['positive'], + task['log_negative_prompt'], task['negative'], + async_task.steps, async_task.base_model_name, async_task.refiner_model_name, + async_task.loras, async_task.vae_name) + d.append(('Metadata Scheme', 'metadata_scheme', + async_task.metadata_scheme.value if async_task.save_metadata_to_images else async_task.save_metadata_to_images)) + d.append(('Version', 'version', 'Fooocus v' + fooocus_version.version)) + img_paths.append(log(x, d, metadata_parser, async_task.output_format, task)) + + return img_paths + + def apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width, current_progress): + for task in async_task.cn_tasks[flags.cn_canny]: + cn_img, cn_stop, cn_weight = task + cn_img = resize_image(HWC3(cn_img), width=width, height=height) + + if not async_task.skipping_cn_preprocessor: + cn_img = preprocessors.canny_pyramid(cn_img, async_task.canny_low_threshold, + async_task.canny_high_threshold) + + cn_img = HWC3(cn_img) + task[0] = core.numpy_to_pytorch(cn_img) + if async_task.debugging_cn_preprocessor: + yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True) + for task in async_task.cn_tasks[flags.cn_cpds]: + cn_img, cn_stop, cn_weight = task + cn_img = resize_image(HWC3(cn_img), width=width, height=height) + + if not async_task.skipping_cn_preprocessor: + cn_img = preprocessors.cpds(cn_img) + + cn_img = HWC3(cn_img) + task[0] = core.numpy_to_pytorch(cn_img) + if async_task.debugging_cn_preprocessor: + yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True) + for task in async_task.cn_tasks[flags.cn_ip]: + cn_img, cn_stop, cn_weight = task + cn_img = HWC3(cn_img) + + # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75 + cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0) + + task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path) + if async_task.debugging_cn_preprocessor: + yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True) + for task in async_task.cn_tasks[flags.cn_ip_face]: + cn_img, cn_stop, cn_weight = task + cn_img = HWC3(cn_img) + + if not async_task.skipping_cn_preprocessor: + cn_img = extras.face_crop.crop_image(cn_img) + + # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75 + cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0) + + task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path) + if async_task.debugging_cn_preprocessor: + yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True) + all_ip_tasks = async_task.cn_tasks[flags.cn_ip] + async_task.cn_tasks[flags.cn_ip_face] + if len(all_ip_tasks) > 0: + pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks) + + def apply_vary(async_task, uov_method, denoising_strength, uov_input_image, switch, current_progress, advance_progress=False): + if 'subtle' in uov_method: + denoising_strength = 0.5 + if 'strong' in uov_method: + denoising_strength = 0.85 + if async_task.overwrite_vary_strength > 0: + denoising_strength = async_task.overwrite_vary_strength + shape_ceil = get_image_shape_ceil(uov_input_image) + if shape_ceil < 1024: + print(f'[Vary] Image is resized because it is too small.') + shape_ceil = 1024 + elif shape_ceil > 2048: + print(f'[Vary] Image is resized because it is too big.') + shape_ceil = 2048 + uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil) + initial_pixels = core.numpy_to_pytorch(uov_input_image) + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, 'VAE encoding ...') + candidate_vae, _ = pipeline.get_candidate_vae( + steps=async_task.steps, + switch=switch, + denoise=denoising_strength, + refiner_swap_method=async_task.refiner_swap_method + ) + initial_latent = core.encode_vae(vae=candidate_vae, pixels=initial_pixels) + B, C, H, W = initial_latent['samples'].shape + width = W * 8 + height = H * 8 + print(f'Final resolution is {str((width, height))}.') + return uov_input_image, denoising_strength, initial_latent, width, height, current_progress + + def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image, + inpaint_mask, inpaint_parameterized, denoising_strength, inpaint_respective_field, switch, + inpaint_disable_initial_latent, current_progress, skip_apply_outpaint=False, + advance_progress=False): + if not skip_apply_outpaint: + inpaint_image, inpaint_mask = apply_outpaint(async_task, inpaint_image, inpaint_mask) + + inpaint_worker.current_task = inpaint_worker.InpaintWorker( + image=inpaint_image, + mask=inpaint_mask, + use_fill=denoising_strength > 0.99, + k=inpaint_respective_field + ) + if async_task.debugging_inpaint_preprocessor: + yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), 100, + async_task.black_out_nsfw, do_not_show_finished_images=True) + raise EarlyReturnException + + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, 'VAE Inpaint encoding ...') + inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill) + inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image) + inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask) + candidate_vae, candidate_vae_swap = pipeline.get_candidate_vae( + steps=async_task.steps, + switch=switch, + denoise=denoising_strength, + refiner_swap_method=async_task.refiner_swap_method + ) + latent_inpaint, latent_mask = core.encode_vae_inpaint( + mask=inpaint_pixel_mask, + vae=candidate_vae, + pixels=inpaint_pixel_image) + latent_swap = None + if candidate_vae_swap is not None: + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, 'VAE SD15 encoding ...') + latent_swap = core.encode_vae( + vae=candidate_vae_swap, + pixels=inpaint_pixel_fill)['samples'] + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, 'VAE encoding ...') + latent_fill = core.encode_vae( + vae=candidate_vae, + pixels=inpaint_pixel_fill)['samples'] + inpaint_worker.current_task.load_latent( + latent_fill=latent_fill, latent_mask=latent_mask, latent_swap=latent_swap) + if inpaint_parameterized: + pipeline.final_unet = inpaint_worker.current_task.patch( + inpaint_head_model_path=inpaint_head_model_path, + inpaint_latent=latent_inpaint, + inpaint_latent_mask=latent_mask, + model=pipeline.final_unet + ) + if not inpaint_disable_initial_latent: + initial_latent = {'samples': latent_fill} + B, C, H, W = latent_fill.shape + height, width = H * 8, W * 8 + final_height, final_width = inpaint_worker.current_task.image.shape[:2] + print(f'Final resolution is {str((final_width, final_height))}, latent is {str((width, height))}.') + + return denoising_strength, initial_latent, width, height, current_progress + + def apply_outpaint(async_task, inpaint_image, inpaint_mask): + if len(async_task.outpaint_selections) > 0: + H, W, C = inpaint_image.shape + if 'top' in async_task.outpaint_selections: + inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge') + inpaint_mask = np.pad(inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant', + constant_values=255) + if 'bottom' in async_task.outpaint_selections: + inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge') + inpaint_mask = np.pad(inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant', + constant_values=255) + + H, W, C = inpaint_image.shape + if 'left' in async_task.outpaint_selections: + inpaint_image = np.pad(inpaint_image, [[0, 0], [int(W * 0.3), 0], [0, 0]], mode='edge') + inpaint_mask = np.pad(inpaint_mask, [[0, 0], [int(W * 0.3), 0]], mode='constant', + constant_values=255) + if 'right' in async_task.outpaint_selections: + inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(W * 0.3)], [0, 0]], mode='edge') + inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int(W * 0.3)]], mode='constant', + constant_values=255) + + inpaint_image = np.ascontiguousarray(inpaint_image.copy()) + inpaint_mask = np.ascontiguousarray(inpaint_mask.copy()) + async_task.inpaint_strength = 1.0 + async_task.inpaint_respective_field = 1.0 + return inpaint_image, inpaint_mask + + def apply_upscale(async_task, uov_input_image, uov_method, switch, current_progress, advance_progress=False): + H, W, C = uov_input_image.shape + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, f'Upscaling image from {str((W, H))} ...') + uov_input_image = perform_upscale(uov_input_image) + print(f'Image upscaled.') + if '1.5x' in uov_method: + f = 1.5 + elif '2x' in uov_method: + f = 2.0 + else: + f = 1.0 + shape_ceil = get_shape_ceil(H * f, W * f) + if shape_ceil < 1024: + print(f'[Upscale] Image is resized because it is too small.') + uov_input_image = set_image_shape_ceil(uov_input_image, 1024) + shape_ceil = 1024 + else: + uov_input_image = resample_image(uov_input_image, width=W * f, height=H * f) + image_is_super_large = shape_ceil > 2800 + if 'fast' in uov_method: + direct_return = True + elif image_is_super_large: + print('Image is too large. Directly returned the SR image. ' + 'Usually directly return SR image at 4K resolution ' + 'yields better results than SDXL diffusion.') + direct_return = True + else: + direct_return = False + if direct_return: + return direct_return, uov_input_image, None, None, None, None, None, current_progress + + tiled = True + denoising_strength = 0.382 + if async_task.overwrite_upscale_strength > 0: + denoising_strength = async_task.overwrite_upscale_strength + initial_pixels = core.numpy_to_pytorch(uov_input_image) + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, 'VAE encoding ...') + candidate_vae, _ = pipeline.get_candidate_vae( + steps=async_task.steps, + switch=switch, + denoise=denoising_strength, + refiner_swap_method=async_task.refiner_swap_method + ) + initial_latent = core.encode_vae( + vae=candidate_vae, + pixels=initial_pixels, tiled=True) + B, C, H, W = initial_latent['samples'].shape + width = W * 8 + height = H * 8 + print(f'Final resolution is {str((width, height))}.') + return direct_return, uov_input_image, denoising_strength, initial_latent, tiled, width, height, current_progress + + def apply_overrides(async_task, steps, height, width): + if async_task.overwrite_step > 0: + steps = async_task.overwrite_step + switch = int(round(async_task.steps * async_task.refiner_switch)) + if async_task.overwrite_switch > 0: + switch = async_task.overwrite_switch + if async_task.overwrite_width > 0: + width = async_task.overwrite_width + if async_task.overwrite_height > 0: + height = async_task.overwrite_height + return steps, switch, width, height + + def process_prompt(async_task, prompt, negative_prompt, base_model_additional_loras, image_number, disable_seed_increment, use_expansion, use_style, + use_synthetic_refiner, current_progress, advance_progress=False): + prompts = remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='') + negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.splitlines()], default='') + prompt = prompts[0] + negative_prompt = negative_prompts[0] + if prompt == '': + # disable expansion when empty since it is not meaningful and influences image prompt + use_expansion = False + extra_positive_prompts = prompts[1:] if len(prompts) > 1 else [] + extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else [] + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, 'Loading models ...') + lora_filenames = modules.util.remove_performance_lora(modules.config.lora_filenames, + async_task.performance_selection) + loras, prompt = parse_lora_references_from_prompt(prompt, async_task.loras, + modules.config.default_max_lora_number, + lora_filenames=lora_filenames) + loras += async_task.performance_loras + pipeline.refresh_everything(refiner_model_name=async_task.refiner_model_name, + base_model_name=async_task.base_model_name, + loras=loras, base_model_additional_loras=base_model_additional_loras, + use_synthetic_refiner=use_synthetic_refiner, vae_name=async_task.vae_name) + pipeline.set_clip_skip(async_task.clip_skip) + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, 'Processing prompts ...') + tasks = [] + for i in range(image_number): + if disable_seed_increment: + task_seed = async_task.seed % (constants.MAX_SEED + 1) + else: + task_seed = (async_task.seed + i) % (constants.MAX_SEED + 1) # randint is inclusive, % is not + + task_rng = random.Random(task_seed) # may bind to inpaint noise in the future + task_prompt = apply_wildcards(prompt, task_rng, i, async_task.read_wildcards_in_order) + task_prompt = apply_arrays(task_prompt, i) + task_negative_prompt = apply_wildcards(negative_prompt, task_rng, i, async_task.read_wildcards_in_order) + task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, async_task.read_wildcards_in_order) for pmt + in + extra_positive_prompts] + task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, async_task.read_wildcards_in_order) for pmt + in + extra_negative_prompts] + + positive_basic_workloads = [] + negative_basic_workloads = [] + + task_styles = async_task.style_selections.copy() + if use_style: + for j, s in enumerate(task_styles): + if s == random_style_name: + s = get_random_style(task_rng) + task_styles[j] = s + p, n = apply_style(s, positive=task_prompt) + positive_basic_workloads = positive_basic_workloads + p + negative_basic_workloads = negative_basic_workloads + n + else: + positive_basic_workloads.append(task_prompt) + + negative_basic_workloads.append(task_negative_prompt) # Always use independent workload for negative. + + positive_basic_workloads = positive_basic_workloads + task_extra_positive_prompts + negative_basic_workloads = negative_basic_workloads + task_extra_negative_prompts + + positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=task_prompt) + negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=task_negative_prompt) + + tasks.append(dict( + task_seed=task_seed, + task_prompt=task_prompt, + task_negative_prompt=task_negative_prompt, + positive=positive_basic_workloads, + negative=negative_basic_workloads, + expansion='', + c=None, + uc=None, + positive_top_k=len(positive_basic_workloads), + negative_top_k=len(negative_basic_workloads), + log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts), + log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts), + styles=task_styles + )) + if use_expansion: + if advance_progress: + current_progress += 1 + for i, t in enumerate(tasks): + + progressbar(async_task, current_progress, f'Preparing Fooocus text #{i + 1} ...') + expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed']) + print(f'[Prompt Expansion] {expansion}') + t['expansion'] = expansion + t['positive'] = copy.deepcopy(t['positive']) + [expansion] # Deep copy. + if advance_progress: + current_progress += 1 + for i, t in enumerate(tasks): + progressbar(async_task, current_progress, f'Encoding positive #{i + 1} ...') + t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k']) + if advance_progress: + current_progress += 1 + for i, t in enumerate(tasks): + if abs(float(async_task.cfg_scale) - 1.0) < 1e-4: + t['uc'] = pipeline.clone_cond(t['c']) + else: + progressbar(async_task, current_progress, f'Encoding negative #{i + 1} ...') + t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k']) + return tasks, use_expansion, loras, current_progress + + def apply_freeu(async_task): + print(f'FreeU is enabled!') + pipeline.final_unet = core.apply_freeu( + pipeline.final_unet, + async_task.freeu_b1, + async_task.freeu_b2, + async_task.freeu_s1, + async_task.freeu_s2 + ) + + def patch_discrete(unet, scheduler_name): + return core.opModelSamplingDiscrete.patch(unet, scheduler_name, False)[0] + + def patch_edm(unet, scheduler_name): + return core.opModelSamplingContinuousEDM.patch(unet, scheduler_name, 120.0, 0.002)[0] + + def patch_samplers(async_task): + final_scheduler_name = async_task.scheduler_name + + if async_task.scheduler_name in ['lcm', 'tcd']: + final_scheduler_name = 'sgm_uniform' + if pipeline.final_unet is not None: + pipeline.final_unet = patch_discrete(pipeline.final_unet, async_task.scheduler_name) + if pipeline.final_refiner_unet is not None: + pipeline.final_refiner_unet = patch_discrete(pipeline.final_refiner_unet, async_task.scheduler_name) + + elif async_task.scheduler_name == 'edm_playground_v2.5': + final_scheduler_name = 'karras' + if pipeline.final_unet is not None: + pipeline.final_unet = patch_edm(pipeline.final_unet, async_task.scheduler_name) + if pipeline.final_refiner_unet is not None: + pipeline.final_refiner_unet = patch_edm(pipeline.final_refiner_unet, async_task.scheduler_name) + + return final_scheduler_name + + def set_hyper_sd_defaults(async_task, current_progress, advance_progress=False): + print('Enter Hyper-SD mode.') + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, 'Downloading Hyper-SD components ...') + async_task.performance_loras += [(modules.config.downloading_sdxl_hyper_sd_lora(), 0.8)] + if async_task.refiner_model_name != 'None': + print(f'Refiner disabled in Hyper-SD mode.') + async_task.refiner_model_name = 'None' + async_task.sampler_name = 'dpmpp_sde_gpu' + async_task.scheduler_name = 'karras' + async_task.sharpness = 0.0 + async_task.cfg_scale = 1.0 + async_task.adaptive_cfg = 1.0 + async_task.refiner_switch = 1.0 + async_task.adm_scaler_positive = 1.0 + async_task.adm_scaler_negative = 1.0 + async_task.adm_scaler_end = 0.0 + return current_progress + + def set_lightning_defaults(async_task, current_progress, advance_progress=False): + print('Enter Lightning mode.') + if advance_progress: + current_progress += 1 + progressbar(async_task, 1, 'Downloading Lightning components ...') + async_task.performance_loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)] + if async_task.refiner_model_name != 'None': + print(f'Refiner disabled in Lightning mode.') + async_task.refiner_model_name = 'None' + async_task.sampler_name = 'euler' + async_task.scheduler_name = 'sgm_uniform' + async_task.sharpness = 0.0 + async_task.cfg_scale = 1.0 + async_task.adaptive_cfg = 1.0 + async_task.refiner_switch = 1.0 + async_task.adm_scaler_positive = 1.0 + async_task.adm_scaler_negative = 1.0 + async_task.adm_scaler_end = 0.0 + return current_progress + + def set_lcm_defaults(async_task, current_progress, advance_progress=False): + print('Enter LCM mode.') + if advance_progress: + current_progress += 1 + progressbar(async_task, 1, 'Downloading LCM components ...') + async_task.performance_loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)] + if async_task.refiner_model_name != 'None': + print(f'Refiner disabled in LCM mode.') + async_task.refiner_model_name = 'None' + async_task.sampler_name = 'lcm' + async_task.scheduler_name = 'lcm' + async_task.sharpness = 0.0 + async_task.cfg_scale = 1.0 + async_task.adaptive_cfg = 1.0 + async_task.refiner_switch = 1.0 + async_task.adm_scaler_positive = 1.0 + async_task.adm_scaler_negative = 1.0 + async_task.adm_scaler_end = 0.0 + return current_progress + + def apply_image_input(async_task, base_model_additional_loras, clip_vision_path, controlnet_canny_path, + controlnet_cpds_path, goals, inpaint_head_model_path, inpaint_image, inpaint_mask, + inpaint_parameterized, ip_adapter_face_path, ip_adapter_path, ip_negative_path, + skip_prompt_processing, use_synthetic_refiner): + if (async_task.current_tab == 'uov' or ( + async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \ + and async_task.uov_method != flags.disabled.casefold() and async_task.uov_input_image is not None: + async_task.uov_input_image, skip_prompt_processing, async_task.steps = prepare_upscale( + async_task, goals, async_task.uov_input_image, async_task.uov_method, async_task.performance_selection, + async_task.steps, 1, skip_prompt_processing=skip_prompt_processing) + if (async_task.current_tab == 'inpaint' or ( + async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_inpaint)) \ + and isinstance(async_task.inpaint_input_image, dict): + inpaint_image = async_task.inpaint_input_image['image'] + inpaint_mask = async_task.inpaint_input_image['mask'][:, :, 0] + + if async_task.inpaint_mask_upload_checkbox: + if isinstance(async_task.inpaint_mask_image_upload, dict): + if (isinstance(async_task.inpaint_mask_image_upload['image'], np.ndarray) + and isinstance(async_task.inpaint_mask_image_upload['mask'], np.ndarray) + and async_task.inpaint_mask_image_upload['image'].ndim == 3): + async_task.inpaint_mask_image_upload = np.maximum( + async_task.inpaint_mask_image_upload['image'], + async_task.inpaint_mask_image_upload['mask']) + if isinstance(async_task.inpaint_mask_image_upload, + np.ndarray) and async_task.inpaint_mask_image_upload.ndim == 3: + H, W, C = inpaint_image.shape + async_task.inpaint_mask_image_upload = resample_image(async_task.inpaint_mask_image_upload, + width=W, height=H) + async_task.inpaint_mask_image_upload = np.mean(async_task.inpaint_mask_image_upload, axis=2) + async_task.inpaint_mask_image_upload = (async_task.inpaint_mask_image_upload > 127).astype( + np.uint8) * 255 + inpaint_mask = np.maximum(inpaint_mask, async_task.inpaint_mask_image_upload) + + if int(async_task.inpaint_erode_or_dilate) != 0: + inpaint_mask = erode_or_dilate(inpaint_mask, async_task.inpaint_erode_or_dilate) + + if async_task.invert_mask_checkbox: + inpaint_mask = 255 - inpaint_mask + + inpaint_image = HWC3(inpaint_image) + if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \ + and (np.any(inpaint_mask > 127) or len(async_task.outpaint_selections) > 0): + progressbar(async_task, 1, 'Downloading upscale models ...') + modules.config.downloading_upscale_model() + if inpaint_parameterized: + progressbar(async_task, 1, 'Downloading inpainter ...') + inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models( + async_task.inpaint_engine) + base_model_additional_loras += [(inpaint_patch_model_path, 1.0)] + print(f'[Inpaint] Current inpaint model is {inpaint_patch_model_path}') + if async_task.refiner_model_name == 'None': + use_synthetic_refiner = True + async_task.refiner_switch = 0.8 + else: + inpaint_head_model_path, inpaint_patch_model_path = None, None + print(f'[Inpaint] Parameterized inpaint is disabled.') + if async_task.inpaint_additional_prompt != '': + if async_task.prompt == '': + async_task.prompt = async_task.inpaint_additional_prompt + else: + async_task.prompt = async_task.inpaint_additional_prompt + '\n' + async_task.prompt + goals.append('inpaint') + if async_task.current_tab == 'ip' or \ + async_task.mixing_image_prompt_and_vary_upscale or \ + async_task.mixing_image_prompt_and_inpaint: + goals.append('cn') + progressbar(async_task, 1, 'Downloading control models ...') + if len(async_task.cn_tasks[flags.cn_canny]) > 0: + controlnet_canny_path = modules.config.downloading_controlnet_canny() + if len(async_task.cn_tasks[flags.cn_cpds]) > 0: + controlnet_cpds_path = modules.config.downloading_controlnet_cpds() + if len(async_task.cn_tasks[flags.cn_ip]) > 0: + clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip') + if len(async_task.cn_tasks[flags.cn_ip_face]) > 0: + clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters( + 'face') + if async_task.current_tab == 'enhance' and async_task.enhance_input_image is not None: + goals.append('enhance') + skip_prompt_processing = True + async_task.enhance_input_image = HWC3(async_task.enhance_input_image) + return base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner + + def prepare_upscale(async_task, goals, uov_input_image, uov_method, performance, steps, current_progress, + advance_progress=False, skip_prompt_processing=False): + uov_input_image = HWC3(uov_input_image) + if 'vary' in uov_method: + goals.append('vary') + elif 'upscale' in uov_method: + goals.append('upscale') + if 'fast' in uov_method: + skip_prompt_processing = True + steps = 0 + else: + steps = performance.steps_uov() + + if advance_progress: + current_progress += 1 + progressbar(async_task, current_progress, 'Downloading upscale models ...') + modules.config.downloading_upscale_model() + return uov_input_image, skip_prompt_processing, steps + + def prepare_enhance_prompt(prompt: str, fallback_prompt: str, translate: bool, prompt_type: str): + if safe_str(prompt) == '' or len(remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')) == 0: + prompt = fallback_prompt + else: + if translate: + prompt = translate2en(prompt, prompt_type) + return prompt + + def stop_processing(async_task, processing_start_time): + async_task.processing = False + processing_time = time.perf_counter() - processing_start_time + print(f'Processing time (total): {processing_time:.2f} seconds') + + def process_enhance(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, + current_progress, current_task_id, denoising_strength, inpaint_disable_initial_latent, + inpaint_engine, inpaint_respective_field, inpaint_strength, + prompt, negative_prompt, final_scheduler_name, goals, height, img, mask, + preparation_steps, steps, switch, tiled, total_count, use_expansion, use_style, + use_synthetic_refiner, width, show_intermediate_results=True): + base_model_additional_loras = [] + inpaint_head_model_path = None + inpaint_parameterized = inpaint_engine != 'None' # inpaint_engine = None, improve detail + initial_latent = None + + if 'vary' in goals: + img, denoising_strength, initial_latent, width, height, current_progress = apply_vary( + async_task, async_task.enhance_uov_method, denoising_strength, img, switch, current_progress) + if 'upscale' in goals: + direct_return, img, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale( + async_task, img, async_task.enhance_uov_method, switch, current_progress) + if direct_return: + d = [('Upscale (Fast)', 'upscale_fast', '2x')] + if modules.config.default_black_out_nsfw or async_task.black_out_nsfw: + progressbar(async_task, current_progress, 'Checking for NSFW content ...') + img = default_censor(img) + progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{total_count} to system ...') + uov_image_path = log(img, d, output_format=async_task.output_format) + yield_result(async_task, uov_image_path, current_progress, async_task.black_out_nsfw, False, + do_not_show_finished_images=not show_intermediate_results or async_task.disable_intermediate_results) + return current_progress, img + + if 'inpaint' in goals and inpaint_parameterized: + progressbar(async_task, current_progress, 'Downloading inpainter ...') + inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models( + inpaint_engine) + if inpaint_patch_model_path not in base_model_additional_loras: + base_model_additional_loras += [(inpaint_patch_model_path, 1.0)] + progressbar(async_task, current_progress, 'Preparing enhance prompts ...') + prompt = prepare_enhance_prompt(prompt, async_task.prompt, async_task.translate_prompts, 'prompt') + negative_prompt = prepare_enhance_prompt(negative_prompt, async_task.negative_prompt, + async_task.translate_prompts, 'negative prompt') + # positive and negative conditioning aren't available here anymore, process prompt again + tasks_enhance, use_expansion, loras, current_progress = process_prompt( + async_task, prompt, negative_prompt, base_model_additional_loras, 1, True, + use_expansion, use_style, use_synthetic_refiner, current_progress) + task_enhance = tasks_enhance[0] + # TODO could support vary, upscale and CN in the future + # if 'cn' in goals: + # apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width) + if async_task.freeu_enabled: + apply_freeu(async_task) + patch_samplers(async_task) + if 'inpaint' in goals: + denoising_strength, initial_latent, width, height, current_progress = apply_inpaint( + async_task, None, inpaint_head_model_path, img, mask, + inpaint_parameterized, inpaint_strength, + inpaint_respective_field, switch, inpaint_disable_initial_latent, + current_progress, True) + imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path, + controlnet_cpds_path, current_task_id, denoising_strength, + final_scheduler_name, goals, initial_latent, steps, switch, + task_enhance['c'], task_enhance['uc'], task_enhance, tiled, + use_expansion, width, height, current_progress, + preparation_steps, total_count, show_intermediate_results) + + del task_enhance['c'], task_enhance['uc'] # Save memory + return current_progress, imgs[0], prompt, negative_prompt + + def enhance_upscale(all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path, + current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps, + prompt, negative_prompt, final_scheduler_name, height, img, preparation_steps, switch, tiled, + total_count, use_expansion, use_style, use_synthetic_refiner, width): + # reset inpaint worker to prevent tensor size issues and not mix upscale and inpainting + inpaint_worker.current_task = None + + current_task_id += 1 + current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting)) + goals_enhance = [] + img, skip_prompt_processing, steps = prepare_upscale( + async_task, goals_enhance, img, async_task.enhance_uov_method, async_task.performance_selection, + enhance_steps, current_progress) + steps, _, _, _ = apply_overrides(async_task, steps, height, width) + exception_result = '' + if len(goals_enhance) > 0: + try: + current_progress, img, prompt, negative_prompt = process_enhance( + all_steps, async_task, callback, controlnet_canny_path, + controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False, + 'None', 0.0, 0.0, prompt, negative_prompt, final_scheduler_name, + goals_enhance, height, img, None, preparation_steps, steps, switch, tiled, total_count, + use_expansion, use_style, use_synthetic_refiner, width) + + except ldm_patched.modules.model_management.InterruptProcessingException: + if async_task.last_stop == 'skip': + print('User skipped') + async_task.last_stop = False + # also skip all enhance steps for this image, but add the steps to the progress bar + if async_task.enhance_uov_processing_order == flags.enhancement_uov_before: + done_steps_inpainting += len(async_task.enhance_ctrls) * enhance_steps + exception_result = 'continue' + else: + print('User stopped') + exception_result = 'break' + finally: + done_steps_upscaling += steps + return current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result + @torch.no_grad() @torch.inference_mode() - def handler(async_task): - execution_start_time = time.perf_counter() + def handler(async_task: AsyncTask): + preparation_start_time = time.perf_counter() async_task.processing = True - args = async_task.args - args.reverse() - - prompt = args.pop() - negative_prompt = args.pop() - translate_prompts = args.pop() - style_selections = args.pop() - performance_selection = Performance(args.pop()) - aspect_ratios_selection = args.pop() - image_number = args.pop() - output_format = args.pop() - image_seed = args.pop() - read_wildcards_in_order = args.pop() - sharpness = args.pop() - guidance_scale = args.pop() - base_model_name = args.pop() - refiner_model_name = args.pop() - refiner_switch = args.pop() - loras = get_enabled_loras([(bool(args.pop()), str(args.pop()), float(args.pop())) for _ in - range(modules.config.default_max_lora_number)]) - input_image_checkbox = args.pop() - current_tab = args.pop() - uov_method = args.pop() - uov_input_image = args.pop() - outpaint_selections = args.pop() - inpaint_input_image = args.pop() - inpaint_additional_prompt = args.pop() - inpaint_mask_image_upload = args.pop() - - disable_preview = args.pop() - disable_intermediate_results = args.pop() - disable_seed_increment = args.pop() - black_out_nsfw = args.pop() - adm_scaler_positive = args.pop() - adm_scaler_negative = args.pop() - adm_scaler_end = args.pop() - adaptive_cfg = args.pop() - clip_skip = args.pop() - sampler_name = args.pop() - scheduler_name = args.pop() - vae_name = args.pop() - overwrite_step = args.pop() - overwrite_switch = args.pop() - overwrite_width = args.pop() - overwrite_height = args.pop() - overwrite_vary_strength = args.pop() - overwrite_upscale_strength = args.pop() - mixing_image_prompt_and_vary_upscale = args.pop() - mixing_image_prompt_and_inpaint = args.pop() - debugging_cn_preprocessor = args.pop() - skipping_cn_preprocessor = args.pop() - canny_low_threshold = args.pop() - canny_high_threshold = args.pop() - refiner_swap_method = args.pop() - controlnet_softness = args.pop() - freeu_enabled = args.pop() - freeu_b1 = args.pop() - freeu_b2 = args.pop() - freeu_s1 = args.pop() - freeu_s2 = args.pop() - debugging_inpaint_preprocessor = args.pop() - inpaint_disable_initial_latent = args.pop() - inpaint_engine = args.pop() - inpaint_strength = args.pop() - inpaint_respective_field = args.pop() - inpaint_mask_upload_checkbox = args.pop() - invert_mask_checkbox = args.pop() - inpaint_erode_or_dilate = args.pop() - - save_metadata_to_images = args.pop() if not args_manager.args.disable_metadata else False - metadata_scheme = MetadataScheme( - args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS - - cn_tasks = {x: [] for x in flags.ip_list} - for _ in range(flags.controlnet_image_count): - cn_img = args.pop() - cn_stop = args.pop() - cn_weight = args.pop() - cn_type = args.pop() - if cn_img is not None: - cn_tasks[cn_type].append([cn_img, cn_stop, cn_weight]) - - outpaint_selections = [o.lower() for o in outpaint_selections] + async_task.outpaint_selections = [o.lower() for o in async_task.outpaint_selections] base_model_additional_loras = [] - raw_style_selections = copy.deepcopy(style_selections) - uov_method = uov_method.lower() + async_task.uov_method = async_task.uov_method.casefold() + async_task.enhance_uov_method = async_task.enhance_uov_method.casefold() - if fooocus_expansion in style_selections: + if fooocus_expansion in async_task.style_selections: use_expansion = True - style_selections.remove(fooocus_expansion) + async_task.style_selections.remove(fooocus_expansion) else: use_expansion = False - use_style = len(style_selections) > 0 + use_style = len(async_task.style_selections) > 0 - if base_model_name == refiner_model_name: + if async_task.base_model_name == async_task.refiner_model_name: print(f'Refiner disabled because base model and refiner are same.') - refiner_model_name = 'None' + async_task.refiner_model_name = 'None' - steps = performance_selection.steps() + current_progress = 0 + if async_task.performance_selection == Performance.EXTREME_SPEED: + set_lcm_defaults(async_task, current_progress, advance_progress=True) + elif async_task.performance_selection == Performance.LIGHTNING: + set_lightning_defaults(async_task, current_progress, advance_progress=True) + elif async_task.performance_selection == Performance.HYPER_SD: + set_hyper_sd_defaults(async_task, current_progress, advance_progress=True) - performance_loras = [] + if async_task.translate_prompts: + async_task.prompt = translate2en(async_task.prompt, 'prompt') + async_task.negative_prompt = translate2en(async_task.negative_prompt, 'negative prompt') - if performance_selection == Performance.EXTREME_SPEED: - print('Enter LCM mode.') - progressbar(async_task, 1, 'Downloading LCM components ...') - performance_loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)] - - if refiner_model_name != 'None': - print(f'Refiner disabled in LCM mode.') - - refiner_model_name = 'None' - sampler_name = 'lcm' - scheduler_name = 'lcm' - sharpness = 0.0 - guidance_scale = 1.0 - adaptive_cfg = 1.0 - refiner_switch = 1.0 - adm_scaler_positive = 1.0 - adm_scaler_negative = 1.0 - adm_scaler_end = 0.0 - - elif performance_selection == Performance.LIGHTNING: - print('Enter Lightning mode.') - progressbar(async_task, 1, 'Downloading Lightning components ...') - performance_loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)] - - if refiner_model_name != 'None': - print(f'Refiner disabled in Lightning mode.') - - refiner_model_name = 'None' - sampler_name = 'euler' - scheduler_name = 'sgm_uniform' - sharpness = 0.0 - guidance_scale = 1.0 - adaptive_cfg = 1.0 - refiner_switch = 1.0 - adm_scaler_positive = 1.0 - adm_scaler_negative = 1.0 - adm_scaler_end = 0.0 - - elif performance_selection == Performance.HYPER_SD: - print('Enter Hyper-SD mode.') - progressbar(async_task, 1, 'Downloading Hyper-SD components ...') - performance_loras += [(modules.config.downloading_sdxl_hyper_sd_lora(), 0.8)] - - if refiner_model_name != 'None': - print(f'Refiner disabled in Hyper-SD mode.') - - refiner_model_name = 'None' - sampler_name = 'dpmpp_sde_gpu' - scheduler_name = 'karras' - sharpness = 0.0 - guidance_scale = 1.0 - adaptive_cfg = 1.0 - refiner_switch = 1.0 - adm_scaler_positive = 1.0 - adm_scaler_negative = 1.0 - adm_scaler_end = 0.0 - - if translate_prompts: - from modules.translator import translate2en - prompt = translate2en(prompt, 'prompt') - negative_prompt = translate2en(negative_prompt, 'negative prompt') - - print(f'[Parameters] Adaptive CFG = {adaptive_cfg}') - print(f'[Parameters] CLIP Skip = {clip_skip}') - print(f'[Parameters] Sharpness = {sharpness}') - print(f'[Parameters] ControlNet Softness = {controlnet_softness}') + print(f'[Parameters] Adaptive CFG = {async_task.adaptive_cfg}') + print(f'[Parameters] CLIP Skip = {async_task.clip_skip}') + print(f'[Parameters] Sharpness = {async_task.sharpness}') + print(f'[Parameters] ControlNet Softness = {async_task.controlnet_softness}') print(f'[Parameters] ADM Scale = ' - f'{adm_scaler_positive} : ' - f'{adm_scaler_negative} : ' - f'{adm_scaler_end}') + f'{async_task.adm_scaler_positive} : ' + f'{async_task.adm_scaler_negative} : ' + f'{async_task.adm_scaler_end}') + print(f'[Parameters] Seed = {async_task.seed}') - patch_settings[pid] = PatchSettings( - sharpness, - adm_scaler_end, - adm_scaler_positive, - adm_scaler_negative, - controlnet_softness, - adaptive_cfg - ) + apply_patch_settings(async_task) - cfg_scale = float(guidance_scale) - print(f'[Parameters] CFG = {cfg_scale}') + print(f'[Parameters] CFG = {async_task.cfg_scale}') initial_latent = None denoising_strength = 1.0 tiled = False - width, height = aspect_ratios_selection.replace('×', ' ').split(' ')[:2] + width, height = async_task.aspect_ratios_selection.replace('×', ' ').split(' ')[:2] width, height = int(width), int(height) skip_prompt_processing = False inpaint_worker.current_task = None - inpaint_parameterized = inpaint_engine != 'None' + inpaint_parameterized = async_task.inpaint_engine != 'None' inpaint_image = None inpaint_mask = None inpaint_head_model_path = None @@ -347,480 +1129,112 @@ def worker(): controlnet_cpds_path = None clip_vision_path, ip_negative_path, ip_adapter_path, ip_adapter_face_path = None, None, None, None - seed = int(image_seed) - print(f'[Parameters] Seed = {seed}') - goals = [] tasks = [] + current_progress = 1 - if input_image_checkbox: - if (current_tab == 'uov' or ( - current_tab == 'ip' and mixing_image_prompt_and_vary_upscale)) \ - and uov_method != flags.disabled and uov_input_image is not None: - uov_input_image = HWC3(uov_input_image) - if 'vary' in uov_method: - goals.append('vary') - elif 'upscale' in uov_method: - goals.append('upscale') - if 'fast' in uov_method: - skip_prompt_processing = True - else: - steps = performance_selection.steps_uov() - - progressbar(async_task, 1, 'Downloading upscale models ...') - modules.config.downloading_upscale_model() - if (current_tab == 'inpaint' or ( - current_tab == 'ip' and mixing_image_prompt_and_inpaint)) \ - and isinstance(inpaint_input_image, dict): - inpaint_image = inpaint_input_image['image'] - inpaint_mask = inpaint_input_image['mask'][:, :, 0] - - if inpaint_mask_upload_checkbox: - if isinstance(inpaint_mask_image_upload, dict): - if (isinstance(inpaint_mask_image_upload['image'], np.ndarray) - and isinstance(inpaint_mask_image_upload['mask'], np.ndarray) - and inpaint_mask_image_upload['image'].ndim == 3): - inpaint_mask_image_upload = np.maximum(inpaint_mask_image_upload['image'], inpaint_mask_image_upload['mask']) - if isinstance(inpaint_mask_image_upload, np.ndarray) and inpaint_mask_image_upload.ndim == 3: - H, W, C = inpaint_image.shape - inpaint_mask_image_upload = resample_image(inpaint_mask_image_upload, width=W, height=H) - inpaint_mask_image_upload = np.mean(inpaint_mask_image_upload, axis=2) - inpaint_mask_image_upload = (inpaint_mask_image_upload > 127).astype(np.uint8) * 255 - inpaint_mask = np.maximum(inpaint_mask, inpaint_mask_image_upload) - - if int(inpaint_erode_or_dilate) != 0: - inpaint_mask = erode_or_dilate(inpaint_mask, inpaint_erode_or_dilate) - - if invert_mask_checkbox: - inpaint_mask = 255 - inpaint_mask - - inpaint_image = HWC3(inpaint_image) - if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \ - and (np.any(inpaint_mask > 127) or len(outpaint_selections) > 0): - progressbar(async_task, 1, 'Downloading upscale models ...') - modules.config.downloading_upscale_model() - if inpaint_parameterized: - progressbar(async_task, 1, 'Downloading inpainter ...') - inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models( - inpaint_engine) - base_model_additional_loras += [(inpaint_patch_model_path, 1.0)] - print(f'[Inpaint] Current inpaint model is {inpaint_patch_model_path}') - if refiner_model_name == 'None': - use_synthetic_refiner = True - refiner_switch = 0.8 - else: - inpaint_head_model_path, inpaint_patch_model_path = None, None - print(f'[Inpaint] Parameterized inpaint is disabled.') - if inpaint_additional_prompt != '': - if prompt == '': - prompt = inpaint_additional_prompt - else: - prompt = inpaint_additional_prompt + '\n' + prompt - goals.append('inpaint') - if current_tab == 'ip' or \ - mixing_image_prompt_and_vary_upscale or \ - mixing_image_prompt_and_inpaint: - goals.append('cn') - progressbar(async_task, 1, 'Downloading control models ...') - if len(cn_tasks[flags.cn_canny]) > 0: - controlnet_canny_path = modules.config.downloading_controlnet_canny() - if len(cn_tasks[flags.cn_cpds]) > 0: - controlnet_cpds_path = modules.config.downloading_controlnet_cpds() - if len(cn_tasks[flags.cn_ip]) > 0: - clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip') - if len(cn_tasks[flags.cn_ip_face]) > 0: - clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters( - 'face') - progressbar(async_task, 1, 'Loading control models ...') + if async_task.input_image_checkbox: + base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner = apply_image_input( + async_task, base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, + goals, inpaint_head_model_path, inpaint_image, inpaint_mask, inpaint_parameterized, ip_adapter_face_path, + ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner) # Load or unload CNs + progressbar(async_task, current_progress, 'Loading control models ...') pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path]) ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path) ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path) - if overwrite_step > 0: - steps = overwrite_step + async_task.steps, switch, width, height = apply_overrides(async_task, async_task.steps, height, width) - switch = int(round(steps * refiner_switch)) + print(f'[Parameters] Sampler = {async_task.sampler_name} - {async_task.scheduler_name}') + print(f'[Parameters] Steps = {async_task.steps} - {switch}') - if overwrite_switch > 0: - switch = overwrite_switch - - if overwrite_width > 0: - width = overwrite_width - - if overwrite_height > 0: - height = overwrite_height - - print(f'[Parameters] Sampler = {sampler_name} - {scheduler_name}') - print(f'[Parameters] Steps = {steps} - {switch}') - - progressbar(async_task, 1, 'Initializing ...') + progressbar(async_task, current_progress, 'Initializing ...') if not skip_prompt_processing: - - prompts = remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='') - negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.splitlines()], default='') - - prompt = prompts[0] - negative_prompt = negative_prompts[0] - - if prompt == '': - # disable expansion when empty since it is not meaningful and influences image prompt - use_expansion = False - - extra_positive_prompts = prompts[1:] if len(prompts) > 1 else [] - extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else [] - - progressbar(async_task, 2, 'Loading models ...') - - lora_filenames = modules.util.remove_performance_lora(modules.config.lora_filenames, performance_selection) - loras, prompt = parse_lora_references_from_prompt(prompt, loras, modules.config.default_max_lora_number, lora_filenames=lora_filenames) - loras += performance_loras - - pipeline.refresh_everything(refiner_model_name=refiner_model_name, base_model_name=base_model_name, - loras=loras, base_model_additional_loras=base_model_additional_loras, - use_synthetic_refiner=use_synthetic_refiner, vae_name=vae_name) - - pipeline.set_clip_skip(clip_skip) - - progressbar(async_task, 3, 'Processing prompts ...') - tasks = [] - - for i in range(image_number): - if disable_seed_increment: - task_seed = seed % (constants.MAX_SEED + 1) - else: - task_seed = (seed + i) % (constants.MAX_SEED + 1) # randint is inclusive, % is not - - task_rng = random.Random(task_seed) # may bind to inpaint noise in the future - task_prompt = apply_wildcards(prompt, task_rng, i, read_wildcards_in_order) - task_prompt = apply_arrays(task_prompt, i) - task_negative_prompt = apply_wildcards(negative_prompt, task_rng, i, read_wildcards_in_order) - task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in - extra_positive_prompts] - task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in - extra_negative_prompts] - - positive_basic_workloads = [] - negative_basic_workloads = [] - - task_styles = style_selections.copy() - if use_style: - for i, s in enumerate(task_styles): - if s == random_style_name: - s = get_random_style(task_rng) - task_styles[i] = s - p, n = apply_style(s, positive=task_prompt) - positive_basic_workloads = positive_basic_workloads + p - negative_basic_workloads = negative_basic_workloads + n - else: - positive_basic_workloads.append(task_prompt) - - negative_basic_workloads.append(task_negative_prompt) # Always use independent workload for negative. - - positive_basic_workloads = positive_basic_workloads + task_extra_positive_prompts - negative_basic_workloads = negative_basic_workloads + task_extra_negative_prompts - - positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=task_prompt) - negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=task_negative_prompt) - - tasks.append(dict( - task_seed=task_seed, - task_prompt=task_prompt, - task_negative_prompt=task_negative_prompt, - positive=positive_basic_workloads, - negative=negative_basic_workloads, - expansion='', - c=None, - uc=None, - positive_top_k=len(positive_basic_workloads), - negative_top_k=len(negative_basic_workloads), - log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts), - log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts), - styles=task_styles - )) - - if use_expansion: - for i, t in enumerate(tasks): - progressbar(async_task, 4, f'Preparing Fooocus text #{i + 1} ...') - expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed']) - print(f'[Prompt Expansion] {expansion}') - t['expansion'] = expansion - t['positive'] = copy.deepcopy(t['positive']) + [expansion] # Deep copy. - - for i, t in enumerate(tasks): - progressbar(async_task, 5, f'Encoding positive #{i + 1} ...') - t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k']) - - for i, t in enumerate(tasks): - if abs(float(cfg_scale) - 1.0) < 1e-4: - t['uc'] = pipeline.clone_cond(t['c']) - else: - progressbar(async_task, 6, f'Encoding negative #{i + 1} ...') - t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k']) + tasks, use_expansion, loras, current_progress = process_prompt(async_task, async_task.prompt, async_task.negative_prompt, + base_model_additional_loras, async_task.image_number, + async_task.disable_seed_increment, use_expansion, use_style, + use_synthetic_refiner, current_progress, advance_progress=True) if len(goals) > 0: - progressbar(async_task, 7, 'Image processing ...') + current_progress += 1 + progressbar(async_task, current_progress, 'Image processing ...') if 'vary' in goals: - if 'subtle' in uov_method: - denoising_strength = 0.5 - if 'strong' in uov_method: - denoising_strength = 0.85 - if overwrite_vary_strength > 0: - denoising_strength = overwrite_vary_strength - - shape_ceil = get_image_shape_ceil(uov_input_image) - if shape_ceil < 1024: - print(f'[Vary] Image is resized because it is too small.') - shape_ceil = 1024 - elif shape_ceil > 2048: - print(f'[Vary] Image is resized because it is too big.') - shape_ceil = 2048 - - uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil) - - initial_pixels = core.numpy_to_pytorch(uov_input_image) - progressbar(async_task, 8, 'VAE encoding ...') - - candidate_vae, _ = pipeline.get_candidate_vae( - steps=steps, - switch=switch, - denoise=denoising_strength, - refiner_swap_method=refiner_swap_method - ) - - initial_latent = core.encode_vae(vae=candidate_vae, pixels=initial_pixels) - B, C, H, W = initial_latent['samples'].shape - width = W * 8 - height = H * 8 - print(f'Final resolution is {str((height, width))}.') + async_task.uov_input_image, denoising_strength, initial_latent, width, height, current_progress = apply_vary( + async_task, async_task.uov_method, denoising_strength, async_task.uov_input_image, switch, + current_progress) if 'upscale' in goals: - H, W, C = uov_input_image.shape - progressbar(async_task, 9, f'Upscaling image from {str((H, W))} ...') - uov_input_image = perform_upscale(uov_input_image) - print(f'Image upscaled.') - - if '1.5x' in uov_method: - f = 1.5 - elif '2x' in uov_method: - f = 2.0 - else: - f = 1.0 - - shape_ceil = get_shape_ceil(H * f, W * f) - - if shape_ceil < 1024: - print(f'[Upscale] Image is resized because it is too small.') - uov_input_image = set_image_shape_ceil(uov_input_image, 1024) - shape_ceil = 1024 - else: - uov_input_image = resample_image(uov_input_image, width=W * f, height=H * f) - - image_is_super_large = shape_ceil > 2800 - - if 'fast' in uov_method: - direct_return = True - elif image_is_super_large: - print('Image is too large. Directly returned the SR image. ' - 'Usually directly return SR image at 4K resolution ' - 'yields better results than SDXL diffusion.') - direct_return = True - else: - direct_return = False - + direct_return, async_task.uov_input_image, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale( + async_task, async_task.uov_input_image, async_task.uov_method, switch, current_progress, + advance_progress=True) if direct_return: d = [('Upscale (Fast)', 'upscale_fast', '2x')] - if modules.config.default_black_out_nsfw or black_out_nsfw: + if modules.config.default_black_out_nsfw or async_task.black_out_nsfw: progressbar(async_task, 100, 'Checking for NSFW content ...') - uov_input_image = default_censor(uov_input_image) + async_task.uov_input_image = default_censor(async_task.uov_input_image) progressbar(async_task, 100, 'Saving image to system ...') - uov_input_image_path = log(uov_input_image, d, output_format=output_format) - yield_result(async_task, uov_input_image_path, black_out_nsfw, False, do_not_show_finished_images=True) + uov_input_image_path = log(async_task.uov_input_image, d, output_format=async_task.output_format) + yield_result(async_task, uov_input_image_path, 100, async_task.black_out_nsfw, False, + do_not_show_finished_images=True) return - tiled = True - denoising_strength = 0.382 - - if overwrite_upscale_strength > 0: - denoising_strength = overwrite_upscale_strength - - initial_pixels = core.numpy_to_pytorch(uov_input_image) - progressbar(async_task, 10, 'VAE encoding ...') - - candidate_vae, _ = pipeline.get_candidate_vae( - steps=steps, - switch=switch, - denoise=denoising_strength, - refiner_swap_method=refiner_swap_method - ) - - initial_latent = core.encode_vae( - vae=candidate_vae, - pixels=initial_pixels, tiled=True) - B, C, H, W = initial_latent['samples'].shape - width = W * 8 - height = H * 8 - print(f'Final resolution is {str((height, width))}.') - if 'inpaint' in goals: - if len(outpaint_selections) > 0: - H, W, C = inpaint_image.shape - if 'top' in outpaint_selections: - inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge') - inpaint_mask = np.pad(inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant', - constant_values=255) - if 'bottom' in outpaint_selections: - inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge') - inpaint_mask = np.pad(inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant', - constant_values=255) - - H, W, C = inpaint_image.shape - if 'left' in outpaint_selections: - inpaint_image = np.pad(inpaint_image, [[0, 0], [int(W * 0.3), 0], [0, 0]], mode='edge') - inpaint_mask = np.pad(inpaint_mask, [[0, 0], [int(W * 0.3), 0]], mode='constant', - constant_values=255) - if 'right' in outpaint_selections: - inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(W * 0.3)], [0, 0]], mode='edge') - inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int(W * 0.3)]], mode='constant', - constant_values=255) - - inpaint_image = np.ascontiguousarray(inpaint_image.copy()) - inpaint_mask = np.ascontiguousarray(inpaint_mask.copy()) - inpaint_strength = 1.0 - inpaint_respective_field = 1.0 - - denoising_strength = inpaint_strength - - inpaint_worker.current_task = inpaint_worker.InpaintWorker( - image=inpaint_image, - mask=inpaint_mask, - use_fill=denoising_strength > 0.99, - k=inpaint_respective_field - ) - - if debugging_inpaint_preprocessor: - yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), black_out_nsfw, do_not_show_finished_images=True) + try: + denoising_strength, initial_latent, width, height, current_progress = apply_inpaint(async_task, + initial_latent, + inpaint_head_model_path, + inpaint_image, + inpaint_mask, + inpaint_parameterized, + async_task.inpaint_strength, + async_task.inpaint_respective_field, + switch, + async_task.inpaint_disable_initial_latent, + current_progress, + advance_progress=True) + except EarlyReturnException: return - progressbar(async_task, 11, 'VAE Inpaint encoding ...') - - inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill) - inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image) - inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask) - - candidate_vae, candidate_vae_swap = pipeline.get_candidate_vae( - steps=steps, - switch=switch, - denoise=denoising_strength, - refiner_swap_method=refiner_swap_method - ) - - latent_inpaint, latent_mask = core.encode_vae_inpaint( - mask=inpaint_pixel_mask, - vae=candidate_vae, - pixels=inpaint_pixel_image) - - latent_swap = None - if candidate_vae_swap is not None: - progressbar(async_task, 12, 'VAE SD15 encoding ...') - latent_swap = core.encode_vae( - vae=candidate_vae_swap, - pixels=inpaint_pixel_fill)['samples'] - - progressbar(async_task, 13, 'VAE encoding ...') - latent_fill = core.encode_vae( - vae=candidate_vae, - pixels=inpaint_pixel_fill)['samples'] - - inpaint_worker.current_task.load_latent( - latent_fill=latent_fill, latent_mask=latent_mask, latent_swap=latent_swap) - - if inpaint_parameterized: - pipeline.final_unet = inpaint_worker.current_task.patch( - inpaint_head_model_path=inpaint_head_model_path, - inpaint_latent=latent_inpaint, - inpaint_latent_mask=latent_mask, - model=pipeline.final_unet - ) - - if not inpaint_disable_initial_latent: - initial_latent = {'samples': latent_fill} - - B, C, H, W = latent_fill.shape - height, width = H * 8, W * 8 - final_height, final_width = inpaint_worker.current_task.image.shape[:2] - print(f'Final resolution is {str((final_height, final_width))}, latent is {str((height, width))}.') - if 'cn' in goals: - for task in cn_tasks[flags.cn_canny]: - cn_img, cn_stop, cn_weight = task - cn_img = resize_image(HWC3(cn_img), width=width, height=height) + apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width, current_progress) + if async_task.debugging_cn_preprocessor: + return - if not skipping_cn_preprocessor: - cn_img = preprocessors.canny_pyramid(cn_img, canny_low_threshold, canny_high_threshold) + if async_task.freeu_enabled: + apply_freeu(async_task) - cn_img = HWC3(cn_img) - task[0] = core.numpy_to_pytorch(cn_img) - if debugging_cn_preprocessor: - yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) - return - for task in cn_tasks[flags.cn_cpds]: - cn_img, cn_stop, cn_weight = task - cn_img = resize_image(HWC3(cn_img), width=width, height=height) + # async_task.steps can have value of uov steps here when upscale has been applied + steps, _, _, _ = apply_overrides(async_task, async_task.steps, height, width) - if not skipping_cn_preprocessor: - cn_img = preprocessors.cpds(cn_img) + images_to_enhance = [] + if 'enhance' in goals: + async_task.image_number = 1 + images_to_enhance += [async_task.enhance_input_image] + height, width, _ = async_task.enhance_input_image.shape + # input image already provided, processing is skipped + steps = 0 - cn_img = HWC3(cn_img) - task[0] = core.numpy_to_pytorch(cn_img) - if debugging_cn_preprocessor: - yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) - return - for task in cn_tasks[flags.cn_ip]: - cn_img, cn_stop, cn_weight = task - cn_img = HWC3(cn_img) + all_steps = steps * async_task.image_number - # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75 - cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0) + if async_task.enhance_checkbox and async_task.enhance_uov_method != flags.disabled.casefold(): + enhance_upscale_steps = async_task.performance_selection.steps() + if 'upscale' in async_task.enhance_uov_method: + if 'fast' in async_task.enhance_uov_method: + enhance_upscale_steps = 0 + else: + enhance_upscale_steps = async_task.performance_selection.steps_uov() + enhance_upscale_steps, _, _, _ = apply_overrides(async_task, enhance_upscale_steps, height, width) + enhance_upscale_steps_total = async_task.image_number * enhance_upscale_steps + all_steps += enhance_upscale_steps_total - task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path) - if debugging_cn_preprocessor: - yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) - return - for task in cn_tasks[flags.cn_ip_face]: - cn_img, cn_stop, cn_weight = task - cn_img = HWC3(cn_img) + if async_task.enhance_checkbox and len(async_task.enhance_ctrls) != 0: + enhance_steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width) + all_steps += async_task.image_number * len(async_task.enhance_ctrls) * enhance_steps - if not skipping_cn_preprocessor: - cn_img = extras.face_crop.crop_image(cn_img) - - # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75 - cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0) - - task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path) - if debugging_cn_preprocessor: - yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) - return - - all_ip_tasks = cn_tasks[flags.cn_ip] + cn_tasks[flags.cn_ip_face] - - if len(all_ip_tasks) > 0: - pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks) - - if freeu_enabled: - print(f'FreeU is enabled!') - pipeline.final_unet = core.apply_freeu( - pipeline.final_unet, - freeu_b1, - freeu_b2, - freeu_s1, - freeu_s2 - ) - - all_steps = steps * image_number print(f'[Parameters] Denoising Strength = {denoising_strength}') @@ -831,163 +1245,47 @@ def worker(): print(f'[Parameters] Initial Latent shape: {log_shape}') - preparation_time = time.perf_counter() - execution_start_time + preparation_time = time.perf_counter() - preparation_start_time print(f'Preparation time: {preparation_time:.2f} seconds') - final_sampler_name = sampler_name - final_scheduler_name = scheduler_name + final_scheduler_name = patch_samplers(async_task) + print(f'Using {final_scheduler_name} scheduler.') - if scheduler_name in ['lcm', 'tcd']: - final_scheduler_name = 'sgm_uniform' + async_task.yields.append(['preview', (current_progress, 'Moving model to GPU ...', None)]) - def patch_discrete(unet): - return core.opModelSamplingDiscrete.patch( - pipeline.final_unet, - sampling=scheduler_name, - zsnr=False)[0] + processing_start_time = time.perf_counter() - if pipeline.final_unet is not None: - pipeline.final_unet = patch_discrete(pipeline.final_unet) - if pipeline.final_refiner_unet is not None: - pipeline.final_refiner_unet = patch_discrete(pipeline.final_refiner_unet) - print(f'Using {scheduler_name} scheduler.') - elif scheduler_name == 'edm_playground_v2.5': - final_scheduler_name = 'karras' - - def patch_edm(unet): - return core.opModelSamplingContinuousEDM.patch( - unet, - sampling=scheduler_name, - sigma_max=120.0, - sigma_min=0.002)[0] - - if pipeline.final_unet is not None: - pipeline.final_unet = patch_edm(pipeline.final_unet) - if pipeline.final_refiner_unet is not None: - pipeline.final_refiner_unet = patch_edm(pipeline.final_refiner_unet) - - print(f'Using {scheduler_name} scheduler.') - - async_task.yields.append(['preview', (flags.preparation_step_count, 'Moving model to GPU ...', None)]) + preparation_steps = current_progress + total_count = async_task.image_number def callback(step, x0, x, total_steps, y): - done_steps = current_task_id * steps + step + if step == 0: + async_task.callback_steps = 0 + async_task.callback_steps += (100 - preparation_steps) / float(all_steps) async_task.yields.append(['preview', ( - int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)), - f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{image_number} ...', y)]) + int(current_progress + async_task.callback_steps), + f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{total_count} ...', y)]) + + should_enhance = async_task.enhance_checkbox and (async_task.enhance_uov_method != flags.disabled.casefold() or len(async_task.enhance_ctrls) > 0) + show_intermediate_results = len(tasks) > 1 or should_enhance for current_task_id, task in enumerate(tasks): - current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(current_task_id * steps) / float(all_steps)) - progressbar(async_task, current_progress, f'Preparing task {current_task_id + 1}/{image_number} ...') + progressbar(async_task, current_progress, f'Preparing task {current_task_id + 1}/{async_task.image_number} ...') execution_start_time = time.perf_counter() try: - if async_task.last_stop is not False: - ldm_patched.modules.model_management.interrupt_current_processing() - positive_cond, negative_cond = task['c'], task['uc'] + imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path, + controlnet_cpds_path, current_task_id, + denoising_strength, final_scheduler_name, goals, + initial_latent, async_task.steps, switch, task['c'], + task['uc'], task, tiled, use_expansion, width, height, + current_progress, preparation_steps, + async_task.image_number, show_intermediate_results) - if 'cn' in goals: - for cn_flag, cn_path in [ - (flags.cn_canny, controlnet_canny_path), - (flags.cn_cpds, controlnet_cpds_path) - ]: - for cn_img, cn_stop, cn_weight in cn_tasks[cn_flag]: - positive_cond, negative_cond = core.apply_controlnet( - positive_cond, negative_cond, - pipeline.loaded_ControlNets[cn_path], cn_img, cn_weight, 0, cn_stop) + current_progress = int(preparation_steps + (100 - preparation_steps) / float(all_steps) * async_task.steps * (current_task_id + 1)) + images_to_enhance += imgs - imgs = pipeline.process_diffusion( - positive_cond=positive_cond, - negative_cond=negative_cond, - steps=steps, - switch=switch, - width=width, - height=height, - image_seed=task['task_seed'], - callback=callback, - sampler_name=final_sampler_name, - scheduler_name=final_scheduler_name, - latent=initial_latent, - denoise=denoising_strength, - tiled=tiled, - cfg_scale=cfg_scale, - refiner_swap_method=refiner_swap_method, - disable_preview=disable_preview - ) - - del task['c'], task['uc'], positive_cond, negative_cond # Save memory - - if inpaint_worker.current_task is not None: - imgs = [inpaint_worker.current_task.post_process(x) for x in imgs] - - img_paths = [] - - current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float((current_task_id + 1) * steps) / float(all_steps)) - if modules.config.default_black_out_nsfw or black_out_nsfw: - progressbar(async_task, current_progress, 'Checking for NSFW content ...') - imgs = default_censor(imgs) - - progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{image_number} to system ...') - - for x in imgs: - d = [('Prompt', 'prompt', task['log_positive_prompt']), - ('Negative Prompt', 'negative_prompt', task['log_negative_prompt']), - ('Fooocus V2 Expansion', 'prompt_expansion', task['expansion']), - ('Styles', 'styles', - str(task['styles'] if not use_expansion else [fooocus_expansion] + task['styles'])), - ('Performance', 'performance', performance_selection.value)] - - if performance_selection.steps() != steps: - d.append(('Steps', 'steps', steps)) - - d += [('Resolution', 'resolution', str((width, height))), - ('Guidance Scale', 'guidance_scale', guidance_scale), - ('Sharpness', 'sharpness', sharpness), - ('ADM Guidance', 'adm_guidance', str(( - modules.patch.patch_settings[pid].positive_adm_scale, - modules.patch.patch_settings[pid].negative_adm_scale, - modules.patch.patch_settings[pid].adm_scaler_end))), - ('Base Model', 'base_model', base_model_name), - ('Refiner Model', 'refiner_model', refiner_model_name), - ('Refiner Switch', 'refiner_switch', refiner_switch)] - - if refiner_model_name != 'None': - if overwrite_switch > 0: - d.append(('Overwrite Switch', 'overwrite_switch', overwrite_switch)) - if refiner_swap_method != flags.refiner_swap_method: - d.append(('Refiner Swap Method', 'refiner_swap_method', refiner_swap_method)) - if modules.patch.patch_settings[pid].adaptive_cfg != modules.config.default_cfg_tsnr: - d.append( - ('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg)) - - if clip_skip > 1: - d.append(('CLIP Skip', 'clip_skip', clip_skip)) - d.append(('Sampler', 'sampler', sampler_name)) - d.append(('Scheduler', 'scheduler', scheduler_name)) - d.append(('VAE', 'vae', vae_name)) - d.append(('Seed', 'seed', str(task['task_seed']))) - - if freeu_enabled: - d.append(('FreeU', 'freeu', str((freeu_b1, freeu_b2, freeu_s1, freeu_s2)))) - - for li, (n, w) in enumerate(loras): - if n != 'None': - d.append((f'LoRA {li + 1}', f'lora_combined_{li + 1}', f'{n} : {w}')) - - metadata_parser = None - if save_metadata_to_images: - metadata_parser = modules.meta_parser.get_metadata_parser(metadata_scheme) - metadata_parser.set_data(task['log_positive_prompt'], task['positive'], - task['log_negative_prompt'], task['negative'], - steps, base_model_name, refiner_model_name, loras, vae_name) - d.append(('Metadata Scheme', 'metadata_scheme', - metadata_scheme.value if save_metadata_to_images else save_metadata_to_images)) - d.append(('Version', 'version', 'Fooocus v' + fooocus_version.version)) - img_paths.append(log(x, d, metadata_parser, output_format, task)) - - yield_result(async_task, img_paths, black_out_nsfw, False, - do_not_show_finished_images=len(tasks) == 1 or disable_intermediate_results) - except ldm_patched.modules.model_management.InterruptProcessingException as e: + except ldm_patched.modules.model_management.InterruptProcessingException: if async_task.last_stop == 'skip': print('User skipped') async_task.last_stop = False @@ -996,20 +1294,146 @@ def worker(): print('User stopped') break + del task['c'], task['uc'] # Save memory execution_time = time.perf_counter() - execution_start_time print(f'Generating and saving time: {execution_time:.2f} seconds') - async_task.processing = False + + if not should_enhance: + print(f'[Enhance] Skipping, preconditions aren\'t met') + stop_processing(async_task, processing_start_time) + return + + progressbar(async_task, current_progress, 'Processing enhance ...') + + active_enhance_tabs = len(async_task.enhance_ctrls) + should_process_enhance_uov = async_task.enhance_uov_method != flags.disabled.casefold() + if should_process_enhance_uov: + active_enhance_tabs += 1 + total_count = len(images_to_enhance) * active_enhance_tabs + + base_progress = current_progress + current_task_id = 0 + done_steps_upscaling = 0 + done_steps_inpainting = 0 + enhance_steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width) + for img in images_to_enhance: + enhancement_image_start_time = time.perf_counter() + + last_enhance_prompt = async_task.prompt + last_enhance_negative_prompt = async_task.negative_prompt + + if should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_before: + current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result = enhance_upscale( + all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path, + current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps, + async_task.prompt, async_task.negative_prompt, final_scheduler_name, height, img, preparation_steps, + switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, width) + if exception_result == 'continue': + continue + elif exception_result == 'break': + break + + # inpaint for all other tabs + for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field, enhance_inpaint_erode_or_dilate, enhance_mask_invert in async_task.enhance_ctrls: + current_task_id += 1 + current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting)) + progressbar(async_task, current_progress, f'Preparing enhancement {current_task_id + 1}/{total_count} ...') + enhancement_task_start_time = time.perf_counter() + + if enhance_mask_model == 'sam': + print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"') + + mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image( + img, mask_model=enhance_mask_model, sam_options=SAMOptions( + dino_prompt=enhance_mask_dino_prompt_text, + dino_box_threshold=enhance_mask_box_threshold, + dino_text_threshold=enhance_mask_text_threshold, + dino_erode_or_dilate=async_task.dino_erode_or_dilate, + dino_debug=async_task.debugging_dino, + max_detections=enhance_mask_sam_max_detections, + model_type=enhance_mask_sam_model, + )) + if len(mask.shape) == 3: + mask = mask[:, :, 0] + + if int(enhance_inpaint_erode_or_dilate) != 0: + mask = erode_or_dilate(mask, enhance_inpaint_erode_or_dilate) + + if enhance_mask_invert: + mask = 255 - mask + + if async_task.debugging_enhance_masks_checkbox: + async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)]) + yield_result(async_task, mask, current_progress, async_task.black_out_nsfw, False, + async_task.disable_intermediate_results) + + print(f'[Enhance] {dino_detection_count} boxes detected') + print(f'[Enhance] {sam_detection_count} segments detected in boxes') + print(f'[Enhance] {sam_detection_on_mask_count} segments applied to mask') + + if enhance_mask_model == 'sam' and ( + dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0): + print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping') + continue + + goals_enhance = ['inpaint'] + + try: + current_progress, img, enhance_prompt_processed, enhance_negative_prompt_processed = process_enhance( + all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, + current_progress, current_task_id, denoising_strength, enhance_inpaint_disable_initial_latent, + enhance_inpaint_engine, enhance_inpaint_respective_field, enhance_inpaint_strength, + enhance_prompt, enhance_negative_prompt, final_scheduler_name, goals_enhance, height, img, mask, + preparation_steps, enhance_steps, switch, tiled, total_count, use_expansion, use_style, + use_synthetic_refiner, width) + + if (should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_after + and async_task.enhance_uov_prompt_type == flags.enhancement_uov_prompt_type_last_filled): + if enhance_prompt_processed != '': + last_enhance_prompt = enhance_prompt_processed + if enhance_negative_prompt_processed != '': + last_enhance_negative_prompt = enhance_negative_prompt_processed + + except ldm_patched.modules.model_management.InterruptProcessingException: + if async_task.last_stop == 'skip': + print('User skipped') + async_task.last_stop = False + continue + else: + print('User stopped') + break + finally: + done_steps_inpainting += enhance_steps + + enhancement_task_time = time.perf_counter() - enhancement_task_start_time + print(f'Enhancement time: {enhancement_task_time:.2f} seconds') + + if should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_after: + current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result = enhance_upscale( + all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path, + current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps, + last_enhance_prompt, last_enhance_negative_prompt, final_scheduler_name, height, img, + preparation_steps, switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, + width) + if exception_result == 'continue': + continue + elif exception_result == 'break': + break + + enhancement_image_time = time.perf_counter() - enhancement_image_start_time + print(f'Enhancement image time: {enhancement_image_time:.2f} seconds') + + stop_processing(async_task, processing_start_time) return while True: time.sleep(0.01) if len(async_tasks) > 0: task = async_tasks.pop(0) - generate_image_grid = task.args.pop(0) try: handler(task) - if generate_image_grid: + if task.generate_image_grid: build_image_wall(task) task.yields.append(['finish', task.results]) pipeline.prepare_text_encoder(async_call=True) diff --git a/modules/config.py b/modules/config.py index 049f6cbc..f8af6599 100644 --- a/modules/config.py +++ b/modules/config.py @@ -201,6 +201,7 @@ path_fooocus_expansion = get_dir_or_set_default('path_fooocus_expansion', '../mo path_safety_checker_models = get_dir_or_set_default('path_safety_checker_models', '../models/safety_checker_models/') path_wildcards = get_dir_or_set_default('path_wildcards', '../wildcards/') path_safety_checker = get_dir_or_set_default('path_safety_checker', '../models/safety_checker/') +path_sam = get_dir_or_set_default('path_sam', '../models/sam/') path_outputs = get_path_output() @@ -500,6 +501,50 @@ example_inpaint_prompts = get_config_item_or_set_default( validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x), expected_type=list ) +example_enhance_detection_prompts = get_config_item_or_set_default( + key='example_enhance_detection_prompts', + default_value=[ + 'face', 'eye', 'mouth', 'hair', 'hand', 'body' + ], + validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x), + expected_type=list +) +default_enhance_tabs = get_config_item_or_set_default( + key='default_enhance_tabs', + default_value=3, + validator=lambda x: isinstance(x, int) and 1 <= x <= 5, + expected_type=int +) +default_enhance_checkbox = get_config_item_or_set_default( + key='default_enhance_checkbox', + default_value=False, + validator=lambda x: isinstance(x, bool), + expected_type=bool +) +default_enhance_uov_method = get_config_item_or_set_default( + key='default_enhance_uov_method', + default_value=modules.flags.disabled, + validator=lambda x: x in modules.flags.uov_list, + expected_type=int +) +default_enhance_uov_processing_order = get_config_item_or_set_default( + key='default_enhance_uov_processing_order', + default_value=modules.flags.enhancement_uov_before, + validator=lambda x: x in modules.flags.enhancement_uov_processing_order, + expected_type=int +) +default_enhance_uov_prompt_type = get_config_item_or_set_default( + key='default_enhance_uov_prompt_type', + default_value=modules.flags.enhancement_uov_prompt_type_original, + validator=lambda x: x in modules.flags.enhancement_uov_prompt_types, + expected_type=int +) +default_sam_max_detections = get_config_item_or_set_default( + key='default_sam_max_detections', + default_value=0, + validator=lambda x: isinstance(x, int) and 0 <= x <= 10, + expected_type=int +) default_black_out_nsfw = get_config_item_or_set_default( key='default_black_out_nsfw', default_value=False, @@ -526,13 +571,8 @@ metadata_created_by = get_config_item_or_set_default( ) example_inpaint_prompts = [[x] for x in example_inpaint_prompts] +example_enhance_detection_prompts = [[x] for x in example_enhance_detection_prompts] -default_black_out_nsfw = get_config_item_or_set_default( - key='default_black_out_nsfw', - default_value=False, - validator=lambda x: isinstance(x, bool), - expected_type=bool -) default_inpaint_mask_model = get_config_item_or_set_default( key='default_inpaint_mask_model', default_value='isnet-general-use', @@ -540,6 +580,13 @@ default_inpaint_mask_model = get_config_item_or_set_default( expected_type=str ) +default_enhance_inpaint_mask_model = get_config_item_or_set_default( + key='default_enhance_inpaint_mask_model', + default_value='sam', + validator=lambda x: x in modules.flags.inpaint_mask_models, + expected_type=str +) + default_inpaint_mask_cloth_category = get_config_item_or_set_default( key='default_inpaint_mask_cloth_category', default_value='full', @@ -549,8 +596,8 @@ default_inpaint_mask_cloth_category = get_config_item_or_set_default( default_inpaint_mask_sam_model = get_config_item_or_set_default( key='default_inpaint_mask_sam_model', - default_value='sam_vit_b_01ec64', - validator=lambda x: x in modules.flags.inpaint_mask_sam_model, + default_value='vit_b', + validator=lambda x: x in [y[1] for y in modules.flags.inpaint_mask_sam_model if y[1] == x], expected_type=str ) @@ -789,4 +836,43 @@ def downloading_safety_checker_model(): return os.path.join(path_safety_checker, 'stable-diffusion-safety-checker.bin') +def download_sam_model(sam_model: str) -> str: + match sam_model: + case 'vit_b': + return downloading_sam_vit_b() + case 'vit_l': + return downloading_sam_vit_l() + case 'vit_h': + return downloading_sam_vit_h() + case _: + raise ValueError(f"sam model {sam_model} does not exist.") + + +def downloading_sam_vit_b(): + load_file_from_url( + url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_b_01ec64.pth', + model_dir=path_sam, + file_name='sam_vit_b_01ec64.pth' + ) + return os.path.join(path_sam, 'sam_vit_b_01ec64.pth') + + +def downloading_sam_vit_l(): + load_file_from_url( + url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_l_0b3195.pth', + model_dir=path_sam, + file_name='sam_vit_l_0b3195.pth' + ) + return os.path.join(path_sam, 'sam_vit_l_0b3195.pth') + + +def downloading_sam_vit_h(): + load_file_from_url( + url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_h_4b8939.pth', + model_dir=path_sam, + file_name='sam_vit_h_4b8939.pth' + ) + return os.path.join(path_sam, 'sam_vit_h_4b8939.pth') + + update_files() diff --git a/modules/flags.py b/modules/flags.py index 6473045d..c8ea0a0c 100644 --- a/modules/flags.py +++ b/modules/flags.py @@ -8,9 +8,15 @@ upscale_15 = 'Upscale (1.5x)' upscale_2 = 'Upscale (2x)' upscale_fast = 'Upscale (Fast 2x)' -uov_list = [ - disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast -] +uov_list = [disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast] + +enhancement_uov_before = "Before First Enhancement" +enhancement_uov_after = "After Last Enhancement" +enhancement_uov_processing_order = [enhancement_uov_before, enhancement_uov_after] + +enhancement_uov_prompt_type_original = 'Original Prompts' +enhancement_uov_prompt_type_last_filled = 'Last Filled Enhancement Prompts' +enhancement_uov_prompt_types = [enhancement_uov_prompt_type_original, enhancement_uov_prompt_type_last_filled] CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"] @@ -76,7 +82,7 @@ output_formats = ['png', 'jpeg', 'webp'] inpaint_mask_models = ['u2net', 'u2netp', 'u2net_human_seg', 'u2net_cloth_seg', 'silueta', 'isnet-general-use', 'isnet-anime', 'sam'] inpaint_mask_cloth_category = ['full', 'upper', 'lower'] -inpaint_mask_sam_model = ['sam_vit_b_01ec64', 'sam_vit_h_4b8939', 'sam_vit_l_0b3195'] +inpaint_mask_sam_model = ['vit_b', 'vit_l', 'vit_h'] inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6'] inpaint_option_default = 'Inpaint or Outpaint (default)' @@ -107,7 +113,6 @@ metadata_scheme = [ ] controlnet_image_count = 4 -preparation_step_count = 13 class OutputFormat(Enum): @@ -163,14 +168,6 @@ class Performance(Enum): def values(cls) -> list: return list(map(lambda c: c.value, cls)) - @classmethod - def values(cls) -> list: - return list(map(lambda c: c.value, cls)) - - @classmethod - def values(cls) -> list: - return list(map(lambda c: c.value, cls)) - @classmethod def by_steps(cls, steps: int | str): return cls[Steps(int(steps)).name] diff --git a/modules/util.py b/modules/util.py index 4506b392..cd18081b 100644 --- a/modules/util.py +++ b/modules/util.py @@ -390,6 +390,9 @@ def get_enabled_loras(loras: list, remove_none=True) -> list: def parse_lora_references_from_prompt(prompt: str, loras: List[Tuple[AnyStr, float]], loras_limit: int = 5, skip_file_check=False, prompt_cleanup=True, deduplicate_loras=True, lora_filenames=None) -> tuple[List[Tuple[AnyStr, float]], str]: + # prevent unintended side effects when returning without detection + loras = loras.copy() + if lora_filenames is None: lora_filenames = [] diff --git a/requirements_versions.txt b/requirements_versions.txt index ebcd0297..9196f1db 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -13,10 +13,10 @@ omegaconf==2.2.3 gradio==3.41.2 pygit2==1.12.2 opencv-contrib-python==4.8.0.74 -diffusers==0.25.1 httpx==0.24.1 onnxruntime==1.16.3 timm==0.9.2 -translators==5.8.9 -rembg==2.0.53 -groundingdino-py==0.4.0 \ No newline at end of file +translators==5.9.2 +rembg==2.0.57 +groundingdino-py==0.4.0 +segment_anything==1.0 \ No newline at end of file diff --git a/webui.py b/webui.py index db1c98d0..fa468352 100644 --- a/webui.py +++ b/webui.py @@ -16,6 +16,7 @@ import modules.meta_parser import args_manager import copy import launch +from extras.inpaint_mask import SAMOptions from modules.sdxl_styles import legal_style_names from modules.private_logger import get_current_html_path @@ -89,6 +90,34 @@ def generate_clicked(task: worker.AsyncTask): return +def inpaint_mode_change(mode): + assert mode in modules.flags.inpaint_options + + # inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts, + # inpaint_disable_initial_latent, inpaint_engine, + # inpaint_strength, inpaint_respective_field + + if mode == modules.flags.inpaint_option_detail: + return [ + gr.update(visible=True), gr.update(visible=False, value=[]), + gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts), + False, 'None', 0.5, 0.0 + ] + + if mode == modules.flags.inpaint_option_modify: + return [ + gr.update(visible=True), gr.update(visible=False, value=[]), + gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts), + True, modules.config.default_inpaint_engine_version, 1.0, 0.0 + ] + + return [ + gr.update(visible=False, value=''), gr.update(visible=True), + gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts), + False, modules.config.default_inpaint_engine_version, 1.0, 0.618 + ] + + reload_javascript() title = f'Fooocus {fooocus_version.version}' @@ -146,6 +175,7 @@ with shared.gradio_root: skip_button.click(skip_clicked, inputs=currentTask, outputs=currentTask, queue=False, show_progress=False) with gr.Row(elem_classes='advanced_check_row'): input_image_checkbox = gr.Checkbox(label='Input Image', value=False, container=False, elem_classes='min_check') + enhance_checkbox = gr.Checkbox(label='Enhance', value=modules.config.default_enhance_checkbox, container=False, elem_classes='min_check') advanced_checkbox = gr.Checkbox(label='Advanced', value=modules.config.default_advanced_checkbox, container=False, elem_classes='min_check') with gr.Row(visible=False) as image_input_panel: with gr.Tabs(): @@ -223,44 +253,56 @@ with shared.gradio_root: choices=flags.inpaint_mask_cloth_category, value=modules.config.default_inpaint_mask_cloth_category, visible=False) - inpaint_mask_sam_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False) + inpaint_mask_dino_prompt_text = gr.Textbox(label='Detection prompt', value='', visible=False, info='Use singular whenever possible', placeholder='Describe what you want to detect.') + example_inpaint_mask_dino_prompt_text = gr.Dataset( + samples=modules.config.example_enhance_detection_prompts, + label='Detection Prompt Quick List', + components=[inpaint_mask_dino_prompt_text], + visible=modules.config.default_inpaint_mask_model == 'sam') + example_inpaint_mask_dino_prompt_text.click(lambda x: x[0], + inputs=example_inpaint_mask_dino_prompt_text, + outputs=inpaint_mask_dino_prompt_text, + show_progress=False, queue=False) + with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options: inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model) - inpaint_mask_sam_quant = gr.Checkbox(label='Quantization', value=False) inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05) inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05) + inpaint_mask_sam_max_detections = gr.Slider(label="Maximum number of detections", info="Set to 0 to detect all", minimum=0, maximum=10, value=modules.config.default_sam_max_detections, step=1, interactive=True) generate_mask_button = gr.Button(value='Generate mask from image') - def generate_mask(image, mask_model, cloth_category, sam_prompt_text, sam_model, sam_quant, box_threshold, text_threshold): + def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, sam_max_detections, dino_erode_or_dilate, dino_debug): from extras.inpaint_mask import generate_mask_from_image extras = {} + sam_options = None if mask_model == 'u2net_cloth_seg': extras['cloth_category'] = cloth_category elif mask_model == 'sam': - extras['sam_prompt_text'] = sam_prompt_text - extras['sam_model'] = sam_model - extras['sam_quant'] = sam_quant - extras['box_threshold'] = box_threshold - extras['text_threshold'] = text_threshold + sam_options = SAMOptions( + dino_prompt=dino_prompt_text, + dino_box_threshold=box_threshold, + dino_text_threshold=text_threshold, + dino_erode_or_dilate=dino_erode_or_dilate, + dino_debug=dino_debug, + max_detections=sam_max_detections, + model_type=sam_model + ) - return generate_mask_from_image(image, mask_model, extras) + mask, _, _, _ = generate_mask_from_image(image, mask_model, extras, sam_options) - generate_mask_button.click(fn=generate_mask, - inputs=[ - inpaint_input_image, inpaint_mask_model, - inpaint_mask_cloth_category, - inpaint_mask_sam_prompt_text, - inpaint_mask_sam_model, - inpaint_mask_sam_quant, - inpaint_mask_box_threshold, - inpaint_mask_text_threshold - ], - outputs=inpaint_mask_image, show_progress=True, queue=True) + return mask - inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg'), gr.update(visible=x == 'sam'), gr.update(visible=x == 'sam')], + + inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] + + [gr.update(visible=x == 'sam')] * 2 + + [gr.Dataset.update(visible=x == 'sam', + samples=modules.config.example_enhance_detection_prompts)], inputs=inpaint_mask_model, - outputs=[inpaint_mask_cloth_category, inpaint_mask_sam_prompt_text, inpaint_mask_advanced_options], + outputs=[inpaint_mask_cloth_category, + inpaint_mask_dino_prompt_text, + inpaint_mask_advanced_options, + example_inpaint_mask_dino_prompt_text], queue=False, show_progress=False) with gr.TabItem(label='Describe') as desc_tab: @@ -283,6 +325,12 @@ with shared.gradio_root: desc_input_image.upload(trigger_show_image_properties, inputs=desc_input_image, outputs=desc_image_size, show_progress=False, queue=False) + with gr.TabItem(label='Enhance') as enhance_tab: + with gr.Row(): + with gr.Column(): + enhance_input_image = grh.Image(label='Base image for enhance', source='upload', type='numpy') + gr.HTML('\U0001F4D4 Document') + with gr.TabItem(label='Metadata') as metadata_tab: with gr.Column(): metadata_input_image = grh.Image(label='For images created by Fooocus', source='upload', type='filepath') @@ -304,6 +352,153 @@ with shared.gradio_root: metadata_input_image.upload(trigger_metadata_preview, inputs=metadata_input_image, outputs=metadata_json, queue=False, show_progress=True) + with gr.Row(visible=modules.config.default_enhance_checkbox) as enhance_input_panel: + with gr.Tabs(): + with gr.TabItem(label='Upscale or Variation'): + with gr.Row(): + with gr.Column(): + enhance_uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list, + value=modules.config.default_enhance_uov_method) + enhance_uov_processing_order = gr.Radio(label='Order of Processing', + info='Use before for enhancement of small details and after for large areas.', + choices=flags.enhancement_uov_processing_order, + value=modules.config.default_enhance_uov_processing_order) + enhance_uov_prompt_type = gr.Radio(label='Prompt', + info='Choose which prompt to use for Upscale or Variation.', + choices=flags.enhancement_uov_prompt_types, + value=modules.config.default_enhance_uov_prompt_type, + visible=modules.config.default_enhance_uov_processing_order == flags.enhancement_uov_after) + + enhance_uov_processing_order.change(lambda x: gr.update(visible=x == flags.enhancement_uov_after), + inputs=enhance_uov_processing_order, + outputs=enhance_uov_prompt_type, + queue=False, show_progress=False) + gr.HTML('\U0001F4D4 Document') + enhance_ctrls = [] + for index in range(modules.config.default_enhance_tabs): + with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item: + enhance_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check', + container=False) + + enhance_mask_dino_prompt_text = gr.Textbox(label='Detection prompt', + info='Use singular whenever possible', + placeholder='Describe what you want to detect.', + interactive=True, + visible=modules.config.default_enhance_inpaint_mask_model == 'sam') + example_enhance_mask_dino_prompt_text = gr.Dataset( + samples=modules.config.example_enhance_detection_prompts, + label='Detection Prompt Quick List', + components=[enhance_mask_dino_prompt_text], + visible=modules.config.default_enhance_inpaint_mask_model == 'sam') + example_enhance_mask_dino_prompt_text.click(lambda x: x[0], + inputs=example_enhance_mask_dino_prompt_text, + outputs=enhance_mask_dino_prompt_text, + show_progress=False, queue=False) + + enhance_prompt = gr.Textbox(label="Enhancement positive prompt", + placeholder="Uses original prompt instead if empty.", + elem_id='enhance_prompt') + enhance_negative_prompt = gr.Textbox(label="Enhancement negative prompt", + placeholder="Uses original negative prompt instead if empty.", + elem_id='enhance_negative_prompt') + + with gr.Accordion("Detection", open=False): + # TODO check if limiting to SAM is better + enhance_mask_model = gr.Dropdown(label='Mask generation model', + choices=flags.inpaint_mask_models, + value=modules.config.default_enhance_inpaint_mask_model) + enhance_mask_cloth_category = gr.Dropdown(label='Cloth category', + choices=flags.inpaint_mask_cloth_category, + value=modules.config.default_inpaint_mask_cloth_category, + visible=modules.config.default_enhance_inpaint_mask_model == 'u2net_cloth_seg', + interactive=True) + + with gr.Accordion("SAM Options", + visible=modules.config.default_enhance_inpaint_mask_model == 'sam', + open=False) as sam_options: + enhance_mask_sam_model = gr.Dropdown(label='SAM model', + choices=flags.inpaint_mask_sam_model, + value=modules.config.default_inpaint_mask_sam_model, + interactive=True) + enhance_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, + maximum=1.0, value=0.3, step=0.05, + interactive=True) + enhance_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, + maximum=1.0, value=0.25, step=0.05, + interactive=True) + enhance_mask_sam_max_detections = gr.Slider(label="Maximum number of detections", + info="Set to 0 to detect all", + minimum=0, maximum=10, + value=modules.config.default_sam_max_detections, + step=1, interactive=True) + + with gr.Accordion("Inpaint", visible=True, open=False): + enhance_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options, + value=modules.flags.inpaint_option_default, + label='Method', interactive=True) + enhance_inpaint_disable_initial_latent = gr.Checkbox( + label='Disable initial latent in inpaint', value=False) + enhance_inpaint_engine = gr.Dropdown(label='Inpaint Engine', + value=modules.config.default_inpaint_engine_version, + choices=flags.inpaint_engine_versions, + info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.') + enhance_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength', + minimum=0.0, maximum=1.0, step=0.001, + value=1.0, + info='Same as the denoising strength in A1111 inpaint. ' + 'Only used in inpaint, not used in outpaint. ' + '(Outpaint always use 1.0)') + enhance_inpaint_respective_field = gr.Slider(label='Inpaint Respective Field', + minimum=0.0, maximum=1.0, step=0.001, + value=0.618, + info='The area to inpaint. ' + 'Value 0 is same as "Only Masked" in A1111. ' + 'Value 1 is same as "Whole Image" in A1111. ' + 'Only used in inpaint, not used in outpaint. ' + '(Outpaint always use 1.0)') + enhance_inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate', + minimum=-64, maximum=64, step=1, value=0, + info='Positive value will make white area in the mask larger, ' + 'negative value will make white area smaller. ' + '(default is 0, always processed before any mask invert)') + enhance_mask_invert = gr.Checkbox(label='Invert Mask', value=False) + + gr.HTML('\U0001F4D4 Document') + + enhance_ctrls += [ + enhance_enabled, + enhance_mask_dino_prompt_text, + enhance_prompt, + enhance_negative_prompt, + enhance_mask_model, + enhance_mask_sam_model, + enhance_mask_text_threshold, + enhance_mask_box_threshold, + enhance_mask_sam_max_detections, + enhance_inpaint_disable_initial_latent, + enhance_inpaint_engine, + enhance_inpaint_strength, + enhance_inpaint_respective_field, + enhance_inpaint_erode_or_dilate, + enhance_mask_invert + ] + + enhance_inpaint_mode.input(inpaint_mode_change, inputs=enhance_inpaint_mode, outputs=[ + inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts, + enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, + enhance_inpaint_strength, enhance_inpaint_respective_field + ], show_progress=False, queue=False) + + enhance_mask_model.change( + lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] + + [gr.update(visible=x == 'sam')] * 2 + + [gr.Dataset.update(visible=x == 'sam', + samples=modules.config.example_enhance_detection_prompts)], + inputs=enhance_mask_model, + outputs=[enhance_mask_cloth_category, enhance_mask_dino_prompt_text, sam_options, + example_enhance_mask_dino_prompt_text], + queue=False, show_progress=False) + switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}" down_js = "() => {viewer_to_bottom();}" @@ -316,7 +511,10 @@ with shared.gradio_root: inpaint_tab.select(lambda: 'inpaint', outputs=current_tab, queue=False, _js=down_js, show_progress=False) ip_tab.select(lambda: 'ip', outputs=current_tab, queue=False, _js=down_js, show_progress=False) desc_tab.select(lambda: 'desc', outputs=current_tab, queue=False, _js=down_js, show_progress=False) + enhance_tab.select(lambda: 'enhance', outputs=current_tab, queue=False, _js=down_js, show_progress=False) metadata_tab.select(lambda: 'metadata', outputs=current_tab, queue=False, _js=down_js, show_progress=False) + enhance_checkbox.change(lambda x: gr.update(visible=x), inputs=enhance_checkbox, + outputs=enhance_input_panel, queue=False, show_progress=False, _js=switch_js) with gr.Column(scale=1, visible=modules.config.default_advanced_checkbox) as advanced_column: with gr.Tab(label='Settings'): @@ -379,7 +577,7 @@ with shared.gradio_root: def update_history_link(): if args_manager.args.disable_image_log: return gr.update(value='') - + return gr.update(value=f'\U0001F4DA History Log') history_link = gr.HTML() @@ -544,7 +742,7 @@ with shared.gradio_root: info='Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.', visible=modules.config.default_save_metadata_to_images) - save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme], + save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme], queue=False, show_progress=False) with gr.Tab(label='Control'): @@ -570,11 +768,15 @@ with shared.gradio_root: with gr.Tab(label='Inpaint'): debugging_inpaint_preprocessor = gr.Checkbox(label='Debug Inpaint Preprocessing', value=False) + debugging_enhance_masks_checkbox = gr.Checkbox(label='Debug Enhance Masks', value=False, + info='Show enhance masks in preview and final results') + debugging_dino = gr.Checkbox(label='Debug GroundingDINO', value=False, + info='Use GroundingDINO boxes instead of more detailed SAM masks') inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False) inpaint_engine = gr.Dropdown(label='Inpaint Engine', value=modules.config.default_inpaint_engine_version, choices=flags.inpaint_engine_versions, - info='Version of Fooocus inpaint model') + info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.') inpaint_strength = gr.Slider(label='Inpaint Denoising Strength', minimum=0.0, maximum=1.0, step=0.001, value=1.0, info='Same as the denoising strength in A1111 inpaint. ' @@ -590,8 +792,13 @@ with shared.gradio_root: inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate', minimum=-64, maximum=64, step=1, value=0, info='Positive value will make white area in the mask larger, ' - 'negative value will make white area smaller.' - '(default is 0, always process before any mask invert)') + 'negative value will make white area smaller. ' + '(default is 0, always processed before any mask invert)') + dino_erode_or_dilate = gr.Slider(label='GroundingDINO Box Erode or Dilate', + minimum=-64, maximum=64, step=1, value=0, + info='Positive value will make white area in the mask larger, ' + 'negative value will make white area smaller. ' + '(default is 0, processed before SAM)') inpaint_mask_upload_checkbox = gr.Checkbox(label='Enable Mask Upload', value=False) invert_mask_checkbox = gr.Checkbox(label='Invert Mask', value=False) @@ -701,46 +908,26 @@ with shared.gradio_root: adm_scaler_negative, refiner_switch, refiner_model, sampler_name, scheduler_name, adaptive_cfg, refiner_swap_method, negative_prompt, disable_intermediate_results ], queue=False, show_progress=False) - + output_format.input(lambda x: gr.update(output_format=x), inputs=output_format) - + advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, advanced_column, queue=False, show_progress=False) \ .then(fn=lambda: None, _js='refresh_grid_delayed', queue=False, show_progress=False) - def inpaint_mode_change(mode): - assert mode in modules.flags.inpaint_options - - # inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts, - # inpaint_disable_initial_latent, inpaint_engine, - # inpaint_strength, inpaint_respective_field - - if mode == modules.flags.inpaint_option_detail: - return [ - gr.update(visible=True), gr.update(visible=False, value=[]), - gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts), - False, 'None', 0.5, 0.0 - ] - - if mode == modules.flags.inpaint_option_modify: - return [ - gr.update(visible=True), gr.update(visible=False, value=[]), - gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts), - True, modules.config.default_inpaint_engine_version, 1.0, 0.0 - ] - - return [ - gr.update(visible=False, value=''), gr.update(visible=True), - gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts), - False, modules.config.default_inpaint_engine_version, 1.0, 0.618 - ] - inpaint_mode.input(inpaint_mode_change, inputs=inpaint_mode, outputs=[ inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field ], show_progress=False, queue=False) + generate_mask_button.click(fn=generate_mask, + inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category, + inpaint_mask_dino_prompt_text, inpaint_mask_sam_model, + inpaint_mask_box_threshold, inpaint_mask_text_threshold, + inpaint_mask_sam_max_detections, dino_erode_or_dilate, debugging_dino], + outputs=inpaint_mask_image, show_progress=True, queue=True) + ctrls = [currentTask, generate_image_grid] ctrls += [ prompt, negative_prompt, translate_prompts, style_selections, @@ -766,6 +953,10 @@ with shared.gradio_root: ctrls += [save_metadata_to_images, metadata_scheme] ctrls += ip_ctrls + ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox, + enhance_input_image, enhance_checkbox, enhance_uov_method, enhance_uov_processing_order, + enhance_uov_prompt_type] + ctrls += enhance_ctrls def parse_meta(raw_prompt_txt, is_generating): loaded_json = None @@ -828,15 +1019,18 @@ with shared.gradio_root: desc_btn.click(trigger_describe, inputs=[desc_method, desc_input_image], outputs=[prompt, style_selections], show_progress=True, queue=True) - if args_manager.args.enable_describe_uov_image: - def trigger_uov_describe(mode, img, prompt): + if args_manager.args.enable_auto_describe_image: + def trigger_auto_describe(mode, img, prompt): # keep prompt if not empty if prompt == '': return trigger_describe(mode, img) return gr.update(), gr.update() - uov_input_image.upload(trigger_uov_describe, inputs=[desc_method, uov_input_image, prompt], - outputs=[prompt, style_selections], show_progress=True, queue=True) + uov_input_image.upload(trigger_auto_describe, inputs=[desc_method, uov_input_image, prompt], + outputs=[prompt, style_selections], show_progress=True, queue=True) + + enhance_input_image.upload(lambda: gr.update(value=True), outputs=enhance_checkbox, queue=False, show_progress=False) \ + .then(trigger_auto_describe, inputs=[desc_method, enhance_input_image, prompt], outputs=[prompt, style_selections], show_progress=True, queue=True) def dump_default_english_config(): from modules.localization import dump_english_config