diff --git a/args_manager.py b/args_manager.py
index 5a2b37c9..08e4dc6e 100644
--- a/args_manager.py
+++ b/args_manager.py
@@ -28,8 +28,8 @@ args_parser.parser.add_argument("--disable-metadata", action='store_true',
args_parser.parser.add_argument("--disable-preset-download", action='store_true',
help="Disables downloading models for presets", default=False)
-args_parser.parser.add_argument("--enable-describe-uov-image", action='store_true',
- help="Disables automatic description of uov images when prompt is empty", default=False)
+args_parser.parser.add_argument("--enable-auto-describe-image", action='store_true',
+ help="Enables automatic description of uov and enhance image when prompt is empty", default=False)
args_parser.parser.add_argument("--always-download-new-model", action='store_true',
help="Always download newer models ", default=False)
diff --git a/css/style.css b/css/style.css
index 6ed0f628..ad9de6f2 100644
--- a/css/style.css
+++ b/css/style.css
@@ -99,7 +99,7 @@ div:has(> #positive_prompt) {
}
.advanced_check_row {
- width: 250px !important;
+ width: 330px !important;
}
.min_check {
diff --git a/experiments_mask_generation.py b/experiments_mask_generation.py
new file mode 100644
index 00000000..a27eb39c
--- /dev/null
+++ b/experiments_mask_generation.py
@@ -0,0 +1,24 @@
+# https://github.com/sail-sg/EditAnything/blob/main/sam2groundingdino_edit.py
+
+import numpy as np
+from PIL import Image
+
+from extras.inpaint_mask import SAMOptions, generate_mask_from_image
+
+original_image = Image.open('cat.webp')
+image = np.array(original_image, dtype=np.uint8)
+
+sam_options = SAMOptions(
+ dino_prompt='eye',
+ dino_box_threshold=0.3,
+ dino_text_threshold=0.25,
+ dino_erode_or_dilate=0,
+ dino_debug=False,
+ max_detections=2,
+ model_type='vit_b'
+)
+
+mask_image, _, _, _ = generate_mask_from_image(image, sam_options=sam_options)
+
+merged_masks_img = Image.fromarray(mask_image)
+merged_masks_img.show()
diff --git a/extras/GroundingDINO/util/inference.py b/extras/GroundingDINO/util/inference.py
index 259094f2..bc8b6429 100644
--- a/extras/GroundingDINO/util/inference.py
+++ b/extras/GroundingDINO/util/inference.py
@@ -25,7 +25,7 @@ class GroundingDinoModel(Model):
caption: str,
box_threshold: float = 0.35,
text_threshold: float = 0.25
- ) -> Tuple[sv.Detections, List[str]]:
+ ) -> Tuple[sv.Detections, torch.Tensor, torch.Tensor, List[str]]:
if self.model is None:
filename = load_file_from_url(
url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth",
@@ -56,7 +56,7 @@ class GroundingDinoModel(Model):
source_w=source_w,
boxes=boxes,
logits=logits)
- return detections, phrases
+ return detections, boxes, logits, phrases
def predict(
diff --git a/extras/censor.py b/extras/censor.py
index 45617fd8..c5dea3fd 100644
--- a/extras/censor.py
+++ b/extras/censor.py
@@ -41,7 +41,7 @@ class Censor:
model_management.load_model_gpu(self.safety_checker_model)
single = False
- if not isinstance(images, list) or isinstance(images, np.ndarray):
+ if not isinstance(images, (list, np.ndarray)):
images = [images]
single = True
diff --git a/extras/inpaint_mask.py b/extras/inpaint_mask.py
index 4999f258..086b7da6 100644
--- a/extras/inpaint_mask.py
+++ b/extras/inpaint_mask.py
@@ -1,42 +1,130 @@
-from PIL import Image
+import sys
+
+import modules.config
import numpy as np
import torch
-from rembg import remove, new_session
from extras.GroundingDINO.util.inference import default_groundingdino
-
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+from extras.sam.predictor import SamPredictor
+from rembg import remove, new_session
+from segment_anything import sam_model_registry
+from segment_anything.utils.amg import remove_small_regions
-def run_grounded_sam(input_image, text_prompt, box_threshold, text_threshold):
+class SAMOptions:
+ def __init__(self,
+ # GroundingDINO
+ dino_prompt: str = '',
+ dino_box_threshold=0.3,
+ dino_text_threshold=0.25,
+ dino_erode_or_dilate=0,
+ dino_debug=False,
- # run grounding dino model
- boxes, _ = default_groundingdino(
- image=np.array(input_image),
- caption=text_prompt,
- box_threshold=box_threshold,
- text_threshold=text_threshold
- )
-
- return boxes.xyxy
+ # SAM
+ max_detections=2,
+ model_type='vit_b'
+ ):
+ self.dino_prompt = dino_prompt
+ self.dino_box_threshold = dino_box_threshold
+ self.dino_text_threshold = dino_text_threshold
+ self.dino_erode_or_dilate = dino_erode_or_dilate
+ self.dino_debug = dino_debug
+ self.max_detections = max_detections
+ self.model_type = model_type
-def generate_mask_from_image(image, mask_model, extras):
+def optimize_masks(masks: torch.Tensor) -> torch.Tensor:
+ """
+ removes small disconnected regions and holes
+ """
+ fine_masks = []
+ for mask in masks.to('cpu').numpy(): # masks: [num_masks, 1, h, w]
+ fine_masks.append(remove_small_regions(mask[0], 400, mode="holes")[0])
+ masks = np.stack(fine_masks, axis=0)[:, np.newaxis]
+ return torch.from_numpy(masks)
+
+
+def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=None,
+ sam_options: SAMOptions | None = SAMOptions) -> tuple[np.ndarray | None, int | None, int | None, int | None]:
+ dino_detection_count = 0
+ sam_detection_count = 0
+ sam_detection_on_mask_count = 0
+
if image is None:
- return
+ return None, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
+
+ if extras is None:
+ extras = {}
if 'image' in image:
image = image['image']
- if mask_model == 'sam':
- boxes = run_grounded_sam(Image.fromarray(image), extras['sam_prompt_text'], box_threshold=extras['box_threshold'], text_threshold=extras['text_threshold'])
- boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(boxes) == 0 else boxes
- extras['sam_prompt'] = []
- for idx, box in enumerate(boxes):
- extras['sam_prompt'] += [{"type": "rectangle", "data": box.tolist()}]
+ if mask_model != 'sam' or sam_options is None:
+ result = remove(
+ image,
+ session=new_session(mask_model, **extras),
+ only_mask=True,
+ **extras
+ )
- return remove(
- image,
- session=new_session(mask_model, **extras),
- only_mask=True,
- **extras
+ return result, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
+
+ detections, boxes, logits, phrases = default_groundingdino(
+ image=image,
+ caption=sam_options.dino_prompt,
+ box_threshold=sam_options.dino_box_threshold,
+ text_threshold=sam_options.dino_text_threshold
)
+
+ H, W = image.shape[0], image.shape[1]
+ boxes = boxes * torch.Tensor([W, H, W, H])
+ boxes[:, :2] = boxes[:, :2] - boxes[:, 2:] / 2
+ boxes[:, 2:] = boxes[:, 2:] + boxes[:, :2]
+
+ sam_checkpoint = modules.config.download_sam_model(sam_options.model_type)
+ sam = sam_model_registry[sam_options.model_type](checkpoint=sam_checkpoint)
+
+ sam_predictor = SamPredictor(sam)
+ final_mask_tensor = torch.zeros((image.shape[0], image.shape[1]))
+ dino_detection_count = boxes.size(0)
+
+ if dino_detection_count > 0:
+ sam_predictor.set_image(image)
+
+ if sam_options.dino_erode_or_dilate != 0:
+ for index in range(boxes.size(0)):
+ assert boxes.size(1) == 4
+ boxes[index][0] -= sam_options.dino_erode_or_dilate
+ boxes[index][1] -= sam_options.dino_erode_or_dilate
+ boxes[index][2] += sam_options.dino_erode_or_dilate
+ boxes[index][3] += sam_options.dino_erode_or_dilate
+
+ if sam_options.dino_debug:
+ from PIL import ImageDraw, Image
+ debug_dino_image = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
+ draw = ImageDraw.Draw(debug_dino_image)
+ for box in boxes.numpy():
+ draw.rectangle(box.tolist(), fill="white")
+ return np.array(debug_dino_image), dino_detection_count, sam_detection_count, sam_detection_on_mask_count
+
+ transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes, image.shape[:2])
+ masks, _, _ = sam_predictor.predict_torch(
+ point_coords=None,
+ point_labels=None,
+ boxes=transformed_boxes,
+ multimask_output=False,
+ )
+
+ masks = optimize_masks(masks)
+ sam_detection_count = len(masks)
+ if sam_options.max_detections == 0:
+ sam_options.max_detections = sys.maxsize
+ sam_objects = min(len(logits), sam_options.max_detections)
+ for obj_ind in range(sam_objects):
+ mask_tensor = masks[obj_ind][0]
+ final_mask_tensor += mask_tensor
+ sam_detection_on_mask_count += 1
+
+ final_mask_tensor = (final_mask_tensor > 0).to('cpu').numpy()
+ mask_image = np.dstack((final_mask_tensor, final_mask_tensor, final_mask_tensor)) * 255
+ mask_image = np.array(mask_image, dtype=np.uint8)
+ return mask_image, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
diff --git a/extras/sam/predictor.py b/extras/sam/predictor.py
new file mode 100644
index 00000000..337c549b
--- /dev/null
+++ b/extras/sam/predictor.py
@@ -0,0 +1,288 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import numpy as np
+import torch
+from ldm_patched.modules import model_management
+from ldm_patched.modules.model_patcher import ModelPatcher
+
+from segment_anything.modeling import Sam
+
+from typing import Optional, Tuple
+
+from segment_anything.utils.transforms import ResizeLongestSide
+
+
+class SamPredictor:
+ def __init__(
+ self,
+ model: Sam,
+ load_device=model_management.text_encoder_device(),
+ offload_device=model_management.text_encoder_offload_device()
+ ) -> None:
+ """
+ Uses SAM to calculate the image embedding for an image, and then
+ allow repeated, efficient mask prediction given prompts.
+
+ Arguments:
+ model (Sam): The model to use for mask prediction.
+ """
+ super().__init__()
+
+ self.load_device = load_device
+ self.offload_device = offload_device
+ # can't use model.half() here as slow_conv2d_cpu is not implemented for half
+ model.to(self.offload_device)
+
+ self.patcher = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
+
+ self.transform = ResizeLongestSide(model.image_encoder.img_size)
+ self.reset_image()
+
+ def set_image(
+ self,
+ image: np.ndarray,
+ image_format: str = "RGB",
+ ) -> None:
+ """
+ Calculates the image embeddings for the provided image, allowing
+ masks to be predicted with the 'predict' method.
+
+ Arguments:
+ image (np.ndarray): The image for calculating masks. Expects an
+ image in HWC uint8 format, with pixel values in [0, 255].
+ image_format (str): The color format of the image, in ['RGB', 'BGR'].
+ """
+ assert image_format in [
+ "RGB",
+ "BGR",
+ ], f"image_format must be in ['RGB', 'BGR'], is {image_format}."
+ if image_format != self.patcher.model.image_format:
+ image = image[..., ::-1]
+
+ # Transform the image to the form expected by the model
+ input_image = self.transform.apply_image(image)
+ input_image_torch = torch.as_tensor(input_image, device=self.load_device)
+ input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
+
+ self.set_torch_image(input_image_torch, image.shape[:2])
+
+ @torch.no_grad()
+ def set_torch_image(
+ self,
+ transformed_image: torch.Tensor,
+ original_image_size: Tuple[int, ...],
+ ) -> None:
+ """
+ Calculates the image embeddings for the provided image, allowing
+ masks to be predicted with the 'predict' method. Expects the input
+ image to be already transformed to the format expected by the model.
+
+ Arguments:
+ transformed_image (torch.Tensor): The input image, with shape
+ 1x3xHxW, which has been transformed with ResizeLongestSide.
+ original_image_size (tuple(int, int)): The size of the image
+ before transformation, in (H, W) format.
+ """
+ assert (
+ len(transformed_image.shape) == 4
+ and transformed_image.shape[1] == 3
+ and max(*transformed_image.shape[2:]) == self.patcher.model.image_encoder.img_size
+ ), f"set_torch_image input must be BCHW with long side {self.patcher.model.image_encoder.img_size}."
+ self.reset_image()
+
+ self.original_size = original_image_size
+ self.input_size = tuple(transformed_image.shape[-2:])
+ model_management.load_model_gpu(self.patcher)
+ input_image = self.patcher.model.preprocess(transformed_image.to(self.load_device))
+ self.features = self.patcher.model.image_encoder(input_image)
+ self.is_image_set = True
+
+ def predict(
+ self,
+ point_coords: Optional[np.ndarray] = None,
+ point_labels: Optional[np.ndarray] = None,
+ box: Optional[np.ndarray] = None,
+ mask_input: Optional[np.ndarray] = None,
+ multimask_output: bool = True,
+ return_logits: bool = False,
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+ """
+ Predict masks for the given input prompts, using the currently set image.
+
+ Arguments:
+ point_coords (np.ndarray or None): A Nx2 array of point prompts to the
+ model. Each point is in (X,Y) in pixels.
+ point_labels (np.ndarray or None): A length N array of labels for the
+ point prompts. 1 indicates a foreground point and 0 indicates a
+ background point.
+ box (np.ndarray or None): A length 4 array given a box prompt to the
+ model, in XYXY format.
+ mask_input (np.ndarray): A low resolution mask input to the model, typically
+ coming from a previous prediction iteration. Has form 1xHxW, where
+ for SAM, H=W=256.
+ multimask_output (bool): If true, the model will return three masks.
+ For ambiguous input prompts (such as a single click), this will often
+ produce better masks than a single prediction. If only a single
+ mask is needed, the model's predicted quality score can be used
+ to select the best mask. For non-ambiguous prompts, such as multiple
+ input prompts, multimask_output=False can give better results.
+ return_logits (bool): If true, returns un-thresholded masks logits
+ instead of a binary mask.
+
+ Returns:
+ (np.ndarray): The output masks in CxHxW format, where C is the
+ number of masks, and (H, W) is the original image size.
+ (np.ndarray): An array of length C containing the model's
+ predictions for the quality of each mask.
+ (np.ndarray): An array of shape CxHxW, where C is the number
+ of masks and H=W=256. These low resolution logits can be passed to
+ a subsequent iteration as mask input.
+ """
+ if not self.is_image_set:
+ raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
+
+ # Transform input prompts
+ coords_torch, labels_torch, box_torch, mask_input_torch = None, None, None, None
+ if point_coords is not None:
+ assert (
+ point_labels is not None
+ ), "point_labels must be supplied if point_coords is supplied."
+ point_coords = self.transform.apply_coords(point_coords, self.original_size)
+ coords_torch = torch.as_tensor(point_coords, dtype=torch.float, device=self.load_device)
+ labels_torch = torch.as_tensor(point_labels, dtype=torch.int, device=self.load_device)
+ coords_torch, labels_torch = coords_torch[None, :, :], labels_torch[None, :]
+ if box is not None:
+ box = self.transform.apply_boxes(box, self.original_size)
+ box_torch = torch.as_tensor(box, dtype=torch.float, device=self.load_device)
+ box_torch = box_torch[None, :]
+ if mask_input is not None:
+ mask_input_torch = torch.as_tensor(mask_input, dtype=torch.float, device=self.load_device)
+ mask_input_torch = mask_input_torch[None, :, :, :]
+
+ masks, iou_predictions, low_res_masks = self.predict_torch(
+ coords_torch,
+ labels_torch,
+ box_torch,
+ mask_input_torch,
+ multimask_output,
+ return_logits=return_logits,
+ )
+
+ masks = masks[0].detach().cpu().numpy()
+ iou_predictions = iou_predictions[0].detach().cpu().numpy()
+ low_res_masks = low_res_masks[0].detach().cpu().numpy()
+ return masks, iou_predictions, low_res_masks
+
+ @torch.no_grad()
+ def predict_torch(
+ self,
+ point_coords: Optional[torch.Tensor],
+ point_labels: Optional[torch.Tensor],
+ boxes: Optional[torch.Tensor] = None,
+ mask_input: Optional[torch.Tensor] = None,
+ multimask_output: bool = True,
+ return_logits: bool = False,
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+ """
+ Predict masks for the given input prompts, using the currently set image.
+ Input prompts are batched torch tensors and are expected to already be
+ transformed to the input frame using ResizeLongestSide.
+
+ Arguments:
+ point_coords (torch.Tensor or None): A BxNx2 array of point prompts to the
+ model. Each point is in (X,Y) in pixels.
+ point_labels (torch.Tensor or None): A BxN array of labels for the
+ point prompts. 1 indicates a foreground point and 0 indicates a
+ background point.
+ box (np.ndarray or None): A Bx4 array given a box prompt to the
+ model, in XYXY format.
+ mask_input (np.ndarray): A low resolution mask input to the model, typically
+ coming from a previous prediction iteration. Has form Bx1xHxW, where
+ for SAM, H=W=256. Masks returned by a previous iteration of the
+ predict method do not need further transformation.
+ multimask_output (bool): If true, the model will return three masks.
+ For ambiguous input prompts (such as a single click), this will often
+ produce better masks than a single prediction. If only a single
+ mask is needed, the model's predicted quality score can be used
+ to select the best mask. For non-ambiguous prompts, such as multiple
+ input prompts, multimask_output=False can give better results.
+ return_logits (bool): If true, returns un-thresholded masks logits
+ instead of a binary mask.
+
+ Returns:
+ (torch.Tensor): The output masks in BxCxHxW format, where C is the
+ number of masks, and (H, W) is the original image size.
+ (torch.Tensor): An array of shape BxC containing the model's
+ predictions for the quality of each mask.
+ (torch.Tensor): An array of shape BxCxHxW, where C is the number
+ of masks and H=W=256. These low res logits can be passed to
+ a subsequent iteration as mask input.
+ """
+ if not self.is_image_set:
+ raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
+
+ if point_coords is not None:
+ points = (point_coords.to(self.load_device), point_labels.to(self.load_device))
+ else:
+ points = None
+
+ # load
+ if boxes is not None:
+ boxes = boxes.to(self.load_device)
+ if mask_input is not None:
+ mask_input = mask_input.to(self.load_device)
+ model_management.load_model_gpu(self.patcher)
+
+ # Embed prompts
+ sparse_embeddings, dense_embeddings = self.patcher.model.prompt_encoder(
+ points=points,
+ boxes=boxes,
+ masks=mask_input,
+ )
+
+ # Predict masks
+ low_res_masks, iou_predictions = self.patcher.model.mask_decoder(
+ image_embeddings=self.features,
+ image_pe=self.patcher.model.prompt_encoder.get_dense_pe(),
+ sparse_prompt_embeddings=sparse_embeddings,
+ dense_prompt_embeddings=dense_embeddings,
+ multimask_output=multimask_output,
+ )
+
+ # Upscale the masks to the original image resolution
+ masks = self.patcher.model.postprocess_masks(low_res_masks, self.input_size, self.original_size)
+
+ if not return_logits:
+ masks = masks > self.patcher.model.mask_threshold
+
+ return masks, iou_predictions, low_res_masks
+
+ def get_image_embedding(self) -> torch.Tensor:
+ """
+ Returns the image embeddings for the currently set image, with
+ shape 1xCxHxW, where C is the embedding dimension and (H,W) are
+ the embedding spatial dimension of SAM (typically C=256, H=W=64).
+ """
+ if not self.is_image_set:
+ raise RuntimeError(
+ "An image must be set with .set_image(...) to generate an embedding."
+ )
+ assert self.features is not None, "Features must exist if an image has been set."
+ return self.features
+
+ @property
+ def device(self) -> torch.device:
+ return self.patcher.model.device
+
+ def reset_image(self) -> None:
+ """Resets the currently set image."""
+ self.is_image_set = False
+ self.features = None
+ self.orig_h = None
+ self.orig_w = None
+ self.input_h = None
+ self.input_w = None
diff --git a/fooocus_version.py b/fooocus_version.py
index 106c67f2..65912fab 100644
--- a/fooocus_version.py
+++ b/fooocus_version.py
@@ -1 +1 @@
-version = '2.4.3 (mashb1t)'
+version = '2.5.0-rc6 (mashb1t)'
diff --git a/language/en.json b/language/en.json
index a5f1e271..da8f2271 100644
--- a/language/en.json
+++ b/language/en.json
@@ -44,14 +44,7 @@
"Top": "Top",
"Bottom": "Bottom",
"* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)": "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)",
- "Mask generation model": "Mask generation model",
- "Cloth category": "Cloth category",
- "Segmentation prompt": "Segmentation prompt",
"Advanced options": "Advanced options",
- "SAM model": "SAM model",
- "Quantization": "Quantization",
- "Box Threshold": "Box Threshold",
- "Text Threshold": "Text Threshold",
"Generate mask from image": "Generate mask from image",
"Setting": "Setting",
"Style": "Style",
@@ -377,10 +370,14 @@
"Disable preview during generation.": "Disable preview during generation.",
"Disable Intermediate Results": "Disable Intermediate Results",
"Disable intermediate results during generation, only show final gallery.": "Disable intermediate results during generation, only show final gallery.",
+ "Debug Inpaint Preprocessing": "Debug Inpaint Preprocessing",
+ "Debug GroundingDINO": "Debug GroundingDINO",
+ "Used for SAM object detection and box generation": "Used for SAM object detection and box generation",
+ "GroundingDINO Box Erode or Dilate": "GroundingDINO Box Erode or Dilate",
"Inpaint Engine": "Inpaint Engine",
"v1": "v1",
- "Version of Fooocus inpaint model": "Version of Fooocus inpaint model",
"v2.5": "v2.5",
+ "v2.6": "v2.6",
"Control Debug": "Control Debug",
"Debug Preprocessors": "Debug Preprocessors",
"Mixing Image Prompt and Vary/Upscale": "Mixing Image Prompt and Vary/Upscale",
@@ -410,5 +407,63 @@
"Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.": "Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.",
"fooocus (json)": "fooocus (json)",
"a1111 (plain text)": "a1111 (plain text)",
- "Unsupported image type in input": "Unsupported image type in input"
+ "Unsupported image type in input": "Unsupported image type in input",
+ "Enhance": "Enhance",
+ "Detection prompt": "Detection prompt",
+ "Detection Prompt Quick List": "Detection Prompt Quick List",
+ "Maximum number of detections": "Maximum number of detections",
+ "Base image for enhance": "Base image for enhance",
+ "Order of Processing": "Order of Processing",
+ "Use before for enhancement of small details and after for large areas.": "Use before for enhancement of small details and after for large areas.",
+ "Before First Enhancement": "Before First Enhancement",
+ "After Last Enhancement": "After Last Enhancement",
+ "Prompt Type": "Prompt Type",
+ "Choose which prompt to use for Upscale or Variation.": "Choose which prompt to use for Upscale or Variation.",
+ "Original Prompts": "Original Prompts",
+ "Last Filled Enhancement Prompts": "Last Filled Enhancement Prompts",
+ "Enable": "Enable",
+ "Describe what you want to detect.": "Describe what you want to detect.",
+ "Enhancement positive prompt": "Enhancement positive prompt",
+ "Uses original prompt instead if empty.": "Uses original prompt instead if empty.",
+ "Enhancement negative prompt": "Enhancement negative prompt",
+ "Uses original negative prompt instead if empty.": "Uses original negative prompt instead if empty.",
+ "Detection": "Detection",
+ "u2net": "u2net",
+ "u2netp": "u2netp",
+ "u2net_human_seg": "u2net_human_seg",
+ "u2net_cloth_seg": "u2net_cloth_seg",
+ "silueta": "silueta",
+ "isnet-general-use": "isnet-general-use",
+ "isnet-anime": "isnet-anime",
+ "sam": "sam",
+ "Mask generation model": "Mask generation model",
+ "Cloth category": "Cloth category",
+ "Use singular whenever possible": "Use singular whenever possible",
+ "full": "full",
+ "upper": "upper",
+ "lower": "lower",
+ "SAM Options": "SAM Options",
+ "SAM model": "SAM model",
+ "vit_b": "vit_b",
+ "vit_l": "vit_l",
+ "vit_h": "vit_h",
+ "Box Threshold": "Box Threshold",
+ "Text Threshold": "Text Threshold",
+ "Set to 0 to detect all": "Set to 0 to detect all",
+ "Inpaint": "Inpaint",
+ "Inpaint or Outpaint (default)": "Inpaint or Outpaint (default)",
+ "Improve Detail (face, hand, eyes, etc.)": "Improve Detail (face, hand, eyes, etc.)",
+ "Modify Content (add objects, change background, etc.)": "Modify Content (add objects, change background, etc.)",
+ "Disable initial latent in inpaint": "Disable initial latent in inpaint",
+ "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.": "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.",
+ "Inpaint Denoising Strength": "Inpaint Denoising Strength",
+ "Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)",
+ "Inpaint Respective Field": "Inpaint Respective Field",
+ "The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)",
+ "Mask Erode or Dilate": "Mask Erode or Dilate",
+ "Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)": "Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)",
+ "Invert Mask": "Invert Mask",
+ "Debug Enhance Masks": "Debug Enhance Masks",
+ "Show enhance masks in preview and final results": "Show enhance masks in preview and final results",
+ "Use GroundingDINO boxes instead of more detailed SAM masks": "Use GroundingDINO boxes instead of more detailed SAM masks"
}
\ No newline at end of file
diff --git a/modules/async_worker.py b/modules/async_worker.py
index 92831427..a9d45086 100644
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@@ -1,23 +1,166 @@
import threading
-import os
-import re
+
+from extras.inpaint_mask import generate_mask_from_image, SAMOptions
from modules.patch import PatchSettings, patch_settings, patch_all
+import modules.config
patch_all()
class AsyncTask:
def __init__(self, args):
- self.args = args
+ from modules.flags import Performance, MetadataScheme, ip_list, controlnet_image_count
+ from modules.util import get_enabled_loras
+ from modules.config import default_max_lora_number
+ import args_manager
+
+ self.args = args.copy()
self.yields = []
self.results = []
self.last_stop = False
self.processing = False
+ self.performance_loras = []
+
+ if len(args) == 0:
+ return
+
+ args.reverse()
+ self.generate_image_grid = args.pop()
+ self.prompt = args.pop()
+ self.negative_prompt = args.pop()
+ self.translate_prompts = args.pop()
+ self.style_selections = args.pop()
+
+ self.performance_selection = Performance(args.pop())
+ self.steps = self.performance_selection.steps()
+ self.original_steps = self.steps
+
+ self.aspect_ratios_selection = args.pop()
+ self.image_number = args.pop()
+ self.output_format = args.pop()
+ self.seed = int(args.pop())
+ self.read_wildcards_in_order = args.pop()
+ self.sharpness = args.pop()
+ self.cfg_scale = args.pop()
+ self.base_model_name = args.pop()
+ self.refiner_model_name = args.pop()
+ self.refiner_switch = args.pop()
+ self.loras = get_enabled_loras([(bool(args.pop()), str(args.pop()), float(args.pop())) for _ in
+ range(default_max_lora_number)])
+ self.input_image_checkbox = args.pop()
+ self.current_tab = args.pop()
+ self.uov_method = args.pop()
+ self.uov_input_image = args.pop()
+ self.outpaint_selections = args.pop()
+ self.inpaint_input_image = args.pop()
+ self.inpaint_additional_prompt = args.pop()
+ self.inpaint_mask_image_upload = args.pop()
+
+ self.disable_preview = args.pop()
+ self.disable_intermediate_results = args.pop()
+ self.disable_seed_increment = args.pop()
+ self.black_out_nsfw = args.pop()
+ self.adm_scaler_positive = args.pop()
+ self.adm_scaler_negative = args.pop()
+ self.adm_scaler_end = args.pop()
+ self.adaptive_cfg = args.pop()
+ self.clip_skip = args.pop()
+ self.sampler_name = args.pop()
+ self.scheduler_name = args.pop()
+ self.vae_name = args.pop()
+ self.overwrite_step = args.pop()
+ self.overwrite_switch = args.pop()
+ self.overwrite_width = args.pop()
+ self.overwrite_height = args.pop()
+ self.overwrite_vary_strength = args.pop()
+ self.overwrite_upscale_strength = args.pop()
+ self.mixing_image_prompt_and_vary_upscale = args.pop()
+ self.mixing_image_prompt_and_inpaint = args.pop()
+ self.debugging_cn_preprocessor = args.pop()
+ self.skipping_cn_preprocessor = args.pop()
+ self.canny_low_threshold = args.pop()
+ self.canny_high_threshold = args.pop()
+ self.refiner_swap_method = args.pop()
+ self.controlnet_softness = args.pop()
+ self.freeu_enabled = args.pop()
+ self.freeu_b1 = args.pop()
+ self.freeu_b2 = args.pop()
+ self.freeu_s1 = args.pop()
+ self.freeu_s2 = args.pop()
+ self.debugging_inpaint_preprocessor = args.pop()
+ self.inpaint_disable_initial_latent = args.pop()
+ self.inpaint_engine = args.pop()
+ self.inpaint_strength = args.pop()
+ self.inpaint_respective_field = args.pop()
+ self.inpaint_mask_upload_checkbox = args.pop()
+ self.invert_mask_checkbox = args.pop()
+ self.inpaint_erode_or_dilate = args.pop()
+ self.save_metadata_to_images = args.pop() if not args_manager.args.disable_metadata else False
+ self.metadata_scheme = MetadataScheme(
+ args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS
+
+ self.cn_tasks = {x: [] for x in ip_list}
+ for _ in range(controlnet_image_count):
+ cn_img = args.pop()
+ cn_stop = args.pop()
+ cn_weight = args.pop()
+ cn_type = args.pop()
+ if cn_img is not None:
+ self.cn_tasks[cn_type].append([cn_img, cn_stop, cn_weight])
+
+ self.debugging_dino = args.pop()
+ self.dino_erode_or_dilate = args.pop()
+ self.debugging_enhance_masks_checkbox = args.pop()
+
+ self.enhance_input_image = args.pop()
+ self.enhance_checkbox = args.pop()
+ self.enhance_uov_method = args.pop()
+ self.enhance_uov_processing_order = args.pop()
+ self.enhance_uov_prompt_type = args.pop()
+ self.enhance_ctrls = []
+ for _ in range(modules.config.default_enhance_tabs):
+ enhance_enabled = args.pop()
+ enhance_mask_dino_prompt_text = args.pop()
+ enhance_prompt = args.pop()
+ enhance_negative_prompt = args.pop()
+ enhance_mask_model = args.pop()
+ enhance_mask_sam_model = args.pop()
+ enhance_mask_text_threshold = args.pop()
+ enhance_mask_box_threshold = args.pop()
+ enhance_mask_sam_max_detections = args.pop()
+ enhance_inpaint_disable_initial_latent = args.pop()
+ enhance_inpaint_engine = args.pop()
+ enhance_inpaint_strength = args.pop()
+ enhance_inpaint_respective_field = args.pop()
+ enhance_inpaint_erode_or_dilate = args.pop()
+ enhance_mask_invert = args.pop()
+ if enhance_enabled:
+ self.enhance_ctrls.append([
+ enhance_mask_dino_prompt_text,
+ enhance_prompt,
+ enhance_negative_prompt,
+ enhance_mask_model,
+ enhance_mask_sam_model,
+ enhance_mask_text_threshold,
+ enhance_mask_box_threshold,
+ enhance_mask_sam_max_detections,
+ enhance_inpaint_disable_initial_latent,
+ enhance_inpaint_engine,
+ enhance_inpaint_strength,
+ enhance_inpaint_respective_field,
+ enhance_inpaint_erode_or_dilate,
+ enhance_mask_invert
+ ])
+
async_tasks = []
+class EarlyReturnException:
+ pass
+
+
def worker():
global async_tasks
@@ -25,7 +168,6 @@ def worker():
import traceback
import math
import numpy as np
- import cv2
import torch
import time
import shared
@@ -35,7 +177,6 @@ def worker():
import modules.default_pipeline as pipeline
import modules.core as core
import modules.flags as flags
- import modules.config
import modules.patch
import ldm_patched.modules.model_management
import extras.preprocessors as preprocessors
@@ -44,18 +185,18 @@ def worker():
import extras.ip_adapter as ip_adapter
import extras.face_crop
import fooocus_version
- import args_manager
from extras.censor import default_censor
from modules.sdxl_styles import apply_style, get_random_style, fooocus_expansion, apply_arrays, random_style_name
from modules.private_logger import log
from extras.expansion import safe_str
from modules.util import (remove_empty_str, HWC3, resize_image, get_image_shape_ceil, set_image_shape_ceil,
- get_shape_ceil, resample_image, erode_or_dilate, get_enabled_loras,
- parse_lora_references_from_prompt, apply_wildcards)
+ get_shape_ceil, resample_image, erode_or_dilate, parse_lora_references_from_prompt,
+ apply_wildcards)
from modules.upscaler import perform_upscale
from modules.flags import Performance
- from modules.meta_parser import get_metadata_parser, MetadataScheme
+ from modules.meta_parser import get_metadata_parser
+ from modules.translator import translate2en
pid = os.getpid()
print(f'Started worker with PID {pid}')
@@ -73,8 +214,7 @@ def worker():
print(f'[Fooocus] {text}')
async_task.yields.append(['preview', (number, text, None)])
- def yield_result(async_task, imgs, black_out_nsfw, censor=True, do_not_show_finished_images=False,
- progressbar_index=flags.preparation_step_count):
+ def yield_result(async_task, imgs, progressbar_index, black_out_nsfw, censor=True, do_not_show_finished_images=False):
if not isinstance(imgs, list):
imgs = [imgs]
@@ -134,209 +274,851 @@ def worker():
async_task.results = async_task.results + [wall]
return
+ def process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, current_task_id,
+ denoising_strength, final_scheduler_name, goals, initial_latent, steps, switch, positive_cond,
+ negative_cond, task, tiled, use_expansion, width, height, base_progress, preparation_steps,
+ total_count, show_intermediate_results):
+ if async_task.last_stop is not False:
+ ldm_patched.modules.model_management.interrupt_current_processing()
+ if 'cn' in goals:
+ for cn_flag, cn_path in [
+ (flags.cn_canny, controlnet_canny_path),
+ (flags.cn_cpds, controlnet_cpds_path)
+ ]:
+ for cn_img, cn_stop, cn_weight in async_task.cn_tasks[cn_flag]:
+ positive_cond, negative_cond = core.apply_controlnet(
+ positive_cond, negative_cond,
+ pipeline.loaded_ControlNets[cn_path], cn_img, cn_weight, 0, cn_stop)
+ imgs = pipeline.process_diffusion(
+ positive_cond=positive_cond,
+ negative_cond=negative_cond,
+ steps=steps,
+ switch=switch,
+ width=width,
+ height=height,
+ image_seed=task['task_seed'],
+ callback=callback,
+ sampler_name=async_task.sampler_name,
+ scheduler_name=final_scheduler_name,
+ latent=initial_latent,
+ denoise=denoising_strength,
+ tiled=tiled,
+ cfg_scale=async_task.cfg_scale,
+ refiner_swap_method=async_task.refiner_swap_method,
+ disable_preview=async_task.disable_preview
+ )
+ del positive_cond, negative_cond # Save memory
+ if inpaint_worker.current_task is not None:
+ imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
+ current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * steps)
+ if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
+ progressbar(async_task, current_progress, 'Checking for NSFW content ...')
+ imgs = default_censor(imgs)
+ progressbar(async_task, current_progress,
+ f'Saving image {current_task_id + 1}/{total_count} to system ...')
+ img_paths = save_and_log(async_task, height, imgs, task, use_expansion, width)
+ yield_result(async_task, img_paths, current_progress, async_task.black_out_nsfw, False,
+ do_not_show_finished_images=not show_intermediate_results or async_task.disable_intermediate_results)
+
+ return imgs, img_paths, current_progress
+
+ def apply_patch_settings(async_task):
+ patch_settings[pid] = PatchSettings(
+ async_task.sharpness,
+ async_task.adm_scaler_end,
+ async_task.adm_scaler_positive,
+ async_task.adm_scaler_negative,
+ async_task.controlnet_softness,
+ async_task.adaptive_cfg
+ )
+
+ def save_and_log(async_task, height, imgs, task, use_expansion, width) -> list:
+ img_paths = []
+ for x in imgs:
+ d = [('Prompt', 'prompt', task['log_positive_prompt']),
+ ('Negative Prompt', 'negative_prompt', task['log_negative_prompt']),
+ ('Fooocus V2 Expansion', 'prompt_expansion', task['expansion']),
+ ('Styles', 'styles',
+ str(task['styles'] if not use_expansion else [fooocus_expansion] + task['styles'])),
+ ('Performance', 'performance', async_task.performance_selection.value),
+ ('Steps', 'steps', async_task.steps),
+ ('Resolution', 'resolution', str((width, height))),
+ ('Guidance Scale', 'guidance_scale', async_task.cfg_scale),
+ ('Sharpness', 'sharpness', async_task.sharpness),
+ ('ADM Guidance', 'adm_guidance', str((
+ modules.patch.patch_settings[pid].positive_adm_scale,
+ modules.patch.patch_settings[pid].negative_adm_scale,
+ modules.patch.patch_settings[pid].adm_scaler_end))),
+ ('Base Model', 'base_model', async_task.base_model_name),
+ ('Refiner Model', 'refiner_model', async_task.refiner_model_name),
+ ('Refiner Switch', 'refiner_switch', async_task.refiner_switch)]
+
+ if async_task.refiner_model_name != 'None':
+ if async_task.overwrite_switch > 0:
+ d.append(('Overwrite Switch', 'overwrite_switch', async_task.overwrite_switch))
+ if async_task.refiner_swap_method != flags.refiner_swap_method:
+ d.append(('Refiner Swap Method', 'refiner_swap_method', async_task.refiner_swap_method))
+ if modules.patch.patch_settings[pid].adaptive_cfg != modules.config.default_cfg_tsnr:
+ d.append(
+ ('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg))
+
+ if async_task.clip_skip > 1:
+ d.append(('CLIP Skip', 'clip_skip', async_task.clip_skip))
+ d.append(('Sampler', 'sampler', async_task.sampler_name))
+ d.append(('Scheduler', 'scheduler', async_task.scheduler_name))
+ d.append(('VAE', 'vae', async_task.vae_name))
+ d.append(('Seed', 'seed', str(task['task_seed'])))
+
+ if async_task.freeu_enabled:
+ d.append(('FreeU', 'freeu',
+ str((async_task.freeu_b1, async_task.freeu_b2, async_task.freeu_s1, async_task.freeu_s2))))
+
+ for li, (n, w) in enumerate(async_task.loras):
+ if n != 'None':
+ d.append((f'LoRA {li + 1}', f'lora_combined_{li + 1}', f'{n} : {w}'))
+
+ metadata_parser = None
+ if async_task.save_metadata_to_images:
+ metadata_parser = modules.meta_parser.get_metadata_parser(async_task.metadata_scheme)
+ metadata_parser.set_data(task['log_positive_prompt'], task['positive'],
+ task['log_negative_prompt'], task['negative'],
+ async_task.steps, async_task.base_model_name, async_task.refiner_model_name,
+ async_task.loras, async_task.vae_name)
+ d.append(('Metadata Scheme', 'metadata_scheme',
+ async_task.metadata_scheme.value if async_task.save_metadata_to_images else async_task.save_metadata_to_images))
+ d.append(('Version', 'version', 'Fooocus v' + fooocus_version.version))
+ img_paths.append(log(x, d, metadata_parser, async_task.output_format, task))
+
+ return img_paths
+
+ def apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width, current_progress):
+ for task in async_task.cn_tasks[flags.cn_canny]:
+ cn_img, cn_stop, cn_weight = task
+ cn_img = resize_image(HWC3(cn_img), width=width, height=height)
+
+ if not async_task.skipping_cn_preprocessor:
+ cn_img = preprocessors.canny_pyramid(cn_img, async_task.canny_low_threshold,
+ async_task.canny_high_threshold)
+
+ cn_img = HWC3(cn_img)
+ task[0] = core.numpy_to_pytorch(cn_img)
+ if async_task.debugging_cn_preprocessor:
+ yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True)
+ for task in async_task.cn_tasks[flags.cn_cpds]:
+ cn_img, cn_stop, cn_weight = task
+ cn_img = resize_image(HWC3(cn_img), width=width, height=height)
+
+ if not async_task.skipping_cn_preprocessor:
+ cn_img = preprocessors.cpds(cn_img)
+
+ cn_img = HWC3(cn_img)
+ task[0] = core.numpy_to_pytorch(cn_img)
+ if async_task.debugging_cn_preprocessor:
+ yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True)
+ for task in async_task.cn_tasks[flags.cn_ip]:
+ cn_img, cn_stop, cn_weight = task
+ cn_img = HWC3(cn_img)
+
+ # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
+ cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
+
+ task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path)
+ if async_task.debugging_cn_preprocessor:
+ yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True)
+ for task in async_task.cn_tasks[flags.cn_ip_face]:
+ cn_img, cn_stop, cn_weight = task
+ cn_img = HWC3(cn_img)
+
+ if not async_task.skipping_cn_preprocessor:
+ cn_img = extras.face_crop.crop_image(cn_img)
+
+ # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
+ cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
+
+ task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path)
+ if async_task.debugging_cn_preprocessor:
+ yield_result(async_task, cn_img, current_progress, async_task.black_out_nsfw, do_not_show_finished_images=True)
+ all_ip_tasks = async_task.cn_tasks[flags.cn_ip] + async_task.cn_tasks[flags.cn_ip_face]
+ if len(all_ip_tasks) > 0:
+ pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
+
+ def apply_vary(async_task, uov_method, denoising_strength, uov_input_image, switch, current_progress, advance_progress=False):
+ if 'subtle' in uov_method:
+ denoising_strength = 0.5
+ if 'strong' in uov_method:
+ denoising_strength = 0.85
+ if async_task.overwrite_vary_strength > 0:
+ denoising_strength = async_task.overwrite_vary_strength
+ shape_ceil = get_image_shape_ceil(uov_input_image)
+ if shape_ceil < 1024:
+ print(f'[Vary] Image is resized because it is too small.')
+ shape_ceil = 1024
+ elif shape_ceil > 2048:
+ print(f'[Vary] Image is resized because it is too big.')
+ shape_ceil = 2048
+ uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil)
+ initial_pixels = core.numpy_to_pytorch(uov_input_image)
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, 'VAE encoding ...')
+ candidate_vae, _ = pipeline.get_candidate_vae(
+ steps=async_task.steps,
+ switch=switch,
+ denoise=denoising_strength,
+ refiner_swap_method=async_task.refiner_swap_method
+ )
+ initial_latent = core.encode_vae(vae=candidate_vae, pixels=initial_pixels)
+ B, C, H, W = initial_latent['samples'].shape
+ width = W * 8
+ height = H * 8
+ print(f'Final resolution is {str((width, height))}.')
+ return uov_input_image, denoising_strength, initial_latent, width, height, current_progress
+
+ def apply_inpaint(async_task, initial_latent, inpaint_head_model_path, inpaint_image,
+ inpaint_mask, inpaint_parameterized, denoising_strength, inpaint_respective_field, switch,
+ inpaint_disable_initial_latent, current_progress, skip_apply_outpaint=False,
+ advance_progress=False):
+ if not skip_apply_outpaint:
+ inpaint_image, inpaint_mask = apply_outpaint(async_task, inpaint_image, inpaint_mask)
+
+ inpaint_worker.current_task = inpaint_worker.InpaintWorker(
+ image=inpaint_image,
+ mask=inpaint_mask,
+ use_fill=denoising_strength > 0.99,
+ k=inpaint_respective_field
+ )
+ if async_task.debugging_inpaint_preprocessor:
+ yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), 100,
+ async_task.black_out_nsfw, do_not_show_finished_images=True)
+ raise EarlyReturnException
+
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, 'VAE Inpaint encoding ...')
+ inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill)
+ inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image)
+ inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask)
+ candidate_vae, candidate_vae_swap = pipeline.get_candidate_vae(
+ steps=async_task.steps,
+ switch=switch,
+ denoise=denoising_strength,
+ refiner_swap_method=async_task.refiner_swap_method
+ )
+ latent_inpaint, latent_mask = core.encode_vae_inpaint(
+ mask=inpaint_pixel_mask,
+ vae=candidate_vae,
+ pixels=inpaint_pixel_image)
+ latent_swap = None
+ if candidate_vae_swap is not None:
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, 'VAE SD15 encoding ...')
+ latent_swap = core.encode_vae(
+ vae=candidate_vae_swap,
+ pixels=inpaint_pixel_fill)['samples']
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, 'VAE encoding ...')
+ latent_fill = core.encode_vae(
+ vae=candidate_vae,
+ pixels=inpaint_pixel_fill)['samples']
+ inpaint_worker.current_task.load_latent(
+ latent_fill=latent_fill, latent_mask=latent_mask, latent_swap=latent_swap)
+ if inpaint_parameterized:
+ pipeline.final_unet = inpaint_worker.current_task.patch(
+ inpaint_head_model_path=inpaint_head_model_path,
+ inpaint_latent=latent_inpaint,
+ inpaint_latent_mask=latent_mask,
+ model=pipeline.final_unet
+ )
+ if not inpaint_disable_initial_latent:
+ initial_latent = {'samples': latent_fill}
+ B, C, H, W = latent_fill.shape
+ height, width = H * 8, W * 8
+ final_height, final_width = inpaint_worker.current_task.image.shape[:2]
+ print(f'Final resolution is {str((final_width, final_height))}, latent is {str((width, height))}.')
+
+ return denoising_strength, initial_latent, width, height, current_progress
+
+ def apply_outpaint(async_task, inpaint_image, inpaint_mask):
+ if len(async_task.outpaint_selections) > 0:
+ H, W, C = inpaint_image.shape
+ if 'top' in async_task.outpaint_selections:
+ inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge')
+ inpaint_mask = np.pad(inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant',
+ constant_values=255)
+ if 'bottom' in async_task.outpaint_selections:
+ inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge')
+ inpaint_mask = np.pad(inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant',
+ constant_values=255)
+
+ H, W, C = inpaint_image.shape
+ if 'left' in async_task.outpaint_selections:
+ inpaint_image = np.pad(inpaint_image, [[0, 0], [int(W * 0.3), 0], [0, 0]], mode='edge')
+ inpaint_mask = np.pad(inpaint_mask, [[0, 0], [int(W * 0.3), 0]], mode='constant',
+ constant_values=255)
+ if 'right' in async_task.outpaint_selections:
+ inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(W * 0.3)], [0, 0]], mode='edge')
+ inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int(W * 0.3)]], mode='constant',
+ constant_values=255)
+
+ inpaint_image = np.ascontiguousarray(inpaint_image.copy())
+ inpaint_mask = np.ascontiguousarray(inpaint_mask.copy())
+ async_task.inpaint_strength = 1.0
+ async_task.inpaint_respective_field = 1.0
+ return inpaint_image, inpaint_mask
+
+ def apply_upscale(async_task, uov_input_image, uov_method, switch, current_progress, advance_progress=False):
+ H, W, C = uov_input_image.shape
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, f'Upscaling image from {str((W, H))} ...')
+ uov_input_image = perform_upscale(uov_input_image)
+ print(f'Image upscaled.')
+ if '1.5x' in uov_method:
+ f = 1.5
+ elif '2x' in uov_method:
+ f = 2.0
+ else:
+ f = 1.0
+ shape_ceil = get_shape_ceil(H * f, W * f)
+ if shape_ceil < 1024:
+ print(f'[Upscale] Image is resized because it is too small.')
+ uov_input_image = set_image_shape_ceil(uov_input_image, 1024)
+ shape_ceil = 1024
+ else:
+ uov_input_image = resample_image(uov_input_image, width=W * f, height=H * f)
+ image_is_super_large = shape_ceil > 2800
+ if 'fast' in uov_method:
+ direct_return = True
+ elif image_is_super_large:
+ print('Image is too large. Directly returned the SR image. '
+ 'Usually directly return SR image at 4K resolution '
+ 'yields better results than SDXL diffusion.')
+ direct_return = True
+ else:
+ direct_return = False
+ if direct_return:
+ return direct_return, uov_input_image, None, None, None, None, None, current_progress
+
+ tiled = True
+ denoising_strength = 0.382
+ if async_task.overwrite_upscale_strength > 0:
+ denoising_strength = async_task.overwrite_upscale_strength
+ initial_pixels = core.numpy_to_pytorch(uov_input_image)
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, 'VAE encoding ...')
+ candidate_vae, _ = pipeline.get_candidate_vae(
+ steps=async_task.steps,
+ switch=switch,
+ denoise=denoising_strength,
+ refiner_swap_method=async_task.refiner_swap_method
+ )
+ initial_latent = core.encode_vae(
+ vae=candidate_vae,
+ pixels=initial_pixels, tiled=True)
+ B, C, H, W = initial_latent['samples'].shape
+ width = W * 8
+ height = H * 8
+ print(f'Final resolution is {str((width, height))}.')
+ return direct_return, uov_input_image, denoising_strength, initial_latent, tiled, width, height, current_progress
+
+ def apply_overrides(async_task, steps, height, width):
+ if async_task.overwrite_step > 0:
+ steps = async_task.overwrite_step
+ switch = int(round(async_task.steps * async_task.refiner_switch))
+ if async_task.overwrite_switch > 0:
+ switch = async_task.overwrite_switch
+ if async_task.overwrite_width > 0:
+ width = async_task.overwrite_width
+ if async_task.overwrite_height > 0:
+ height = async_task.overwrite_height
+ return steps, switch, width, height
+
+ def process_prompt(async_task, prompt, negative_prompt, base_model_additional_loras, image_number, disable_seed_increment, use_expansion, use_style,
+ use_synthetic_refiner, current_progress, advance_progress=False):
+ prompts = remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')
+ negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.splitlines()], default='')
+ prompt = prompts[0]
+ negative_prompt = negative_prompts[0]
+ if prompt == '':
+ # disable expansion when empty since it is not meaningful and influences image prompt
+ use_expansion = False
+ extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
+ extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, 'Loading models ...')
+ lora_filenames = modules.util.remove_performance_lora(modules.config.lora_filenames,
+ async_task.performance_selection)
+ loras, prompt = parse_lora_references_from_prompt(prompt, async_task.loras,
+ modules.config.default_max_lora_number,
+ lora_filenames=lora_filenames)
+ loras += async_task.performance_loras
+ pipeline.refresh_everything(refiner_model_name=async_task.refiner_model_name,
+ base_model_name=async_task.base_model_name,
+ loras=loras, base_model_additional_loras=base_model_additional_loras,
+ use_synthetic_refiner=use_synthetic_refiner, vae_name=async_task.vae_name)
+ pipeline.set_clip_skip(async_task.clip_skip)
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, 'Processing prompts ...')
+ tasks = []
+ for i in range(image_number):
+ if disable_seed_increment:
+ task_seed = async_task.seed % (constants.MAX_SEED + 1)
+ else:
+ task_seed = (async_task.seed + i) % (constants.MAX_SEED + 1) # randint is inclusive, % is not
+
+ task_rng = random.Random(task_seed) # may bind to inpaint noise in the future
+ task_prompt = apply_wildcards(prompt, task_rng, i, async_task.read_wildcards_in_order)
+ task_prompt = apply_arrays(task_prompt, i)
+ task_negative_prompt = apply_wildcards(negative_prompt, task_rng, i, async_task.read_wildcards_in_order)
+ task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, async_task.read_wildcards_in_order) for pmt
+ in
+ extra_positive_prompts]
+ task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, async_task.read_wildcards_in_order) for pmt
+ in
+ extra_negative_prompts]
+
+ positive_basic_workloads = []
+ negative_basic_workloads = []
+
+ task_styles = async_task.style_selections.copy()
+ if use_style:
+ for j, s in enumerate(task_styles):
+ if s == random_style_name:
+ s = get_random_style(task_rng)
+ task_styles[j] = s
+ p, n = apply_style(s, positive=task_prompt)
+ positive_basic_workloads = positive_basic_workloads + p
+ negative_basic_workloads = negative_basic_workloads + n
+ else:
+ positive_basic_workloads.append(task_prompt)
+
+ negative_basic_workloads.append(task_negative_prompt) # Always use independent workload for negative.
+
+ positive_basic_workloads = positive_basic_workloads + task_extra_positive_prompts
+ negative_basic_workloads = negative_basic_workloads + task_extra_negative_prompts
+
+ positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=task_prompt)
+ negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=task_negative_prompt)
+
+ tasks.append(dict(
+ task_seed=task_seed,
+ task_prompt=task_prompt,
+ task_negative_prompt=task_negative_prompt,
+ positive=positive_basic_workloads,
+ negative=negative_basic_workloads,
+ expansion='',
+ c=None,
+ uc=None,
+ positive_top_k=len(positive_basic_workloads),
+ negative_top_k=len(negative_basic_workloads),
+ log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts),
+ log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts),
+ styles=task_styles
+ ))
+ if use_expansion:
+ if advance_progress:
+ current_progress += 1
+ for i, t in enumerate(tasks):
+
+ progressbar(async_task, current_progress, f'Preparing Fooocus text #{i + 1} ...')
+ expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed'])
+ print(f'[Prompt Expansion] {expansion}')
+ t['expansion'] = expansion
+ t['positive'] = copy.deepcopy(t['positive']) + [expansion] # Deep copy.
+ if advance_progress:
+ current_progress += 1
+ for i, t in enumerate(tasks):
+ progressbar(async_task, current_progress, f'Encoding positive #{i + 1} ...')
+ t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k'])
+ if advance_progress:
+ current_progress += 1
+ for i, t in enumerate(tasks):
+ if abs(float(async_task.cfg_scale) - 1.0) < 1e-4:
+ t['uc'] = pipeline.clone_cond(t['c'])
+ else:
+ progressbar(async_task, current_progress, f'Encoding negative #{i + 1} ...')
+ t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k'])
+ return tasks, use_expansion, loras, current_progress
+
+ def apply_freeu(async_task):
+ print(f'FreeU is enabled!')
+ pipeline.final_unet = core.apply_freeu(
+ pipeline.final_unet,
+ async_task.freeu_b1,
+ async_task.freeu_b2,
+ async_task.freeu_s1,
+ async_task.freeu_s2
+ )
+
+ def patch_discrete(unet, scheduler_name):
+ return core.opModelSamplingDiscrete.patch(unet, scheduler_name, False)[0]
+
+ def patch_edm(unet, scheduler_name):
+ return core.opModelSamplingContinuousEDM.patch(unet, scheduler_name, 120.0, 0.002)[0]
+
+ def patch_samplers(async_task):
+ final_scheduler_name = async_task.scheduler_name
+
+ if async_task.scheduler_name in ['lcm', 'tcd']:
+ final_scheduler_name = 'sgm_uniform'
+ if pipeline.final_unet is not None:
+ pipeline.final_unet = patch_discrete(pipeline.final_unet, async_task.scheduler_name)
+ if pipeline.final_refiner_unet is not None:
+ pipeline.final_refiner_unet = patch_discrete(pipeline.final_refiner_unet, async_task.scheduler_name)
+
+ elif async_task.scheduler_name == 'edm_playground_v2.5':
+ final_scheduler_name = 'karras'
+ if pipeline.final_unet is not None:
+ pipeline.final_unet = patch_edm(pipeline.final_unet, async_task.scheduler_name)
+ if pipeline.final_refiner_unet is not None:
+ pipeline.final_refiner_unet = patch_edm(pipeline.final_refiner_unet, async_task.scheduler_name)
+
+ return final_scheduler_name
+
+ def set_hyper_sd_defaults(async_task, current_progress, advance_progress=False):
+ print('Enter Hyper-SD mode.')
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, 'Downloading Hyper-SD components ...')
+ async_task.performance_loras += [(modules.config.downloading_sdxl_hyper_sd_lora(), 0.8)]
+ if async_task.refiner_model_name != 'None':
+ print(f'Refiner disabled in Hyper-SD mode.')
+ async_task.refiner_model_name = 'None'
+ async_task.sampler_name = 'dpmpp_sde_gpu'
+ async_task.scheduler_name = 'karras'
+ async_task.sharpness = 0.0
+ async_task.cfg_scale = 1.0
+ async_task.adaptive_cfg = 1.0
+ async_task.refiner_switch = 1.0
+ async_task.adm_scaler_positive = 1.0
+ async_task.adm_scaler_negative = 1.0
+ async_task.adm_scaler_end = 0.0
+ return current_progress
+
+ def set_lightning_defaults(async_task, current_progress, advance_progress=False):
+ print('Enter Lightning mode.')
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, 1, 'Downloading Lightning components ...')
+ async_task.performance_loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)]
+ if async_task.refiner_model_name != 'None':
+ print(f'Refiner disabled in Lightning mode.')
+ async_task.refiner_model_name = 'None'
+ async_task.sampler_name = 'euler'
+ async_task.scheduler_name = 'sgm_uniform'
+ async_task.sharpness = 0.0
+ async_task.cfg_scale = 1.0
+ async_task.adaptive_cfg = 1.0
+ async_task.refiner_switch = 1.0
+ async_task.adm_scaler_positive = 1.0
+ async_task.adm_scaler_negative = 1.0
+ async_task.adm_scaler_end = 0.0
+ return current_progress
+
+ def set_lcm_defaults(async_task, current_progress, advance_progress=False):
+ print('Enter LCM mode.')
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, 1, 'Downloading LCM components ...')
+ async_task.performance_loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)]
+ if async_task.refiner_model_name != 'None':
+ print(f'Refiner disabled in LCM mode.')
+ async_task.refiner_model_name = 'None'
+ async_task.sampler_name = 'lcm'
+ async_task.scheduler_name = 'lcm'
+ async_task.sharpness = 0.0
+ async_task.cfg_scale = 1.0
+ async_task.adaptive_cfg = 1.0
+ async_task.refiner_switch = 1.0
+ async_task.adm_scaler_positive = 1.0
+ async_task.adm_scaler_negative = 1.0
+ async_task.adm_scaler_end = 0.0
+ return current_progress
+
+ def apply_image_input(async_task, base_model_additional_loras, clip_vision_path, controlnet_canny_path,
+ controlnet_cpds_path, goals, inpaint_head_model_path, inpaint_image, inpaint_mask,
+ inpaint_parameterized, ip_adapter_face_path, ip_adapter_path, ip_negative_path,
+ skip_prompt_processing, use_synthetic_refiner):
+ if (async_task.current_tab == 'uov' or (
+ async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_vary_upscale)) \
+ and async_task.uov_method != flags.disabled.casefold() and async_task.uov_input_image is not None:
+ async_task.uov_input_image, skip_prompt_processing, async_task.steps = prepare_upscale(
+ async_task, goals, async_task.uov_input_image, async_task.uov_method, async_task.performance_selection,
+ async_task.steps, 1, skip_prompt_processing=skip_prompt_processing)
+ if (async_task.current_tab == 'inpaint' or (
+ async_task.current_tab == 'ip' and async_task.mixing_image_prompt_and_inpaint)) \
+ and isinstance(async_task.inpaint_input_image, dict):
+ inpaint_image = async_task.inpaint_input_image['image']
+ inpaint_mask = async_task.inpaint_input_image['mask'][:, :, 0]
+
+ if async_task.inpaint_mask_upload_checkbox:
+ if isinstance(async_task.inpaint_mask_image_upload, dict):
+ if (isinstance(async_task.inpaint_mask_image_upload['image'], np.ndarray)
+ and isinstance(async_task.inpaint_mask_image_upload['mask'], np.ndarray)
+ and async_task.inpaint_mask_image_upload['image'].ndim == 3):
+ async_task.inpaint_mask_image_upload = np.maximum(
+ async_task.inpaint_mask_image_upload['image'],
+ async_task.inpaint_mask_image_upload['mask'])
+ if isinstance(async_task.inpaint_mask_image_upload,
+ np.ndarray) and async_task.inpaint_mask_image_upload.ndim == 3:
+ H, W, C = inpaint_image.shape
+ async_task.inpaint_mask_image_upload = resample_image(async_task.inpaint_mask_image_upload,
+ width=W, height=H)
+ async_task.inpaint_mask_image_upload = np.mean(async_task.inpaint_mask_image_upload, axis=2)
+ async_task.inpaint_mask_image_upload = (async_task.inpaint_mask_image_upload > 127).astype(
+ np.uint8) * 255
+ inpaint_mask = np.maximum(inpaint_mask, async_task.inpaint_mask_image_upload)
+
+ if int(async_task.inpaint_erode_or_dilate) != 0:
+ inpaint_mask = erode_or_dilate(inpaint_mask, async_task.inpaint_erode_or_dilate)
+
+ if async_task.invert_mask_checkbox:
+ inpaint_mask = 255 - inpaint_mask
+
+ inpaint_image = HWC3(inpaint_image)
+ if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
+ and (np.any(inpaint_mask > 127) or len(async_task.outpaint_selections) > 0):
+ progressbar(async_task, 1, 'Downloading upscale models ...')
+ modules.config.downloading_upscale_model()
+ if inpaint_parameterized:
+ progressbar(async_task, 1, 'Downloading inpainter ...')
+ inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
+ async_task.inpaint_engine)
+ base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]
+ print(f'[Inpaint] Current inpaint model is {inpaint_patch_model_path}')
+ if async_task.refiner_model_name == 'None':
+ use_synthetic_refiner = True
+ async_task.refiner_switch = 0.8
+ else:
+ inpaint_head_model_path, inpaint_patch_model_path = None, None
+ print(f'[Inpaint] Parameterized inpaint is disabled.')
+ if async_task.inpaint_additional_prompt != '':
+ if async_task.prompt == '':
+ async_task.prompt = async_task.inpaint_additional_prompt
+ else:
+ async_task.prompt = async_task.inpaint_additional_prompt + '\n' + async_task.prompt
+ goals.append('inpaint')
+ if async_task.current_tab == 'ip' or \
+ async_task.mixing_image_prompt_and_vary_upscale or \
+ async_task.mixing_image_prompt_and_inpaint:
+ goals.append('cn')
+ progressbar(async_task, 1, 'Downloading control models ...')
+ if len(async_task.cn_tasks[flags.cn_canny]) > 0:
+ controlnet_canny_path = modules.config.downloading_controlnet_canny()
+ if len(async_task.cn_tasks[flags.cn_cpds]) > 0:
+ controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
+ if len(async_task.cn_tasks[flags.cn_ip]) > 0:
+ clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
+ if len(async_task.cn_tasks[flags.cn_ip_face]) > 0:
+ clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters(
+ 'face')
+ if async_task.current_tab == 'enhance' and async_task.enhance_input_image is not None:
+ goals.append('enhance')
+ skip_prompt_processing = True
+ async_task.enhance_input_image = HWC3(async_task.enhance_input_image)
+ return base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner
+
+ def prepare_upscale(async_task, goals, uov_input_image, uov_method, performance, steps, current_progress,
+ advance_progress=False, skip_prompt_processing=False):
+ uov_input_image = HWC3(uov_input_image)
+ if 'vary' in uov_method:
+ goals.append('vary')
+ elif 'upscale' in uov_method:
+ goals.append('upscale')
+ if 'fast' in uov_method:
+ skip_prompt_processing = True
+ steps = 0
+ else:
+ steps = performance.steps_uov()
+
+ if advance_progress:
+ current_progress += 1
+ progressbar(async_task, current_progress, 'Downloading upscale models ...')
+ modules.config.downloading_upscale_model()
+ return uov_input_image, skip_prompt_processing, steps
+
+ def prepare_enhance_prompt(prompt: str, fallback_prompt: str, translate: bool, prompt_type: str):
+ if safe_str(prompt) == '' or len(remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')) == 0:
+ prompt = fallback_prompt
+ else:
+ if translate:
+ prompt = translate2en(prompt, prompt_type)
+ return prompt
+
+ def stop_processing(async_task, processing_start_time):
+ async_task.processing = False
+ processing_time = time.perf_counter() - processing_start_time
+ print(f'Processing time (total): {processing_time:.2f} seconds')
+
+ def process_enhance(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
+ current_progress, current_task_id, denoising_strength, inpaint_disable_initial_latent,
+ inpaint_engine, inpaint_respective_field, inpaint_strength,
+ prompt, negative_prompt, final_scheduler_name, goals, height, img, mask,
+ preparation_steps, steps, switch, tiled, total_count, use_expansion, use_style,
+ use_synthetic_refiner, width, show_intermediate_results=True):
+ base_model_additional_loras = []
+ inpaint_head_model_path = None
+ inpaint_parameterized = inpaint_engine != 'None' # inpaint_engine = None, improve detail
+ initial_latent = None
+
+ if 'vary' in goals:
+ img, denoising_strength, initial_latent, width, height, current_progress = apply_vary(
+ async_task, async_task.enhance_uov_method, denoising_strength, img, switch, current_progress)
+ if 'upscale' in goals:
+ direct_return, img, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(
+ async_task, img, async_task.enhance_uov_method, switch, current_progress)
+ if direct_return:
+ d = [('Upscale (Fast)', 'upscale_fast', '2x')]
+ if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
+ progressbar(async_task, current_progress, 'Checking for NSFW content ...')
+ img = default_censor(img)
+ progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{total_count} to system ...')
+ uov_image_path = log(img, d, output_format=async_task.output_format)
+ yield_result(async_task, uov_image_path, current_progress, async_task.black_out_nsfw, False,
+ do_not_show_finished_images=not show_intermediate_results or async_task.disable_intermediate_results)
+ return current_progress, img
+
+ if 'inpaint' in goals and inpaint_parameterized:
+ progressbar(async_task, current_progress, 'Downloading inpainter ...')
+ inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
+ inpaint_engine)
+ if inpaint_patch_model_path not in base_model_additional_loras:
+ base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]
+ progressbar(async_task, current_progress, 'Preparing enhance prompts ...')
+ prompt = prepare_enhance_prompt(prompt, async_task.prompt, async_task.translate_prompts, 'prompt')
+ negative_prompt = prepare_enhance_prompt(negative_prompt, async_task.negative_prompt,
+ async_task.translate_prompts, 'negative prompt')
+ # positive and negative conditioning aren't available here anymore, process prompt again
+ tasks_enhance, use_expansion, loras, current_progress = process_prompt(
+ async_task, prompt, negative_prompt, base_model_additional_loras, 1, True,
+ use_expansion, use_style, use_synthetic_refiner, current_progress)
+ task_enhance = tasks_enhance[0]
+ # TODO could support vary, upscale and CN in the future
+ # if 'cn' in goals:
+ # apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width)
+ if async_task.freeu_enabled:
+ apply_freeu(async_task)
+ patch_samplers(async_task)
+ if 'inpaint' in goals:
+ denoising_strength, initial_latent, width, height, current_progress = apply_inpaint(
+ async_task, None, inpaint_head_model_path, img, mask,
+ inpaint_parameterized, inpaint_strength,
+ inpaint_respective_field, switch, inpaint_disable_initial_latent,
+ current_progress, True)
+ imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path,
+ controlnet_cpds_path, current_task_id, denoising_strength,
+ final_scheduler_name, goals, initial_latent, steps, switch,
+ task_enhance['c'], task_enhance['uc'], task_enhance, tiled,
+ use_expansion, width, height, current_progress,
+ preparation_steps, total_count, show_intermediate_results)
+
+ del task_enhance['c'], task_enhance['uc'] # Save memory
+ return current_progress, imgs[0], prompt, negative_prompt
+
+ def enhance_upscale(all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+ current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps,
+ prompt, negative_prompt, final_scheduler_name, height, img, preparation_steps, switch, tiled,
+ total_count, use_expansion, use_style, use_synthetic_refiner, width):
+ # reset inpaint worker to prevent tensor size issues and not mix upscale and inpainting
+ inpaint_worker.current_task = None
+
+ current_task_id += 1
+ current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
+ goals_enhance = []
+ img, skip_prompt_processing, steps = prepare_upscale(
+ async_task, goals_enhance, img, async_task.enhance_uov_method, async_task.performance_selection,
+ enhance_steps, current_progress)
+ steps, _, _, _ = apply_overrides(async_task, steps, height, width)
+ exception_result = ''
+ if len(goals_enhance) > 0:
+ try:
+ current_progress, img, prompt, negative_prompt = process_enhance(
+ all_steps, async_task, callback, controlnet_canny_path,
+ controlnet_cpds_path, current_progress, current_task_id, denoising_strength, False,
+ 'None', 0.0, 0.0, prompt, negative_prompt, final_scheduler_name,
+ goals_enhance, height, img, None, preparation_steps, steps, switch, tiled, total_count,
+ use_expansion, use_style, use_synthetic_refiner, width)
+
+ except ldm_patched.modules.model_management.InterruptProcessingException:
+ if async_task.last_stop == 'skip':
+ print('User skipped')
+ async_task.last_stop = False
+ # also skip all enhance steps for this image, but add the steps to the progress bar
+ if async_task.enhance_uov_processing_order == flags.enhancement_uov_before:
+ done_steps_inpainting += len(async_task.enhance_ctrls) * enhance_steps
+ exception_result = 'continue'
+ else:
+ print('User stopped')
+ exception_result = 'break'
+ finally:
+ done_steps_upscaling += steps
+ return current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result
+
@torch.no_grad()
@torch.inference_mode()
- def handler(async_task):
- execution_start_time = time.perf_counter()
+ def handler(async_task: AsyncTask):
+ preparation_start_time = time.perf_counter()
async_task.processing = True
- args = async_task.args
- args.reverse()
-
- prompt = args.pop()
- negative_prompt = args.pop()
- translate_prompts = args.pop()
- style_selections = args.pop()
- performance_selection = Performance(args.pop())
- aspect_ratios_selection = args.pop()
- image_number = args.pop()
- output_format = args.pop()
- image_seed = args.pop()
- read_wildcards_in_order = args.pop()
- sharpness = args.pop()
- guidance_scale = args.pop()
- base_model_name = args.pop()
- refiner_model_name = args.pop()
- refiner_switch = args.pop()
- loras = get_enabled_loras([(bool(args.pop()), str(args.pop()), float(args.pop())) for _ in
- range(modules.config.default_max_lora_number)])
- input_image_checkbox = args.pop()
- current_tab = args.pop()
- uov_method = args.pop()
- uov_input_image = args.pop()
- outpaint_selections = args.pop()
- inpaint_input_image = args.pop()
- inpaint_additional_prompt = args.pop()
- inpaint_mask_image_upload = args.pop()
-
- disable_preview = args.pop()
- disable_intermediate_results = args.pop()
- disable_seed_increment = args.pop()
- black_out_nsfw = args.pop()
- adm_scaler_positive = args.pop()
- adm_scaler_negative = args.pop()
- adm_scaler_end = args.pop()
- adaptive_cfg = args.pop()
- clip_skip = args.pop()
- sampler_name = args.pop()
- scheduler_name = args.pop()
- vae_name = args.pop()
- overwrite_step = args.pop()
- overwrite_switch = args.pop()
- overwrite_width = args.pop()
- overwrite_height = args.pop()
- overwrite_vary_strength = args.pop()
- overwrite_upscale_strength = args.pop()
- mixing_image_prompt_and_vary_upscale = args.pop()
- mixing_image_prompt_and_inpaint = args.pop()
- debugging_cn_preprocessor = args.pop()
- skipping_cn_preprocessor = args.pop()
- canny_low_threshold = args.pop()
- canny_high_threshold = args.pop()
- refiner_swap_method = args.pop()
- controlnet_softness = args.pop()
- freeu_enabled = args.pop()
- freeu_b1 = args.pop()
- freeu_b2 = args.pop()
- freeu_s1 = args.pop()
- freeu_s2 = args.pop()
- debugging_inpaint_preprocessor = args.pop()
- inpaint_disable_initial_latent = args.pop()
- inpaint_engine = args.pop()
- inpaint_strength = args.pop()
- inpaint_respective_field = args.pop()
- inpaint_mask_upload_checkbox = args.pop()
- invert_mask_checkbox = args.pop()
- inpaint_erode_or_dilate = args.pop()
-
- save_metadata_to_images = args.pop() if not args_manager.args.disable_metadata else False
- metadata_scheme = MetadataScheme(
- args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS
-
- cn_tasks = {x: [] for x in flags.ip_list}
- for _ in range(flags.controlnet_image_count):
- cn_img = args.pop()
- cn_stop = args.pop()
- cn_weight = args.pop()
- cn_type = args.pop()
- if cn_img is not None:
- cn_tasks[cn_type].append([cn_img, cn_stop, cn_weight])
-
- outpaint_selections = [o.lower() for o in outpaint_selections]
+ async_task.outpaint_selections = [o.lower() for o in async_task.outpaint_selections]
base_model_additional_loras = []
- raw_style_selections = copy.deepcopy(style_selections)
- uov_method = uov_method.lower()
+ async_task.uov_method = async_task.uov_method.casefold()
+ async_task.enhance_uov_method = async_task.enhance_uov_method.casefold()
- if fooocus_expansion in style_selections:
+ if fooocus_expansion in async_task.style_selections:
use_expansion = True
- style_selections.remove(fooocus_expansion)
+ async_task.style_selections.remove(fooocus_expansion)
else:
use_expansion = False
- use_style = len(style_selections) > 0
+ use_style = len(async_task.style_selections) > 0
- if base_model_name == refiner_model_name:
+ if async_task.base_model_name == async_task.refiner_model_name:
print(f'Refiner disabled because base model and refiner are same.')
- refiner_model_name = 'None'
+ async_task.refiner_model_name = 'None'
- steps = performance_selection.steps()
+ current_progress = 0
+ if async_task.performance_selection == Performance.EXTREME_SPEED:
+ set_lcm_defaults(async_task, current_progress, advance_progress=True)
+ elif async_task.performance_selection == Performance.LIGHTNING:
+ set_lightning_defaults(async_task, current_progress, advance_progress=True)
+ elif async_task.performance_selection == Performance.HYPER_SD:
+ set_hyper_sd_defaults(async_task, current_progress, advance_progress=True)
- performance_loras = []
+ if async_task.translate_prompts:
+ async_task.prompt = translate2en(async_task.prompt, 'prompt')
+ async_task.negative_prompt = translate2en(async_task.negative_prompt, 'negative prompt')
- if performance_selection == Performance.EXTREME_SPEED:
- print('Enter LCM mode.')
- progressbar(async_task, 1, 'Downloading LCM components ...')
- performance_loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)]
-
- if refiner_model_name != 'None':
- print(f'Refiner disabled in LCM mode.')
-
- refiner_model_name = 'None'
- sampler_name = 'lcm'
- scheduler_name = 'lcm'
- sharpness = 0.0
- guidance_scale = 1.0
- adaptive_cfg = 1.0
- refiner_switch = 1.0
- adm_scaler_positive = 1.0
- adm_scaler_negative = 1.0
- adm_scaler_end = 0.0
-
- elif performance_selection == Performance.LIGHTNING:
- print('Enter Lightning mode.')
- progressbar(async_task, 1, 'Downloading Lightning components ...')
- performance_loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)]
-
- if refiner_model_name != 'None':
- print(f'Refiner disabled in Lightning mode.')
-
- refiner_model_name = 'None'
- sampler_name = 'euler'
- scheduler_name = 'sgm_uniform'
- sharpness = 0.0
- guidance_scale = 1.0
- adaptive_cfg = 1.0
- refiner_switch = 1.0
- adm_scaler_positive = 1.0
- adm_scaler_negative = 1.0
- adm_scaler_end = 0.0
-
- elif performance_selection == Performance.HYPER_SD:
- print('Enter Hyper-SD mode.')
- progressbar(async_task, 1, 'Downloading Hyper-SD components ...')
- performance_loras += [(modules.config.downloading_sdxl_hyper_sd_lora(), 0.8)]
-
- if refiner_model_name != 'None':
- print(f'Refiner disabled in Hyper-SD mode.')
-
- refiner_model_name = 'None'
- sampler_name = 'dpmpp_sde_gpu'
- scheduler_name = 'karras'
- sharpness = 0.0
- guidance_scale = 1.0
- adaptive_cfg = 1.0
- refiner_switch = 1.0
- adm_scaler_positive = 1.0
- adm_scaler_negative = 1.0
- adm_scaler_end = 0.0
-
- if translate_prompts:
- from modules.translator import translate2en
- prompt = translate2en(prompt, 'prompt')
- negative_prompt = translate2en(negative_prompt, 'negative prompt')
-
- print(f'[Parameters] Adaptive CFG = {adaptive_cfg}')
- print(f'[Parameters] CLIP Skip = {clip_skip}')
- print(f'[Parameters] Sharpness = {sharpness}')
- print(f'[Parameters] ControlNet Softness = {controlnet_softness}')
+ print(f'[Parameters] Adaptive CFG = {async_task.adaptive_cfg}')
+ print(f'[Parameters] CLIP Skip = {async_task.clip_skip}')
+ print(f'[Parameters] Sharpness = {async_task.sharpness}')
+ print(f'[Parameters] ControlNet Softness = {async_task.controlnet_softness}')
print(f'[Parameters] ADM Scale = '
- f'{adm_scaler_positive} : '
- f'{adm_scaler_negative} : '
- f'{adm_scaler_end}')
+ f'{async_task.adm_scaler_positive} : '
+ f'{async_task.adm_scaler_negative} : '
+ f'{async_task.adm_scaler_end}')
+ print(f'[Parameters] Seed = {async_task.seed}')
- patch_settings[pid] = PatchSettings(
- sharpness,
- adm_scaler_end,
- adm_scaler_positive,
- adm_scaler_negative,
- controlnet_softness,
- adaptive_cfg
- )
+ apply_patch_settings(async_task)
- cfg_scale = float(guidance_scale)
- print(f'[Parameters] CFG = {cfg_scale}')
+ print(f'[Parameters] CFG = {async_task.cfg_scale}')
initial_latent = None
denoising_strength = 1.0
tiled = False
- width, height = aspect_ratios_selection.replace('×', ' ').split(' ')[:2]
+ width, height = async_task.aspect_ratios_selection.replace('×', ' ').split(' ')[:2]
width, height = int(width), int(height)
skip_prompt_processing = False
inpaint_worker.current_task = None
- inpaint_parameterized = inpaint_engine != 'None'
+ inpaint_parameterized = async_task.inpaint_engine != 'None'
inpaint_image = None
inpaint_mask = None
inpaint_head_model_path = None
@@ -347,480 +1129,112 @@ def worker():
controlnet_cpds_path = None
clip_vision_path, ip_negative_path, ip_adapter_path, ip_adapter_face_path = None, None, None, None
- seed = int(image_seed)
- print(f'[Parameters] Seed = {seed}')
-
goals = []
tasks = []
+ current_progress = 1
- if input_image_checkbox:
- if (current_tab == 'uov' or (
- current_tab == 'ip' and mixing_image_prompt_and_vary_upscale)) \
- and uov_method != flags.disabled and uov_input_image is not None:
- uov_input_image = HWC3(uov_input_image)
- if 'vary' in uov_method:
- goals.append('vary')
- elif 'upscale' in uov_method:
- goals.append('upscale')
- if 'fast' in uov_method:
- skip_prompt_processing = True
- else:
- steps = performance_selection.steps_uov()
-
- progressbar(async_task, 1, 'Downloading upscale models ...')
- modules.config.downloading_upscale_model()
- if (current_tab == 'inpaint' or (
- current_tab == 'ip' and mixing_image_prompt_and_inpaint)) \
- and isinstance(inpaint_input_image, dict):
- inpaint_image = inpaint_input_image['image']
- inpaint_mask = inpaint_input_image['mask'][:, :, 0]
-
- if inpaint_mask_upload_checkbox:
- if isinstance(inpaint_mask_image_upload, dict):
- if (isinstance(inpaint_mask_image_upload['image'], np.ndarray)
- and isinstance(inpaint_mask_image_upload['mask'], np.ndarray)
- and inpaint_mask_image_upload['image'].ndim == 3):
- inpaint_mask_image_upload = np.maximum(inpaint_mask_image_upload['image'], inpaint_mask_image_upload['mask'])
- if isinstance(inpaint_mask_image_upload, np.ndarray) and inpaint_mask_image_upload.ndim == 3:
- H, W, C = inpaint_image.shape
- inpaint_mask_image_upload = resample_image(inpaint_mask_image_upload, width=W, height=H)
- inpaint_mask_image_upload = np.mean(inpaint_mask_image_upload, axis=2)
- inpaint_mask_image_upload = (inpaint_mask_image_upload > 127).astype(np.uint8) * 255
- inpaint_mask = np.maximum(inpaint_mask, inpaint_mask_image_upload)
-
- if int(inpaint_erode_or_dilate) != 0:
- inpaint_mask = erode_or_dilate(inpaint_mask, inpaint_erode_or_dilate)
-
- if invert_mask_checkbox:
- inpaint_mask = 255 - inpaint_mask
-
- inpaint_image = HWC3(inpaint_image)
- if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \
- and (np.any(inpaint_mask > 127) or len(outpaint_selections) > 0):
- progressbar(async_task, 1, 'Downloading upscale models ...')
- modules.config.downloading_upscale_model()
- if inpaint_parameterized:
- progressbar(async_task, 1, 'Downloading inpainter ...')
- inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models(
- inpaint_engine)
- base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]
- print(f'[Inpaint] Current inpaint model is {inpaint_patch_model_path}')
- if refiner_model_name == 'None':
- use_synthetic_refiner = True
- refiner_switch = 0.8
- else:
- inpaint_head_model_path, inpaint_patch_model_path = None, None
- print(f'[Inpaint] Parameterized inpaint is disabled.')
- if inpaint_additional_prompt != '':
- if prompt == '':
- prompt = inpaint_additional_prompt
- else:
- prompt = inpaint_additional_prompt + '\n' + prompt
- goals.append('inpaint')
- if current_tab == 'ip' or \
- mixing_image_prompt_and_vary_upscale or \
- mixing_image_prompt_and_inpaint:
- goals.append('cn')
- progressbar(async_task, 1, 'Downloading control models ...')
- if len(cn_tasks[flags.cn_canny]) > 0:
- controlnet_canny_path = modules.config.downloading_controlnet_canny()
- if len(cn_tasks[flags.cn_cpds]) > 0:
- controlnet_cpds_path = modules.config.downloading_controlnet_cpds()
- if len(cn_tasks[flags.cn_ip]) > 0:
- clip_vision_path, ip_negative_path, ip_adapter_path = modules.config.downloading_ip_adapters('ip')
- if len(cn_tasks[flags.cn_ip_face]) > 0:
- clip_vision_path, ip_negative_path, ip_adapter_face_path = modules.config.downloading_ip_adapters(
- 'face')
- progressbar(async_task, 1, 'Loading control models ...')
+ if async_task.input_image_checkbox:
+ base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path, inpaint_head_model_path, inpaint_image, inpaint_mask, ip_adapter_face_path, ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner = apply_image_input(
+ async_task, base_model_additional_loras, clip_vision_path, controlnet_canny_path, controlnet_cpds_path,
+ goals, inpaint_head_model_path, inpaint_image, inpaint_mask, inpaint_parameterized, ip_adapter_face_path,
+ ip_adapter_path, ip_negative_path, skip_prompt_processing, use_synthetic_refiner)
# Load or unload CNs
+ progressbar(async_task, current_progress, 'Loading control models ...')
pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path])
ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path)
ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path)
- if overwrite_step > 0:
- steps = overwrite_step
+ async_task.steps, switch, width, height = apply_overrides(async_task, async_task.steps, height, width)
- switch = int(round(steps * refiner_switch))
+ print(f'[Parameters] Sampler = {async_task.sampler_name} - {async_task.scheduler_name}')
+ print(f'[Parameters] Steps = {async_task.steps} - {switch}')
- if overwrite_switch > 0:
- switch = overwrite_switch
-
- if overwrite_width > 0:
- width = overwrite_width
-
- if overwrite_height > 0:
- height = overwrite_height
-
- print(f'[Parameters] Sampler = {sampler_name} - {scheduler_name}')
- print(f'[Parameters] Steps = {steps} - {switch}')
-
- progressbar(async_task, 1, 'Initializing ...')
+ progressbar(async_task, current_progress, 'Initializing ...')
if not skip_prompt_processing:
-
- prompts = remove_empty_str([safe_str(p) for p in prompt.splitlines()], default='')
- negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.splitlines()], default='')
-
- prompt = prompts[0]
- negative_prompt = negative_prompts[0]
-
- if prompt == '':
- # disable expansion when empty since it is not meaningful and influences image prompt
- use_expansion = False
-
- extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
- extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
-
- progressbar(async_task, 2, 'Loading models ...')
-
- lora_filenames = modules.util.remove_performance_lora(modules.config.lora_filenames, performance_selection)
- loras, prompt = parse_lora_references_from_prompt(prompt, loras, modules.config.default_max_lora_number, lora_filenames=lora_filenames)
- loras += performance_loras
-
- pipeline.refresh_everything(refiner_model_name=refiner_model_name, base_model_name=base_model_name,
- loras=loras, base_model_additional_loras=base_model_additional_loras,
- use_synthetic_refiner=use_synthetic_refiner, vae_name=vae_name)
-
- pipeline.set_clip_skip(clip_skip)
-
- progressbar(async_task, 3, 'Processing prompts ...')
- tasks = []
-
- for i in range(image_number):
- if disable_seed_increment:
- task_seed = seed % (constants.MAX_SEED + 1)
- else:
- task_seed = (seed + i) % (constants.MAX_SEED + 1) # randint is inclusive, % is not
-
- task_rng = random.Random(task_seed) # may bind to inpaint noise in the future
- task_prompt = apply_wildcards(prompt, task_rng, i, read_wildcards_in_order)
- task_prompt = apply_arrays(task_prompt, i)
- task_negative_prompt = apply_wildcards(negative_prompt, task_rng, i, read_wildcards_in_order)
- task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in
- extra_positive_prompts]
- task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in
- extra_negative_prompts]
-
- positive_basic_workloads = []
- negative_basic_workloads = []
-
- task_styles = style_selections.copy()
- if use_style:
- for i, s in enumerate(task_styles):
- if s == random_style_name:
- s = get_random_style(task_rng)
- task_styles[i] = s
- p, n = apply_style(s, positive=task_prompt)
- positive_basic_workloads = positive_basic_workloads + p
- negative_basic_workloads = negative_basic_workloads + n
- else:
- positive_basic_workloads.append(task_prompt)
-
- negative_basic_workloads.append(task_negative_prompt) # Always use independent workload for negative.
-
- positive_basic_workloads = positive_basic_workloads + task_extra_positive_prompts
- negative_basic_workloads = negative_basic_workloads + task_extra_negative_prompts
-
- positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=task_prompt)
- negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=task_negative_prompt)
-
- tasks.append(dict(
- task_seed=task_seed,
- task_prompt=task_prompt,
- task_negative_prompt=task_negative_prompt,
- positive=positive_basic_workloads,
- negative=negative_basic_workloads,
- expansion='',
- c=None,
- uc=None,
- positive_top_k=len(positive_basic_workloads),
- negative_top_k=len(negative_basic_workloads),
- log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts),
- log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts),
- styles=task_styles
- ))
-
- if use_expansion:
- for i, t in enumerate(tasks):
- progressbar(async_task, 4, f'Preparing Fooocus text #{i + 1} ...')
- expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed'])
- print(f'[Prompt Expansion] {expansion}')
- t['expansion'] = expansion
- t['positive'] = copy.deepcopy(t['positive']) + [expansion] # Deep copy.
-
- for i, t in enumerate(tasks):
- progressbar(async_task, 5, f'Encoding positive #{i + 1} ...')
- t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k'])
-
- for i, t in enumerate(tasks):
- if abs(float(cfg_scale) - 1.0) < 1e-4:
- t['uc'] = pipeline.clone_cond(t['c'])
- else:
- progressbar(async_task, 6, f'Encoding negative #{i + 1} ...')
- t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k'])
+ tasks, use_expansion, loras, current_progress = process_prompt(async_task, async_task.prompt, async_task.negative_prompt,
+ base_model_additional_loras, async_task.image_number,
+ async_task.disable_seed_increment, use_expansion, use_style,
+ use_synthetic_refiner, current_progress, advance_progress=True)
if len(goals) > 0:
- progressbar(async_task, 7, 'Image processing ...')
+ current_progress += 1
+ progressbar(async_task, current_progress, 'Image processing ...')
if 'vary' in goals:
- if 'subtle' in uov_method:
- denoising_strength = 0.5
- if 'strong' in uov_method:
- denoising_strength = 0.85
- if overwrite_vary_strength > 0:
- denoising_strength = overwrite_vary_strength
-
- shape_ceil = get_image_shape_ceil(uov_input_image)
- if shape_ceil < 1024:
- print(f'[Vary] Image is resized because it is too small.')
- shape_ceil = 1024
- elif shape_ceil > 2048:
- print(f'[Vary] Image is resized because it is too big.')
- shape_ceil = 2048
-
- uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil)
-
- initial_pixels = core.numpy_to_pytorch(uov_input_image)
- progressbar(async_task, 8, 'VAE encoding ...')
-
- candidate_vae, _ = pipeline.get_candidate_vae(
- steps=steps,
- switch=switch,
- denoise=denoising_strength,
- refiner_swap_method=refiner_swap_method
- )
-
- initial_latent = core.encode_vae(vae=candidate_vae, pixels=initial_pixels)
- B, C, H, W = initial_latent['samples'].shape
- width = W * 8
- height = H * 8
- print(f'Final resolution is {str((height, width))}.')
+ async_task.uov_input_image, denoising_strength, initial_latent, width, height, current_progress = apply_vary(
+ async_task, async_task.uov_method, denoising_strength, async_task.uov_input_image, switch,
+ current_progress)
if 'upscale' in goals:
- H, W, C = uov_input_image.shape
- progressbar(async_task, 9, f'Upscaling image from {str((H, W))} ...')
- uov_input_image = perform_upscale(uov_input_image)
- print(f'Image upscaled.')
-
- if '1.5x' in uov_method:
- f = 1.5
- elif '2x' in uov_method:
- f = 2.0
- else:
- f = 1.0
-
- shape_ceil = get_shape_ceil(H * f, W * f)
-
- if shape_ceil < 1024:
- print(f'[Upscale] Image is resized because it is too small.')
- uov_input_image = set_image_shape_ceil(uov_input_image, 1024)
- shape_ceil = 1024
- else:
- uov_input_image = resample_image(uov_input_image, width=W * f, height=H * f)
-
- image_is_super_large = shape_ceil > 2800
-
- if 'fast' in uov_method:
- direct_return = True
- elif image_is_super_large:
- print('Image is too large. Directly returned the SR image. '
- 'Usually directly return SR image at 4K resolution '
- 'yields better results than SDXL diffusion.')
- direct_return = True
- else:
- direct_return = False
-
+ direct_return, async_task.uov_input_image, denoising_strength, initial_latent, tiled, width, height, current_progress = apply_upscale(
+ async_task, async_task.uov_input_image, async_task.uov_method, switch, current_progress,
+ advance_progress=True)
if direct_return:
d = [('Upscale (Fast)', 'upscale_fast', '2x')]
- if modules.config.default_black_out_nsfw or black_out_nsfw:
+ if modules.config.default_black_out_nsfw or async_task.black_out_nsfw:
progressbar(async_task, 100, 'Checking for NSFW content ...')
- uov_input_image = default_censor(uov_input_image)
+ async_task.uov_input_image = default_censor(async_task.uov_input_image)
progressbar(async_task, 100, 'Saving image to system ...')
- uov_input_image_path = log(uov_input_image, d, output_format=output_format)
- yield_result(async_task, uov_input_image_path, black_out_nsfw, False, do_not_show_finished_images=True)
+ uov_input_image_path = log(async_task.uov_input_image, d, output_format=async_task.output_format)
+ yield_result(async_task, uov_input_image_path, 100, async_task.black_out_nsfw, False,
+ do_not_show_finished_images=True)
return
- tiled = True
- denoising_strength = 0.382
-
- if overwrite_upscale_strength > 0:
- denoising_strength = overwrite_upscale_strength
-
- initial_pixels = core.numpy_to_pytorch(uov_input_image)
- progressbar(async_task, 10, 'VAE encoding ...')
-
- candidate_vae, _ = pipeline.get_candidate_vae(
- steps=steps,
- switch=switch,
- denoise=denoising_strength,
- refiner_swap_method=refiner_swap_method
- )
-
- initial_latent = core.encode_vae(
- vae=candidate_vae,
- pixels=initial_pixels, tiled=True)
- B, C, H, W = initial_latent['samples'].shape
- width = W * 8
- height = H * 8
- print(f'Final resolution is {str((height, width))}.')
-
if 'inpaint' in goals:
- if len(outpaint_selections) > 0:
- H, W, C = inpaint_image.shape
- if 'top' in outpaint_selections:
- inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge')
- inpaint_mask = np.pad(inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant',
- constant_values=255)
- if 'bottom' in outpaint_selections:
- inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge')
- inpaint_mask = np.pad(inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant',
- constant_values=255)
-
- H, W, C = inpaint_image.shape
- if 'left' in outpaint_selections:
- inpaint_image = np.pad(inpaint_image, [[0, 0], [int(W * 0.3), 0], [0, 0]], mode='edge')
- inpaint_mask = np.pad(inpaint_mask, [[0, 0], [int(W * 0.3), 0]], mode='constant',
- constant_values=255)
- if 'right' in outpaint_selections:
- inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(W * 0.3)], [0, 0]], mode='edge')
- inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int(W * 0.3)]], mode='constant',
- constant_values=255)
-
- inpaint_image = np.ascontiguousarray(inpaint_image.copy())
- inpaint_mask = np.ascontiguousarray(inpaint_mask.copy())
- inpaint_strength = 1.0
- inpaint_respective_field = 1.0
-
- denoising_strength = inpaint_strength
-
- inpaint_worker.current_task = inpaint_worker.InpaintWorker(
- image=inpaint_image,
- mask=inpaint_mask,
- use_fill=denoising_strength > 0.99,
- k=inpaint_respective_field
- )
-
- if debugging_inpaint_preprocessor:
- yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), black_out_nsfw, do_not_show_finished_images=True)
+ try:
+ denoising_strength, initial_latent, width, height, current_progress = apply_inpaint(async_task,
+ initial_latent,
+ inpaint_head_model_path,
+ inpaint_image,
+ inpaint_mask,
+ inpaint_parameterized,
+ async_task.inpaint_strength,
+ async_task.inpaint_respective_field,
+ switch,
+ async_task.inpaint_disable_initial_latent,
+ current_progress,
+ advance_progress=True)
+ except EarlyReturnException:
return
- progressbar(async_task, 11, 'VAE Inpaint encoding ...')
-
- inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill)
- inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image)
- inpaint_pixel_mask = core.numpy_to_pytorch(inpaint_worker.current_task.interested_mask)
-
- candidate_vae, candidate_vae_swap = pipeline.get_candidate_vae(
- steps=steps,
- switch=switch,
- denoise=denoising_strength,
- refiner_swap_method=refiner_swap_method
- )
-
- latent_inpaint, latent_mask = core.encode_vae_inpaint(
- mask=inpaint_pixel_mask,
- vae=candidate_vae,
- pixels=inpaint_pixel_image)
-
- latent_swap = None
- if candidate_vae_swap is not None:
- progressbar(async_task, 12, 'VAE SD15 encoding ...')
- latent_swap = core.encode_vae(
- vae=candidate_vae_swap,
- pixels=inpaint_pixel_fill)['samples']
-
- progressbar(async_task, 13, 'VAE encoding ...')
- latent_fill = core.encode_vae(
- vae=candidate_vae,
- pixels=inpaint_pixel_fill)['samples']
-
- inpaint_worker.current_task.load_latent(
- latent_fill=latent_fill, latent_mask=latent_mask, latent_swap=latent_swap)
-
- if inpaint_parameterized:
- pipeline.final_unet = inpaint_worker.current_task.patch(
- inpaint_head_model_path=inpaint_head_model_path,
- inpaint_latent=latent_inpaint,
- inpaint_latent_mask=latent_mask,
- model=pipeline.final_unet
- )
-
- if not inpaint_disable_initial_latent:
- initial_latent = {'samples': latent_fill}
-
- B, C, H, W = latent_fill.shape
- height, width = H * 8, W * 8
- final_height, final_width = inpaint_worker.current_task.image.shape[:2]
- print(f'Final resolution is {str((final_height, final_width))}, latent is {str((height, width))}.')
-
if 'cn' in goals:
- for task in cn_tasks[flags.cn_canny]:
- cn_img, cn_stop, cn_weight = task
- cn_img = resize_image(HWC3(cn_img), width=width, height=height)
+ apply_control_nets(async_task, height, ip_adapter_face_path, ip_adapter_path, width, current_progress)
+ if async_task.debugging_cn_preprocessor:
+ return
- if not skipping_cn_preprocessor:
- cn_img = preprocessors.canny_pyramid(cn_img, canny_low_threshold, canny_high_threshold)
+ if async_task.freeu_enabled:
+ apply_freeu(async_task)
- cn_img = HWC3(cn_img)
- task[0] = core.numpy_to_pytorch(cn_img)
- if debugging_cn_preprocessor:
- yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True)
- return
- for task in cn_tasks[flags.cn_cpds]:
- cn_img, cn_stop, cn_weight = task
- cn_img = resize_image(HWC3(cn_img), width=width, height=height)
+ # async_task.steps can have value of uov steps here when upscale has been applied
+ steps, _, _, _ = apply_overrides(async_task, async_task.steps, height, width)
- if not skipping_cn_preprocessor:
- cn_img = preprocessors.cpds(cn_img)
+ images_to_enhance = []
+ if 'enhance' in goals:
+ async_task.image_number = 1
+ images_to_enhance += [async_task.enhance_input_image]
+ height, width, _ = async_task.enhance_input_image.shape
+ # input image already provided, processing is skipped
+ steps = 0
- cn_img = HWC3(cn_img)
- task[0] = core.numpy_to_pytorch(cn_img)
- if debugging_cn_preprocessor:
- yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True)
- return
- for task in cn_tasks[flags.cn_ip]:
- cn_img, cn_stop, cn_weight = task
- cn_img = HWC3(cn_img)
+ all_steps = steps * async_task.image_number
- # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
- cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
+ if async_task.enhance_checkbox and async_task.enhance_uov_method != flags.disabled.casefold():
+ enhance_upscale_steps = async_task.performance_selection.steps()
+ if 'upscale' in async_task.enhance_uov_method:
+ if 'fast' in async_task.enhance_uov_method:
+ enhance_upscale_steps = 0
+ else:
+ enhance_upscale_steps = async_task.performance_selection.steps_uov()
+ enhance_upscale_steps, _, _, _ = apply_overrides(async_task, enhance_upscale_steps, height, width)
+ enhance_upscale_steps_total = async_task.image_number * enhance_upscale_steps
+ all_steps += enhance_upscale_steps_total
- task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path)
- if debugging_cn_preprocessor:
- yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True)
- return
- for task in cn_tasks[flags.cn_ip_face]:
- cn_img, cn_stop, cn_weight = task
- cn_img = HWC3(cn_img)
+ if async_task.enhance_checkbox and len(async_task.enhance_ctrls) != 0:
+ enhance_steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width)
+ all_steps += async_task.image_number * len(async_task.enhance_ctrls) * enhance_steps
- if not skipping_cn_preprocessor:
- cn_img = extras.face_crop.crop_image(cn_img)
-
- # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75
- cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0)
-
- task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path)
- if debugging_cn_preprocessor:
- yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True)
- return
-
- all_ip_tasks = cn_tasks[flags.cn_ip] + cn_tasks[flags.cn_ip_face]
-
- if len(all_ip_tasks) > 0:
- pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, all_ip_tasks)
-
- if freeu_enabled:
- print(f'FreeU is enabled!')
- pipeline.final_unet = core.apply_freeu(
- pipeline.final_unet,
- freeu_b1,
- freeu_b2,
- freeu_s1,
- freeu_s2
- )
-
- all_steps = steps * image_number
print(f'[Parameters] Denoising Strength = {denoising_strength}')
@@ -831,163 +1245,47 @@ def worker():
print(f'[Parameters] Initial Latent shape: {log_shape}')
- preparation_time = time.perf_counter() - execution_start_time
+ preparation_time = time.perf_counter() - preparation_start_time
print(f'Preparation time: {preparation_time:.2f} seconds')
- final_sampler_name = sampler_name
- final_scheduler_name = scheduler_name
+ final_scheduler_name = patch_samplers(async_task)
+ print(f'Using {final_scheduler_name} scheduler.')
- if scheduler_name in ['lcm', 'tcd']:
- final_scheduler_name = 'sgm_uniform'
+ async_task.yields.append(['preview', (current_progress, 'Moving model to GPU ...', None)])
- def patch_discrete(unet):
- return core.opModelSamplingDiscrete.patch(
- pipeline.final_unet,
- sampling=scheduler_name,
- zsnr=False)[0]
+ processing_start_time = time.perf_counter()
- if pipeline.final_unet is not None:
- pipeline.final_unet = patch_discrete(pipeline.final_unet)
- if pipeline.final_refiner_unet is not None:
- pipeline.final_refiner_unet = patch_discrete(pipeline.final_refiner_unet)
- print(f'Using {scheduler_name} scheduler.')
- elif scheduler_name == 'edm_playground_v2.5':
- final_scheduler_name = 'karras'
-
- def patch_edm(unet):
- return core.opModelSamplingContinuousEDM.patch(
- unet,
- sampling=scheduler_name,
- sigma_max=120.0,
- sigma_min=0.002)[0]
-
- if pipeline.final_unet is not None:
- pipeline.final_unet = patch_edm(pipeline.final_unet)
- if pipeline.final_refiner_unet is not None:
- pipeline.final_refiner_unet = patch_edm(pipeline.final_refiner_unet)
-
- print(f'Using {scheduler_name} scheduler.')
-
- async_task.yields.append(['preview', (flags.preparation_step_count, 'Moving model to GPU ...', None)])
+ preparation_steps = current_progress
+ total_count = async_task.image_number
def callback(step, x0, x, total_steps, y):
- done_steps = current_task_id * steps + step
+ if step == 0:
+ async_task.callback_steps = 0
+ async_task.callback_steps += (100 - preparation_steps) / float(all_steps)
async_task.yields.append(['preview', (
- int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)),
- f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{image_number} ...', y)])
+ int(current_progress + async_task.callback_steps),
+ f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{total_count} ...', y)])
+
+ should_enhance = async_task.enhance_checkbox and (async_task.enhance_uov_method != flags.disabled.casefold() or len(async_task.enhance_ctrls) > 0)
+ show_intermediate_results = len(tasks) > 1 or should_enhance
for current_task_id, task in enumerate(tasks):
- current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(current_task_id * steps) / float(all_steps))
- progressbar(async_task, current_progress, f'Preparing task {current_task_id + 1}/{image_number} ...')
+ progressbar(async_task, current_progress, f'Preparing task {current_task_id + 1}/{async_task.image_number} ...')
execution_start_time = time.perf_counter()
try:
- if async_task.last_stop is not False:
- ldm_patched.modules.model_management.interrupt_current_processing()
- positive_cond, negative_cond = task['c'], task['uc']
+ imgs, img_paths, current_progress = process_task(all_steps, async_task, callback, controlnet_canny_path,
+ controlnet_cpds_path, current_task_id,
+ denoising_strength, final_scheduler_name, goals,
+ initial_latent, async_task.steps, switch, task['c'],
+ task['uc'], task, tiled, use_expansion, width, height,
+ current_progress, preparation_steps,
+ async_task.image_number, show_intermediate_results)
- if 'cn' in goals:
- for cn_flag, cn_path in [
- (flags.cn_canny, controlnet_canny_path),
- (flags.cn_cpds, controlnet_cpds_path)
- ]:
- for cn_img, cn_stop, cn_weight in cn_tasks[cn_flag]:
- positive_cond, negative_cond = core.apply_controlnet(
- positive_cond, negative_cond,
- pipeline.loaded_ControlNets[cn_path], cn_img, cn_weight, 0, cn_stop)
+ current_progress = int(preparation_steps + (100 - preparation_steps) / float(all_steps) * async_task.steps * (current_task_id + 1))
+ images_to_enhance += imgs
- imgs = pipeline.process_diffusion(
- positive_cond=positive_cond,
- negative_cond=negative_cond,
- steps=steps,
- switch=switch,
- width=width,
- height=height,
- image_seed=task['task_seed'],
- callback=callback,
- sampler_name=final_sampler_name,
- scheduler_name=final_scheduler_name,
- latent=initial_latent,
- denoise=denoising_strength,
- tiled=tiled,
- cfg_scale=cfg_scale,
- refiner_swap_method=refiner_swap_method,
- disable_preview=disable_preview
- )
-
- del task['c'], task['uc'], positive_cond, negative_cond # Save memory
-
- if inpaint_worker.current_task is not None:
- imgs = [inpaint_worker.current_task.post_process(x) for x in imgs]
-
- img_paths = []
-
- current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float((current_task_id + 1) * steps) / float(all_steps))
- if modules.config.default_black_out_nsfw or black_out_nsfw:
- progressbar(async_task, current_progress, 'Checking for NSFW content ...')
- imgs = default_censor(imgs)
-
- progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{image_number} to system ...')
-
- for x in imgs:
- d = [('Prompt', 'prompt', task['log_positive_prompt']),
- ('Negative Prompt', 'negative_prompt', task['log_negative_prompt']),
- ('Fooocus V2 Expansion', 'prompt_expansion', task['expansion']),
- ('Styles', 'styles',
- str(task['styles'] if not use_expansion else [fooocus_expansion] + task['styles'])),
- ('Performance', 'performance', performance_selection.value)]
-
- if performance_selection.steps() != steps:
- d.append(('Steps', 'steps', steps))
-
- d += [('Resolution', 'resolution', str((width, height))),
- ('Guidance Scale', 'guidance_scale', guidance_scale),
- ('Sharpness', 'sharpness', sharpness),
- ('ADM Guidance', 'adm_guidance', str((
- modules.patch.patch_settings[pid].positive_adm_scale,
- modules.patch.patch_settings[pid].negative_adm_scale,
- modules.patch.patch_settings[pid].adm_scaler_end))),
- ('Base Model', 'base_model', base_model_name),
- ('Refiner Model', 'refiner_model', refiner_model_name),
- ('Refiner Switch', 'refiner_switch', refiner_switch)]
-
- if refiner_model_name != 'None':
- if overwrite_switch > 0:
- d.append(('Overwrite Switch', 'overwrite_switch', overwrite_switch))
- if refiner_swap_method != flags.refiner_swap_method:
- d.append(('Refiner Swap Method', 'refiner_swap_method', refiner_swap_method))
- if modules.patch.patch_settings[pid].adaptive_cfg != modules.config.default_cfg_tsnr:
- d.append(
- ('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg))
-
- if clip_skip > 1:
- d.append(('CLIP Skip', 'clip_skip', clip_skip))
- d.append(('Sampler', 'sampler', sampler_name))
- d.append(('Scheduler', 'scheduler', scheduler_name))
- d.append(('VAE', 'vae', vae_name))
- d.append(('Seed', 'seed', str(task['task_seed'])))
-
- if freeu_enabled:
- d.append(('FreeU', 'freeu', str((freeu_b1, freeu_b2, freeu_s1, freeu_s2))))
-
- for li, (n, w) in enumerate(loras):
- if n != 'None':
- d.append((f'LoRA {li + 1}', f'lora_combined_{li + 1}', f'{n} : {w}'))
-
- metadata_parser = None
- if save_metadata_to_images:
- metadata_parser = modules.meta_parser.get_metadata_parser(metadata_scheme)
- metadata_parser.set_data(task['log_positive_prompt'], task['positive'],
- task['log_negative_prompt'], task['negative'],
- steps, base_model_name, refiner_model_name, loras, vae_name)
- d.append(('Metadata Scheme', 'metadata_scheme',
- metadata_scheme.value if save_metadata_to_images else save_metadata_to_images))
- d.append(('Version', 'version', 'Fooocus v' + fooocus_version.version))
- img_paths.append(log(x, d, metadata_parser, output_format, task))
-
- yield_result(async_task, img_paths, black_out_nsfw, False,
- do_not_show_finished_images=len(tasks) == 1 or disable_intermediate_results)
- except ldm_patched.modules.model_management.InterruptProcessingException as e:
+ except ldm_patched.modules.model_management.InterruptProcessingException:
if async_task.last_stop == 'skip':
print('User skipped')
async_task.last_stop = False
@@ -996,20 +1294,146 @@ def worker():
print('User stopped')
break
+ del task['c'], task['uc'] # Save memory
execution_time = time.perf_counter() - execution_start_time
print(f'Generating and saving time: {execution_time:.2f} seconds')
- async_task.processing = False
+
+ if not should_enhance:
+ print(f'[Enhance] Skipping, preconditions aren\'t met')
+ stop_processing(async_task, processing_start_time)
+ return
+
+ progressbar(async_task, current_progress, 'Processing enhance ...')
+
+ active_enhance_tabs = len(async_task.enhance_ctrls)
+ should_process_enhance_uov = async_task.enhance_uov_method != flags.disabled.casefold()
+ if should_process_enhance_uov:
+ active_enhance_tabs += 1
+ total_count = len(images_to_enhance) * active_enhance_tabs
+
+ base_progress = current_progress
+ current_task_id = 0
+ done_steps_upscaling = 0
+ done_steps_inpainting = 0
+ enhance_steps, _, _, _ = apply_overrides(async_task, async_task.original_steps, height, width)
+ for img in images_to_enhance:
+ enhancement_image_start_time = time.perf_counter()
+
+ last_enhance_prompt = async_task.prompt
+ last_enhance_negative_prompt = async_task.negative_prompt
+
+ if should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_before:
+ current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result = enhance_upscale(
+ all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+ current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps,
+ async_task.prompt, async_task.negative_prompt, final_scheduler_name, height, img, preparation_steps,
+ switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner, width)
+ if exception_result == 'continue':
+ continue
+ elif exception_result == 'break':
+ break
+
+ # inpaint for all other tabs
+ for enhance_mask_dino_prompt_text, enhance_prompt, enhance_negative_prompt, enhance_mask_model, enhance_mask_sam_model, enhance_mask_text_threshold, enhance_mask_box_threshold, enhance_mask_sam_max_detections, enhance_inpaint_disable_initial_latent, enhance_inpaint_engine, enhance_inpaint_strength, enhance_inpaint_respective_field, enhance_inpaint_erode_or_dilate, enhance_mask_invert in async_task.enhance_ctrls:
+ current_task_id += 1
+ current_progress = int(base_progress + (100 - preparation_steps) / float(all_steps) * (done_steps_upscaling + done_steps_inpainting))
+ progressbar(async_task, current_progress, f'Preparing enhancement {current_task_id + 1}/{total_count} ...')
+ enhancement_task_start_time = time.perf_counter()
+
+ if enhance_mask_model == 'sam':
+ print(f'[Enhance] Searching for "{enhance_mask_dino_prompt_text}"')
+
+ mask, dino_detection_count, sam_detection_count, sam_detection_on_mask_count = generate_mask_from_image(
+ img, mask_model=enhance_mask_model, sam_options=SAMOptions(
+ dino_prompt=enhance_mask_dino_prompt_text,
+ dino_box_threshold=enhance_mask_box_threshold,
+ dino_text_threshold=enhance_mask_text_threshold,
+ dino_erode_or_dilate=async_task.dino_erode_or_dilate,
+ dino_debug=async_task.debugging_dino,
+ max_detections=enhance_mask_sam_max_detections,
+ model_type=enhance_mask_sam_model,
+ ))
+ if len(mask.shape) == 3:
+ mask = mask[:, :, 0]
+
+ if int(enhance_inpaint_erode_or_dilate) != 0:
+ mask = erode_or_dilate(mask, enhance_inpaint_erode_or_dilate)
+
+ if enhance_mask_invert:
+ mask = 255 - mask
+
+ if async_task.debugging_enhance_masks_checkbox:
+ async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
+ yield_result(async_task, mask, current_progress, async_task.black_out_nsfw, False,
+ async_task.disable_intermediate_results)
+
+ print(f'[Enhance] {dino_detection_count} boxes detected')
+ print(f'[Enhance] {sam_detection_count} segments detected in boxes')
+ print(f'[Enhance] {sam_detection_on_mask_count} segments applied to mask')
+
+ if enhance_mask_model == 'sam' and (
+ dino_detection_count == 0 or not async_task.debugging_dino and sam_detection_on_mask_count == 0):
+ print(f'[Enhance] No "{enhance_mask_dino_prompt_text}" detected, skipping')
+ continue
+
+ goals_enhance = ['inpaint']
+
+ try:
+ current_progress, img, enhance_prompt_processed, enhance_negative_prompt_processed = process_enhance(
+ all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
+ current_progress, current_task_id, denoising_strength, enhance_inpaint_disable_initial_latent,
+ enhance_inpaint_engine, enhance_inpaint_respective_field, enhance_inpaint_strength,
+ enhance_prompt, enhance_negative_prompt, final_scheduler_name, goals_enhance, height, img, mask,
+ preparation_steps, enhance_steps, switch, tiled, total_count, use_expansion, use_style,
+ use_synthetic_refiner, width)
+
+ if (should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_after
+ and async_task.enhance_uov_prompt_type == flags.enhancement_uov_prompt_type_last_filled):
+ if enhance_prompt_processed != '':
+ last_enhance_prompt = enhance_prompt_processed
+ if enhance_negative_prompt_processed != '':
+ last_enhance_negative_prompt = enhance_negative_prompt_processed
+
+ except ldm_patched.modules.model_management.InterruptProcessingException:
+ if async_task.last_stop == 'skip':
+ print('User skipped')
+ async_task.last_stop = False
+ continue
+ else:
+ print('User stopped')
+ break
+ finally:
+ done_steps_inpainting += enhance_steps
+
+ enhancement_task_time = time.perf_counter() - enhancement_task_start_time
+ print(f'Enhancement time: {enhancement_task_time:.2f} seconds')
+
+ if should_process_enhance_uov and async_task.enhance_uov_processing_order == flags.enhancement_uov_after:
+ current_task_id, done_steps_inpainting, done_steps_upscaling, img, exception_result = enhance_upscale(
+ all_steps, async_task, base_progress, callback, controlnet_canny_path, controlnet_cpds_path,
+ current_task_id, denoising_strength, done_steps_inpainting, done_steps_upscaling, enhance_steps,
+ last_enhance_prompt, last_enhance_negative_prompt, final_scheduler_name, height, img,
+ preparation_steps, switch, tiled, total_count, use_expansion, use_style, use_synthetic_refiner,
+ width)
+ if exception_result == 'continue':
+ continue
+ elif exception_result == 'break':
+ break
+
+ enhancement_image_time = time.perf_counter() - enhancement_image_start_time
+ print(f'Enhancement image time: {enhancement_image_time:.2f} seconds')
+
+ stop_processing(async_task, processing_start_time)
return
while True:
time.sleep(0.01)
if len(async_tasks) > 0:
task = async_tasks.pop(0)
- generate_image_grid = task.args.pop(0)
try:
handler(task)
- if generate_image_grid:
+ if task.generate_image_grid:
build_image_wall(task)
task.yields.append(['finish', task.results])
pipeline.prepare_text_encoder(async_call=True)
diff --git a/modules/config.py b/modules/config.py
index 049f6cbc..f8af6599 100644
--- a/modules/config.py
+++ b/modules/config.py
@@ -201,6 +201,7 @@ path_fooocus_expansion = get_dir_or_set_default('path_fooocus_expansion', '../mo
path_safety_checker_models = get_dir_or_set_default('path_safety_checker_models', '../models/safety_checker_models/')
path_wildcards = get_dir_or_set_default('path_wildcards', '../wildcards/')
path_safety_checker = get_dir_or_set_default('path_safety_checker', '../models/safety_checker/')
+path_sam = get_dir_or_set_default('path_sam', '../models/sam/')
path_outputs = get_path_output()
@@ -500,6 +501,50 @@ example_inpaint_prompts = get_config_item_or_set_default(
validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
expected_type=list
)
+example_enhance_detection_prompts = get_config_item_or_set_default(
+ key='example_enhance_detection_prompts',
+ default_value=[
+ 'face', 'eye', 'mouth', 'hair', 'hand', 'body'
+ ],
+ validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
+ expected_type=list
+)
+default_enhance_tabs = get_config_item_or_set_default(
+ key='default_enhance_tabs',
+ default_value=3,
+ validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
+ expected_type=int
+)
+default_enhance_checkbox = get_config_item_or_set_default(
+ key='default_enhance_checkbox',
+ default_value=False,
+ validator=lambda x: isinstance(x, bool),
+ expected_type=bool
+)
+default_enhance_uov_method = get_config_item_or_set_default(
+ key='default_enhance_uov_method',
+ default_value=modules.flags.disabled,
+ validator=lambda x: x in modules.flags.uov_list,
+ expected_type=int
+)
+default_enhance_uov_processing_order = get_config_item_or_set_default(
+ key='default_enhance_uov_processing_order',
+ default_value=modules.flags.enhancement_uov_before,
+ validator=lambda x: x in modules.flags.enhancement_uov_processing_order,
+ expected_type=int
+)
+default_enhance_uov_prompt_type = get_config_item_or_set_default(
+ key='default_enhance_uov_prompt_type',
+ default_value=modules.flags.enhancement_uov_prompt_type_original,
+ validator=lambda x: x in modules.flags.enhancement_uov_prompt_types,
+ expected_type=int
+)
+default_sam_max_detections = get_config_item_or_set_default(
+ key='default_sam_max_detections',
+ default_value=0,
+ validator=lambda x: isinstance(x, int) and 0 <= x <= 10,
+ expected_type=int
+)
default_black_out_nsfw = get_config_item_or_set_default(
key='default_black_out_nsfw',
default_value=False,
@@ -526,13 +571,8 @@ metadata_created_by = get_config_item_or_set_default(
)
example_inpaint_prompts = [[x] for x in example_inpaint_prompts]
+example_enhance_detection_prompts = [[x] for x in example_enhance_detection_prompts]
-default_black_out_nsfw = get_config_item_or_set_default(
- key='default_black_out_nsfw',
- default_value=False,
- validator=lambda x: isinstance(x, bool),
- expected_type=bool
-)
default_inpaint_mask_model = get_config_item_or_set_default(
key='default_inpaint_mask_model',
default_value='isnet-general-use',
@@ -540,6 +580,13 @@ default_inpaint_mask_model = get_config_item_or_set_default(
expected_type=str
)
+default_enhance_inpaint_mask_model = get_config_item_or_set_default(
+ key='default_enhance_inpaint_mask_model',
+ default_value='sam',
+ validator=lambda x: x in modules.flags.inpaint_mask_models,
+ expected_type=str
+)
+
default_inpaint_mask_cloth_category = get_config_item_or_set_default(
key='default_inpaint_mask_cloth_category',
default_value='full',
@@ -549,8 +596,8 @@ default_inpaint_mask_cloth_category = get_config_item_or_set_default(
default_inpaint_mask_sam_model = get_config_item_or_set_default(
key='default_inpaint_mask_sam_model',
- default_value='sam_vit_b_01ec64',
- validator=lambda x: x in modules.flags.inpaint_mask_sam_model,
+ default_value='vit_b',
+ validator=lambda x: x in [y[1] for y in modules.flags.inpaint_mask_sam_model if y[1] == x],
expected_type=str
)
@@ -789,4 +836,43 @@ def downloading_safety_checker_model():
return os.path.join(path_safety_checker, 'stable-diffusion-safety-checker.bin')
+def download_sam_model(sam_model: str) -> str:
+ match sam_model:
+ case 'vit_b':
+ return downloading_sam_vit_b()
+ case 'vit_l':
+ return downloading_sam_vit_l()
+ case 'vit_h':
+ return downloading_sam_vit_h()
+ case _:
+ raise ValueError(f"sam model {sam_model} does not exist.")
+
+
+def downloading_sam_vit_b():
+ load_file_from_url(
+ url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_b_01ec64.pth',
+ model_dir=path_sam,
+ file_name='sam_vit_b_01ec64.pth'
+ )
+ return os.path.join(path_sam, 'sam_vit_b_01ec64.pth')
+
+
+def downloading_sam_vit_l():
+ load_file_from_url(
+ url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_l_0b3195.pth',
+ model_dir=path_sam,
+ file_name='sam_vit_l_0b3195.pth'
+ )
+ return os.path.join(path_sam, 'sam_vit_l_0b3195.pth')
+
+
+def downloading_sam_vit_h():
+ load_file_from_url(
+ url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_h_4b8939.pth',
+ model_dir=path_sam,
+ file_name='sam_vit_h_4b8939.pth'
+ )
+ return os.path.join(path_sam, 'sam_vit_h_4b8939.pth')
+
+
update_files()
diff --git a/modules/flags.py b/modules/flags.py
index 6473045d..c8ea0a0c 100644
--- a/modules/flags.py
+++ b/modules/flags.py
@@ -8,9 +8,15 @@ upscale_15 = 'Upscale (1.5x)'
upscale_2 = 'Upscale (2x)'
upscale_fast = 'Upscale (Fast 2x)'
-uov_list = [
- disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast
-]
+uov_list = [disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast]
+
+enhancement_uov_before = "Before First Enhancement"
+enhancement_uov_after = "After Last Enhancement"
+enhancement_uov_processing_order = [enhancement_uov_before, enhancement_uov_after]
+
+enhancement_uov_prompt_type_original = 'Original Prompts'
+enhancement_uov_prompt_type_last_filled = 'Last Filled Enhancement Prompts'
+enhancement_uov_prompt_types = [enhancement_uov_prompt_type_original, enhancement_uov_prompt_type_last_filled]
CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"]
@@ -76,7 +82,7 @@ output_formats = ['png', 'jpeg', 'webp']
inpaint_mask_models = ['u2net', 'u2netp', 'u2net_human_seg', 'u2net_cloth_seg', 'silueta', 'isnet-general-use', 'isnet-anime', 'sam']
inpaint_mask_cloth_category = ['full', 'upper', 'lower']
-inpaint_mask_sam_model = ['sam_vit_b_01ec64', 'sam_vit_h_4b8939', 'sam_vit_l_0b3195']
+inpaint_mask_sam_model = ['vit_b', 'vit_l', 'vit_h']
inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6']
inpaint_option_default = 'Inpaint or Outpaint (default)'
@@ -107,7 +113,6 @@ metadata_scheme = [
]
controlnet_image_count = 4
-preparation_step_count = 13
class OutputFormat(Enum):
@@ -163,14 +168,6 @@ class Performance(Enum):
def values(cls) -> list:
return list(map(lambda c: c.value, cls))
- @classmethod
- def values(cls) -> list:
- return list(map(lambda c: c.value, cls))
-
- @classmethod
- def values(cls) -> list:
- return list(map(lambda c: c.value, cls))
-
@classmethod
def by_steps(cls, steps: int | str):
return cls[Steps(int(steps)).name]
diff --git a/modules/util.py b/modules/util.py
index 4506b392..cd18081b 100644
--- a/modules/util.py
+++ b/modules/util.py
@@ -390,6 +390,9 @@ def get_enabled_loras(loras: list, remove_none=True) -> list:
def parse_lora_references_from_prompt(prompt: str, loras: List[Tuple[AnyStr, float]], loras_limit: int = 5,
skip_file_check=False, prompt_cleanup=True, deduplicate_loras=True,
lora_filenames=None) -> tuple[List[Tuple[AnyStr, float]], str]:
+ # prevent unintended side effects when returning without detection
+ loras = loras.copy()
+
if lora_filenames is None:
lora_filenames = []
diff --git a/requirements_versions.txt b/requirements_versions.txt
index ebcd0297..9196f1db 100644
--- a/requirements_versions.txt
+++ b/requirements_versions.txt
@@ -13,10 +13,10 @@ omegaconf==2.2.3
gradio==3.41.2
pygit2==1.12.2
opencv-contrib-python==4.8.0.74
-diffusers==0.25.1
httpx==0.24.1
onnxruntime==1.16.3
timm==0.9.2
-translators==5.8.9
-rembg==2.0.53
-groundingdino-py==0.4.0
\ No newline at end of file
+translators==5.9.2
+rembg==2.0.57
+groundingdino-py==0.4.0
+segment_anything==1.0
\ No newline at end of file
diff --git a/webui.py b/webui.py
index db1c98d0..fa468352 100644
--- a/webui.py
+++ b/webui.py
@@ -16,6 +16,7 @@ import modules.meta_parser
import args_manager
import copy
import launch
+from extras.inpaint_mask import SAMOptions
from modules.sdxl_styles import legal_style_names
from modules.private_logger import get_current_html_path
@@ -89,6 +90,34 @@ def generate_clicked(task: worker.AsyncTask):
return
+def inpaint_mode_change(mode):
+ assert mode in modules.flags.inpaint_options
+
+ # inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
+ # inpaint_disable_initial_latent, inpaint_engine,
+ # inpaint_strength, inpaint_respective_field
+
+ if mode == modules.flags.inpaint_option_detail:
+ return [
+ gr.update(visible=True), gr.update(visible=False, value=[]),
+ gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts),
+ False, 'None', 0.5, 0.0
+ ]
+
+ if mode == modules.flags.inpaint_option_modify:
+ return [
+ gr.update(visible=True), gr.update(visible=False, value=[]),
+ gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
+ True, modules.config.default_inpaint_engine_version, 1.0, 0.0
+ ]
+
+ return [
+ gr.update(visible=False, value=''), gr.update(visible=True),
+ gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
+ False, modules.config.default_inpaint_engine_version, 1.0, 0.618
+ ]
+
+
reload_javascript()
title = f'Fooocus {fooocus_version.version}'
@@ -146,6 +175,7 @@ with shared.gradio_root:
skip_button.click(skip_clicked, inputs=currentTask, outputs=currentTask, queue=False, show_progress=False)
with gr.Row(elem_classes='advanced_check_row'):
input_image_checkbox = gr.Checkbox(label='Input Image', value=False, container=False, elem_classes='min_check')
+ enhance_checkbox = gr.Checkbox(label='Enhance', value=modules.config.default_enhance_checkbox, container=False, elem_classes='min_check')
advanced_checkbox = gr.Checkbox(label='Advanced', value=modules.config.default_advanced_checkbox, container=False, elem_classes='min_check')
with gr.Row(visible=False) as image_input_panel:
with gr.Tabs():
@@ -223,44 +253,56 @@ with shared.gradio_root:
choices=flags.inpaint_mask_cloth_category,
value=modules.config.default_inpaint_mask_cloth_category,
visible=False)
- inpaint_mask_sam_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False)
+ inpaint_mask_dino_prompt_text = gr.Textbox(label='Detection prompt', value='', visible=False, info='Use singular whenever possible', placeholder='Describe what you want to detect.')
+ example_inpaint_mask_dino_prompt_text = gr.Dataset(
+ samples=modules.config.example_enhance_detection_prompts,
+ label='Detection Prompt Quick List',
+ components=[inpaint_mask_dino_prompt_text],
+ visible=modules.config.default_inpaint_mask_model == 'sam')
+ example_inpaint_mask_dino_prompt_text.click(lambda x: x[0],
+ inputs=example_inpaint_mask_dino_prompt_text,
+ outputs=inpaint_mask_dino_prompt_text,
+ show_progress=False, queue=False)
+
with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options:
inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
- inpaint_mask_sam_quant = gr.Checkbox(label='Quantization', value=False)
inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)
inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
+ inpaint_mask_sam_max_detections = gr.Slider(label="Maximum number of detections", info="Set to 0 to detect all", minimum=0, maximum=10, value=modules.config.default_sam_max_detections, step=1, interactive=True)
generate_mask_button = gr.Button(value='Generate mask from image')
- def generate_mask(image, mask_model, cloth_category, sam_prompt_text, sam_model, sam_quant, box_threshold, text_threshold):
+ def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, sam_max_detections, dino_erode_or_dilate, dino_debug):
from extras.inpaint_mask import generate_mask_from_image
extras = {}
+ sam_options = None
if mask_model == 'u2net_cloth_seg':
extras['cloth_category'] = cloth_category
elif mask_model == 'sam':
- extras['sam_prompt_text'] = sam_prompt_text
- extras['sam_model'] = sam_model
- extras['sam_quant'] = sam_quant
- extras['box_threshold'] = box_threshold
- extras['text_threshold'] = text_threshold
+ sam_options = SAMOptions(
+ dino_prompt=dino_prompt_text,
+ dino_box_threshold=box_threshold,
+ dino_text_threshold=text_threshold,
+ dino_erode_or_dilate=dino_erode_or_dilate,
+ dino_debug=dino_debug,
+ max_detections=sam_max_detections,
+ model_type=sam_model
+ )
- return generate_mask_from_image(image, mask_model, extras)
+ mask, _, _, _ = generate_mask_from_image(image, mask_model, extras, sam_options)
- generate_mask_button.click(fn=generate_mask,
- inputs=[
- inpaint_input_image, inpaint_mask_model,
- inpaint_mask_cloth_category,
- inpaint_mask_sam_prompt_text,
- inpaint_mask_sam_model,
- inpaint_mask_sam_quant,
- inpaint_mask_box_threshold,
- inpaint_mask_text_threshold
- ],
- outputs=inpaint_mask_image, show_progress=True, queue=True)
+ return mask
- inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg'), gr.update(visible=x == 'sam'), gr.update(visible=x == 'sam')],
+
+ inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
+ [gr.update(visible=x == 'sam')] * 2 +
+ [gr.Dataset.update(visible=x == 'sam',
+ samples=modules.config.example_enhance_detection_prompts)],
inputs=inpaint_mask_model,
- outputs=[inpaint_mask_cloth_category, inpaint_mask_sam_prompt_text, inpaint_mask_advanced_options],
+ outputs=[inpaint_mask_cloth_category,
+ inpaint_mask_dino_prompt_text,
+ inpaint_mask_advanced_options,
+ example_inpaint_mask_dino_prompt_text],
queue=False, show_progress=False)
with gr.TabItem(label='Describe') as desc_tab:
@@ -283,6 +325,12 @@ with shared.gradio_root:
desc_input_image.upload(trigger_show_image_properties, inputs=desc_input_image,
outputs=desc_image_size, show_progress=False, queue=False)
+ with gr.TabItem(label='Enhance') as enhance_tab:
+ with gr.Row():
+ with gr.Column():
+ enhance_input_image = grh.Image(label='Base image for enhance', source='upload', type='numpy')
+ gr.HTML('\U0001F4D4 Document')
+
with gr.TabItem(label='Metadata') as metadata_tab:
with gr.Column():
metadata_input_image = grh.Image(label='For images created by Fooocus', source='upload', type='filepath')
@@ -304,6 +352,153 @@ with shared.gradio_root:
metadata_input_image.upload(trigger_metadata_preview, inputs=metadata_input_image,
outputs=metadata_json, queue=False, show_progress=True)
+ with gr.Row(visible=modules.config.default_enhance_checkbox) as enhance_input_panel:
+ with gr.Tabs():
+ with gr.TabItem(label='Upscale or Variation'):
+ with gr.Row():
+ with gr.Column():
+ enhance_uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list,
+ value=modules.config.default_enhance_uov_method)
+ enhance_uov_processing_order = gr.Radio(label='Order of Processing',
+ info='Use before for enhancement of small details and after for large areas.',
+ choices=flags.enhancement_uov_processing_order,
+ value=modules.config.default_enhance_uov_processing_order)
+ enhance_uov_prompt_type = gr.Radio(label='Prompt',
+ info='Choose which prompt to use for Upscale or Variation.',
+ choices=flags.enhancement_uov_prompt_types,
+ value=modules.config.default_enhance_uov_prompt_type,
+ visible=modules.config.default_enhance_uov_processing_order == flags.enhancement_uov_after)
+
+ enhance_uov_processing_order.change(lambda x: gr.update(visible=x == flags.enhancement_uov_after),
+ inputs=enhance_uov_processing_order,
+ outputs=enhance_uov_prompt_type,
+ queue=False, show_progress=False)
+ gr.HTML('\U0001F4D4 Document')
+ enhance_ctrls = []
+ for index in range(modules.config.default_enhance_tabs):
+ with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item:
+ enhance_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check',
+ container=False)
+
+ enhance_mask_dino_prompt_text = gr.Textbox(label='Detection prompt',
+ info='Use singular whenever possible',
+ placeholder='Describe what you want to detect.',
+ interactive=True,
+ visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
+ example_enhance_mask_dino_prompt_text = gr.Dataset(
+ samples=modules.config.example_enhance_detection_prompts,
+ label='Detection Prompt Quick List',
+ components=[enhance_mask_dino_prompt_text],
+ visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
+ example_enhance_mask_dino_prompt_text.click(lambda x: x[0],
+ inputs=example_enhance_mask_dino_prompt_text,
+ outputs=enhance_mask_dino_prompt_text,
+ show_progress=False, queue=False)
+
+ enhance_prompt = gr.Textbox(label="Enhancement positive prompt",
+ placeholder="Uses original prompt instead if empty.",
+ elem_id='enhance_prompt')
+ enhance_negative_prompt = gr.Textbox(label="Enhancement negative prompt",
+ placeholder="Uses original negative prompt instead if empty.",
+ elem_id='enhance_negative_prompt')
+
+ with gr.Accordion("Detection", open=False):
+ # TODO check if limiting to SAM is better
+ enhance_mask_model = gr.Dropdown(label='Mask generation model',
+ choices=flags.inpaint_mask_models,
+ value=modules.config.default_enhance_inpaint_mask_model)
+ enhance_mask_cloth_category = gr.Dropdown(label='Cloth category',
+ choices=flags.inpaint_mask_cloth_category,
+ value=modules.config.default_inpaint_mask_cloth_category,
+ visible=modules.config.default_enhance_inpaint_mask_model == 'u2net_cloth_seg',
+ interactive=True)
+
+ with gr.Accordion("SAM Options",
+ visible=modules.config.default_enhance_inpaint_mask_model == 'sam',
+ open=False) as sam_options:
+ enhance_mask_sam_model = gr.Dropdown(label='SAM model',
+ choices=flags.inpaint_mask_sam_model,
+ value=modules.config.default_inpaint_mask_sam_model,
+ interactive=True)
+ enhance_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0,
+ maximum=1.0, value=0.3, step=0.05,
+ interactive=True)
+ enhance_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0,
+ maximum=1.0, value=0.25, step=0.05,
+ interactive=True)
+ enhance_mask_sam_max_detections = gr.Slider(label="Maximum number of detections",
+ info="Set to 0 to detect all",
+ minimum=0, maximum=10,
+ value=modules.config.default_sam_max_detections,
+ step=1, interactive=True)
+
+ with gr.Accordion("Inpaint", visible=True, open=False):
+ enhance_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
+ value=modules.flags.inpaint_option_default,
+ label='Method', interactive=True)
+ enhance_inpaint_disable_initial_latent = gr.Checkbox(
+ label='Disable initial latent in inpaint', value=False)
+ enhance_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
+ value=modules.config.default_inpaint_engine_version,
+ choices=flags.inpaint_engine_versions,
+ info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.')
+ enhance_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
+ minimum=0.0, maximum=1.0, step=0.001,
+ value=1.0,
+ info='Same as the denoising strength in A1111 inpaint. '
+ 'Only used in inpaint, not used in outpaint. '
+ '(Outpaint always use 1.0)')
+ enhance_inpaint_respective_field = gr.Slider(label='Inpaint Respective Field',
+ minimum=0.0, maximum=1.0, step=0.001,
+ value=0.618,
+ info='The area to inpaint. '
+ 'Value 0 is same as "Only Masked" in A1111. '
+ 'Value 1 is same as "Whole Image" in A1111. '
+ 'Only used in inpaint, not used in outpaint. '
+ '(Outpaint always use 1.0)')
+ enhance_inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate',
+ minimum=-64, maximum=64, step=1, value=0,
+ info='Positive value will make white area in the mask larger, '
+ 'negative value will make white area smaller. '
+ '(default is 0, always processed before any mask invert)')
+ enhance_mask_invert = gr.Checkbox(label='Invert Mask', value=False)
+
+ gr.HTML('\U0001F4D4 Document')
+
+ enhance_ctrls += [
+ enhance_enabled,
+ enhance_mask_dino_prompt_text,
+ enhance_prompt,
+ enhance_negative_prompt,
+ enhance_mask_model,
+ enhance_mask_sam_model,
+ enhance_mask_text_threshold,
+ enhance_mask_box_threshold,
+ enhance_mask_sam_max_detections,
+ enhance_inpaint_disable_initial_latent,
+ enhance_inpaint_engine,
+ enhance_inpaint_strength,
+ enhance_inpaint_respective_field,
+ enhance_inpaint_erode_or_dilate,
+ enhance_mask_invert
+ ]
+
+ enhance_inpaint_mode.input(inpaint_mode_change, inputs=enhance_inpaint_mode, outputs=[
+ inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
+ enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
+ enhance_inpaint_strength, enhance_inpaint_respective_field
+ ], show_progress=False, queue=False)
+
+ enhance_mask_model.change(
+ lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
+ [gr.update(visible=x == 'sam')] * 2 +
+ [gr.Dataset.update(visible=x == 'sam',
+ samples=modules.config.example_enhance_detection_prompts)],
+ inputs=enhance_mask_model,
+ outputs=[enhance_mask_cloth_category, enhance_mask_dino_prompt_text, sam_options,
+ example_enhance_mask_dino_prompt_text],
+ queue=False, show_progress=False)
+
switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}"
down_js = "() => {viewer_to_bottom();}"
@@ -316,7 +511,10 @@ with shared.gradio_root:
inpaint_tab.select(lambda: 'inpaint', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
ip_tab.select(lambda: 'ip', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
desc_tab.select(lambda: 'desc', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
+ enhance_tab.select(lambda: 'enhance', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
metadata_tab.select(lambda: 'metadata', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
+ enhance_checkbox.change(lambda x: gr.update(visible=x), inputs=enhance_checkbox,
+ outputs=enhance_input_panel, queue=False, show_progress=False, _js=switch_js)
with gr.Column(scale=1, visible=modules.config.default_advanced_checkbox) as advanced_column:
with gr.Tab(label='Settings'):
@@ -379,7 +577,7 @@ with shared.gradio_root:
def update_history_link():
if args_manager.args.disable_image_log:
return gr.update(value='')
-
+
return gr.update(value=f'\U0001F4DA History Log')
history_link = gr.HTML()
@@ -544,7 +742,7 @@ with shared.gradio_root:
info='Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.',
visible=modules.config.default_save_metadata_to_images)
- save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme],
+ save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme],
queue=False, show_progress=False)
with gr.Tab(label='Control'):
@@ -570,11 +768,15 @@ with shared.gradio_root:
with gr.Tab(label='Inpaint'):
debugging_inpaint_preprocessor = gr.Checkbox(label='Debug Inpaint Preprocessing', value=False)
+ debugging_enhance_masks_checkbox = gr.Checkbox(label='Debug Enhance Masks', value=False,
+ info='Show enhance masks in preview and final results')
+ debugging_dino = gr.Checkbox(label='Debug GroundingDINO', value=False,
+ info='Use GroundingDINO boxes instead of more detailed SAM masks')
inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
inpaint_engine = gr.Dropdown(label='Inpaint Engine',
value=modules.config.default_inpaint_engine_version,
choices=flags.inpaint_engine_versions,
- info='Version of Fooocus inpaint model')
+ info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.')
inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
minimum=0.0, maximum=1.0, step=0.001, value=1.0,
info='Same as the denoising strength in A1111 inpaint. '
@@ -590,8 +792,13 @@ with shared.gradio_root:
inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate',
minimum=-64, maximum=64, step=1, value=0,
info='Positive value will make white area in the mask larger, '
- 'negative value will make white area smaller.'
- '(default is 0, always process before any mask invert)')
+ 'negative value will make white area smaller. '
+ '(default is 0, always processed before any mask invert)')
+ dino_erode_or_dilate = gr.Slider(label='GroundingDINO Box Erode or Dilate',
+ minimum=-64, maximum=64, step=1, value=0,
+ info='Positive value will make white area in the mask larger, '
+ 'negative value will make white area smaller. '
+ '(default is 0, processed before SAM)')
inpaint_mask_upload_checkbox = gr.Checkbox(label='Enable Mask Upload', value=False)
invert_mask_checkbox = gr.Checkbox(label='Invert Mask', value=False)
@@ -701,46 +908,26 @@ with shared.gradio_root:
adm_scaler_negative, refiner_switch, refiner_model, sampler_name,
scheduler_name, adaptive_cfg, refiner_swap_method, negative_prompt, disable_intermediate_results
], queue=False, show_progress=False)
-
+
output_format.input(lambda x: gr.update(output_format=x), inputs=output_format)
-
+
advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, advanced_column,
queue=False, show_progress=False) \
.then(fn=lambda: None, _js='refresh_grid_delayed', queue=False, show_progress=False)
- def inpaint_mode_change(mode):
- assert mode in modules.flags.inpaint_options
-
- # inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
- # inpaint_disable_initial_latent, inpaint_engine,
- # inpaint_strength, inpaint_respective_field
-
- if mode == modules.flags.inpaint_option_detail:
- return [
- gr.update(visible=True), gr.update(visible=False, value=[]),
- gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts),
- False, 'None', 0.5, 0.0
- ]
-
- if mode == modules.flags.inpaint_option_modify:
- return [
- gr.update(visible=True), gr.update(visible=False, value=[]),
- gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
- True, modules.config.default_inpaint_engine_version, 1.0, 0.0
- ]
-
- return [
- gr.update(visible=False, value=''), gr.update(visible=True),
- gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
- False, modules.config.default_inpaint_engine_version, 1.0, 0.618
- ]
-
inpaint_mode.input(inpaint_mode_change, inputs=inpaint_mode, outputs=[
inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
inpaint_disable_initial_latent, inpaint_engine,
inpaint_strength, inpaint_respective_field
], show_progress=False, queue=False)
+ generate_mask_button.click(fn=generate_mask,
+ inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
+ inpaint_mask_dino_prompt_text, inpaint_mask_sam_model,
+ inpaint_mask_box_threshold, inpaint_mask_text_threshold,
+ inpaint_mask_sam_max_detections, dino_erode_or_dilate, debugging_dino],
+ outputs=inpaint_mask_image, show_progress=True, queue=True)
+
ctrls = [currentTask, generate_image_grid]
ctrls += [
prompt, negative_prompt, translate_prompts, style_selections,
@@ -766,6 +953,10 @@ with shared.gradio_root:
ctrls += [save_metadata_to_images, metadata_scheme]
ctrls += ip_ctrls
+ ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox,
+ enhance_input_image, enhance_checkbox, enhance_uov_method, enhance_uov_processing_order,
+ enhance_uov_prompt_type]
+ ctrls += enhance_ctrls
def parse_meta(raw_prompt_txt, is_generating):
loaded_json = None
@@ -828,15 +1019,18 @@ with shared.gradio_root:
desc_btn.click(trigger_describe, inputs=[desc_method, desc_input_image],
outputs=[prompt, style_selections], show_progress=True, queue=True)
- if args_manager.args.enable_describe_uov_image:
- def trigger_uov_describe(mode, img, prompt):
+ if args_manager.args.enable_auto_describe_image:
+ def trigger_auto_describe(mode, img, prompt):
# keep prompt if not empty
if prompt == '':
return trigger_describe(mode, img)
return gr.update(), gr.update()
- uov_input_image.upload(trigger_uov_describe, inputs=[desc_method, uov_input_image, prompt],
- outputs=[prompt, style_selections], show_progress=True, queue=True)
+ uov_input_image.upload(trigger_auto_describe, inputs=[desc_method, uov_input_image, prompt],
+ outputs=[prompt, style_selections], show_progress=True, queue=True)
+
+ enhance_input_image.upload(lambda: gr.update(value=True), outputs=enhance_checkbox, queue=False, show_progress=False) \
+ .then(trigger_auto_describe, inputs=[desc_method, enhance_input_image, prompt], outputs=[prompt, style_selections], show_progress=True, queue=True)
def dump_default_english_config():
from modules.localization import dump_english_config