Merge branch 'feature/multi-step-rendering'

This commit is contained in:
Manuel Schmid 2024-06-25 21:10:58 +02:00
commit 89f8725228
No known key found for this signature in database
GPG Key ID: 32C4F7569B40B84B
15 changed files with 2056 additions and 897 deletions

View File

@ -28,8 +28,8 @@ args_parser.parser.add_argument("--disable-metadata", action='store_true',
args_parser.parser.add_argument("--disable-preset-download", action='store_true',
help="Disables downloading models for presets", default=False)
args_parser.parser.add_argument("--enable-describe-uov-image", action='store_true',
help="Disables automatic description of uov images when prompt is empty", default=False)
args_parser.parser.add_argument("--enable-auto-describe-image", action='store_true',
help="Enables automatic description of uov and enhance image when prompt is empty", default=False)
args_parser.parser.add_argument("--always-download-new-model", action='store_true',
help="Always download newer models ", default=False)

View File

@ -99,7 +99,7 @@ div:has(> #positive_prompt) {
}
.advanced_check_row {
width: 250px !important;
width: 330px !important;
}
.min_check {

View File

@ -0,0 +1,24 @@
# https://github.com/sail-sg/EditAnything/blob/main/sam2groundingdino_edit.py
import numpy as np
from PIL import Image
from extras.inpaint_mask import SAMOptions, generate_mask_from_image
original_image = Image.open('cat.webp')
image = np.array(original_image, dtype=np.uint8)
sam_options = SAMOptions(
dino_prompt='eye',
dino_box_threshold=0.3,
dino_text_threshold=0.25,
dino_erode_or_dilate=0,
dino_debug=False,
max_detections=2,
model_type='vit_b'
)
mask_image, _, _, _ = generate_mask_from_image(image, sam_options=sam_options)
merged_masks_img = Image.fromarray(mask_image)
merged_masks_img.show()

View File

@ -25,7 +25,7 @@ class GroundingDinoModel(Model):
caption: str,
box_threshold: float = 0.35,
text_threshold: float = 0.25
) -> Tuple[sv.Detections, List[str]]:
) -> Tuple[sv.Detections, torch.Tensor, torch.Tensor, List[str]]:
if self.model is None:
filename = load_file_from_url(
url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth",
@ -56,7 +56,7 @@ class GroundingDinoModel(Model):
source_w=source_w,
boxes=boxes,
logits=logits)
return detections, phrases
return detections, boxes, logits, phrases
def predict(

View File

@ -41,7 +41,7 @@ class Censor:
model_management.load_model_gpu(self.safety_checker_model)
single = False
if not isinstance(images, list) or isinstance(images, np.ndarray):
if not isinstance(images, (list, np.ndarray)):
images = [images]
single = True

View File

@ -1,42 +1,130 @@
from PIL import Image
import sys
import modules.config
import numpy as np
import torch
from rembg import remove, new_session
from extras.GroundingDINO.util.inference import default_groundingdino
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from extras.sam.predictor import SamPredictor
from rembg import remove, new_session
from segment_anything import sam_model_registry
from segment_anything.utils.amg import remove_small_regions
def run_grounded_sam(input_image, text_prompt, box_threshold, text_threshold):
class SAMOptions:
def __init__(self,
# GroundingDINO
dino_prompt: str = '',
dino_box_threshold=0.3,
dino_text_threshold=0.25,
dino_erode_or_dilate=0,
dino_debug=False,
# run grounding dino model
boxes, _ = default_groundingdino(
image=np.array(input_image),
caption=text_prompt,
box_threshold=box_threshold,
text_threshold=text_threshold
)
return boxes.xyxy
# SAM
max_detections=2,
model_type='vit_b'
):
self.dino_prompt = dino_prompt
self.dino_box_threshold = dino_box_threshold
self.dino_text_threshold = dino_text_threshold
self.dino_erode_or_dilate = dino_erode_or_dilate
self.dino_debug = dino_debug
self.max_detections = max_detections
self.model_type = model_type
def generate_mask_from_image(image, mask_model, extras):
def optimize_masks(masks: torch.Tensor) -> torch.Tensor:
"""
removes small disconnected regions and holes
"""
fine_masks = []
for mask in masks.to('cpu').numpy(): # masks: [num_masks, 1, h, w]
fine_masks.append(remove_small_regions(mask[0], 400, mode="holes")[0])
masks = np.stack(fine_masks, axis=0)[:, np.newaxis]
return torch.from_numpy(masks)
def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=None,
sam_options: SAMOptions | None = SAMOptions) -> tuple[np.ndarray | None, int | None, int | None, int | None]:
dino_detection_count = 0
sam_detection_count = 0
sam_detection_on_mask_count = 0
if image is None:
return
return None, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
if extras is None:
extras = {}
if 'image' in image:
image = image['image']
if mask_model == 'sam':
boxes = run_grounded_sam(Image.fromarray(image), extras['sam_prompt_text'], box_threshold=extras['box_threshold'], text_threshold=extras['text_threshold'])
boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(boxes) == 0 else boxes
extras['sam_prompt'] = []
for idx, box in enumerate(boxes):
extras['sam_prompt'] += [{"type": "rectangle", "data": box.tolist()}]
if mask_model != 'sam' or sam_options is None:
result = remove(
image,
session=new_session(mask_model, **extras),
only_mask=True,
**extras
)
return remove(
image,
session=new_session(mask_model, **extras),
only_mask=True,
**extras
return result, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
detections, boxes, logits, phrases = default_groundingdino(
image=image,
caption=sam_options.dino_prompt,
box_threshold=sam_options.dino_box_threshold,
text_threshold=sam_options.dino_text_threshold
)
H, W = image.shape[0], image.shape[1]
boxes = boxes * torch.Tensor([W, H, W, H])
boxes[:, :2] = boxes[:, :2] - boxes[:, 2:] / 2
boxes[:, 2:] = boxes[:, 2:] + boxes[:, :2]
sam_checkpoint = modules.config.download_sam_model(sam_options.model_type)
sam = sam_model_registry[sam_options.model_type](checkpoint=sam_checkpoint)
sam_predictor = SamPredictor(sam)
final_mask_tensor = torch.zeros((image.shape[0], image.shape[1]))
dino_detection_count = boxes.size(0)
if dino_detection_count > 0:
sam_predictor.set_image(image)
if sam_options.dino_erode_or_dilate != 0:
for index in range(boxes.size(0)):
assert boxes.size(1) == 4
boxes[index][0] -= sam_options.dino_erode_or_dilate
boxes[index][1] -= sam_options.dino_erode_or_dilate
boxes[index][2] += sam_options.dino_erode_or_dilate
boxes[index][3] += sam_options.dino_erode_or_dilate
if sam_options.dino_debug:
from PIL import ImageDraw, Image
debug_dino_image = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
draw = ImageDraw.Draw(debug_dino_image)
for box in boxes.numpy():
draw.rectangle(box.tolist(), fill="white")
return np.array(debug_dino_image), dino_detection_count, sam_detection_count, sam_detection_on_mask_count
transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes, image.shape[:2])
masks, _, _ = sam_predictor.predict_torch(
point_coords=None,
point_labels=None,
boxes=transformed_boxes,
multimask_output=False,
)
masks = optimize_masks(masks)
sam_detection_count = len(masks)
if sam_options.max_detections == 0:
sam_options.max_detections = sys.maxsize
sam_objects = min(len(logits), sam_options.max_detections)
for obj_ind in range(sam_objects):
mask_tensor = masks[obj_ind][0]
final_mask_tensor += mask_tensor
sam_detection_on_mask_count += 1
final_mask_tensor = (final_mask_tensor > 0).to('cpu').numpy()
mask_image = np.dstack((final_mask_tensor, final_mask_tensor, final_mask_tensor)) * 255
mask_image = np.array(mask_image, dtype=np.uint8)
return mask_image, dino_detection_count, sam_detection_count, sam_detection_on_mask_count

288
extras/sam/predictor.py Normal file
View File

@ -0,0 +1,288 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
import torch
from ldm_patched.modules import model_management
from ldm_patched.modules.model_patcher import ModelPatcher
from segment_anything.modeling import Sam
from typing import Optional, Tuple
from segment_anything.utils.transforms import ResizeLongestSide
class SamPredictor:
def __init__(
self,
model: Sam,
load_device=model_management.text_encoder_device(),
offload_device=model_management.text_encoder_offload_device()
) -> None:
"""
Uses SAM to calculate the image embedding for an image, and then
allow repeated, efficient mask prediction given prompts.
Arguments:
model (Sam): The model to use for mask prediction.
"""
super().__init__()
self.load_device = load_device
self.offload_device = offload_device
# can't use model.half() here as slow_conv2d_cpu is not implemented for half
model.to(self.offload_device)
self.patcher = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
self.transform = ResizeLongestSide(model.image_encoder.img_size)
self.reset_image()
def set_image(
self,
image: np.ndarray,
image_format: str = "RGB",
) -> None:
"""
Calculates the image embeddings for the provided image, allowing
masks to be predicted with the 'predict' method.
Arguments:
image (np.ndarray): The image for calculating masks. Expects an
image in HWC uint8 format, with pixel values in [0, 255].
image_format (str): The color format of the image, in ['RGB', 'BGR'].
"""
assert image_format in [
"RGB",
"BGR",
], f"image_format must be in ['RGB', 'BGR'], is {image_format}."
if image_format != self.patcher.model.image_format:
image = image[..., ::-1]
# Transform the image to the form expected by the model
input_image = self.transform.apply_image(image)
input_image_torch = torch.as_tensor(input_image, device=self.load_device)
input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
self.set_torch_image(input_image_torch, image.shape[:2])
@torch.no_grad()
def set_torch_image(
self,
transformed_image: torch.Tensor,
original_image_size: Tuple[int, ...],
) -> None:
"""
Calculates the image embeddings for the provided image, allowing
masks to be predicted with the 'predict' method. Expects the input
image to be already transformed to the format expected by the model.
Arguments:
transformed_image (torch.Tensor): The input image, with shape
1x3xHxW, which has been transformed with ResizeLongestSide.
original_image_size (tuple(int, int)): The size of the image
before transformation, in (H, W) format.
"""
assert (
len(transformed_image.shape) == 4
and transformed_image.shape[1] == 3
and max(*transformed_image.shape[2:]) == self.patcher.model.image_encoder.img_size
), f"set_torch_image input must be BCHW with long side {self.patcher.model.image_encoder.img_size}."
self.reset_image()
self.original_size = original_image_size
self.input_size = tuple(transformed_image.shape[-2:])
model_management.load_model_gpu(self.patcher)
input_image = self.patcher.model.preprocess(transformed_image.to(self.load_device))
self.features = self.patcher.model.image_encoder(input_image)
self.is_image_set = True
def predict(
self,
point_coords: Optional[np.ndarray] = None,
point_labels: Optional[np.ndarray] = None,
box: Optional[np.ndarray] = None,
mask_input: Optional[np.ndarray] = None,
multimask_output: bool = True,
return_logits: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Predict masks for the given input prompts, using the currently set image.
Arguments:
point_coords (np.ndarray or None): A Nx2 array of point prompts to the
model. Each point is in (X,Y) in pixels.
point_labels (np.ndarray or None): A length N array of labels for the
point prompts. 1 indicates a foreground point and 0 indicates a
background point.
box (np.ndarray or None): A length 4 array given a box prompt to the
model, in XYXY format.
mask_input (np.ndarray): A low resolution mask input to the model, typically
coming from a previous prediction iteration. Has form 1xHxW, where
for SAM, H=W=256.
multimask_output (bool): If true, the model will return three masks.
For ambiguous input prompts (such as a single click), this will often
produce better masks than a single prediction. If only a single
mask is needed, the model's predicted quality score can be used
to select the best mask. For non-ambiguous prompts, such as multiple
input prompts, multimask_output=False can give better results.
return_logits (bool): If true, returns un-thresholded masks logits
instead of a binary mask.
Returns:
(np.ndarray): The output masks in CxHxW format, where C is the
number of masks, and (H, W) is the original image size.
(np.ndarray): An array of length C containing the model's
predictions for the quality of each mask.
(np.ndarray): An array of shape CxHxW, where C is the number
of masks and H=W=256. These low resolution logits can be passed to
a subsequent iteration as mask input.
"""
if not self.is_image_set:
raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
# Transform input prompts
coords_torch, labels_torch, box_torch, mask_input_torch = None, None, None, None
if point_coords is not None:
assert (
point_labels is not None
), "point_labels must be supplied if point_coords is supplied."
point_coords = self.transform.apply_coords(point_coords, self.original_size)
coords_torch = torch.as_tensor(point_coords, dtype=torch.float, device=self.load_device)
labels_torch = torch.as_tensor(point_labels, dtype=torch.int, device=self.load_device)
coords_torch, labels_torch = coords_torch[None, :, :], labels_torch[None, :]
if box is not None:
box = self.transform.apply_boxes(box, self.original_size)
box_torch = torch.as_tensor(box, dtype=torch.float, device=self.load_device)
box_torch = box_torch[None, :]
if mask_input is not None:
mask_input_torch = torch.as_tensor(mask_input, dtype=torch.float, device=self.load_device)
mask_input_torch = mask_input_torch[None, :, :, :]
masks, iou_predictions, low_res_masks = self.predict_torch(
coords_torch,
labels_torch,
box_torch,
mask_input_torch,
multimask_output,
return_logits=return_logits,
)
masks = masks[0].detach().cpu().numpy()
iou_predictions = iou_predictions[0].detach().cpu().numpy()
low_res_masks = low_res_masks[0].detach().cpu().numpy()
return masks, iou_predictions, low_res_masks
@torch.no_grad()
def predict_torch(
self,
point_coords: Optional[torch.Tensor],
point_labels: Optional[torch.Tensor],
boxes: Optional[torch.Tensor] = None,
mask_input: Optional[torch.Tensor] = None,
multimask_output: bool = True,
return_logits: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Predict masks for the given input prompts, using the currently set image.
Input prompts are batched torch tensors and are expected to already be
transformed to the input frame using ResizeLongestSide.
Arguments:
point_coords (torch.Tensor or None): A BxNx2 array of point prompts to the
model. Each point is in (X,Y) in pixels.
point_labels (torch.Tensor or None): A BxN array of labels for the
point prompts. 1 indicates a foreground point and 0 indicates a
background point.
box (np.ndarray or None): A Bx4 array given a box prompt to the
model, in XYXY format.
mask_input (np.ndarray): A low resolution mask input to the model, typically
coming from a previous prediction iteration. Has form Bx1xHxW, where
for SAM, H=W=256. Masks returned by a previous iteration of the
predict method do not need further transformation.
multimask_output (bool): If true, the model will return three masks.
For ambiguous input prompts (such as a single click), this will often
produce better masks than a single prediction. If only a single
mask is needed, the model's predicted quality score can be used
to select the best mask. For non-ambiguous prompts, such as multiple
input prompts, multimask_output=False can give better results.
return_logits (bool): If true, returns un-thresholded masks logits
instead of a binary mask.
Returns:
(torch.Tensor): The output masks in BxCxHxW format, where C is the
number of masks, and (H, W) is the original image size.
(torch.Tensor): An array of shape BxC containing the model's
predictions for the quality of each mask.
(torch.Tensor): An array of shape BxCxHxW, where C is the number
of masks and H=W=256. These low res logits can be passed to
a subsequent iteration as mask input.
"""
if not self.is_image_set:
raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
if point_coords is not None:
points = (point_coords.to(self.load_device), point_labels.to(self.load_device))
else:
points = None
# load
if boxes is not None:
boxes = boxes.to(self.load_device)
if mask_input is not None:
mask_input = mask_input.to(self.load_device)
model_management.load_model_gpu(self.patcher)
# Embed prompts
sparse_embeddings, dense_embeddings = self.patcher.model.prompt_encoder(
points=points,
boxes=boxes,
masks=mask_input,
)
# Predict masks
low_res_masks, iou_predictions = self.patcher.model.mask_decoder(
image_embeddings=self.features,
image_pe=self.patcher.model.prompt_encoder.get_dense_pe(),
sparse_prompt_embeddings=sparse_embeddings,
dense_prompt_embeddings=dense_embeddings,
multimask_output=multimask_output,
)
# Upscale the masks to the original image resolution
masks = self.patcher.model.postprocess_masks(low_res_masks, self.input_size, self.original_size)
if not return_logits:
masks = masks > self.patcher.model.mask_threshold
return masks, iou_predictions, low_res_masks
def get_image_embedding(self) -> torch.Tensor:
"""
Returns the image embeddings for the currently set image, with
shape 1xCxHxW, where C is the embedding dimension and (H,W) are
the embedding spatial dimension of SAM (typically C=256, H=W=64).
"""
if not self.is_image_set:
raise RuntimeError(
"An image must be set with .set_image(...) to generate an embedding."
)
assert self.features is not None, "Features must exist if an image has been set."
return self.features
@property
def device(self) -> torch.device:
return self.patcher.model.device
def reset_image(self) -> None:
"""Resets the currently set image."""
self.is_image_set = False
self.features = None
self.orig_h = None
self.orig_w = None
self.input_h = None
self.input_w = None

View File

@ -1 +1 @@
version = '2.4.3 (mashb1t)'
version = '2.5.0-rc6 (mashb1t)'

View File

@ -44,14 +44,7 @@
"Top": "Top",
"Bottom": "Bottom",
"* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)": "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)",
"Mask generation model": "Mask generation model",
"Cloth category": "Cloth category",
"Segmentation prompt": "Segmentation prompt",
"Advanced options": "Advanced options",
"SAM model": "SAM model",
"Quantization": "Quantization",
"Box Threshold": "Box Threshold",
"Text Threshold": "Text Threshold",
"Generate mask from image": "Generate mask from image",
"Setting": "Setting",
"Style": "Style",
@ -377,10 +370,14 @@
"Disable preview during generation.": "Disable preview during generation.",
"Disable Intermediate Results": "Disable Intermediate Results",
"Disable intermediate results during generation, only show final gallery.": "Disable intermediate results during generation, only show final gallery.",
"Debug Inpaint Preprocessing": "Debug Inpaint Preprocessing",
"Debug GroundingDINO": "Debug GroundingDINO",
"Used for SAM object detection and box generation": "Used for SAM object detection and box generation",
"GroundingDINO Box Erode or Dilate": "GroundingDINO Box Erode or Dilate",
"Inpaint Engine": "Inpaint Engine",
"v1": "v1",
"Version of Fooocus inpaint model": "Version of Fooocus inpaint model",
"v2.5": "v2.5",
"v2.6": "v2.6",
"Control Debug": "Control Debug",
"Debug Preprocessors": "Debug Preprocessors",
"Mixing Image Prompt and Vary/Upscale": "Mixing Image Prompt and Vary/Upscale",
@ -410,5 +407,63 @@
"Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.": "Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.",
"fooocus (json)": "fooocus (json)",
"a1111 (plain text)": "a1111 (plain text)",
"Unsupported image type in input": "Unsupported image type in input"
"Unsupported image type in input": "Unsupported image type in input",
"Enhance": "Enhance",
"Detection prompt": "Detection prompt",
"Detection Prompt Quick List": "Detection Prompt Quick List",
"Maximum number of detections": "Maximum number of detections",
"Base image for enhance": "Base image for enhance",
"Order of Processing": "Order of Processing",
"Use before for enhancement of small details and after for large areas.": "Use before for enhancement of small details and after for large areas.",
"Before First Enhancement": "Before First Enhancement",
"After Last Enhancement": "After Last Enhancement",
"Prompt Type": "Prompt Type",
"Choose which prompt to use for Upscale or Variation.": "Choose which prompt to use for Upscale or Variation.",
"Original Prompts": "Original Prompts",
"Last Filled Enhancement Prompts": "Last Filled Enhancement Prompts",
"Enable": "Enable",
"Describe what you want to detect.": "Describe what you want to detect.",
"Enhancement positive prompt": "Enhancement positive prompt",
"Uses original prompt instead if empty.": "Uses original prompt instead if empty.",
"Enhancement negative prompt": "Enhancement negative prompt",
"Uses original negative prompt instead if empty.": "Uses original negative prompt instead if empty.",
"Detection": "Detection",
"u2net": "u2net",
"u2netp": "u2netp",
"u2net_human_seg": "u2net_human_seg",
"u2net_cloth_seg": "u2net_cloth_seg",
"silueta": "silueta",
"isnet-general-use": "isnet-general-use",
"isnet-anime": "isnet-anime",
"sam": "sam",
"Mask generation model": "Mask generation model",
"Cloth category": "Cloth category",
"Use singular whenever possible": "Use singular whenever possible",
"full": "full",
"upper": "upper",
"lower": "lower",
"SAM Options": "SAM Options",
"SAM model": "SAM model",
"vit_b": "vit_b",
"vit_l": "vit_l",
"vit_h": "vit_h",
"Box Threshold": "Box Threshold",
"Text Threshold": "Text Threshold",
"Set to 0 to detect all": "Set to 0 to detect all",
"Inpaint": "Inpaint",
"Inpaint or Outpaint (default)": "Inpaint or Outpaint (default)",
"Improve Detail (face, hand, eyes, etc.)": "Improve Detail (face, hand, eyes, etc.)",
"Modify Content (add objects, change background, etc.)": "Modify Content (add objects, change background, etc.)",
"Disable initial latent in inpaint": "Disable initial latent in inpaint",
"Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.": "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.",
"Inpaint Denoising Strength": "Inpaint Denoising Strength",
"Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)",
"Inpaint Respective Field": "Inpaint Respective Field",
"The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)",
"Mask Erode or Dilate": "Mask Erode or Dilate",
"Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)": "Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)",
"Invert Mask": "Invert Mask",
"Debug Enhance Masks": "Debug Enhance Masks",
"Show enhance masks in preview and final results": "Show enhance masks in preview and final results",
"Use GroundingDINO boxes instead of more detailed SAM masks": "Use GroundingDINO boxes instead of more detailed SAM masks"
}

File diff suppressed because it is too large Load Diff

View File

@ -201,6 +201,7 @@ path_fooocus_expansion = get_dir_or_set_default('path_fooocus_expansion', '../mo
path_safety_checker_models = get_dir_or_set_default('path_safety_checker_models', '../models/safety_checker_models/')
path_wildcards = get_dir_or_set_default('path_wildcards', '../wildcards/')
path_safety_checker = get_dir_or_set_default('path_safety_checker', '../models/safety_checker/')
path_sam = get_dir_or_set_default('path_sam', '../models/sam/')
path_outputs = get_path_output()
@ -500,6 +501,50 @@ example_inpaint_prompts = get_config_item_or_set_default(
validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
expected_type=list
)
example_enhance_detection_prompts = get_config_item_or_set_default(
key='example_enhance_detection_prompts',
default_value=[
'face', 'eye', 'mouth', 'hair', 'hand', 'body'
],
validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
expected_type=list
)
default_enhance_tabs = get_config_item_or_set_default(
key='default_enhance_tabs',
default_value=3,
validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
expected_type=int
)
default_enhance_checkbox = get_config_item_or_set_default(
key='default_enhance_checkbox',
default_value=False,
validator=lambda x: isinstance(x, bool),
expected_type=bool
)
default_enhance_uov_method = get_config_item_or_set_default(
key='default_enhance_uov_method',
default_value=modules.flags.disabled,
validator=lambda x: x in modules.flags.uov_list,
expected_type=int
)
default_enhance_uov_processing_order = get_config_item_or_set_default(
key='default_enhance_uov_processing_order',
default_value=modules.flags.enhancement_uov_before,
validator=lambda x: x in modules.flags.enhancement_uov_processing_order,
expected_type=int
)
default_enhance_uov_prompt_type = get_config_item_or_set_default(
key='default_enhance_uov_prompt_type',
default_value=modules.flags.enhancement_uov_prompt_type_original,
validator=lambda x: x in modules.flags.enhancement_uov_prompt_types,
expected_type=int
)
default_sam_max_detections = get_config_item_or_set_default(
key='default_sam_max_detections',
default_value=0,
validator=lambda x: isinstance(x, int) and 0 <= x <= 10,
expected_type=int
)
default_black_out_nsfw = get_config_item_or_set_default(
key='default_black_out_nsfw',
default_value=False,
@ -526,13 +571,8 @@ metadata_created_by = get_config_item_or_set_default(
)
example_inpaint_prompts = [[x] for x in example_inpaint_prompts]
example_enhance_detection_prompts = [[x] for x in example_enhance_detection_prompts]
default_black_out_nsfw = get_config_item_or_set_default(
key='default_black_out_nsfw',
default_value=False,
validator=lambda x: isinstance(x, bool),
expected_type=bool
)
default_inpaint_mask_model = get_config_item_or_set_default(
key='default_inpaint_mask_model',
default_value='isnet-general-use',
@ -540,6 +580,13 @@ default_inpaint_mask_model = get_config_item_or_set_default(
expected_type=str
)
default_enhance_inpaint_mask_model = get_config_item_or_set_default(
key='default_enhance_inpaint_mask_model',
default_value='sam',
validator=lambda x: x in modules.flags.inpaint_mask_models,
expected_type=str
)
default_inpaint_mask_cloth_category = get_config_item_or_set_default(
key='default_inpaint_mask_cloth_category',
default_value='full',
@ -549,8 +596,8 @@ default_inpaint_mask_cloth_category = get_config_item_or_set_default(
default_inpaint_mask_sam_model = get_config_item_or_set_default(
key='default_inpaint_mask_sam_model',
default_value='sam_vit_b_01ec64',
validator=lambda x: x in modules.flags.inpaint_mask_sam_model,
default_value='vit_b',
validator=lambda x: x in [y[1] for y in modules.flags.inpaint_mask_sam_model if y[1] == x],
expected_type=str
)
@ -789,4 +836,43 @@ def downloading_safety_checker_model():
return os.path.join(path_safety_checker, 'stable-diffusion-safety-checker.bin')
def download_sam_model(sam_model: str) -> str:
match sam_model:
case 'vit_b':
return downloading_sam_vit_b()
case 'vit_l':
return downloading_sam_vit_l()
case 'vit_h':
return downloading_sam_vit_h()
case _:
raise ValueError(f"sam model {sam_model} does not exist.")
def downloading_sam_vit_b():
load_file_from_url(
url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_b_01ec64.pth',
model_dir=path_sam,
file_name='sam_vit_b_01ec64.pth'
)
return os.path.join(path_sam, 'sam_vit_b_01ec64.pth')
def downloading_sam_vit_l():
load_file_from_url(
url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_l_0b3195.pth',
model_dir=path_sam,
file_name='sam_vit_l_0b3195.pth'
)
return os.path.join(path_sam, 'sam_vit_l_0b3195.pth')
def downloading_sam_vit_h():
load_file_from_url(
url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_h_4b8939.pth',
model_dir=path_sam,
file_name='sam_vit_h_4b8939.pth'
)
return os.path.join(path_sam, 'sam_vit_h_4b8939.pth')
update_files()

View File

@ -8,9 +8,15 @@ upscale_15 = 'Upscale (1.5x)'
upscale_2 = 'Upscale (2x)'
upscale_fast = 'Upscale (Fast 2x)'
uov_list = [
disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast
]
uov_list = [disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast]
enhancement_uov_before = "Before First Enhancement"
enhancement_uov_after = "After Last Enhancement"
enhancement_uov_processing_order = [enhancement_uov_before, enhancement_uov_after]
enhancement_uov_prompt_type_original = 'Original Prompts'
enhancement_uov_prompt_type_last_filled = 'Last Filled Enhancement Prompts'
enhancement_uov_prompt_types = [enhancement_uov_prompt_type_original, enhancement_uov_prompt_type_last_filled]
CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"]
@ -76,7 +82,7 @@ output_formats = ['png', 'jpeg', 'webp']
inpaint_mask_models = ['u2net', 'u2netp', 'u2net_human_seg', 'u2net_cloth_seg', 'silueta', 'isnet-general-use', 'isnet-anime', 'sam']
inpaint_mask_cloth_category = ['full', 'upper', 'lower']
inpaint_mask_sam_model = ['sam_vit_b_01ec64', 'sam_vit_h_4b8939', 'sam_vit_l_0b3195']
inpaint_mask_sam_model = ['vit_b', 'vit_l', 'vit_h']
inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6']
inpaint_option_default = 'Inpaint or Outpaint (default)'
@ -107,7 +113,6 @@ metadata_scheme = [
]
controlnet_image_count = 4
preparation_step_count = 13
class OutputFormat(Enum):
@ -163,14 +168,6 @@ class Performance(Enum):
def values(cls) -> list:
return list(map(lambda c: c.value, cls))
@classmethod
def values(cls) -> list:
return list(map(lambda c: c.value, cls))
@classmethod
def values(cls) -> list:
return list(map(lambda c: c.value, cls))
@classmethod
def by_steps(cls, steps: int | str):
return cls[Steps(int(steps)).name]

View File

@ -390,6 +390,9 @@ def get_enabled_loras(loras: list, remove_none=True) -> list:
def parse_lora_references_from_prompt(prompt: str, loras: List[Tuple[AnyStr, float]], loras_limit: int = 5,
skip_file_check=False, prompt_cleanup=True, deduplicate_loras=True,
lora_filenames=None) -> tuple[List[Tuple[AnyStr, float]], str]:
# prevent unintended side effects when returning without detection
loras = loras.copy()
if lora_filenames is None:
lora_filenames = []

View File

@ -13,10 +13,10 @@ omegaconf==2.2.3
gradio==3.41.2
pygit2==1.12.2
opencv-contrib-python==4.8.0.74
diffusers==0.25.1
httpx==0.24.1
onnxruntime==1.16.3
timm==0.9.2
translators==5.8.9
rembg==2.0.53
groundingdino-py==0.4.0
translators==5.9.2
rembg==2.0.57
groundingdino-py==0.4.0
segment_anything==1.0

314
webui.py
View File

@ -16,6 +16,7 @@ import modules.meta_parser
import args_manager
import copy
import launch
from extras.inpaint_mask import SAMOptions
from modules.sdxl_styles import legal_style_names
from modules.private_logger import get_current_html_path
@ -89,6 +90,34 @@ def generate_clicked(task: worker.AsyncTask):
return
def inpaint_mode_change(mode):
assert mode in modules.flags.inpaint_options
# inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
# inpaint_disable_initial_latent, inpaint_engine,
# inpaint_strength, inpaint_respective_field
if mode == modules.flags.inpaint_option_detail:
return [
gr.update(visible=True), gr.update(visible=False, value=[]),
gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts),
False, 'None', 0.5, 0.0
]
if mode == modules.flags.inpaint_option_modify:
return [
gr.update(visible=True), gr.update(visible=False, value=[]),
gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
True, modules.config.default_inpaint_engine_version, 1.0, 0.0
]
return [
gr.update(visible=False, value=''), gr.update(visible=True),
gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
False, modules.config.default_inpaint_engine_version, 1.0, 0.618
]
reload_javascript()
title = f'Fooocus {fooocus_version.version}'
@ -146,6 +175,7 @@ with shared.gradio_root:
skip_button.click(skip_clicked, inputs=currentTask, outputs=currentTask, queue=False, show_progress=False)
with gr.Row(elem_classes='advanced_check_row'):
input_image_checkbox = gr.Checkbox(label='Input Image', value=False, container=False, elem_classes='min_check')
enhance_checkbox = gr.Checkbox(label='Enhance', value=modules.config.default_enhance_checkbox, container=False, elem_classes='min_check')
advanced_checkbox = gr.Checkbox(label='Advanced', value=modules.config.default_advanced_checkbox, container=False, elem_classes='min_check')
with gr.Row(visible=False) as image_input_panel:
with gr.Tabs():
@ -223,44 +253,56 @@ with shared.gradio_root:
choices=flags.inpaint_mask_cloth_category,
value=modules.config.default_inpaint_mask_cloth_category,
visible=False)
inpaint_mask_sam_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False)
inpaint_mask_dino_prompt_text = gr.Textbox(label='Detection prompt', value='', visible=False, info='Use singular whenever possible', placeholder='Describe what you want to detect.')
example_inpaint_mask_dino_prompt_text = gr.Dataset(
samples=modules.config.example_enhance_detection_prompts,
label='Detection Prompt Quick List',
components=[inpaint_mask_dino_prompt_text],
visible=modules.config.default_inpaint_mask_model == 'sam')
example_inpaint_mask_dino_prompt_text.click(lambda x: x[0],
inputs=example_inpaint_mask_dino_prompt_text,
outputs=inpaint_mask_dino_prompt_text,
show_progress=False, queue=False)
with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options:
inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
inpaint_mask_sam_quant = gr.Checkbox(label='Quantization', value=False)
inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)
inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
inpaint_mask_sam_max_detections = gr.Slider(label="Maximum number of detections", info="Set to 0 to detect all", minimum=0, maximum=10, value=modules.config.default_sam_max_detections, step=1, interactive=True)
generate_mask_button = gr.Button(value='Generate mask from image')
def generate_mask(image, mask_model, cloth_category, sam_prompt_text, sam_model, sam_quant, box_threshold, text_threshold):
def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, sam_max_detections, dino_erode_or_dilate, dino_debug):
from extras.inpaint_mask import generate_mask_from_image
extras = {}
sam_options = None
if mask_model == 'u2net_cloth_seg':
extras['cloth_category'] = cloth_category
elif mask_model == 'sam':
extras['sam_prompt_text'] = sam_prompt_text
extras['sam_model'] = sam_model
extras['sam_quant'] = sam_quant
extras['box_threshold'] = box_threshold
extras['text_threshold'] = text_threshold
sam_options = SAMOptions(
dino_prompt=dino_prompt_text,
dino_box_threshold=box_threshold,
dino_text_threshold=text_threshold,
dino_erode_or_dilate=dino_erode_or_dilate,
dino_debug=dino_debug,
max_detections=sam_max_detections,
model_type=sam_model
)
return generate_mask_from_image(image, mask_model, extras)
mask, _, _, _ = generate_mask_from_image(image, mask_model, extras, sam_options)
generate_mask_button.click(fn=generate_mask,
inputs=[
inpaint_input_image, inpaint_mask_model,
inpaint_mask_cloth_category,
inpaint_mask_sam_prompt_text,
inpaint_mask_sam_model,
inpaint_mask_sam_quant,
inpaint_mask_box_threshold,
inpaint_mask_text_threshold
],
outputs=inpaint_mask_image, show_progress=True, queue=True)
return mask
inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg'), gr.update(visible=x == 'sam'), gr.update(visible=x == 'sam')],
inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
[gr.update(visible=x == 'sam')] * 2 +
[gr.Dataset.update(visible=x == 'sam',
samples=modules.config.example_enhance_detection_prompts)],
inputs=inpaint_mask_model,
outputs=[inpaint_mask_cloth_category, inpaint_mask_sam_prompt_text, inpaint_mask_advanced_options],
outputs=[inpaint_mask_cloth_category,
inpaint_mask_dino_prompt_text,
inpaint_mask_advanced_options,
example_inpaint_mask_dino_prompt_text],
queue=False, show_progress=False)
with gr.TabItem(label='Describe') as desc_tab:
@ -283,6 +325,12 @@ with shared.gradio_root:
desc_input_image.upload(trigger_show_image_properties, inputs=desc_input_image,
outputs=desc_image_size, show_progress=False, queue=False)
with gr.TabItem(label='Enhance') as enhance_tab:
with gr.Row():
with gr.Column():
enhance_input_image = grh.Image(label='Base image for enhance', source='upload', type='numpy')
gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
with gr.TabItem(label='Metadata') as metadata_tab:
with gr.Column():
metadata_input_image = grh.Image(label='For images created by Fooocus', source='upload', type='filepath')
@ -304,6 +352,153 @@ with shared.gradio_root:
metadata_input_image.upload(trigger_metadata_preview, inputs=metadata_input_image,
outputs=metadata_json, queue=False, show_progress=True)
with gr.Row(visible=modules.config.default_enhance_checkbox) as enhance_input_panel:
with gr.Tabs():
with gr.TabItem(label='Upscale or Variation'):
with gr.Row():
with gr.Column():
enhance_uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list,
value=modules.config.default_enhance_uov_method)
enhance_uov_processing_order = gr.Radio(label='Order of Processing',
info='Use before for enhancement of small details and after for large areas.',
choices=flags.enhancement_uov_processing_order,
value=modules.config.default_enhance_uov_processing_order)
enhance_uov_prompt_type = gr.Radio(label='Prompt',
info='Choose which prompt to use for Upscale or Variation.',
choices=flags.enhancement_uov_prompt_types,
value=modules.config.default_enhance_uov_prompt_type,
visible=modules.config.default_enhance_uov_processing_order == flags.enhancement_uov_after)
enhance_uov_processing_order.change(lambda x: gr.update(visible=x == flags.enhancement_uov_after),
inputs=enhance_uov_processing_order,
outputs=enhance_uov_prompt_type,
queue=False, show_progress=False)
gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
enhance_ctrls = []
for index in range(modules.config.default_enhance_tabs):
with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item:
enhance_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check',
container=False)
enhance_mask_dino_prompt_text = gr.Textbox(label='Detection prompt',
info='Use singular whenever possible',
placeholder='Describe what you want to detect.',
interactive=True,
visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
example_enhance_mask_dino_prompt_text = gr.Dataset(
samples=modules.config.example_enhance_detection_prompts,
label='Detection Prompt Quick List',
components=[enhance_mask_dino_prompt_text],
visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
example_enhance_mask_dino_prompt_text.click(lambda x: x[0],
inputs=example_enhance_mask_dino_prompt_text,
outputs=enhance_mask_dino_prompt_text,
show_progress=False, queue=False)
enhance_prompt = gr.Textbox(label="Enhancement positive prompt",
placeholder="Uses original prompt instead if empty.",
elem_id='enhance_prompt')
enhance_negative_prompt = gr.Textbox(label="Enhancement negative prompt",
placeholder="Uses original negative prompt instead if empty.",
elem_id='enhance_negative_prompt')
with gr.Accordion("Detection", open=False):
# TODO check if limiting to SAM is better
enhance_mask_model = gr.Dropdown(label='Mask generation model',
choices=flags.inpaint_mask_models,
value=modules.config.default_enhance_inpaint_mask_model)
enhance_mask_cloth_category = gr.Dropdown(label='Cloth category',
choices=flags.inpaint_mask_cloth_category,
value=modules.config.default_inpaint_mask_cloth_category,
visible=modules.config.default_enhance_inpaint_mask_model == 'u2net_cloth_seg',
interactive=True)
with gr.Accordion("SAM Options",
visible=modules.config.default_enhance_inpaint_mask_model == 'sam',
open=False) as sam_options:
enhance_mask_sam_model = gr.Dropdown(label='SAM model',
choices=flags.inpaint_mask_sam_model,
value=modules.config.default_inpaint_mask_sam_model,
interactive=True)
enhance_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0,
maximum=1.0, value=0.3, step=0.05,
interactive=True)
enhance_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0,
maximum=1.0, value=0.25, step=0.05,
interactive=True)
enhance_mask_sam_max_detections = gr.Slider(label="Maximum number of detections",
info="Set to 0 to detect all",
minimum=0, maximum=10,
value=modules.config.default_sam_max_detections,
step=1, interactive=True)
with gr.Accordion("Inpaint", visible=True, open=False):
enhance_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
value=modules.flags.inpaint_option_default,
label='Method', interactive=True)
enhance_inpaint_disable_initial_latent = gr.Checkbox(
label='Disable initial latent in inpaint', value=False)
enhance_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
value=modules.config.default_inpaint_engine_version,
choices=flags.inpaint_engine_versions,
info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.')
enhance_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
minimum=0.0, maximum=1.0, step=0.001,
value=1.0,
info='Same as the denoising strength in A1111 inpaint. '
'Only used in inpaint, not used in outpaint. '
'(Outpaint always use 1.0)')
enhance_inpaint_respective_field = gr.Slider(label='Inpaint Respective Field',
minimum=0.0, maximum=1.0, step=0.001,
value=0.618,
info='The area to inpaint. '
'Value 0 is same as "Only Masked" in A1111. '
'Value 1 is same as "Whole Image" in A1111. '
'Only used in inpaint, not used in outpaint. '
'(Outpaint always use 1.0)')
enhance_inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate',
minimum=-64, maximum=64, step=1, value=0,
info='Positive value will make white area in the mask larger, '
'negative value will make white area smaller. '
'(default is 0, always processed before any mask invert)')
enhance_mask_invert = gr.Checkbox(label='Invert Mask', value=False)
gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
enhance_ctrls += [
enhance_enabled,
enhance_mask_dino_prompt_text,
enhance_prompt,
enhance_negative_prompt,
enhance_mask_model,
enhance_mask_sam_model,
enhance_mask_text_threshold,
enhance_mask_box_threshold,
enhance_mask_sam_max_detections,
enhance_inpaint_disable_initial_latent,
enhance_inpaint_engine,
enhance_inpaint_strength,
enhance_inpaint_respective_field,
enhance_inpaint_erode_or_dilate,
enhance_mask_invert
]
enhance_inpaint_mode.input(inpaint_mode_change, inputs=enhance_inpaint_mode, outputs=[
inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
enhance_inpaint_strength, enhance_inpaint_respective_field
], show_progress=False, queue=False)
enhance_mask_model.change(
lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
[gr.update(visible=x == 'sam')] * 2 +
[gr.Dataset.update(visible=x == 'sam',
samples=modules.config.example_enhance_detection_prompts)],
inputs=enhance_mask_model,
outputs=[enhance_mask_cloth_category, enhance_mask_dino_prompt_text, sam_options,
example_enhance_mask_dino_prompt_text],
queue=False, show_progress=False)
switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}"
down_js = "() => {viewer_to_bottom();}"
@ -316,7 +511,10 @@ with shared.gradio_root:
inpaint_tab.select(lambda: 'inpaint', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
ip_tab.select(lambda: 'ip', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
desc_tab.select(lambda: 'desc', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
enhance_tab.select(lambda: 'enhance', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
metadata_tab.select(lambda: 'metadata', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
enhance_checkbox.change(lambda x: gr.update(visible=x), inputs=enhance_checkbox,
outputs=enhance_input_panel, queue=False, show_progress=False, _js=switch_js)
with gr.Column(scale=1, visible=modules.config.default_advanced_checkbox) as advanced_column:
with gr.Tab(label='Settings'):
@ -379,7 +577,7 @@ with shared.gradio_root:
def update_history_link():
if args_manager.args.disable_image_log:
return gr.update(value='')
return gr.update(value=f'<a href="file={get_current_html_path(output_format)}" target="_blank">\U0001F4DA History Log</a>')
history_link = gr.HTML()
@ -544,7 +742,7 @@ with shared.gradio_root:
info='Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.',
visible=modules.config.default_save_metadata_to_images)
save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme],
save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme],
queue=False, show_progress=False)
with gr.Tab(label='Control'):
@ -570,11 +768,15 @@ with shared.gradio_root:
with gr.Tab(label='Inpaint'):
debugging_inpaint_preprocessor = gr.Checkbox(label='Debug Inpaint Preprocessing', value=False)
debugging_enhance_masks_checkbox = gr.Checkbox(label='Debug Enhance Masks', value=False,
info='Show enhance masks in preview and final results')
debugging_dino = gr.Checkbox(label='Debug GroundingDINO', value=False,
info='Use GroundingDINO boxes instead of more detailed SAM masks')
inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
inpaint_engine = gr.Dropdown(label='Inpaint Engine',
value=modules.config.default_inpaint_engine_version,
choices=flags.inpaint_engine_versions,
info='Version of Fooocus inpaint model')
info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.')
inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
minimum=0.0, maximum=1.0, step=0.001, value=1.0,
info='Same as the denoising strength in A1111 inpaint. '
@ -590,8 +792,13 @@ with shared.gradio_root:
inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate',
minimum=-64, maximum=64, step=1, value=0,
info='Positive value will make white area in the mask larger, '
'negative value will make white area smaller.'
'(default is 0, always process before any mask invert)')
'negative value will make white area smaller. '
'(default is 0, always processed before any mask invert)')
dino_erode_or_dilate = gr.Slider(label='GroundingDINO Box Erode or Dilate',
minimum=-64, maximum=64, step=1, value=0,
info='Positive value will make white area in the mask larger, '
'negative value will make white area smaller. '
'(default is 0, processed before SAM)')
inpaint_mask_upload_checkbox = gr.Checkbox(label='Enable Mask Upload', value=False)
invert_mask_checkbox = gr.Checkbox(label='Invert Mask', value=False)
@ -701,46 +908,26 @@ with shared.gradio_root:
adm_scaler_negative, refiner_switch, refiner_model, sampler_name,
scheduler_name, adaptive_cfg, refiner_swap_method, negative_prompt, disable_intermediate_results
], queue=False, show_progress=False)
output_format.input(lambda x: gr.update(output_format=x), inputs=output_format)
advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, advanced_column,
queue=False, show_progress=False) \
.then(fn=lambda: None, _js='refresh_grid_delayed', queue=False, show_progress=False)
def inpaint_mode_change(mode):
assert mode in modules.flags.inpaint_options
# inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
# inpaint_disable_initial_latent, inpaint_engine,
# inpaint_strength, inpaint_respective_field
if mode == modules.flags.inpaint_option_detail:
return [
gr.update(visible=True), gr.update(visible=False, value=[]),
gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts),
False, 'None', 0.5, 0.0
]
if mode == modules.flags.inpaint_option_modify:
return [
gr.update(visible=True), gr.update(visible=False, value=[]),
gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
True, modules.config.default_inpaint_engine_version, 1.0, 0.0
]
return [
gr.update(visible=False, value=''), gr.update(visible=True),
gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
False, modules.config.default_inpaint_engine_version, 1.0, 0.618
]
inpaint_mode.input(inpaint_mode_change, inputs=inpaint_mode, outputs=[
inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
inpaint_disable_initial_latent, inpaint_engine,
inpaint_strength, inpaint_respective_field
], show_progress=False, queue=False)
generate_mask_button.click(fn=generate_mask,
inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
inpaint_mask_dino_prompt_text, inpaint_mask_sam_model,
inpaint_mask_box_threshold, inpaint_mask_text_threshold,
inpaint_mask_sam_max_detections, dino_erode_or_dilate, debugging_dino],
outputs=inpaint_mask_image, show_progress=True, queue=True)
ctrls = [currentTask, generate_image_grid]
ctrls += [
prompt, negative_prompt, translate_prompts, style_selections,
@ -766,6 +953,10 @@ with shared.gradio_root:
ctrls += [save_metadata_to_images, metadata_scheme]
ctrls += ip_ctrls
ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox,
enhance_input_image, enhance_checkbox, enhance_uov_method, enhance_uov_processing_order,
enhance_uov_prompt_type]
ctrls += enhance_ctrls
def parse_meta(raw_prompt_txt, is_generating):
loaded_json = None
@ -828,15 +1019,18 @@ with shared.gradio_root:
desc_btn.click(trigger_describe, inputs=[desc_method, desc_input_image],
outputs=[prompt, style_selections], show_progress=True, queue=True)
if args_manager.args.enable_describe_uov_image:
def trigger_uov_describe(mode, img, prompt):
if args_manager.args.enable_auto_describe_image:
def trigger_auto_describe(mode, img, prompt):
# keep prompt if not empty
if prompt == '':
return trigger_describe(mode, img)
return gr.update(), gr.update()
uov_input_image.upload(trigger_uov_describe, inputs=[desc_method, uov_input_image, prompt],
outputs=[prompt, style_selections], show_progress=True, queue=True)
uov_input_image.upload(trigger_auto_describe, inputs=[desc_method, uov_input_image, prompt],
outputs=[prompt, style_selections], show_progress=True, queue=True)
enhance_input_image.upload(lambda: gr.update(value=True), outputs=enhance_checkbox, queue=False, show_progress=False) \
.then(trigger_auto_describe, inputs=[desc_method, enhance_input_image, prompt], outputs=[prompt, style_selections], show_progress=True, queue=True)
def dump_default_english_config():
from modules.localization import dump_english_config