Merge branch 'feature/multi-step-rendering'
This commit is contained in:
commit
89f8725228
|
|
@ -28,8 +28,8 @@ args_parser.parser.add_argument("--disable-metadata", action='store_true',
|
|||
args_parser.parser.add_argument("--disable-preset-download", action='store_true',
|
||||
help="Disables downloading models for presets", default=False)
|
||||
|
||||
args_parser.parser.add_argument("--enable-describe-uov-image", action='store_true',
|
||||
help="Disables automatic description of uov images when prompt is empty", default=False)
|
||||
args_parser.parser.add_argument("--enable-auto-describe-image", action='store_true',
|
||||
help="Enables automatic description of uov and enhance image when prompt is empty", default=False)
|
||||
|
||||
args_parser.parser.add_argument("--always-download-new-model", action='store_true',
|
||||
help="Always download newer models ", default=False)
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ div:has(> #positive_prompt) {
|
|||
}
|
||||
|
||||
.advanced_check_row {
|
||||
width: 250px !important;
|
||||
width: 330px !important;
|
||||
}
|
||||
|
||||
.min_check {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,24 @@
|
|||
# https://github.com/sail-sg/EditAnything/blob/main/sam2groundingdino_edit.py
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from extras.inpaint_mask import SAMOptions, generate_mask_from_image
|
||||
|
||||
original_image = Image.open('cat.webp')
|
||||
image = np.array(original_image, dtype=np.uint8)
|
||||
|
||||
sam_options = SAMOptions(
|
||||
dino_prompt='eye',
|
||||
dino_box_threshold=0.3,
|
||||
dino_text_threshold=0.25,
|
||||
dino_erode_or_dilate=0,
|
||||
dino_debug=False,
|
||||
max_detections=2,
|
||||
model_type='vit_b'
|
||||
)
|
||||
|
||||
mask_image, _, _, _ = generate_mask_from_image(image, sam_options=sam_options)
|
||||
|
||||
merged_masks_img = Image.fromarray(mask_image)
|
||||
merged_masks_img.show()
|
||||
|
|
@ -25,7 +25,7 @@ class GroundingDinoModel(Model):
|
|||
caption: str,
|
||||
box_threshold: float = 0.35,
|
||||
text_threshold: float = 0.25
|
||||
) -> Tuple[sv.Detections, List[str]]:
|
||||
) -> Tuple[sv.Detections, torch.Tensor, torch.Tensor, List[str]]:
|
||||
if self.model is None:
|
||||
filename = load_file_from_url(
|
||||
url="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth",
|
||||
|
|
@ -56,7 +56,7 @@ class GroundingDinoModel(Model):
|
|||
source_w=source_w,
|
||||
boxes=boxes,
|
||||
logits=logits)
|
||||
return detections, phrases
|
||||
return detections, boxes, logits, phrases
|
||||
|
||||
|
||||
def predict(
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ class Censor:
|
|||
model_management.load_model_gpu(self.safety_checker_model)
|
||||
|
||||
single = False
|
||||
if not isinstance(images, list) or isinstance(images, np.ndarray):
|
||||
if not isinstance(images, (list, np.ndarray)):
|
||||
images = [images]
|
||||
single = True
|
||||
|
||||
|
|
|
|||
|
|
@ -1,42 +1,130 @@
|
|||
from PIL import Image
|
||||
import sys
|
||||
|
||||
import modules.config
|
||||
import numpy as np
|
||||
import torch
|
||||
from rembg import remove, new_session
|
||||
from extras.GroundingDINO.util.inference import default_groundingdino
|
||||
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
from extras.sam.predictor import SamPredictor
|
||||
from rembg import remove, new_session
|
||||
from segment_anything import sam_model_registry
|
||||
from segment_anything.utils.amg import remove_small_regions
|
||||
|
||||
|
||||
def run_grounded_sam(input_image, text_prompt, box_threshold, text_threshold):
|
||||
class SAMOptions:
|
||||
def __init__(self,
|
||||
# GroundingDINO
|
||||
dino_prompt: str = '',
|
||||
dino_box_threshold=0.3,
|
||||
dino_text_threshold=0.25,
|
||||
dino_erode_or_dilate=0,
|
||||
dino_debug=False,
|
||||
|
||||
# run grounding dino model
|
||||
boxes, _ = default_groundingdino(
|
||||
image=np.array(input_image),
|
||||
caption=text_prompt,
|
||||
box_threshold=box_threshold,
|
||||
text_threshold=text_threshold
|
||||
)
|
||||
|
||||
return boxes.xyxy
|
||||
# SAM
|
||||
max_detections=2,
|
||||
model_type='vit_b'
|
||||
):
|
||||
self.dino_prompt = dino_prompt
|
||||
self.dino_box_threshold = dino_box_threshold
|
||||
self.dino_text_threshold = dino_text_threshold
|
||||
self.dino_erode_or_dilate = dino_erode_or_dilate
|
||||
self.dino_debug = dino_debug
|
||||
self.max_detections = max_detections
|
||||
self.model_type = model_type
|
||||
|
||||
|
||||
def generate_mask_from_image(image, mask_model, extras):
|
||||
def optimize_masks(masks: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
removes small disconnected regions and holes
|
||||
"""
|
||||
fine_masks = []
|
||||
for mask in masks.to('cpu').numpy(): # masks: [num_masks, 1, h, w]
|
||||
fine_masks.append(remove_small_regions(mask[0], 400, mode="holes")[0])
|
||||
masks = np.stack(fine_masks, axis=0)[:, np.newaxis]
|
||||
return torch.from_numpy(masks)
|
||||
|
||||
|
||||
def generate_mask_from_image(image: np.ndarray, mask_model: str = 'sam', extras=None,
|
||||
sam_options: SAMOptions | None = SAMOptions) -> tuple[np.ndarray | None, int | None, int | None, int | None]:
|
||||
dino_detection_count = 0
|
||||
sam_detection_count = 0
|
||||
sam_detection_on_mask_count = 0
|
||||
|
||||
if image is None:
|
||||
return
|
||||
return None, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
|
||||
|
||||
if extras is None:
|
||||
extras = {}
|
||||
|
||||
if 'image' in image:
|
||||
image = image['image']
|
||||
|
||||
if mask_model == 'sam':
|
||||
boxes = run_grounded_sam(Image.fromarray(image), extras['sam_prompt_text'], box_threshold=extras['box_threshold'], text_threshold=extras['text_threshold'])
|
||||
boxes = np.array([[0, 0, image.shape[1], image.shape[0]]]) if len(boxes) == 0 else boxes
|
||||
extras['sam_prompt'] = []
|
||||
for idx, box in enumerate(boxes):
|
||||
extras['sam_prompt'] += [{"type": "rectangle", "data": box.tolist()}]
|
||||
if mask_model != 'sam' or sam_options is None:
|
||||
result = remove(
|
||||
image,
|
||||
session=new_session(mask_model, **extras),
|
||||
only_mask=True,
|
||||
**extras
|
||||
)
|
||||
|
||||
return remove(
|
||||
image,
|
||||
session=new_session(mask_model, **extras),
|
||||
only_mask=True,
|
||||
**extras
|
||||
return result, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
|
||||
|
||||
detections, boxes, logits, phrases = default_groundingdino(
|
||||
image=image,
|
||||
caption=sam_options.dino_prompt,
|
||||
box_threshold=sam_options.dino_box_threshold,
|
||||
text_threshold=sam_options.dino_text_threshold
|
||||
)
|
||||
|
||||
H, W = image.shape[0], image.shape[1]
|
||||
boxes = boxes * torch.Tensor([W, H, W, H])
|
||||
boxes[:, :2] = boxes[:, :2] - boxes[:, 2:] / 2
|
||||
boxes[:, 2:] = boxes[:, 2:] + boxes[:, :2]
|
||||
|
||||
sam_checkpoint = modules.config.download_sam_model(sam_options.model_type)
|
||||
sam = sam_model_registry[sam_options.model_type](checkpoint=sam_checkpoint)
|
||||
|
||||
sam_predictor = SamPredictor(sam)
|
||||
final_mask_tensor = torch.zeros((image.shape[0], image.shape[1]))
|
||||
dino_detection_count = boxes.size(0)
|
||||
|
||||
if dino_detection_count > 0:
|
||||
sam_predictor.set_image(image)
|
||||
|
||||
if sam_options.dino_erode_or_dilate != 0:
|
||||
for index in range(boxes.size(0)):
|
||||
assert boxes.size(1) == 4
|
||||
boxes[index][0] -= sam_options.dino_erode_or_dilate
|
||||
boxes[index][1] -= sam_options.dino_erode_or_dilate
|
||||
boxes[index][2] += sam_options.dino_erode_or_dilate
|
||||
boxes[index][3] += sam_options.dino_erode_or_dilate
|
||||
|
||||
if sam_options.dino_debug:
|
||||
from PIL import ImageDraw, Image
|
||||
debug_dino_image = Image.new("RGB", (image.shape[1], image.shape[0]), color="black")
|
||||
draw = ImageDraw.Draw(debug_dino_image)
|
||||
for box in boxes.numpy():
|
||||
draw.rectangle(box.tolist(), fill="white")
|
||||
return np.array(debug_dino_image), dino_detection_count, sam_detection_count, sam_detection_on_mask_count
|
||||
|
||||
transformed_boxes = sam_predictor.transform.apply_boxes_torch(boxes, image.shape[:2])
|
||||
masks, _, _ = sam_predictor.predict_torch(
|
||||
point_coords=None,
|
||||
point_labels=None,
|
||||
boxes=transformed_boxes,
|
||||
multimask_output=False,
|
||||
)
|
||||
|
||||
masks = optimize_masks(masks)
|
||||
sam_detection_count = len(masks)
|
||||
if sam_options.max_detections == 0:
|
||||
sam_options.max_detections = sys.maxsize
|
||||
sam_objects = min(len(logits), sam_options.max_detections)
|
||||
for obj_ind in range(sam_objects):
|
||||
mask_tensor = masks[obj_ind][0]
|
||||
final_mask_tensor += mask_tensor
|
||||
sam_detection_on_mask_count += 1
|
||||
|
||||
final_mask_tensor = (final_mask_tensor > 0).to('cpu').numpy()
|
||||
mask_image = np.dstack((final_mask_tensor, final_mask_tensor, final_mask_tensor)) * 255
|
||||
mask_image = np.array(mask_image, dtype=np.uint8)
|
||||
return mask_image, dino_detection_count, sam_detection_count, sam_detection_on_mask_count
|
||||
|
|
|
|||
|
|
@ -0,0 +1,288 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from ldm_patched.modules import model_management
|
||||
from ldm_patched.modules.model_patcher import ModelPatcher
|
||||
|
||||
from segment_anything.modeling import Sam
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from segment_anything.utils.transforms import ResizeLongestSide
|
||||
|
||||
|
||||
class SamPredictor:
|
||||
def __init__(
|
||||
self,
|
||||
model: Sam,
|
||||
load_device=model_management.text_encoder_device(),
|
||||
offload_device=model_management.text_encoder_offload_device()
|
||||
) -> None:
|
||||
"""
|
||||
Uses SAM to calculate the image embedding for an image, and then
|
||||
allow repeated, efficient mask prediction given prompts.
|
||||
|
||||
Arguments:
|
||||
model (Sam): The model to use for mask prediction.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.load_device = load_device
|
||||
self.offload_device = offload_device
|
||||
# can't use model.half() here as slow_conv2d_cpu is not implemented for half
|
||||
model.to(self.offload_device)
|
||||
|
||||
self.patcher = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
|
||||
|
||||
self.transform = ResizeLongestSide(model.image_encoder.img_size)
|
||||
self.reset_image()
|
||||
|
||||
def set_image(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
image_format: str = "RGB",
|
||||
) -> None:
|
||||
"""
|
||||
Calculates the image embeddings for the provided image, allowing
|
||||
masks to be predicted with the 'predict' method.
|
||||
|
||||
Arguments:
|
||||
image (np.ndarray): The image for calculating masks. Expects an
|
||||
image in HWC uint8 format, with pixel values in [0, 255].
|
||||
image_format (str): The color format of the image, in ['RGB', 'BGR'].
|
||||
"""
|
||||
assert image_format in [
|
||||
"RGB",
|
||||
"BGR",
|
||||
], f"image_format must be in ['RGB', 'BGR'], is {image_format}."
|
||||
if image_format != self.patcher.model.image_format:
|
||||
image = image[..., ::-1]
|
||||
|
||||
# Transform the image to the form expected by the model
|
||||
input_image = self.transform.apply_image(image)
|
||||
input_image_torch = torch.as_tensor(input_image, device=self.load_device)
|
||||
input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]
|
||||
|
||||
self.set_torch_image(input_image_torch, image.shape[:2])
|
||||
|
||||
@torch.no_grad()
|
||||
def set_torch_image(
|
||||
self,
|
||||
transformed_image: torch.Tensor,
|
||||
original_image_size: Tuple[int, ...],
|
||||
) -> None:
|
||||
"""
|
||||
Calculates the image embeddings for the provided image, allowing
|
||||
masks to be predicted with the 'predict' method. Expects the input
|
||||
image to be already transformed to the format expected by the model.
|
||||
|
||||
Arguments:
|
||||
transformed_image (torch.Tensor): The input image, with shape
|
||||
1x3xHxW, which has been transformed with ResizeLongestSide.
|
||||
original_image_size (tuple(int, int)): The size of the image
|
||||
before transformation, in (H, W) format.
|
||||
"""
|
||||
assert (
|
||||
len(transformed_image.shape) == 4
|
||||
and transformed_image.shape[1] == 3
|
||||
and max(*transformed_image.shape[2:]) == self.patcher.model.image_encoder.img_size
|
||||
), f"set_torch_image input must be BCHW with long side {self.patcher.model.image_encoder.img_size}."
|
||||
self.reset_image()
|
||||
|
||||
self.original_size = original_image_size
|
||||
self.input_size = tuple(transformed_image.shape[-2:])
|
||||
model_management.load_model_gpu(self.patcher)
|
||||
input_image = self.patcher.model.preprocess(transformed_image.to(self.load_device))
|
||||
self.features = self.patcher.model.image_encoder(input_image)
|
||||
self.is_image_set = True
|
||||
|
||||
def predict(
|
||||
self,
|
||||
point_coords: Optional[np.ndarray] = None,
|
||||
point_labels: Optional[np.ndarray] = None,
|
||||
box: Optional[np.ndarray] = None,
|
||||
mask_input: Optional[np.ndarray] = None,
|
||||
multimask_output: bool = True,
|
||||
return_logits: bool = False,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Predict masks for the given input prompts, using the currently set image.
|
||||
|
||||
Arguments:
|
||||
point_coords (np.ndarray or None): A Nx2 array of point prompts to the
|
||||
model. Each point is in (X,Y) in pixels.
|
||||
point_labels (np.ndarray or None): A length N array of labels for the
|
||||
point prompts. 1 indicates a foreground point and 0 indicates a
|
||||
background point.
|
||||
box (np.ndarray or None): A length 4 array given a box prompt to the
|
||||
model, in XYXY format.
|
||||
mask_input (np.ndarray): A low resolution mask input to the model, typically
|
||||
coming from a previous prediction iteration. Has form 1xHxW, where
|
||||
for SAM, H=W=256.
|
||||
multimask_output (bool): If true, the model will return three masks.
|
||||
For ambiguous input prompts (such as a single click), this will often
|
||||
produce better masks than a single prediction. If only a single
|
||||
mask is needed, the model's predicted quality score can be used
|
||||
to select the best mask. For non-ambiguous prompts, such as multiple
|
||||
input prompts, multimask_output=False can give better results.
|
||||
return_logits (bool): If true, returns un-thresholded masks logits
|
||||
instead of a binary mask.
|
||||
|
||||
Returns:
|
||||
(np.ndarray): The output masks in CxHxW format, where C is the
|
||||
number of masks, and (H, W) is the original image size.
|
||||
(np.ndarray): An array of length C containing the model's
|
||||
predictions for the quality of each mask.
|
||||
(np.ndarray): An array of shape CxHxW, where C is the number
|
||||
of masks and H=W=256. These low resolution logits can be passed to
|
||||
a subsequent iteration as mask input.
|
||||
"""
|
||||
if not self.is_image_set:
|
||||
raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
|
||||
|
||||
# Transform input prompts
|
||||
coords_torch, labels_torch, box_torch, mask_input_torch = None, None, None, None
|
||||
if point_coords is not None:
|
||||
assert (
|
||||
point_labels is not None
|
||||
), "point_labels must be supplied if point_coords is supplied."
|
||||
point_coords = self.transform.apply_coords(point_coords, self.original_size)
|
||||
coords_torch = torch.as_tensor(point_coords, dtype=torch.float, device=self.load_device)
|
||||
labels_torch = torch.as_tensor(point_labels, dtype=torch.int, device=self.load_device)
|
||||
coords_torch, labels_torch = coords_torch[None, :, :], labels_torch[None, :]
|
||||
if box is not None:
|
||||
box = self.transform.apply_boxes(box, self.original_size)
|
||||
box_torch = torch.as_tensor(box, dtype=torch.float, device=self.load_device)
|
||||
box_torch = box_torch[None, :]
|
||||
if mask_input is not None:
|
||||
mask_input_torch = torch.as_tensor(mask_input, dtype=torch.float, device=self.load_device)
|
||||
mask_input_torch = mask_input_torch[None, :, :, :]
|
||||
|
||||
masks, iou_predictions, low_res_masks = self.predict_torch(
|
||||
coords_torch,
|
||||
labels_torch,
|
||||
box_torch,
|
||||
mask_input_torch,
|
||||
multimask_output,
|
||||
return_logits=return_logits,
|
||||
)
|
||||
|
||||
masks = masks[0].detach().cpu().numpy()
|
||||
iou_predictions = iou_predictions[0].detach().cpu().numpy()
|
||||
low_res_masks = low_res_masks[0].detach().cpu().numpy()
|
||||
return masks, iou_predictions, low_res_masks
|
||||
|
||||
@torch.no_grad()
|
||||
def predict_torch(
|
||||
self,
|
||||
point_coords: Optional[torch.Tensor],
|
||||
point_labels: Optional[torch.Tensor],
|
||||
boxes: Optional[torch.Tensor] = None,
|
||||
mask_input: Optional[torch.Tensor] = None,
|
||||
multimask_output: bool = True,
|
||||
return_logits: bool = False,
|
||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Predict masks for the given input prompts, using the currently set image.
|
||||
Input prompts are batched torch tensors and are expected to already be
|
||||
transformed to the input frame using ResizeLongestSide.
|
||||
|
||||
Arguments:
|
||||
point_coords (torch.Tensor or None): A BxNx2 array of point prompts to the
|
||||
model. Each point is in (X,Y) in pixels.
|
||||
point_labels (torch.Tensor or None): A BxN array of labels for the
|
||||
point prompts. 1 indicates a foreground point and 0 indicates a
|
||||
background point.
|
||||
box (np.ndarray or None): A Bx4 array given a box prompt to the
|
||||
model, in XYXY format.
|
||||
mask_input (np.ndarray): A low resolution mask input to the model, typically
|
||||
coming from a previous prediction iteration. Has form Bx1xHxW, where
|
||||
for SAM, H=W=256. Masks returned by a previous iteration of the
|
||||
predict method do not need further transformation.
|
||||
multimask_output (bool): If true, the model will return three masks.
|
||||
For ambiguous input prompts (such as a single click), this will often
|
||||
produce better masks than a single prediction. If only a single
|
||||
mask is needed, the model's predicted quality score can be used
|
||||
to select the best mask. For non-ambiguous prompts, such as multiple
|
||||
input prompts, multimask_output=False can give better results.
|
||||
return_logits (bool): If true, returns un-thresholded masks logits
|
||||
instead of a binary mask.
|
||||
|
||||
Returns:
|
||||
(torch.Tensor): The output masks in BxCxHxW format, where C is the
|
||||
number of masks, and (H, W) is the original image size.
|
||||
(torch.Tensor): An array of shape BxC containing the model's
|
||||
predictions for the quality of each mask.
|
||||
(torch.Tensor): An array of shape BxCxHxW, where C is the number
|
||||
of masks and H=W=256. These low res logits can be passed to
|
||||
a subsequent iteration as mask input.
|
||||
"""
|
||||
if not self.is_image_set:
|
||||
raise RuntimeError("An image must be set with .set_image(...) before mask prediction.")
|
||||
|
||||
if point_coords is not None:
|
||||
points = (point_coords.to(self.load_device), point_labels.to(self.load_device))
|
||||
else:
|
||||
points = None
|
||||
|
||||
# load
|
||||
if boxes is not None:
|
||||
boxes = boxes.to(self.load_device)
|
||||
if mask_input is not None:
|
||||
mask_input = mask_input.to(self.load_device)
|
||||
model_management.load_model_gpu(self.patcher)
|
||||
|
||||
# Embed prompts
|
||||
sparse_embeddings, dense_embeddings = self.patcher.model.prompt_encoder(
|
||||
points=points,
|
||||
boxes=boxes,
|
||||
masks=mask_input,
|
||||
)
|
||||
|
||||
# Predict masks
|
||||
low_res_masks, iou_predictions = self.patcher.model.mask_decoder(
|
||||
image_embeddings=self.features,
|
||||
image_pe=self.patcher.model.prompt_encoder.get_dense_pe(),
|
||||
sparse_prompt_embeddings=sparse_embeddings,
|
||||
dense_prompt_embeddings=dense_embeddings,
|
||||
multimask_output=multimask_output,
|
||||
)
|
||||
|
||||
# Upscale the masks to the original image resolution
|
||||
masks = self.patcher.model.postprocess_masks(low_res_masks, self.input_size, self.original_size)
|
||||
|
||||
if not return_logits:
|
||||
masks = masks > self.patcher.model.mask_threshold
|
||||
|
||||
return masks, iou_predictions, low_res_masks
|
||||
|
||||
def get_image_embedding(self) -> torch.Tensor:
|
||||
"""
|
||||
Returns the image embeddings for the currently set image, with
|
||||
shape 1xCxHxW, where C is the embedding dimension and (H,W) are
|
||||
the embedding spatial dimension of SAM (typically C=256, H=W=64).
|
||||
"""
|
||||
if not self.is_image_set:
|
||||
raise RuntimeError(
|
||||
"An image must be set with .set_image(...) to generate an embedding."
|
||||
)
|
||||
assert self.features is not None, "Features must exist if an image has been set."
|
||||
return self.features
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
return self.patcher.model.device
|
||||
|
||||
def reset_image(self) -> None:
|
||||
"""Resets the currently set image."""
|
||||
self.is_image_set = False
|
||||
self.features = None
|
||||
self.orig_h = None
|
||||
self.orig_w = None
|
||||
self.input_h = None
|
||||
self.input_w = None
|
||||
|
|
@ -1 +1 @@
|
|||
version = '2.4.3 (mashb1t)'
|
||||
version = '2.5.0-rc6 (mashb1t)'
|
||||
|
|
|
|||
|
|
@ -44,14 +44,7 @@
|
|||
"Top": "Top",
|
||||
"Bottom": "Bottom",
|
||||
"* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)": "* \"Inpaint or Outpaint\" is powered by the sampler \"DPMPP Fooocus Seamless 2M SDE Karras Inpaint Sampler\" (beta)",
|
||||
"Mask generation model": "Mask generation model",
|
||||
"Cloth category": "Cloth category",
|
||||
"Segmentation prompt": "Segmentation prompt",
|
||||
"Advanced options": "Advanced options",
|
||||
"SAM model": "SAM model",
|
||||
"Quantization": "Quantization",
|
||||
"Box Threshold": "Box Threshold",
|
||||
"Text Threshold": "Text Threshold",
|
||||
"Generate mask from image": "Generate mask from image",
|
||||
"Setting": "Setting",
|
||||
"Style": "Style",
|
||||
|
|
@ -377,10 +370,14 @@
|
|||
"Disable preview during generation.": "Disable preview during generation.",
|
||||
"Disable Intermediate Results": "Disable Intermediate Results",
|
||||
"Disable intermediate results during generation, only show final gallery.": "Disable intermediate results during generation, only show final gallery.",
|
||||
"Debug Inpaint Preprocessing": "Debug Inpaint Preprocessing",
|
||||
"Debug GroundingDINO": "Debug GroundingDINO",
|
||||
"Used for SAM object detection and box generation": "Used for SAM object detection and box generation",
|
||||
"GroundingDINO Box Erode or Dilate": "GroundingDINO Box Erode or Dilate",
|
||||
"Inpaint Engine": "Inpaint Engine",
|
||||
"v1": "v1",
|
||||
"Version of Fooocus inpaint model": "Version of Fooocus inpaint model",
|
||||
"v2.5": "v2.5",
|
||||
"v2.6": "v2.6",
|
||||
"Control Debug": "Control Debug",
|
||||
"Debug Preprocessors": "Debug Preprocessors",
|
||||
"Mixing Image Prompt and Vary/Upscale": "Mixing Image Prompt and Vary/Upscale",
|
||||
|
|
@ -410,5 +407,63 @@
|
|||
"Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.": "Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.",
|
||||
"fooocus (json)": "fooocus (json)",
|
||||
"a1111 (plain text)": "a1111 (plain text)",
|
||||
"Unsupported image type in input": "Unsupported image type in input"
|
||||
"Unsupported image type in input": "Unsupported image type in input",
|
||||
"Enhance": "Enhance",
|
||||
"Detection prompt": "Detection prompt",
|
||||
"Detection Prompt Quick List": "Detection Prompt Quick List",
|
||||
"Maximum number of detections": "Maximum number of detections",
|
||||
"Base image for enhance": "Base image for enhance",
|
||||
"Order of Processing": "Order of Processing",
|
||||
"Use before for enhancement of small details and after for large areas.": "Use before for enhancement of small details and after for large areas.",
|
||||
"Before First Enhancement": "Before First Enhancement",
|
||||
"After Last Enhancement": "After Last Enhancement",
|
||||
"Prompt Type": "Prompt Type",
|
||||
"Choose which prompt to use for Upscale or Variation.": "Choose which prompt to use for Upscale or Variation.",
|
||||
"Original Prompts": "Original Prompts",
|
||||
"Last Filled Enhancement Prompts": "Last Filled Enhancement Prompts",
|
||||
"Enable": "Enable",
|
||||
"Describe what you want to detect.": "Describe what you want to detect.",
|
||||
"Enhancement positive prompt": "Enhancement positive prompt",
|
||||
"Uses original prompt instead if empty.": "Uses original prompt instead if empty.",
|
||||
"Enhancement negative prompt": "Enhancement negative prompt",
|
||||
"Uses original negative prompt instead if empty.": "Uses original negative prompt instead if empty.",
|
||||
"Detection": "Detection",
|
||||
"u2net": "u2net",
|
||||
"u2netp": "u2netp",
|
||||
"u2net_human_seg": "u2net_human_seg",
|
||||
"u2net_cloth_seg": "u2net_cloth_seg",
|
||||
"silueta": "silueta",
|
||||
"isnet-general-use": "isnet-general-use",
|
||||
"isnet-anime": "isnet-anime",
|
||||
"sam": "sam",
|
||||
"Mask generation model": "Mask generation model",
|
||||
"Cloth category": "Cloth category",
|
||||
"Use singular whenever possible": "Use singular whenever possible",
|
||||
"full": "full",
|
||||
"upper": "upper",
|
||||
"lower": "lower",
|
||||
"SAM Options": "SAM Options",
|
||||
"SAM model": "SAM model",
|
||||
"vit_b": "vit_b",
|
||||
"vit_l": "vit_l",
|
||||
"vit_h": "vit_h",
|
||||
"Box Threshold": "Box Threshold",
|
||||
"Text Threshold": "Text Threshold",
|
||||
"Set to 0 to detect all": "Set to 0 to detect all",
|
||||
"Inpaint": "Inpaint",
|
||||
"Inpaint or Outpaint (default)": "Inpaint or Outpaint (default)",
|
||||
"Improve Detail (face, hand, eyes, etc.)": "Improve Detail (face, hand, eyes, etc.)",
|
||||
"Modify Content (add objects, change background, etc.)": "Modify Content (add objects, change background, etc.)",
|
||||
"Disable initial latent in inpaint": "Disable initial latent in inpaint",
|
||||
"Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.": "Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.",
|
||||
"Inpaint Denoising Strength": "Inpaint Denoising Strength",
|
||||
"Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "Same as the denoising strength in A1111 inpaint. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)",
|
||||
"Inpaint Respective Field": "Inpaint Respective Field",
|
||||
"The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)": "The area to inpaint. Value 0 is same as \"Only Masked\" in A1111. Value 1 is same as \"Whole Image\" in A1111. Only used in inpaint, not used in outpaint. (Outpaint always use 1.0)",
|
||||
"Mask Erode or Dilate": "Mask Erode or Dilate",
|
||||
"Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)": "Positive value will make white area in the mask larger, negative value will make white area smaller. (default is 0, always processed before any mask invert)",
|
||||
"Invert Mask": "Invert Mask",
|
||||
"Debug Enhance Masks": "Debug Enhance Masks",
|
||||
"Show enhance masks in preview and final results": "Show enhance masks in preview and final results",
|
||||
"Use GroundingDINO boxes instead of more detailed SAM masks": "Use GroundingDINO boxes instead of more detailed SAM masks"
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -201,6 +201,7 @@ path_fooocus_expansion = get_dir_or_set_default('path_fooocus_expansion', '../mo
|
|||
path_safety_checker_models = get_dir_or_set_default('path_safety_checker_models', '../models/safety_checker_models/')
|
||||
path_wildcards = get_dir_or_set_default('path_wildcards', '../wildcards/')
|
||||
path_safety_checker = get_dir_or_set_default('path_safety_checker', '../models/safety_checker/')
|
||||
path_sam = get_dir_or_set_default('path_sam', '../models/sam/')
|
||||
path_outputs = get_path_output()
|
||||
|
||||
|
||||
|
|
@ -500,6 +501,50 @@ example_inpaint_prompts = get_config_item_or_set_default(
|
|||
validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
|
||||
expected_type=list
|
||||
)
|
||||
example_enhance_detection_prompts = get_config_item_or_set_default(
|
||||
key='example_enhance_detection_prompts',
|
||||
default_value=[
|
||||
'face', 'eye', 'mouth', 'hair', 'hand', 'body'
|
||||
],
|
||||
validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x),
|
||||
expected_type=list
|
||||
)
|
||||
default_enhance_tabs = get_config_item_or_set_default(
|
||||
key='default_enhance_tabs',
|
||||
default_value=3,
|
||||
validator=lambda x: isinstance(x, int) and 1 <= x <= 5,
|
||||
expected_type=int
|
||||
)
|
||||
default_enhance_checkbox = get_config_item_or_set_default(
|
||||
key='default_enhance_checkbox',
|
||||
default_value=False,
|
||||
validator=lambda x: isinstance(x, bool),
|
||||
expected_type=bool
|
||||
)
|
||||
default_enhance_uov_method = get_config_item_or_set_default(
|
||||
key='default_enhance_uov_method',
|
||||
default_value=modules.flags.disabled,
|
||||
validator=lambda x: x in modules.flags.uov_list,
|
||||
expected_type=int
|
||||
)
|
||||
default_enhance_uov_processing_order = get_config_item_or_set_default(
|
||||
key='default_enhance_uov_processing_order',
|
||||
default_value=modules.flags.enhancement_uov_before,
|
||||
validator=lambda x: x in modules.flags.enhancement_uov_processing_order,
|
||||
expected_type=int
|
||||
)
|
||||
default_enhance_uov_prompt_type = get_config_item_or_set_default(
|
||||
key='default_enhance_uov_prompt_type',
|
||||
default_value=modules.flags.enhancement_uov_prompt_type_original,
|
||||
validator=lambda x: x in modules.flags.enhancement_uov_prompt_types,
|
||||
expected_type=int
|
||||
)
|
||||
default_sam_max_detections = get_config_item_or_set_default(
|
||||
key='default_sam_max_detections',
|
||||
default_value=0,
|
||||
validator=lambda x: isinstance(x, int) and 0 <= x <= 10,
|
||||
expected_type=int
|
||||
)
|
||||
default_black_out_nsfw = get_config_item_or_set_default(
|
||||
key='default_black_out_nsfw',
|
||||
default_value=False,
|
||||
|
|
@ -526,13 +571,8 @@ metadata_created_by = get_config_item_or_set_default(
|
|||
)
|
||||
|
||||
example_inpaint_prompts = [[x] for x in example_inpaint_prompts]
|
||||
example_enhance_detection_prompts = [[x] for x in example_enhance_detection_prompts]
|
||||
|
||||
default_black_out_nsfw = get_config_item_or_set_default(
|
||||
key='default_black_out_nsfw',
|
||||
default_value=False,
|
||||
validator=lambda x: isinstance(x, bool),
|
||||
expected_type=bool
|
||||
)
|
||||
default_inpaint_mask_model = get_config_item_or_set_default(
|
||||
key='default_inpaint_mask_model',
|
||||
default_value='isnet-general-use',
|
||||
|
|
@ -540,6 +580,13 @@ default_inpaint_mask_model = get_config_item_or_set_default(
|
|||
expected_type=str
|
||||
)
|
||||
|
||||
default_enhance_inpaint_mask_model = get_config_item_or_set_default(
|
||||
key='default_enhance_inpaint_mask_model',
|
||||
default_value='sam',
|
||||
validator=lambda x: x in modules.flags.inpaint_mask_models,
|
||||
expected_type=str
|
||||
)
|
||||
|
||||
default_inpaint_mask_cloth_category = get_config_item_or_set_default(
|
||||
key='default_inpaint_mask_cloth_category',
|
||||
default_value='full',
|
||||
|
|
@ -549,8 +596,8 @@ default_inpaint_mask_cloth_category = get_config_item_or_set_default(
|
|||
|
||||
default_inpaint_mask_sam_model = get_config_item_or_set_default(
|
||||
key='default_inpaint_mask_sam_model',
|
||||
default_value='sam_vit_b_01ec64',
|
||||
validator=lambda x: x in modules.flags.inpaint_mask_sam_model,
|
||||
default_value='vit_b',
|
||||
validator=lambda x: x in [y[1] for y in modules.flags.inpaint_mask_sam_model if y[1] == x],
|
||||
expected_type=str
|
||||
)
|
||||
|
||||
|
|
@ -789,4 +836,43 @@ def downloading_safety_checker_model():
|
|||
return os.path.join(path_safety_checker, 'stable-diffusion-safety-checker.bin')
|
||||
|
||||
|
||||
def download_sam_model(sam_model: str) -> str:
|
||||
match sam_model:
|
||||
case 'vit_b':
|
||||
return downloading_sam_vit_b()
|
||||
case 'vit_l':
|
||||
return downloading_sam_vit_l()
|
||||
case 'vit_h':
|
||||
return downloading_sam_vit_h()
|
||||
case _:
|
||||
raise ValueError(f"sam model {sam_model} does not exist.")
|
||||
|
||||
|
||||
def downloading_sam_vit_b():
|
||||
load_file_from_url(
|
||||
url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_b_01ec64.pth',
|
||||
model_dir=path_sam,
|
||||
file_name='sam_vit_b_01ec64.pth'
|
||||
)
|
||||
return os.path.join(path_sam, 'sam_vit_b_01ec64.pth')
|
||||
|
||||
|
||||
def downloading_sam_vit_l():
|
||||
load_file_from_url(
|
||||
url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_l_0b3195.pth',
|
||||
model_dir=path_sam,
|
||||
file_name='sam_vit_l_0b3195.pth'
|
||||
)
|
||||
return os.path.join(path_sam, 'sam_vit_l_0b3195.pth')
|
||||
|
||||
|
||||
def downloading_sam_vit_h():
|
||||
load_file_from_url(
|
||||
url='https://huggingface.co/mashb1t/misc/resolve/main/sam_vit_h_4b8939.pth',
|
||||
model_dir=path_sam,
|
||||
file_name='sam_vit_h_4b8939.pth'
|
||||
)
|
||||
return os.path.join(path_sam, 'sam_vit_h_4b8939.pth')
|
||||
|
||||
|
||||
update_files()
|
||||
|
|
|
|||
|
|
@ -8,9 +8,15 @@ upscale_15 = 'Upscale (1.5x)'
|
|||
upscale_2 = 'Upscale (2x)'
|
||||
upscale_fast = 'Upscale (Fast 2x)'
|
||||
|
||||
uov_list = [
|
||||
disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast
|
||||
]
|
||||
uov_list = [disabled, subtle_variation, strong_variation, upscale_15, upscale_2, upscale_fast]
|
||||
|
||||
enhancement_uov_before = "Before First Enhancement"
|
||||
enhancement_uov_after = "After Last Enhancement"
|
||||
enhancement_uov_processing_order = [enhancement_uov_before, enhancement_uov_after]
|
||||
|
||||
enhancement_uov_prompt_type_original = 'Original Prompts'
|
||||
enhancement_uov_prompt_type_last_filled = 'Last Filled Enhancement Prompts'
|
||||
enhancement_uov_prompt_types = [enhancement_uov_prompt_type_original, enhancement_uov_prompt_type_last_filled]
|
||||
|
||||
CIVITAI_NO_KARRAS = ["euler", "euler_ancestral", "heun", "dpm_fast", "dpm_adaptive", "ddim", "uni_pc"]
|
||||
|
||||
|
|
@ -76,7 +82,7 @@ output_formats = ['png', 'jpeg', 'webp']
|
|||
|
||||
inpaint_mask_models = ['u2net', 'u2netp', 'u2net_human_seg', 'u2net_cloth_seg', 'silueta', 'isnet-general-use', 'isnet-anime', 'sam']
|
||||
inpaint_mask_cloth_category = ['full', 'upper', 'lower']
|
||||
inpaint_mask_sam_model = ['sam_vit_b_01ec64', 'sam_vit_h_4b8939', 'sam_vit_l_0b3195']
|
||||
inpaint_mask_sam_model = ['vit_b', 'vit_l', 'vit_h']
|
||||
|
||||
inpaint_engine_versions = ['None', 'v1', 'v2.5', 'v2.6']
|
||||
inpaint_option_default = 'Inpaint or Outpaint (default)'
|
||||
|
|
@ -107,7 +113,6 @@ metadata_scheme = [
|
|||
]
|
||||
|
||||
controlnet_image_count = 4
|
||||
preparation_step_count = 13
|
||||
|
||||
|
||||
class OutputFormat(Enum):
|
||||
|
|
@ -163,14 +168,6 @@ class Performance(Enum):
|
|||
def values(cls) -> list:
|
||||
return list(map(lambda c: c.value, cls))
|
||||
|
||||
@classmethod
|
||||
def values(cls) -> list:
|
||||
return list(map(lambda c: c.value, cls))
|
||||
|
||||
@classmethod
|
||||
def values(cls) -> list:
|
||||
return list(map(lambda c: c.value, cls))
|
||||
|
||||
@classmethod
|
||||
def by_steps(cls, steps: int | str):
|
||||
return cls[Steps(int(steps)).name]
|
||||
|
|
|
|||
|
|
@ -390,6 +390,9 @@ def get_enabled_loras(loras: list, remove_none=True) -> list:
|
|||
def parse_lora_references_from_prompt(prompt: str, loras: List[Tuple[AnyStr, float]], loras_limit: int = 5,
|
||||
skip_file_check=False, prompt_cleanup=True, deduplicate_loras=True,
|
||||
lora_filenames=None) -> tuple[List[Tuple[AnyStr, float]], str]:
|
||||
# prevent unintended side effects when returning without detection
|
||||
loras = loras.copy()
|
||||
|
||||
if lora_filenames is None:
|
||||
lora_filenames = []
|
||||
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@ omegaconf==2.2.3
|
|||
gradio==3.41.2
|
||||
pygit2==1.12.2
|
||||
opencv-contrib-python==4.8.0.74
|
||||
diffusers==0.25.1
|
||||
httpx==0.24.1
|
||||
onnxruntime==1.16.3
|
||||
timm==0.9.2
|
||||
translators==5.8.9
|
||||
rembg==2.0.53
|
||||
groundingdino-py==0.4.0
|
||||
translators==5.9.2
|
||||
rembg==2.0.57
|
||||
groundingdino-py==0.4.0
|
||||
segment_anything==1.0
|
||||
314
webui.py
314
webui.py
|
|
@ -16,6 +16,7 @@ import modules.meta_parser
|
|||
import args_manager
|
||||
import copy
|
||||
import launch
|
||||
from extras.inpaint_mask import SAMOptions
|
||||
|
||||
from modules.sdxl_styles import legal_style_names
|
||||
from modules.private_logger import get_current_html_path
|
||||
|
|
@ -89,6 +90,34 @@ def generate_clicked(task: worker.AsyncTask):
|
|||
return
|
||||
|
||||
|
||||
def inpaint_mode_change(mode):
|
||||
assert mode in modules.flags.inpaint_options
|
||||
|
||||
# inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
|
||||
# inpaint_disable_initial_latent, inpaint_engine,
|
||||
# inpaint_strength, inpaint_respective_field
|
||||
|
||||
if mode == modules.flags.inpaint_option_detail:
|
||||
return [
|
||||
gr.update(visible=True), gr.update(visible=False, value=[]),
|
||||
gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts),
|
||||
False, 'None', 0.5, 0.0
|
||||
]
|
||||
|
||||
if mode == modules.flags.inpaint_option_modify:
|
||||
return [
|
||||
gr.update(visible=True), gr.update(visible=False, value=[]),
|
||||
gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
|
||||
True, modules.config.default_inpaint_engine_version, 1.0, 0.0
|
||||
]
|
||||
|
||||
return [
|
||||
gr.update(visible=False, value=''), gr.update(visible=True),
|
||||
gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
|
||||
False, modules.config.default_inpaint_engine_version, 1.0, 0.618
|
||||
]
|
||||
|
||||
|
||||
reload_javascript()
|
||||
|
||||
title = f'Fooocus {fooocus_version.version}'
|
||||
|
|
@ -146,6 +175,7 @@ with shared.gradio_root:
|
|||
skip_button.click(skip_clicked, inputs=currentTask, outputs=currentTask, queue=False, show_progress=False)
|
||||
with gr.Row(elem_classes='advanced_check_row'):
|
||||
input_image_checkbox = gr.Checkbox(label='Input Image', value=False, container=False, elem_classes='min_check')
|
||||
enhance_checkbox = gr.Checkbox(label='Enhance', value=modules.config.default_enhance_checkbox, container=False, elem_classes='min_check')
|
||||
advanced_checkbox = gr.Checkbox(label='Advanced', value=modules.config.default_advanced_checkbox, container=False, elem_classes='min_check')
|
||||
with gr.Row(visible=False) as image_input_panel:
|
||||
with gr.Tabs():
|
||||
|
|
@ -223,44 +253,56 @@ with shared.gradio_root:
|
|||
choices=flags.inpaint_mask_cloth_category,
|
||||
value=modules.config.default_inpaint_mask_cloth_category,
|
||||
visible=False)
|
||||
inpaint_mask_sam_prompt_text = gr.Textbox(label='Segmentation prompt', value='', visible=False)
|
||||
inpaint_mask_dino_prompt_text = gr.Textbox(label='Detection prompt', value='', visible=False, info='Use singular whenever possible', placeholder='Describe what you want to detect.')
|
||||
example_inpaint_mask_dino_prompt_text = gr.Dataset(
|
||||
samples=modules.config.example_enhance_detection_prompts,
|
||||
label='Detection Prompt Quick List',
|
||||
components=[inpaint_mask_dino_prompt_text],
|
||||
visible=modules.config.default_inpaint_mask_model == 'sam')
|
||||
example_inpaint_mask_dino_prompt_text.click(lambda x: x[0],
|
||||
inputs=example_inpaint_mask_dino_prompt_text,
|
||||
outputs=inpaint_mask_dino_prompt_text,
|
||||
show_progress=False, queue=False)
|
||||
|
||||
with gr.Accordion("Advanced options", visible=False, open=False) as inpaint_mask_advanced_options:
|
||||
inpaint_mask_sam_model = gr.Dropdown(label='SAM model', choices=flags.inpaint_mask_sam_model, value=modules.config.default_inpaint_mask_sam_model)
|
||||
inpaint_mask_sam_quant = gr.Checkbox(label='Quantization', value=False)
|
||||
inpaint_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.05)
|
||||
inpaint_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.05)
|
||||
inpaint_mask_sam_max_detections = gr.Slider(label="Maximum number of detections", info="Set to 0 to detect all", minimum=0, maximum=10, value=modules.config.default_sam_max_detections, step=1, interactive=True)
|
||||
generate_mask_button = gr.Button(value='Generate mask from image')
|
||||
|
||||
def generate_mask(image, mask_model, cloth_category, sam_prompt_text, sam_model, sam_quant, box_threshold, text_threshold):
|
||||
def generate_mask(image, mask_model, cloth_category, dino_prompt_text, sam_model, box_threshold, text_threshold, sam_max_detections, dino_erode_or_dilate, dino_debug):
|
||||
from extras.inpaint_mask import generate_mask_from_image
|
||||
|
||||
extras = {}
|
||||
sam_options = None
|
||||
if mask_model == 'u2net_cloth_seg':
|
||||
extras['cloth_category'] = cloth_category
|
||||
elif mask_model == 'sam':
|
||||
extras['sam_prompt_text'] = sam_prompt_text
|
||||
extras['sam_model'] = sam_model
|
||||
extras['sam_quant'] = sam_quant
|
||||
extras['box_threshold'] = box_threshold
|
||||
extras['text_threshold'] = text_threshold
|
||||
sam_options = SAMOptions(
|
||||
dino_prompt=dino_prompt_text,
|
||||
dino_box_threshold=box_threshold,
|
||||
dino_text_threshold=text_threshold,
|
||||
dino_erode_or_dilate=dino_erode_or_dilate,
|
||||
dino_debug=dino_debug,
|
||||
max_detections=sam_max_detections,
|
||||
model_type=sam_model
|
||||
)
|
||||
|
||||
return generate_mask_from_image(image, mask_model, extras)
|
||||
mask, _, _, _ = generate_mask_from_image(image, mask_model, extras, sam_options)
|
||||
|
||||
generate_mask_button.click(fn=generate_mask,
|
||||
inputs=[
|
||||
inpaint_input_image, inpaint_mask_model,
|
||||
inpaint_mask_cloth_category,
|
||||
inpaint_mask_sam_prompt_text,
|
||||
inpaint_mask_sam_model,
|
||||
inpaint_mask_sam_quant,
|
||||
inpaint_mask_box_threshold,
|
||||
inpaint_mask_text_threshold
|
||||
],
|
||||
outputs=inpaint_mask_image, show_progress=True, queue=True)
|
||||
return mask
|
||||
|
||||
inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg'), gr.update(visible=x == 'sam'), gr.update(visible=x == 'sam')],
|
||||
|
||||
inpaint_mask_model.change(lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
|
||||
[gr.update(visible=x == 'sam')] * 2 +
|
||||
[gr.Dataset.update(visible=x == 'sam',
|
||||
samples=modules.config.example_enhance_detection_prompts)],
|
||||
inputs=inpaint_mask_model,
|
||||
outputs=[inpaint_mask_cloth_category, inpaint_mask_sam_prompt_text, inpaint_mask_advanced_options],
|
||||
outputs=[inpaint_mask_cloth_category,
|
||||
inpaint_mask_dino_prompt_text,
|
||||
inpaint_mask_advanced_options,
|
||||
example_inpaint_mask_dino_prompt_text],
|
||||
queue=False, show_progress=False)
|
||||
|
||||
with gr.TabItem(label='Describe') as desc_tab:
|
||||
|
|
@ -283,6 +325,12 @@ with shared.gradio_root:
|
|||
desc_input_image.upload(trigger_show_image_properties, inputs=desc_input_image,
|
||||
outputs=desc_image_size, show_progress=False, queue=False)
|
||||
|
||||
with gr.TabItem(label='Enhance') as enhance_tab:
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
enhance_input_image = grh.Image(label='Base image for enhance', source='upload', type='numpy')
|
||||
gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
|
||||
|
||||
with gr.TabItem(label='Metadata') as metadata_tab:
|
||||
with gr.Column():
|
||||
metadata_input_image = grh.Image(label='For images created by Fooocus', source='upload', type='filepath')
|
||||
|
|
@ -304,6 +352,153 @@ with shared.gradio_root:
|
|||
metadata_input_image.upload(trigger_metadata_preview, inputs=metadata_input_image,
|
||||
outputs=metadata_json, queue=False, show_progress=True)
|
||||
|
||||
with gr.Row(visible=modules.config.default_enhance_checkbox) as enhance_input_panel:
|
||||
with gr.Tabs():
|
||||
with gr.TabItem(label='Upscale or Variation'):
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
enhance_uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list,
|
||||
value=modules.config.default_enhance_uov_method)
|
||||
enhance_uov_processing_order = gr.Radio(label='Order of Processing',
|
||||
info='Use before for enhancement of small details and after for large areas.',
|
||||
choices=flags.enhancement_uov_processing_order,
|
||||
value=modules.config.default_enhance_uov_processing_order)
|
||||
enhance_uov_prompt_type = gr.Radio(label='Prompt',
|
||||
info='Choose which prompt to use for Upscale or Variation.',
|
||||
choices=flags.enhancement_uov_prompt_types,
|
||||
value=modules.config.default_enhance_uov_prompt_type,
|
||||
visible=modules.config.default_enhance_uov_processing_order == flags.enhancement_uov_after)
|
||||
|
||||
enhance_uov_processing_order.change(lambda x: gr.update(visible=x == flags.enhancement_uov_after),
|
||||
inputs=enhance_uov_processing_order,
|
||||
outputs=enhance_uov_prompt_type,
|
||||
queue=False, show_progress=False)
|
||||
gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
|
||||
enhance_ctrls = []
|
||||
for index in range(modules.config.default_enhance_tabs):
|
||||
with gr.TabItem(label=f'#{index + 1}') as enhance_tab_item:
|
||||
enhance_enabled = gr.Checkbox(label='Enable', value=False, elem_classes='min_check',
|
||||
container=False)
|
||||
|
||||
enhance_mask_dino_prompt_text = gr.Textbox(label='Detection prompt',
|
||||
info='Use singular whenever possible',
|
||||
placeholder='Describe what you want to detect.',
|
||||
interactive=True,
|
||||
visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
|
||||
example_enhance_mask_dino_prompt_text = gr.Dataset(
|
||||
samples=modules.config.example_enhance_detection_prompts,
|
||||
label='Detection Prompt Quick List',
|
||||
components=[enhance_mask_dino_prompt_text],
|
||||
visible=modules.config.default_enhance_inpaint_mask_model == 'sam')
|
||||
example_enhance_mask_dino_prompt_text.click(lambda x: x[0],
|
||||
inputs=example_enhance_mask_dino_prompt_text,
|
||||
outputs=enhance_mask_dino_prompt_text,
|
||||
show_progress=False, queue=False)
|
||||
|
||||
enhance_prompt = gr.Textbox(label="Enhancement positive prompt",
|
||||
placeholder="Uses original prompt instead if empty.",
|
||||
elem_id='enhance_prompt')
|
||||
enhance_negative_prompt = gr.Textbox(label="Enhancement negative prompt",
|
||||
placeholder="Uses original negative prompt instead if empty.",
|
||||
elem_id='enhance_negative_prompt')
|
||||
|
||||
with gr.Accordion("Detection", open=False):
|
||||
# TODO check if limiting to SAM is better
|
||||
enhance_mask_model = gr.Dropdown(label='Mask generation model',
|
||||
choices=flags.inpaint_mask_models,
|
||||
value=modules.config.default_enhance_inpaint_mask_model)
|
||||
enhance_mask_cloth_category = gr.Dropdown(label='Cloth category',
|
||||
choices=flags.inpaint_mask_cloth_category,
|
||||
value=modules.config.default_inpaint_mask_cloth_category,
|
||||
visible=modules.config.default_enhance_inpaint_mask_model == 'u2net_cloth_seg',
|
||||
interactive=True)
|
||||
|
||||
with gr.Accordion("SAM Options",
|
||||
visible=modules.config.default_enhance_inpaint_mask_model == 'sam',
|
||||
open=False) as sam_options:
|
||||
enhance_mask_sam_model = gr.Dropdown(label='SAM model',
|
||||
choices=flags.inpaint_mask_sam_model,
|
||||
value=modules.config.default_inpaint_mask_sam_model,
|
||||
interactive=True)
|
||||
enhance_mask_box_threshold = gr.Slider(label="Box Threshold", minimum=0.0,
|
||||
maximum=1.0, value=0.3, step=0.05,
|
||||
interactive=True)
|
||||
enhance_mask_text_threshold = gr.Slider(label="Text Threshold", minimum=0.0,
|
||||
maximum=1.0, value=0.25, step=0.05,
|
||||
interactive=True)
|
||||
enhance_mask_sam_max_detections = gr.Slider(label="Maximum number of detections",
|
||||
info="Set to 0 to detect all",
|
||||
minimum=0, maximum=10,
|
||||
value=modules.config.default_sam_max_detections,
|
||||
step=1, interactive=True)
|
||||
|
||||
with gr.Accordion("Inpaint", visible=True, open=False):
|
||||
enhance_inpaint_mode = gr.Dropdown(choices=modules.flags.inpaint_options,
|
||||
value=modules.flags.inpaint_option_default,
|
||||
label='Method', interactive=True)
|
||||
enhance_inpaint_disable_initial_latent = gr.Checkbox(
|
||||
label='Disable initial latent in inpaint', value=False)
|
||||
enhance_inpaint_engine = gr.Dropdown(label='Inpaint Engine',
|
||||
value=modules.config.default_inpaint_engine_version,
|
||||
choices=flags.inpaint_engine_versions,
|
||||
info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.')
|
||||
enhance_inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
|
||||
minimum=0.0, maximum=1.0, step=0.001,
|
||||
value=1.0,
|
||||
info='Same as the denoising strength in A1111 inpaint. '
|
||||
'Only used in inpaint, not used in outpaint. '
|
||||
'(Outpaint always use 1.0)')
|
||||
enhance_inpaint_respective_field = gr.Slider(label='Inpaint Respective Field',
|
||||
minimum=0.0, maximum=1.0, step=0.001,
|
||||
value=0.618,
|
||||
info='The area to inpaint. '
|
||||
'Value 0 is same as "Only Masked" in A1111. '
|
||||
'Value 1 is same as "Whole Image" in A1111. '
|
||||
'Only used in inpaint, not used in outpaint. '
|
||||
'(Outpaint always use 1.0)')
|
||||
enhance_inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate',
|
||||
minimum=-64, maximum=64, step=1, value=0,
|
||||
info='Positive value will make white area in the mask larger, '
|
||||
'negative value will make white area smaller. '
|
||||
'(default is 0, always processed before any mask invert)')
|
||||
enhance_mask_invert = gr.Checkbox(label='Invert Mask', value=False)
|
||||
|
||||
gr.HTML('<a href="https://github.com/mashb1t/Fooocus/discussions/42" target="_blank">\U0001F4D4 Document</a>')
|
||||
|
||||
enhance_ctrls += [
|
||||
enhance_enabled,
|
||||
enhance_mask_dino_prompt_text,
|
||||
enhance_prompt,
|
||||
enhance_negative_prompt,
|
||||
enhance_mask_model,
|
||||
enhance_mask_sam_model,
|
||||
enhance_mask_text_threshold,
|
||||
enhance_mask_box_threshold,
|
||||
enhance_mask_sam_max_detections,
|
||||
enhance_inpaint_disable_initial_latent,
|
||||
enhance_inpaint_engine,
|
||||
enhance_inpaint_strength,
|
||||
enhance_inpaint_respective_field,
|
||||
enhance_inpaint_erode_or_dilate,
|
||||
enhance_mask_invert
|
||||
]
|
||||
|
||||
enhance_inpaint_mode.input(inpaint_mode_change, inputs=enhance_inpaint_mode, outputs=[
|
||||
inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
|
||||
enhance_inpaint_disable_initial_latent, enhance_inpaint_engine,
|
||||
enhance_inpaint_strength, enhance_inpaint_respective_field
|
||||
], show_progress=False, queue=False)
|
||||
|
||||
enhance_mask_model.change(
|
||||
lambda x: [gr.update(visible=x == 'u2net_cloth_seg')] +
|
||||
[gr.update(visible=x == 'sam')] * 2 +
|
||||
[gr.Dataset.update(visible=x == 'sam',
|
||||
samples=modules.config.example_enhance_detection_prompts)],
|
||||
inputs=enhance_mask_model,
|
||||
outputs=[enhance_mask_cloth_category, enhance_mask_dino_prompt_text, sam_options,
|
||||
example_enhance_mask_dino_prompt_text],
|
||||
queue=False, show_progress=False)
|
||||
|
||||
switch_js = "(x) => {if(x){viewer_to_bottom(100);viewer_to_bottom(500);}else{viewer_to_top();} return x;}"
|
||||
down_js = "() => {viewer_to_bottom();}"
|
||||
|
||||
|
|
@ -316,7 +511,10 @@ with shared.gradio_root:
|
|||
inpaint_tab.select(lambda: 'inpaint', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
|
||||
ip_tab.select(lambda: 'ip', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
|
||||
desc_tab.select(lambda: 'desc', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
|
||||
enhance_tab.select(lambda: 'enhance', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
|
||||
metadata_tab.select(lambda: 'metadata', outputs=current_tab, queue=False, _js=down_js, show_progress=False)
|
||||
enhance_checkbox.change(lambda x: gr.update(visible=x), inputs=enhance_checkbox,
|
||||
outputs=enhance_input_panel, queue=False, show_progress=False, _js=switch_js)
|
||||
|
||||
with gr.Column(scale=1, visible=modules.config.default_advanced_checkbox) as advanced_column:
|
||||
with gr.Tab(label='Settings'):
|
||||
|
|
@ -379,7 +577,7 @@ with shared.gradio_root:
|
|||
def update_history_link():
|
||||
if args_manager.args.disable_image_log:
|
||||
return gr.update(value='')
|
||||
|
||||
|
||||
return gr.update(value=f'<a href="file={get_current_html_path(output_format)}" target="_blank">\U0001F4DA History Log</a>')
|
||||
|
||||
history_link = gr.HTML()
|
||||
|
|
@ -544,7 +742,7 @@ with shared.gradio_root:
|
|||
info='Image Prompt parameters are not included. Use png and a1111 for compatibility with Civitai.',
|
||||
visible=modules.config.default_save_metadata_to_images)
|
||||
|
||||
save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme],
|
||||
save_metadata_to_images.change(lambda x: gr.update(visible=x), inputs=[save_metadata_to_images], outputs=[metadata_scheme],
|
||||
queue=False, show_progress=False)
|
||||
|
||||
with gr.Tab(label='Control'):
|
||||
|
|
@ -570,11 +768,15 @@ with shared.gradio_root:
|
|||
|
||||
with gr.Tab(label='Inpaint'):
|
||||
debugging_inpaint_preprocessor = gr.Checkbox(label='Debug Inpaint Preprocessing', value=False)
|
||||
debugging_enhance_masks_checkbox = gr.Checkbox(label='Debug Enhance Masks', value=False,
|
||||
info='Show enhance masks in preview and final results')
|
||||
debugging_dino = gr.Checkbox(label='Debug GroundingDINO', value=False,
|
||||
info='Use GroundingDINO boxes instead of more detailed SAM masks')
|
||||
inpaint_disable_initial_latent = gr.Checkbox(label='Disable initial latent in inpaint', value=False)
|
||||
inpaint_engine = gr.Dropdown(label='Inpaint Engine',
|
||||
value=modules.config.default_inpaint_engine_version,
|
||||
choices=flags.inpaint_engine_versions,
|
||||
info='Version of Fooocus inpaint model')
|
||||
info='Version of Fooocus inpaint model. If set, use performance Quality or Speed (no performance LoRAs) for best results.')
|
||||
inpaint_strength = gr.Slider(label='Inpaint Denoising Strength',
|
||||
minimum=0.0, maximum=1.0, step=0.001, value=1.0,
|
||||
info='Same as the denoising strength in A1111 inpaint. '
|
||||
|
|
@ -590,8 +792,13 @@ with shared.gradio_root:
|
|||
inpaint_erode_or_dilate = gr.Slider(label='Mask Erode or Dilate',
|
||||
minimum=-64, maximum=64, step=1, value=0,
|
||||
info='Positive value will make white area in the mask larger, '
|
||||
'negative value will make white area smaller.'
|
||||
'(default is 0, always process before any mask invert)')
|
||||
'negative value will make white area smaller. '
|
||||
'(default is 0, always processed before any mask invert)')
|
||||
dino_erode_or_dilate = gr.Slider(label='GroundingDINO Box Erode or Dilate',
|
||||
minimum=-64, maximum=64, step=1, value=0,
|
||||
info='Positive value will make white area in the mask larger, '
|
||||
'negative value will make white area smaller. '
|
||||
'(default is 0, processed before SAM)')
|
||||
inpaint_mask_upload_checkbox = gr.Checkbox(label='Enable Mask Upload', value=False)
|
||||
invert_mask_checkbox = gr.Checkbox(label='Invert Mask', value=False)
|
||||
|
||||
|
|
@ -701,46 +908,26 @@ with shared.gradio_root:
|
|||
adm_scaler_negative, refiner_switch, refiner_model, sampler_name,
|
||||
scheduler_name, adaptive_cfg, refiner_swap_method, negative_prompt, disable_intermediate_results
|
||||
], queue=False, show_progress=False)
|
||||
|
||||
|
||||
output_format.input(lambda x: gr.update(output_format=x), inputs=output_format)
|
||||
|
||||
|
||||
advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, advanced_column,
|
||||
queue=False, show_progress=False) \
|
||||
.then(fn=lambda: None, _js='refresh_grid_delayed', queue=False, show_progress=False)
|
||||
|
||||
def inpaint_mode_change(mode):
|
||||
assert mode in modules.flags.inpaint_options
|
||||
|
||||
# inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
|
||||
# inpaint_disable_initial_latent, inpaint_engine,
|
||||
# inpaint_strength, inpaint_respective_field
|
||||
|
||||
if mode == modules.flags.inpaint_option_detail:
|
||||
return [
|
||||
gr.update(visible=True), gr.update(visible=False, value=[]),
|
||||
gr.Dataset.update(visible=True, samples=modules.config.example_inpaint_prompts),
|
||||
False, 'None', 0.5, 0.0
|
||||
]
|
||||
|
||||
if mode == modules.flags.inpaint_option_modify:
|
||||
return [
|
||||
gr.update(visible=True), gr.update(visible=False, value=[]),
|
||||
gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
|
||||
True, modules.config.default_inpaint_engine_version, 1.0, 0.0
|
||||
]
|
||||
|
||||
return [
|
||||
gr.update(visible=False, value=''), gr.update(visible=True),
|
||||
gr.Dataset.update(visible=False, samples=modules.config.example_inpaint_prompts),
|
||||
False, modules.config.default_inpaint_engine_version, 1.0, 0.618
|
||||
]
|
||||
|
||||
inpaint_mode.input(inpaint_mode_change, inputs=inpaint_mode, outputs=[
|
||||
inpaint_additional_prompt, outpaint_selections, example_inpaint_prompts,
|
||||
inpaint_disable_initial_latent, inpaint_engine,
|
||||
inpaint_strength, inpaint_respective_field
|
||||
], show_progress=False, queue=False)
|
||||
|
||||
generate_mask_button.click(fn=generate_mask,
|
||||
inputs=[inpaint_input_image, inpaint_mask_model, inpaint_mask_cloth_category,
|
||||
inpaint_mask_dino_prompt_text, inpaint_mask_sam_model,
|
||||
inpaint_mask_box_threshold, inpaint_mask_text_threshold,
|
||||
inpaint_mask_sam_max_detections, dino_erode_or_dilate, debugging_dino],
|
||||
outputs=inpaint_mask_image, show_progress=True, queue=True)
|
||||
|
||||
ctrls = [currentTask, generate_image_grid]
|
||||
ctrls += [
|
||||
prompt, negative_prompt, translate_prompts, style_selections,
|
||||
|
|
@ -766,6 +953,10 @@ with shared.gradio_root:
|
|||
ctrls += [save_metadata_to_images, metadata_scheme]
|
||||
|
||||
ctrls += ip_ctrls
|
||||
ctrls += [debugging_dino, dino_erode_or_dilate, debugging_enhance_masks_checkbox,
|
||||
enhance_input_image, enhance_checkbox, enhance_uov_method, enhance_uov_processing_order,
|
||||
enhance_uov_prompt_type]
|
||||
ctrls += enhance_ctrls
|
||||
|
||||
def parse_meta(raw_prompt_txt, is_generating):
|
||||
loaded_json = None
|
||||
|
|
@ -828,15 +1019,18 @@ with shared.gradio_root:
|
|||
desc_btn.click(trigger_describe, inputs=[desc_method, desc_input_image],
|
||||
outputs=[prompt, style_selections], show_progress=True, queue=True)
|
||||
|
||||
if args_manager.args.enable_describe_uov_image:
|
||||
def trigger_uov_describe(mode, img, prompt):
|
||||
if args_manager.args.enable_auto_describe_image:
|
||||
def trigger_auto_describe(mode, img, prompt):
|
||||
# keep prompt if not empty
|
||||
if prompt == '':
|
||||
return trigger_describe(mode, img)
|
||||
return gr.update(), gr.update()
|
||||
|
||||
uov_input_image.upload(trigger_uov_describe, inputs=[desc_method, uov_input_image, prompt],
|
||||
outputs=[prompt, style_selections], show_progress=True, queue=True)
|
||||
uov_input_image.upload(trigger_auto_describe, inputs=[desc_method, uov_input_image, prompt],
|
||||
outputs=[prompt, style_selections], show_progress=True, queue=True)
|
||||
|
||||
enhance_input_image.upload(lambda: gr.update(value=True), outputs=enhance_checkbox, queue=False, show_progress=False) \
|
||||
.then(trigger_auto_describe, inputs=[desc_method, enhance_input_image, prompt], outputs=[prompt, style_selections], show_progress=True, queue=True)
|
||||
|
||||
def dump_default_english_config():
|
||||
from modules.localization import dump_english_config
|
||||
|
|
|
|||
Loading…
Reference in New Issue