diff --git a/extras/adetailer/args.py b/extras/adetailer/args.py deleted file mode 100644 index 08ad4a3a..00000000 --- a/extras/adetailer/args.py +++ /dev/null @@ -1,278 +0,0 @@ -from __future__ import annotations - -from collections import UserList -from dataclasses import dataclass -from functools import cached_property, partial -from typing import Any, Literal, NamedTuple, Optional - -try: - from pydantic.v1 import ( - BaseModel, - Extra, - NonNegativeFloat, - NonNegativeInt, - PositiveInt, - confloat, - conint, - validator, - ) -except ImportError: - from pydantic import ( - BaseModel, - Extra, - NonNegativeFloat, - NonNegativeInt, - PositiveInt, - confloat, - conint, - validator, - ) - - -@dataclass -class SkipImg2ImgOrig: - steps: int - sampler_name: str - width: int - height: int - - -class Arg(NamedTuple): - attr: str - name: str - - -class ArgsList(UserList): - @cached_property - def attrs(self) -> tuple[str, ...]: - return tuple(attr for attr, _ in self) - - @cached_property - def names(self) -> tuple[str, ...]: - return tuple(name for _, name in self) - - -class ADetailerArgs(BaseModel, extra=Extra.forbid): - ad_model: str = "None" - ad_model_classes: str = "" - ad_tap_enable: bool = True - ad_prompt: str = "" - ad_negative_prompt: str = "" - ad_confidence: confloat(ge=0.0, le=1.0) = 0.3 - ad_mask_k_largest: NonNegativeInt = 0 - ad_mask_min_ratio: confloat(ge=0.0, le=1.0) = 0.0 - ad_mask_max_ratio: confloat(ge=0.0, le=1.0) = 1.0 - ad_dilate_erode: int = 4 - ad_x_offset: int = 0 - ad_y_offset: int = 0 - ad_mask_merge_invert: Literal["None", "Merge", "Merge and Invert"] = "None" - ad_mask_blur: NonNegativeInt = 4 - ad_denoising_strength: confloat(ge=0.0, le=1.0) = 0.4 - ad_inpaint_only_masked: bool = True - ad_inpaint_only_masked_padding: NonNegativeInt = 32 - ad_use_inpaint_width_height: bool = False - ad_inpaint_width: PositiveInt = 512 - ad_inpaint_height: PositiveInt = 512 - ad_use_steps: bool = False - ad_steps: PositiveInt = 28 - ad_use_cfg_scale: bool = False - ad_cfg_scale: NonNegativeFloat = 7.0 - ad_use_checkpoint: bool = False - ad_checkpoint: Optional[str] = None - ad_use_vae: bool = False - ad_vae: Optional[str] = None - ad_use_sampler: bool = False - ad_sampler: str = "DPM++ 2M Karras" - ad_scheduler: str = "Use same scheduler" - ad_use_noise_multiplier: bool = False - ad_noise_multiplier: confloat(ge=0.5, le=1.5) = 1.0 - ad_use_clip_skip: bool = False - ad_clip_skip: conint(ge=1, le=12) = 1 - ad_restore_face: bool = False - ad_controlnet_model: str = "None" - ad_controlnet_module: str = "None" - ad_controlnet_weight: confloat(ge=0.0, le=1.0) = 1.0 - ad_controlnet_guidance_start: confloat(ge=0.0, le=1.0) = 0.0 - ad_controlnet_guidance_end: confloat(ge=0.0, le=1.0) = 1.0 - is_api: bool = True - - @validator("is_api", pre=True) - def is_api_validator(cls, v: Any): # noqa: N805 - "tuple is json serializable but cannot be made with json deserialize." - return type(v) is not tuple - - @staticmethod - def ppop( - p: dict[str, Any], - key: str, - pops: list[str] | None = None, - cond: Any = None, - ) -> None: - if pops is None: - pops = [key] - if key not in p: - return - value = p[key] - cond = (not bool(value)) if cond is None else value == cond - - if cond: - for k in pops: - p.pop(k, None) - - def extra_params(self, suffix: str = "") -> dict[str, Any]: - if self.need_skip(): - return {} - - p = {name: getattr(self, attr) for attr, name in ALL_ARGS} - ppop = partial(self.ppop, p) - - ppop("ADetailer model classes") - ppop("ADetailer prompt") - ppop("ADetailer negative prompt") - p.pop("ADetailer tap enable", None) # always pop - ppop("ADetailer mask only top k largest", cond=0) - ppop("ADetailer mask min ratio", cond=0.0) - ppop("ADetailer mask max ratio", cond=1.0) - ppop("ADetailer x offset", cond=0) - ppop("ADetailer y offset", cond=0) - ppop("ADetailer mask merge invert", cond="None") - ppop("ADetailer inpaint only masked", ["ADetailer inpaint padding"]) - ppop( - "ADetailer use inpaint width height", - [ - "ADetailer use inpaint width height", - "ADetailer inpaint width", - "ADetailer inpaint height", - ], - ) - ppop( - "ADetailer use separate steps", - ["ADetailer use separate steps", "ADetailer steps"], - ) - ppop( - "ADetailer use separate CFG scale", - ["ADetailer use separate CFG scale", "ADetailer CFG scale"], - ) - ppop( - "ADetailer use separate checkpoint", - ["ADetailer use separate checkpoint", "ADetailer checkpoint"], - ) - ppop( - "ADetailer use separate VAE", - ["ADetailer use separate VAE", "ADetailer VAE"], - ) - ppop( - "ADetailer use separate sampler", - [ - "ADetailer use separate sampler", - "ADetailer sampler", - "ADetailer scheduler", - ], - ) - ppop("ADetailer scheduler", cond="Use same scheduler") - ppop( - "ADetailer use separate noise multiplier", - ["ADetailer use separate noise multiplier", "ADetailer noise multiplier"], - ) - - ppop( - "ADetailer use separate CLIP skip", - ["ADetailer use separate CLIP skip", "ADetailer CLIP skip"], - ) - - ppop("ADetailer restore face") - ppop( - "ADetailer ControlNet model", - [ - "ADetailer ControlNet model", - "ADetailer ControlNet module", - "ADetailer ControlNet weight", - "ADetailer ControlNet guidance start", - "ADetailer ControlNet guidance end", - ], - cond="None", - ) - ppop("ADetailer ControlNet module", cond="None") - ppop("ADetailer ControlNet weight", cond=1.0) - ppop("ADetailer ControlNet guidance start", cond=0.0) - ppop("ADetailer ControlNet guidance end", cond=1.0) - - if suffix: - p = {k + suffix: v for k, v in p.items()} - - return p - - def is_mediapipe(self) -> bool: - return self.ad_model.lower().startswith("mediapipe") - - def need_skip(self) -> bool: - return self.ad_model == "None" or self.ad_tap_enable is False - - -_all_args = [ - ("ad_model", "ADetailer model"), - ("ad_model_classes", "ADetailer model classes"), - ("ad_tap_enable", "ADetailer tap enable"), - ("ad_prompt", "ADetailer prompt"), - ("ad_negative_prompt", "ADetailer negative prompt"), - ("ad_confidence", "ADetailer confidence"), - ("ad_mask_k_largest", "ADetailer mask only top k largest"), - ("ad_mask_min_ratio", "ADetailer mask min ratio"), - ("ad_mask_max_ratio", "ADetailer mask max ratio"), - ("ad_x_offset", "ADetailer x offset"), - ("ad_y_offset", "ADetailer y offset"), - ("ad_dilate_erode", "ADetailer dilate erode"), - ("ad_mask_merge_invert", "ADetailer mask merge invert"), - ("ad_mask_blur", "ADetailer mask blur"), - ("ad_denoising_strength", "ADetailer denoising strength"), - ("ad_inpaint_only_masked", "ADetailer inpaint only masked"), - ("ad_inpaint_only_masked_padding", "ADetailer inpaint padding"), - ("ad_use_inpaint_width_height", "ADetailer use inpaint width height"), - ("ad_inpaint_width", "ADetailer inpaint width"), - ("ad_inpaint_height", "ADetailer inpaint height"), - ("ad_use_steps", "ADetailer use separate steps"), - ("ad_steps", "ADetailer steps"), - ("ad_use_cfg_scale", "ADetailer use separate CFG scale"), - ("ad_cfg_scale", "ADetailer CFG scale"), - ("ad_use_checkpoint", "ADetailer use separate checkpoint"), - ("ad_checkpoint", "ADetailer checkpoint"), - ("ad_use_vae", "ADetailer use separate VAE"), - ("ad_vae", "ADetailer VAE"), - ("ad_use_sampler", "ADetailer use separate sampler"), - ("ad_sampler", "ADetailer sampler"), - ("ad_scheduler", "ADetailer scheduler"), - ("ad_use_noise_multiplier", "ADetailer use separate noise multiplier"), - ("ad_noise_multiplier", "ADetailer noise multiplier"), - ("ad_use_clip_skip", "ADetailer use separate CLIP skip"), - ("ad_clip_skip", "ADetailer CLIP skip"), - ("ad_restore_face", "ADetailer restore face"), - ("ad_controlnet_model", "ADetailer ControlNet model"), - ("ad_controlnet_module", "ADetailer ControlNet module"), - ("ad_controlnet_weight", "ADetailer ControlNet weight"), - ("ad_controlnet_guidance_start", "ADetailer ControlNet guidance start"), - ("ad_controlnet_guidance_end", "ADetailer ControlNet guidance end"), -] - -_args = [Arg(*args) for args in _all_args] -ALL_ARGS = ArgsList(_args) - -BBOX_SORTBY = [ - "None", - "Position (left to right)", - "Position (center to edge)", - "Area (large to small)", -] -MASK_MERGE_INVERT = ["None", "Merge", "Merge and Invert"] - -_script_default = ( - "dynamic_prompting", - "dynamic_thresholding", - "wildcard_recursive", - "wildcards", - "lora_block_weight", - "negpip", -) -SCRIPT_DEFAULT = ",".join(sorted(_script_default)) - -_builtin_script = ("soft_inpainting", "hypertile_script") -BUILTIN_SCRIPT = ",".join(sorted(_builtin_script)) \ No newline at end of file diff --git a/extras/adetailer/common.py b/extras/adetailer/common.py deleted file mode 100644 index f80103fc..00000000 --- a/extras/adetailer/common.py +++ /dev/null @@ -1,161 +0,0 @@ -from __future__ import annotations - -import os -from collections import OrderedDict -from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Generic, Optional, TypeVar - -from huggingface_hub import hf_hub_download -from PIL import Image, ImageDraw -from torchvision.transforms.functional import to_pil_image - -REPO_ID = "Bingsu/adetailer" - -T = TypeVar("T", int, float) - - -@dataclass -class PredictOutput(Generic[T]): - bboxes: list[list[T]] = field(default_factory=list) - masks: list[Image.Image] = field(default_factory=list) - preview: Optional[Image.Image] = None - - -def hf_download(file: str, repo_id: str = REPO_ID) -> str: - try: - path = hf_hub_download(repo_id, file) - except Exception: - print(f"[ADetailer] Failed to load model {file!r} from huggingface") - path = "INVALID" - return path - - -def safe_mkdir(path: str | os.PathLike[str]) -> None: - path = Path(path) - if not path.exists() and path.parent.exists() and os.access(path.parent, os.W_OK): - path.mkdir() - - -def scan_model_dir(path: Path) -> list[Path]: - if not path.is_dir(): - return [] - return [p for p in path.rglob("*") if p.is_file() and p.suffix == ".pt"] - - -def download_models(*names: str) -> dict[str, str]: - models = OrderedDict() - with ThreadPoolExecutor() as executor: - for name in names: - if "-world" in name: - models[name] = executor.submit( - hf_download, name, repo_id="Bingsu/yolo-world-mirror" - ) - else: - models[name] = executor.submit(hf_download, name) - return {name: future.result() for name, future in models.items()} - - -def get_models( - *dirs: str | os.PathLike[str], huggingface: bool = True -) -> OrderedDict[str, str]: - model_paths = [] - - for dir_ in dirs: - if not dir_: - continue - model_paths.extend(scan_model_dir(Path(dir_))) - - models = OrderedDict() - if huggingface: - to_download = [ - "face_yolov8n.pt", - "face_yolov8s.pt", - "hand_yolov8n.pt", - "person_yolov8n-seg.pt", - "person_yolov8s-seg.pt", - "yolov8x-worldv2.pt", - ] - models.update(download_models(*to_download)) - - models.update( - { - "mediapipe_face_full": "mediapipe_face_full", - "mediapipe_face_short": "mediapipe_face_short", - "mediapipe_face_mesh": "mediapipe_face_mesh", - "mediapipe_face_mesh_eyes_only": "mediapipe_face_mesh_eyes_only", - } - ) - - invalid_keys = [k for k, v in models.items() if v == "INVALID"] - for key in invalid_keys: - models.pop(key) - - for path in model_paths: - if path.name in models: - continue - models[path.name] = str(path) - - return models - - -def create_mask_from_bbox( - bboxes: list[list[float]], shape: tuple[int, int] -) -> list[Image.Image]: - """ - Parameters - ---------- - bboxes: list[list[float]] - list of [x1, y1, x2, y2] - bounding boxes - shape: tuple[int, int] - shape of the image (width, height) - - Returns - ------- - masks: list[Image.Image] - A list of masks - - """ - masks = [] - for bbox in bboxes: - mask = Image.new("L", shape, 0) - mask_draw = ImageDraw.Draw(mask) - mask_draw.rectangle(bbox, fill=255) - masks.append(mask) - return masks - - -def create_bbox_from_mask( - masks: list[Image.Image], shape: tuple[int, int] -) -> list[list[int]]: - """ - Parameters - ---------- - masks: list[Image.Image] - A list of masks - shape: tuple[int, int] - shape of the image (width, height) - - Returns - ------- - bboxes: list[list[float]] - A list of bounding boxes - - """ - bboxes = [] - for mask in masks: - mask = mask.resize(shape) - bbox = mask.getbbox() - if bbox is not None: - bboxes.append(list(bbox)) - return bboxes - - -def ensure_pil_image(image: Any, mode: str = "RGB") -> Image.Image: - if not isinstance(image, Image.Image): - image = to_pil_image(image) - if image.mode != mode: - image = image.convert(mode) - return image \ No newline at end of file diff --git a/extras/adetailer/mask.py b/extras/adetailer/mask.py deleted file mode 100644 index 2faee71a..00000000 --- a/extras/adetailer/mask.py +++ /dev/null @@ -1,269 +0,0 @@ -from __future__ import annotations - -from enum import IntEnum -from functools import partial, reduce -from math import dist -from typing import Any, TypeVar - -import cv2 -import numpy as np -from PIL import Image, ImageChops - -from extras.adetailer.args import MASK_MERGE_INVERT -from extras.adetailer.common import ensure_pil_image, PredictOutput - - -class SortBy(IntEnum): - NONE = 0 - LEFT_TO_RIGHT = 1 - CENTER_TO_EDGE = 2 - AREA = 3 - - -class MergeInvert(IntEnum): - NONE = 0 - MERGE = 1 - MERGE_INVERT = 2 - - -T = TypeVar("T", int, float) - - -def _dilate(arr: np.ndarray, value: int) -> np.ndarray: - kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value)) - return cv2.dilate(arr, kernel, iterations=1) - - -def _erode(arr: np.ndarray, value: int) -> np.ndarray: - kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value)) - return cv2.erode(arr, kernel, iterations=1) - - -def dilate_erode(img: Image.Image, value: int) -> Image.Image: - """ - The dilate_erode function takes an image and a value. - If the value is positive, it dilates the image by that amount. - If the value is negative, it erodes the image by that amount. - - Parameters - ---------- - img: PIL.Image.Image - the image to be processed - value: int - kernel size of dilation or erosion - - Returns - ------- - PIL.Image.Image - The image that has been dilated or eroded - """ - if value == 0: - return img - - arr = np.array(img) - arr = _dilate(arr, value) if value > 0 else _erode(arr, -value) - - return Image.fromarray(arr) - - -def offset(img: Image.Image, x: int = 0, y: int = 0) -> Image.Image: - """ - The offset function takes an image and offsets it by a given x(→) and y(↑) value. - - Parameters - ---------- - mask: Image.Image - Pass the mask image to the function - x: int - → - y: int - ↑ - - Returns - ------- - PIL.Image.Image - A new image that is offset by x and y - """ - return ImageChops.offset(img, x, -y) - - -def is_all_black(img: Image.Image | np.ndarray) -> bool: - if isinstance(img, Image.Image): - img = np.array(ensure_pil_image(img, "L")) - return cv2.countNonZero(img) == 0 - - -def has_intersection(im1: Any, im2: Any) -> bool: - arr1 = np.array(ensure_pil_image(im1, "L")) - arr2 = np.array(ensure_pil_image(im2, "L")) - return not is_all_black(cv2.bitwise_and(arr1, arr2)) - - -def bbox_area(bbox: list[T]) -> T: - return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - - -def mask_preprocess( - masks: list[Image.Image], - kernel: int = 0, - x_offset: int = 0, - y_offset: int = 0, - merge_invert: int | MergeInvert | str = MergeInvert.NONE, -) -> list[Image.Image]: - """ - The mask_preprocess function takes a list of masks and preprocesses them. - It dilates and erodes the masks, and offsets them by x_offset and y_offset. - - Parameters - ---------- - masks: list[Image.Image] - A list of masks - kernel: int - kernel size of dilation or erosion - x_offset: int - → - y_offset: int - ↑ - - Returns - ------- - list[Image.Image] - A list of processed masks - """ - if not masks: - return [] - - if x_offset != 0 or y_offset != 0: - masks = [offset(m, x_offset, y_offset) for m in masks] - - if kernel != 0: - masks = [dilate_erode(m, kernel) for m in masks] - masks = [m for m in masks if not is_all_black(m)] - - return mask_merge_invert(masks, mode=merge_invert) - - -# Bbox sorting -def _key_left_to_right(bbox: list[T]) -> T: - """ - Left to right - - Parameters - ---------- - bbox: list[int] | list[float] - list of [x1, y1, x2, y2] - """ - return bbox[0] - - -def _key_center_to_edge(bbox: list[T], *, center: tuple[float, float]) -> float: - """ - Center to edge - - Parameters - ---------- - bbox: list[int] | list[float] - list of [x1, y1, x2, y2] - image: Image.Image - the image - """ - bbox_center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2) - return dist(center, bbox_center) - - -def _key_area(bbox: list[T]) -> T: - """ - Large to small - - Parameters - ---------- - bbox: list[int] | list[float] - list of [x1, y1, x2, y2] - """ - return -bbox_area(bbox) - - -def sort_bboxes( - pred: PredictOutput[T], order: int | SortBy = SortBy.NONE -) -> PredictOutput[T]: - if order == SortBy.NONE or len(pred.bboxes) <= 1: - return pred - - if order == SortBy.LEFT_TO_RIGHT: - key = _key_left_to_right - elif order == SortBy.CENTER_TO_EDGE: - width, height = pred.preview.size - center = (width / 2, height / 2) - key = partial(_key_center_to_edge, center=center) - elif order == SortBy.AREA: - key = _key_area - else: - raise RuntimeError - - items = len(pred.bboxes) - idx = sorted(range(items), key=lambda i: key(pred.bboxes[i])) - pred.bboxes = [pred.bboxes[i] for i in idx] - pred.masks = [pred.masks[i] for i in idx] - return pred - - -# Filter by ratio -def is_in_ratio(bbox: list[T], low: float, high: float, orig_area: int) -> bool: - area = bbox_area(bbox) - return low <= area / orig_area <= high - - -def filter_by_ratio( - pred: PredictOutput[T], low: float, high: float -) -> PredictOutput[T]: - if not pred.bboxes: - return pred - - w, h = pred.preview.size - orig_area = w * h - items = len(pred.bboxes) - idx = [i for i in range(items) if is_in_ratio(pred.bboxes[i], low, high, orig_area)] - pred.bboxes = [pred.bboxes[i] for i in idx] - pred.masks = [pred.masks[i] for i in idx] - return pred - - -def filter_k_largest(pred: PredictOutput[T], k: int = 0) -> PredictOutput[T]: - if not pred.bboxes or k == 0: - return pred - areas = [bbox_area(bbox) for bbox in pred.bboxes] - idx = np.argsort(areas)[-k:] - idx = idx[::-1] - pred.bboxes = [pred.bboxes[i] for i in idx] - pred.masks = [pred.masks[i] for i in idx] - return pred - - -# Merge / Invert -def mask_merge(masks: list[Image.Image]) -> list[Image.Image]: - arrs = [np.array(m) for m in masks] - arr = reduce(cv2.bitwise_or, arrs) - return [Image.fromarray(arr)] - - -def mask_invert(masks: list[Image.Image]) -> list[Image.Image]: - return [ImageChops.invert(m) for m in masks] - - -def mask_merge_invert( - masks: list[Image.Image], mode: int | MergeInvert | str -) -> list[Image.Image]: - if isinstance(mode, str): - mode = MASK_MERGE_INVERT.index(mode) - - if mode == MergeInvert.NONE or not masks: - return masks - - if mode == MergeInvert.MERGE: - return mask_merge(masks) - - if mode == MergeInvert.MERGE_INVERT: - merged = mask_merge(masks) - return mask_invert(merged) - - raise RuntimeError \ No newline at end of file diff --git a/extras/adetailer/script.py b/extras/adetailer/script.py deleted file mode 100644 index 05a4110e..00000000 --- a/extras/adetailer/script.py +++ /dev/null @@ -1,53 +0,0 @@ -from extras.adetailer.args import ADetailerArgs -from extras.adetailer.common import get_models, PredictOutput -from extras.adetailer.mask import filter_by_ratio, filter_k_largest, sort_bboxes, mask_preprocess -from modules import config - -model_mapping = get_models( - config.path_adetailer, - huggingface=True, -) - - -def get_ad_model(name: str): - if name not in model_mapping: - msg = f"[-] ADetailer: Model {name!r} not found. Available models: {list(model_mapping.keys())}" - raise ValueError(msg) - return model_mapping[name] - - -def pred_preprocessing(p, pred: PredictOutput, args: ADetailerArgs, inpaint_only_masked=False): - pred = filter_by_ratio( - pred, low=args.ad_mask_min_ratio, high=args.ad_mask_max_ratio - ) - pred = filter_k_largest(pred, k=args.ad_mask_k_largest) - pred = sort_bboxes(pred) - masks = mask_preprocess( - pred.masks, - kernel=args.ad_dilate_erode, - x_offset=args.ad_x_offset, - y_offset=args.ad_y_offset, - merge_invert=args.ad_mask_merge_invert, - ) - - #if inpaint_only_masked: - # image_mask = self.get_image_mask(p) - # masks = self.inpaint_mask_filter(image_mask, masks) - return masks - - - # def get_image_mask(p) -> Image.Image: - # mask = p.image_mask - # if getattr(p, "inpainting_mask_invert", False): - # mask = ImageChops.invert(mask) - # mask = create_binary_mask(mask) - # - # if is_skip_img2img(p): - # if hasattr(p, "init_images") and p.init_images: - # width, height = p.init_images[0].size - # else: - # msg = "[-] ADetailer: no init_images." - # raise RuntimeError(msg) - # else: - # width, height = p.width, p.height - # return images.resize_image(p.resize_mode, mask, width, height) \ No newline at end of file diff --git a/extras/adetailer/ultralytics_predict.py b/extras/adetailer/ultralytics_predict.py deleted file mode 100644 index b028ea83..00000000 --- a/extras/adetailer/ultralytics_predict.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import TYPE_CHECKING - -import cv2 -from PIL import Image -from torchvision.transforms.functional import to_pil_image - -from extras.adetailer.common import PredictOutput, create_mask_from_bbox - -if TYPE_CHECKING: - import torch - from ultralytics import YOLO, YOLOWorld - - -def ultralytics_predict( - model_path: str | Path, - image: Image.Image, - confidence: float = 0.3, - device: str = "", - classes: str = "", -) -> PredictOutput[float]: - from ultralytics import YOLO - - model = YOLO(model_path) - apply_classes(model, model_path, classes) - pred = model(image, conf=confidence, device=device) - - bboxes = pred[0].boxes.xyxy.cpu().numpy() - if bboxes.size == 0: - return PredictOutput() - bboxes = bboxes.tolist() - - if pred[0].masks is None: - masks = create_mask_from_bbox(bboxes, image.size) - else: - masks = mask_to_pil(pred[0].masks.data, image.size) - preview = pred[0].plot() - preview = cv2.cvtColor(preview, cv2.COLOR_BGR2RGB) - preview = Image.fromarray(preview) - - return PredictOutput(bboxes=bboxes, masks=masks, preview=preview) - - -def apply_classes(model: YOLO | YOLOWorld, model_path: str | Path, classes: str): - if not classes or "-world" not in Path(model_path).stem: - return - parsed = [c.strip() for c in classes.split(",") if c.strip()] - if parsed: - model.set_classes(parsed) - - -def mask_to_pil(masks: torch.Tensor, shape: tuple[int, int]) -> list[Image.Image]: - """ - Parameters - ---------- - masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W). - The device can be CUDA, but `to_pil_image` takes care of that. - - shape: tuple[int, int] - (W, H) of the original image - """ - n = masks.shape[0] - return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)] - - diff --git a/modules/async_worker.py b/modules/async_worker.py index e8d536db..c7a2afa4 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -1,4 +1,6 @@ import threading + +from extras.inpaint_mask import generate_mask_from_image, SAMOptions from modules.patch import PatchSettings, patch_settings, patch_all patch_all() @@ -1014,51 +1016,35 @@ def worker(): current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent, switch, task, tasks, tiled, use_expansion, width, height) - # adetailer - progressbar(async_task, current_progress, 'Processing adetailer ...') + # stage2 + progressbar(async_task, current_progress, 'Processing stage2 ...') final_unet = pipeline.final_unet.clone() for img in imgs: - from extras.adetailer.ultralytics_predict import ultralytics_predict - predictor = ultralytics_predict - from extras.adetailer.script import get_ad_model - ad_model = get_ad_model('face_yolov8n.pt') + # TODO add stage2 check and options from inputs here + mask = generate_mask_from_image(img, sam_options=SAMOptions( + dino_prompt='eye' + )) - kwargs = {} - kwargs["device"] = torch.device('cpu') - kwargs["classes"] = "" - from PIL import Image - img2 = Image.fromarray(img) - pred = predictor(ad_model, img2, **kwargs) - - if pred.preview is None: - print('[ADetailer] nothing detected on image') - continue - - from extras.adetailer.args import ADetailerArgs - args = ADetailerArgs() - from extras.adetailer.script import pred_preprocessing - masks = pred_preprocessing(img, pred, args) - merged_masks = np.maximum(*[np.array(mask) for mask in masks]) - async_task.yields.append(['preview', (current_progress, 'Loading ...', merged_masks)]) - # TODO also show do_not_show_finished_images=len(tasks) == 1 when adetailer is on - yield_result(async_task, merged_masks, async_task.black_out_nsfw, False, + async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)]) + # TODO also show do_not_show_finished_images=len(tasks) == 1 + yield_result(async_task, mask, async_task.black_out_nsfw, False, do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results) # TODO make configurable - denoising_strength_adetailer = 0.3 - inpaint_respective_field_adetailer = 0.0 - inpaint_head_model_path_adetailer = None - inpaint_parameterized_adetailer = False - goals_adetailer = ['inpaint'] - denoising_strength_adetailer, initial_latent_adetailer, width_adetailer, height_adetailer = apply_inpaint( - async_task, None, inpaint_head_model_path_adetailer, img, merged_masks, - inpaint_parameterized_adetailer, denoising_strength_adetailer, - inpaint_respective_field_adetailer, switch, current_progress, True) + denoising_strength_stage2 = 0.3 + inpaint_respective_field_stage2 = 0.0 + inpaint_head_model_path_stage2 = None + inpaint_parameterized_stage2 = False + goals_stage2 = ['inpaint'] + denoising_strength_stage2, initial_latent_stage2, width_stage2, height_stage2 = apply_inpaint( + async_task, None, inpaint_head_model_path_stage2, img, mask, + inpaint_parameterized_stage2, denoising_strength_stage2, + inpaint_respective_field_stage2, switch, current_progress, True) process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path, - current_task_id, denoising_strength_adetailer, final_scheduler_name, goals_adetailer, - initial_latent_adetailer, switch, task, tasks, tiled, use_expansion, width_adetailer, - height_adetailer) + current_task_id, denoising_strength_stage2, final_scheduler_name, goals_stage2, + initial_latent_stage2, switch, task, tasks, tiled, use_expansion, width_stage2, + height_stage2) # reset unet and inpaint_worker pipeline.final_unet = final_unet