refactor: remove adetailer code

2024-06-10 23:46:25 +02:00 · 2024-06-10 23:46:25 +02:00 · ead1ed617a
parent 29967d3a18
commit ead1ed617a
6 changed files with 23 additions and 865 deletions
--- a/extras/adetailer/args.py
+++ b/extras/adetailer/args.py
@ -1,278 +0,0 @@
 from __future__ import annotations
 from collections import UserList
 from dataclasses import dataclass
 from functools import cached_property, partial
 from typing import Any, Literal, NamedTuple, Optional
 try:
    from pydantic.v1 import (
        BaseModel,
        Extra,
        NonNegativeFloat,
        NonNegativeInt,
        PositiveInt,
        confloat,
        conint,
        validator,
    )
 except ImportError:
    from pydantic import (
        BaseModel,
        Extra,
        NonNegativeFloat,
        NonNegativeInt,
        PositiveInt,
        confloat,
        conint,
        validator,
    )
@dataclass
 class SkipImg2ImgOrig:
    steps: int
    sampler_name: str
    width: int
    height: int
 class Arg(NamedTuple):
    attr: str
    name: str
 class ArgsList(UserList):
    @cached_property
    def attrs(self) -> tuple[str, ...]:
        return tuple(attr for attr, _ in self)
    @cached_property
    def names(self) -> tuple[str, ...]:
        return tuple(name for _, name in self)
 class ADetailerArgs(BaseModel, extra=Extra.forbid):
    ad_model: str = "None"
    ad_model_classes: str = ""
    ad_tap_enable: bool = True
    ad_prompt: str = ""
    ad_negative_prompt: str = ""
    ad_confidence: confloat(ge=0.0, le=1.0) = 0.3
    ad_mask_k_largest: NonNegativeInt = 0
    ad_mask_min_ratio: confloat(ge=0.0, le=1.0) = 0.0
    ad_mask_max_ratio: confloat(ge=0.0, le=1.0) = 1.0
    ad_dilate_erode: int = 4
    ad_x_offset: int = 0
    ad_y_offset: int = 0
    ad_mask_merge_invert: Literal["None", "Merge", "Merge and Invert"] = "None"
    ad_mask_blur: NonNegativeInt = 4
    ad_denoising_strength: confloat(ge=0.0, le=1.0) = 0.4
    ad_inpaint_only_masked: bool = True
    ad_inpaint_only_masked_padding: NonNegativeInt = 32
    ad_use_inpaint_width_height: bool = False
    ad_inpaint_width: PositiveInt = 512
    ad_inpaint_height: PositiveInt = 512
    ad_use_steps: bool = False
    ad_steps: PositiveInt = 28
    ad_use_cfg_scale: bool = False
    ad_cfg_scale: NonNegativeFloat = 7.0
    ad_use_checkpoint: bool = False
    ad_checkpoint: Optional[str] = None
    ad_use_vae: bool = False
    ad_vae: Optional[str] = None
    ad_use_sampler: bool = False
    ad_sampler: str = "DPM++ 2M Karras"
    ad_scheduler: str = "Use same scheduler"
    ad_use_noise_multiplier: bool = False
    ad_noise_multiplier: confloat(ge=0.5, le=1.5) = 1.0
    ad_use_clip_skip: bool = False
    ad_clip_skip: conint(ge=1, le=12) = 1
    ad_restore_face: bool = False
    ad_controlnet_model: str = "None"
    ad_controlnet_module: str = "None"
    ad_controlnet_weight: confloat(ge=0.0, le=1.0) = 1.0
    ad_controlnet_guidance_start: confloat(ge=0.0, le=1.0) = 0.0
    ad_controlnet_guidance_end: confloat(ge=0.0, le=1.0) = 1.0
    is_api: bool = True
    @validator("is_api", pre=True)
    def is_api_validator(cls, v: Any):  # noqa: N805
        "tuple is json serializable but cannot be made with json deserialize."
        return type(v) is not tuple
    @staticmethod
    def ppop(
        p: dict[str, Any],
        key: str,
        pops: list[str] | None = None,
        cond: Any = None,
    ) -> None:
        if pops is None:
            pops = [key]
        if key not in p:
            return
        value = p[key]
        cond = (not bool(value)) if cond is None else value == cond
        if cond:
            for k in pops:
                p.pop(k, None)
    def extra_params(self, suffix: str = "") -> dict[str, Any]:
        if self.need_skip():
            return {}
        p = {name: getattr(self, attr) for attr, name in ALL_ARGS}
        ppop = partial(self.ppop, p)
        ppop("ADetailer model classes")
        ppop("ADetailer prompt")
        ppop("ADetailer negative prompt")
        p.pop("ADetailer tap enable", None)  # always pop
        ppop("ADetailer mask only top k largest", cond=0)
        ppop("ADetailer mask min ratio", cond=0.0)
        ppop("ADetailer mask max ratio", cond=1.0)
        ppop("ADetailer x offset", cond=0)
        ppop("ADetailer y offset", cond=0)
        ppop("ADetailer mask merge invert", cond="None")
        ppop("ADetailer inpaint only masked", ["ADetailer inpaint padding"])
        ppop(
            "ADetailer use inpaint width height",
            [
                "ADetailer use inpaint width height",
                "ADetailer inpaint width",
                "ADetailer inpaint height",
            ],
        )
        ppop(
            "ADetailer use separate steps",
            ["ADetailer use separate steps", "ADetailer steps"],
        )
        ppop(
            "ADetailer use separate CFG scale",
            ["ADetailer use separate CFG scale", "ADetailer CFG scale"],
        )
        ppop(
            "ADetailer use separate checkpoint",
            ["ADetailer use separate checkpoint", "ADetailer checkpoint"],
        )
        ppop(
            "ADetailer use separate VAE",
            ["ADetailer use separate VAE", "ADetailer VAE"],
        )
        ppop(
            "ADetailer use separate sampler",
            [
                "ADetailer use separate sampler",
                "ADetailer sampler",
                "ADetailer scheduler",
            ],
        )
        ppop("ADetailer scheduler", cond="Use same scheduler")
        ppop(
            "ADetailer use separate noise multiplier",
            ["ADetailer use separate noise multiplier", "ADetailer noise multiplier"],
        )
        ppop(
            "ADetailer use separate CLIP skip",
            ["ADetailer use separate CLIP skip", "ADetailer CLIP skip"],
        )
        ppop("ADetailer restore face")
        ppop(
            "ADetailer ControlNet model",
            [
                "ADetailer ControlNet model",
                "ADetailer ControlNet module",
                "ADetailer ControlNet weight",
                "ADetailer ControlNet guidance start",
                "ADetailer ControlNet guidance end",
            ],
            cond="None",
        )
        ppop("ADetailer ControlNet module", cond="None")
        ppop("ADetailer ControlNet weight", cond=1.0)
        ppop("ADetailer ControlNet guidance start", cond=0.0)
        ppop("ADetailer ControlNet guidance end", cond=1.0)
        if suffix:
            p = {k + suffix: v for k, v in p.items()}
        return p
    def is_mediapipe(self) -> bool:
        return self.ad_model.lower().startswith("mediapipe")
    def need_skip(self) -> bool:
        return self.ad_model == "None" or self.ad_tap_enable is False
 _all_args = [
    ("ad_model", "ADetailer model"),
    ("ad_model_classes", "ADetailer model classes"),
    ("ad_tap_enable", "ADetailer tap enable"),
    ("ad_prompt", "ADetailer prompt"),
    ("ad_negative_prompt", "ADetailer negative prompt"),
    ("ad_confidence", "ADetailer confidence"),
    ("ad_mask_k_largest", "ADetailer mask only top k largest"),
    ("ad_mask_min_ratio", "ADetailer mask min ratio"),
    ("ad_mask_max_ratio", "ADetailer mask max ratio"),
    ("ad_x_offset", "ADetailer x offset"),
    ("ad_y_offset", "ADetailer y offset"),
    ("ad_dilate_erode", "ADetailer dilate erode"),
    ("ad_mask_merge_invert", "ADetailer mask merge invert"),
    ("ad_mask_blur", "ADetailer mask blur"),
    ("ad_denoising_strength", "ADetailer denoising strength"),
    ("ad_inpaint_only_masked", "ADetailer inpaint only masked"),
    ("ad_inpaint_only_masked_padding", "ADetailer inpaint padding"),
    ("ad_use_inpaint_width_height", "ADetailer use inpaint width height"),
    ("ad_inpaint_width", "ADetailer inpaint width"),
    ("ad_inpaint_height", "ADetailer inpaint height"),
    ("ad_use_steps", "ADetailer use separate steps"),
    ("ad_steps", "ADetailer steps"),
    ("ad_use_cfg_scale", "ADetailer use separate CFG scale"),
    ("ad_cfg_scale", "ADetailer CFG scale"),
    ("ad_use_checkpoint", "ADetailer use separate checkpoint"),
    ("ad_checkpoint", "ADetailer checkpoint"),
    ("ad_use_vae", "ADetailer use separate VAE"),
    ("ad_vae", "ADetailer VAE"),
    ("ad_use_sampler", "ADetailer use separate sampler"),
    ("ad_sampler", "ADetailer sampler"),
    ("ad_scheduler", "ADetailer scheduler"),
    ("ad_use_noise_multiplier", "ADetailer use separate noise multiplier"),
    ("ad_noise_multiplier", "ADetailer noise multiplier"),
    ("ad_use_clip_skip", "ADetailer use separate CLIP skip"),
    ("ad_clip_skip", "ADetailer CLIP skip"),
    ("ad_restore_face", "ADetailer restore face"),
    ("ad_controlnet_model", "ADetailer ControlNet model"),
    ("ad_controlnet_module", "ADetailer ControlNet module"),
    ("ad_controlnet_weight", "ADetailer ControlNet weight"),
    ("ad_controlnet_guidance_start", "ADetailer ControlNet guidance start"),
    ("ad_controlnet_guidance_end", "ADetailer ControlNet guidance end"),
 ]
 _args = [Arg(*args) for args in _all_args]
 ALL_ARGS = ArgsList(_args)
 BBOX_SORTBY = [
    "None",
    "Position (left to right)",
    "Position (center to edge)",
    "Area (large to small)",
 ]
 MASK_MERGE_INVERT = ["None", "Merge", "Merge and Invert"]
 _script_default = (
    "dynamic_prompting",
    "dynamic_thresholding",
    "wildcard_recursive",
    "wildcards",
    "lora_block_weight",
    "negpip",
 )
 SCRIPT_DEFAULT = ",".join(sorted(_script_default))
 _builtin_script = ("soft_inpainting", "hypertile_script")
 BUILTIN_SCRIPT = ",".join(sorted(_builtin_script))
--- a/extras/adetailer/common.py
+++ b/extras/adetailer/common.py
@ -1,161 +0,0 @@
 from __future__ import annotations
 import os
 from collections import OrderedDict
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Generic, Optional, TypeVar
 from huggingface_hub import hf_hub_download
 from PIL import Image, ImageDraw
 from torchvision.transforms.functional import to_pil_image
 REPO_ID = "Bingsu/adetailer"
 T = TypeVar("T", int, float)
@dataclass
 class PredictOutput(Generic[T]):
    bboxes: list[list[T]] = field(default_factory=list)
    masks: list[Image.Image] = field(default_factory=list)
    preview: Optional[Image.Image] = None
 def hf_download(file: str, repo_id: str = REPO_ID) -> str:
    try:
        path = hf_hub_download(repo_id, file)
    except Exception:
        print(f"[ADetailer] Failed to load model {file!r} from huggingface")
        path = "INVALID"
    return path
 def safe_mkdir(path: str | os.PathLike[str]) -> None:
    path = Path(path)
    if not path.exists() and path.parent.exists() and os.access(path.parent, os.W_OK):
        path.mkdir()
 def scan_model_dir(path: Path) -> list[Path]:
    if not path.is_dir():
        return []
    return [p for p in path.rglob("*") if p.is_file() and p.suffix == ".pt"]
 def download_models(*names: str) -> dict[str, str]:
    models = OrderedDict()
    with ThreadPoolExecutor() as executor:
        for name in names:
            if "-world" in name:
                models[name] = executor.submit(
                    hf_download, name, repo_id="Bingsu/yolo-world-mirror"
                )
            else:
                models[name] = executor.submit(hf_download, name)
    return {name: future.result() for name, future in models.items()}
 def get_models(
    *dirs: str | os.PathLike[str], huggingface: bool = True
 ) -> OrderedDict[str, str]:
    model_paths = []
    for dir_ in dirs:
        if not dir_:
            continue
        model_paths.extend(scan_model_dir(Path(dir_)))
    models = OrderedDict()
    if huggingface:
        to_download = [
            "face_yolov8n.pt",
            "face_yolov8s.pt",
            "hand_yolov8n.pt",
            "person_yolov8n-seg.pt",
            "person_yolov8s-seg.pt",
            "yolov8x-worldv2.pt",
        ]
        models.update(download_models(*to_download))
    models.update(
        {
            "mediapipe_face_full": "mediapipe_face_full",
            "mediapipe_face_short": "mediapipe_face_short",
            "mediapipe_face_mesh": "mediapipe_face_mesh",
            "mediapipe_face_mesh_eyes_only": "mediapipe_face_mesh_eyes_only",
        }
    )
    invalid_keys = [k for k, v in models.items() if v == "INVALID"]
    for key in invalid_keys:
        models.pop(key)
    for path in model_paths:
        if path.name in models:
            continue
        models[path.name] = str(path)
    return models
 def create_mask_from_bbox(
    bboxes: list[list[float]], shape: tuple[int, int]
 ) -> list[Image.Image]:
    """
    Parameters
    ----------
        bboxes: list[list[float]]
            list of [x1, y1, x2, y2]
            bounding boxes
        shape: tuple[int, int]
            shape of the image (width, height)
    Returns
    -------
        masks: list[Image.Image]
        A list of masks
    """
    masks = []
    for bbox in bboxes:
        mask = Image.new("L", shape, 0)
        mask_draw = ImageDraw.Draw(mask)
        mask_draw.rectangle(bbox, fill=255)
        masks.append(mask)
    return masks
 def create_bbox_from_mask(
    masks: list[Image.Image], shape: tuple[int, int]
 ) -> list[list[int]]:
    """
    Parameters
    ----------
        masks: list[Image.Image]
            A list of masks
        shape: tuple[int, int]
            shape of the image (width, height)
    Returns
    -------
        bboxes: list[list[float]]
        A list of bounding boxes
    """
    bboxes = []
    for mask in masks:
        mask = mask.resize(shape)
        bbox = mask.getbbox()
        if bbox is not None:
            bboxes.append(list(bbox))
    return bboxes
 def ensure_pil_image(image: Any, mode: str = "RGB") -> Image.Image:
    if not isinstance(image, Image.Image):
        image = to_pil_image(image)
    if image.mode != mode:
        image = image.convert(mode)
    return image
--- a/extras/adetailer/mask.py
+++ b/extras/adetailer/mask.py
@ -1,269 +0,0 @@
 from __future__ import annotations
 from enum import IntEnum
 from functools import partial, reduce
 from math import dist
 from typing import Any, TypeVar
 import cv2
 import numpy as np
 from PIL import Image, ImageChops
 from extras.adetailer.args import MASK_MERGE_INVERT
 from extras.adetailer.common import ensure_pil_image, PredictOutput
 class SortBy(IntEnum):
    NONE = 0
    LEFT_TO_RIGHT = 1
    CENTER_TO_EDGE = 2
    AREA = 3
 class MergeInvert(IntEnum):
    NONE = 0
    MERGE = 1
    MERGE_INVERT = 2
 T = TypeVar("T", int, float)
 def _dilate(arr: np.ndarray, value: int) -> np.ndarray:
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
    return cv2.dilate(arr, kernel, iterations=1)
 def _erode(arr: np.ndarray, value: int) -> np.ndarray:
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
    return cv2.erode(arr, kernel, iterations=1)
 def dilate_erode(img: Image.Image, value: int) -> Image.Image:
    """
    The dilate_erode function takes an image and a value.
    If the value is positive, it dilates the image by that amount.
    If the value is negative, it erodes the image by that amount.
    Parameters
    ----------
        img: PIL.Image.Image
            the image to be processed
        value: int
            kernel size of dilation or erosion
    Returns
    -------
        PIL.Image.Image
            The image that has been dilated or eroded
    """
    if value == 0:
        return img
    arr = np.array(img)
    arr = _dilate(arr, value) if value > 0 else _erode(arr, -value)
    return Image.fromarray(arr)
 def offset(img: Image.Image, x: int = 0, y: int = 0) -> Image.Image:
    """
    The offset function takes an image and offsets it by a given x(→) and y(↑) value.
    Parameters
    ----------
        mask: Image.Image
            Pass the mask image to the function
        x: int
            →
        y: int
            ↑
    Returns
    -------
        PIL.Image.Image
            A new image that is offset by x and y
    """
    return ImageChops.offset(img, x, -y)
 def is_all_black(img: Image.Image | np.ndarray) -> bool:
    if isinstance(img, Image.Image):
        img = np.array(ensure_pil_image(img, "L"))
    return cv2.countNonZero(img) == 0
 def has_intersection(im1: Any, im2: Any) -> bool:
    arr1 = np.array(ensure_pil_image(im1, "L"))
    arr2 = np.array(ensure_pil_image(im2, "L"))
    return not is_all_black(cv2.bitwise_and(arr1, arr2))
 def bbox_area(bbox: list[T]) -> T:
    return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
 def mask_preprocess(
    masks: list[Image.Image],
    kernel: int = 0,
    x_offset: int = 0,
    y_offset: int = 0,
    merge_invert: int | MergeInvert | str = MergeInvert.NONE,
 ) -> list[Image.Image]:
    """
    The mask_preprocess function takes a list of masks and preprocesses them.
    It dilates and erodes the masks, and offsets them by x_offset and y_offset.
    Parameters
    ----------
        masks: list[Image.Image]
            A list of masks
        kernel: int
            kernel size of dilation or erosion
        x_offset: int
            →
        y_offset: int
            ↑
    Returns
    -------
        list[Image.Image]
            A list of processed masks
    """
    if not masks:
        return []
    if x_offset != 0 or y_offset != 0:
        masks = [offset(m, x_offset, y_offset) for m in masks]
    if kernel != 0:
        masks = [dilate_erode(m, kernel) for m in masks]
        masks = [m for m in masks if not is_all_black(m)]
    return mask_merge_invert(masks, mode=merge_invert)
 # Bbox sorting
 def _key_left_to_right(bbox: list[T]) -> T:
    """
    Left to right
    Parameters
    ----------
    bbox: list[int] | list[float]
        list of [x1, y1, x2, y2]
    """
    return bbox[0]
 def _key_center_to_edge(bbox: list[T], *, center: tuple[float, float]) -> float:
    """
    Center to edge
    Parameters
    ----------
    bbox: list[int] | list[float]
        list of [x1, y1, x2, y2]
    image: Image.Image
        the image
    """
    bbox_center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
    return dist(center, bbox_center)
 def _key_area(bbox: list[T]) -> T:
    """
    Large to small
    Parameters
    ----------
    bbox: list[int] | list[float]
        list of [x1, y1, x2, y2]
    """
    return -bbox_area(bbox)
 def sort_bboxes(
    pred: PredictOutput[T], order: int | SortBy = SortBy.NONE
 ) -> PredictOutput[T]:
    if order == SortBy.NONE or len(pred.bboxes) <= 1:
        return pred
    if order == SortBy.LEFT_TO_RIGHT:
        key = _key_left_to_right
    elif order == SortBy.CENTER_TO_EDGE:
        width, height = pred.preview.size
        center = (width / 2, height / 2)
        key = partial(_key_center_to_edge, center=center)
    elif order == SortBy.AREA:
        key = _key_area
    else:
        raise RuntimeError
    items = len(pred.bboxes)
    idx = sorted(range(items), key=lambda i: key(pred.bboxes[i]))
    pred.bboxes = [pred.bboxes[i] for i in idx]
    pred.masks = [pred.masks[i] for i in idx]
    return pred
 # Filter by ratio
 def is_in_ratio(bbox: list[T], low: float, high: float, orig_area: int) -> bool:
    area = bbox_area(bbox)
    return low <= area / orig_area <= high
 def filter_by_ratio(
    pred: PredictOutput[T], low: float, high: float
 ) -> PredictOutput[T]:
    if not pred.bboxes:
        return pred
    w, h = pred.preview.size
    orig_area = w * h
    items = len(pred.bboxes)
    idx = [i for i in range(items) if is_in_ratio(pred.bboxes[i], low, high, orig_area)]
    pred.bboxes = [pred.bboxes[i] for i in idx]
    pred.masks = [pred.masks[i] for i in idx]
    return pred
 def filter_k_largest(pred: PredictOutput[T], k: int = 0) -> PredictOutput[T]:
    if not pred.bboxes or k == 0:
        return pred
    areas = [bbox_area(bbox) for bbox in pred.bboxes]
    idx = np.argsort(areas)[-k:]
    idx = idx[::-1]
    pred.bboxes = [pred.bboxes[i] for i in idx]
    pred.masks = [pred.masks[i] for i in idx]
    return pred
 # Merge / Invert
 def mask_merge(masks: list[Image.Image]) -> list[Image.Image]:
    arrs = [np.array(m) for m in masks]
    arr = reduce(cv2.bitwise_or, arrs)
    return [Image.fromarray(arr)]
 def mask_invert(masks: list[Image.Image]) -> list[Image.Image]:
    return [ImageChops.invert(m) for m in masks]
 def mask_merge_invert(
    masks: list[Image.Image], mode: int | MergeInvert | str
 ) -> list[Image.Image]:
    if isinstance(mode, str):
        mode = MASK_MERGE_INVERT.index(mode)
    if mode == MergeInvert.NONE or not masks:
        return masks
    if mode == MergeInvert.MERGE:
        return mask_merge(masks)
    if mode == MergeInvert.MERGE_INVERT:
        merged = mask_merge(masks)
        return mask_invert(merged)
    raise RuntimeError
--- a/extras/adetailer/script.py
+++ b/extras/adetailer/script.py
@ -1,53 +0,0 @@
 from extras.adetailer.args import ADetailerArgs
 from extras.adetailer.common import get_models, PredictOutput
 from extras.adetailer.mask import filter_by_ratio, filter_k_largest, sort_bboxes, mask_preprocess
 from modules import config
 model_mapping = get_models(
    config.path_adetailer,
    huggingface=True,
 )
 def get_ad_model(name: str):
    if name not in model_mapping:
        msg = f"[-] ADetailer: Model {name!r} not found. Available models: {list(model_mapping.keys())}"
        raise ValueError(msg)
    return model_mapping[name]
 def pred_preprocessing(p, pred: PredictOutput, args: ADetailerArgs, inpaint_only_masked=False):
    pred = filter_by_ratio(
        pred, low=args.ad_mask_min_ratio, high=args.ad_mask_max_ratio
    )
    pred = filter_k_largest(pred, k=args.ad_mask_k_largest)
    pred = sort_bboxes(pred)
    masks = mask_preprocess(
        pred.masks,
        kernel=args.ad_dilate_erode,
        x_offset=args.ad_x_offset,
        y_offset=args.ad_y_offset,
        merge_invert=args.ad_mask_merge_invert,
    )
    #if inpaint_only_masked:
    # image_mask = self.get_image_mask(p)
    # masks = self.inpaint_mask_filter(image_mask, masks)
    return masks
    # def get_image_mask(p) -> Image.Image:
    #     mask = p.image_mask
    #     if getattr(p, "inpainting_mask_invert", False):
    #         mask = ImageChops.invert(mask)
    #     mask = create_binary_mask(mask)
    #
    #     if is_skip_img2img(p):
    #         if hasattr(p, "init_images") and p.init_images:
    #             width, height = p.init_images[0].size
    #         else:
    #             msg = "[-] ADetailer: no init_images."
    #             raise RuntimeError(msg)
    #     else:
    #         width, height = p.width, p.height
    #     return images.resize_image(p.resize_mode, mask, width, height)
--- a/extras/adetailer/ultralytics_predict.py
+++ b/extras/adetailer/ultralytics_predict.py
@ -1,67 +0,0 @@
 from __future__ import annotations
 from pathlib import Path
 from typing import TYPE_CHECKING
 import cv2
 from PIL import Image
 from torchvision.transforms.functional import to_pil_image
 from extras.adetailer.common import PredictOutput, create_mask_from_bbox
 if TYPE_CHECKING:
    import torch
    from ultralytics import YOLO, YOLOWorld
 def ultralytics_predict(
    model_path: str | Path,
    image: Image.Image,
    confidence: float = 0.3,
    device: str = "",
    classes: str = "",
 ) -> PredictOutput[float]:
    from ultralytics import YOLO
    model = YOLO(model_path)
    apply_classes(model, model_path, classes)
    pred = model(image, conf=confidence, device=device)
    bboxes = pred[0].boxes.xyxy.cpu().numpy()
    if bboxes.size == 0:
        return PredictOutput()
    bboxes = bboxes.tolist()
    if pred[0].masks is None:
        masks = create_mask_from_bbox(bboxes, image.size)
    else:
        masks = mask_to_pil(pred[0].masks.data, image.size)
    preview = pred[0].plot()
    preview = cv2.cvtColor(preview, cv2.COLOR_BGR2RGB)
    preview = Image.fromarray(preview)
    return PredictOutput(bboxes=bboxes, masks=masks, preview=preview)
 def apply_classes(model: YOLO | YOLOWorld, model_path: str | Path, classes: str):
    if not classes or "-world" not in Path(model_path).stem:
        return
    parsed = [c.strip() for c in classes.split(",") if c.strip()]
    if parsed:
        model.set_classes(parsed)
 def mask_to_pil(masks: torch.Tensor, shape: tuple[int, int]) -> list[Image.Image]:
    """
    Parameters
    ----------
    masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W).
        The device can be CUDA, but `to_pil_image` takes care of that.
    shape: tuple[int, int]
        (W, H) of the original image
    """
    n = masks.shape[0]
    return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)]
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@ -1,4 +1,6 @@
 import threading
 from extras.inpaint_mask import generate_mask_from_image, SAMOptions
 from modules.patch import PatchSettings, patch_settings, patch_all
 patch_all()
@ -1014,51 +1016,35 @@ def worker():
                             current_task_id, denoising_strength, final_scheduler_name, goals, initial_latent,
                             switch, task, tasks, tiled, use_expansion, width, height)
-                # adetailer
+                # stage2
-                progressbar(async_task, current_progress, 'Processing adetailer ...')
+                progressbar(async_task, current_progress, 'Processing stage2 ...')
                final_unet = pipeline.final_unet.clone()
                for img in imgs:
-                    from extras.adetailer.ultralytics_predict import ultralytics_predict
+                    # TODO add stage2 check and options from inputs here
-                    predictor = ultralytics_predict
+                    mask = generate_mask_from_image(img, sam_options=SAMOptions(
-                    from extras.adetailer.script import get_ad_model
+                        dino_prompt='eye'
-                    ad_model = get_ad_model('face_yolov8n.pt')
+                    ))
-                    kwargs = {}
+                    async_task.yields.append(['preview', (current_progress, 'Loading ...', mask)])
-                    kwargs["device"] = torch.device('cpu')
+                    # TODO also show do_not_show_finished_images=len(tasks) == 1
-                    kwargs["classes"] = ""
+                    yield_result(async_task, mask, async_task.black_out_nsfw, False,
                    from PIL import Image
                    img2 = Image.fromarray(img)
                    pred = predictor(ad_model, img2, **kwargs)
                    if pred.preview is None:
                        print('[ADetailer] nothing detected on image')
                        continue
                    from extras.adetailer.args import ADetailerArgs
                    args = ADetailerArgs()
                    from extras.adetailer.script import pred_preprocessing
                    masks = pred_preprocessing(img, pred, args)
                    merged_masks = np.maximum(*[np.array(mask) for mask in masks])
                    async_task.yields.append(['preview', (current_progress, 'Loading ...', merged_masks)])
                    # TODO also show do_not_show_finished_images=len(tasks) == 1 when adetailer is on
                    yield_result(async_task, merged_masks, async_task.black_out_nsfw, False,
                                 do_not_show_finished_images=len(tasks) == 1 or async_task.disable_intermediate_results)
                    # TODO make configurable
-                    denoising_strength_adetailer = 0.3
+                    denoising_strength_stage2 = 0.3
-                    inpaint_respective_field_adetailer = 0.0
+                    inpaint_respective_field_stage2 = 0.0
-                    inpaint_head_model_path_adetailer = None
+                    inpaint_head_model_path_stage2 = None
-                    inpaint_parameterized_adetailer = False
+                    inpaint_parameterized_stage2 = False
-                    goals_adetailer = ['inpaint']
+                    goals_stage2 = ['inpaint']
-                    denoising_strength_adetailer, initial_latent_adetailer, width_adetailer, height_adetailer = apply_inpaint(
+                    denoising_strength_stage2, initial_latent_stage2, width_stage2, height_stage2 = apply_inpaint(
-                        async_task, None, inpaint_head_model_path_adetailer, img, merged_masks,
+                        async_task, None, inpaint_head_model_path_stage2, img, mask,
-                        inpaint_parameterized_adetailer, denoising_strength_adetailer,
+                        inpaint_parameterized_stage2, denoising_strength_stage2,
-                        inpaint_respective_field_adetailer, switch, current_progress, True)
+                        inpaint_respective_field_stage2, switch, current_progress, True)
                    process_task(all_steps, async_task, callback, controlnet_canny_path, controlnet_cpds_path,
-                                 current_task_id, denoising_strength_adetailer, final_scheduler_name, goals_adetailer,
+                                 current_task_id, denoising_strength_stage2, final_scheduler_name, goals_stage2,
-                                 initial_latent_adetailer, switch, task, tasks, tiled, use_expansion, width_adetailer,
+                                 initial_latent_stage2, switch, task, tasks, tiled, use_expansion, width_stage2,
-                                 height_adetailer)
+                                 height_stage2)
                    # reset unet and inpaint_worker
                    pipeline.final_unet = final_unet