diff --git a/.dockerignore b/.dockerignore index 485dee64..d1eab807 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,54 @@ -.idea +__pycache__ +*.ckpt +*.safetensors +*.pth +*.pt +*.bin +*.patch +*.backup +*.corrupted +*.partial +*.onnx +sorted_styles.json +/input +/cache +/language/default.json +/test_imgs +config.txt +config_modification_tutorial.txt +user_path_config.txt +user_path_config-deprecated.txt +/modules/*.png +/repositories +/fooocus_env +/venv +/tmp +/ui-config.json +/outputs +/config.json +/log +/webui.settings.bat +/embeddings +/styles.csv +/params.txt +/styles.csv.bak +/webui-user.bat +/webui-user.sh +/interrogate +/user.css +/.idea +/notification.ogg +/notification.mp3 +/SwinIR +/textual_inversion +.vscode +/extensions +/test/stdout.txt +/test/stderr.txt +/cache.json* +/config_states/ +/node_modules +/package-lock.json +/.coverage* +/auth.json +.DS_Store \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..ce213ceb --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +# Ensure that shell scripts always use lf line endings, e.g. entrypoint.sh for docker +* text=auto +*.sh text eol=lf \ No newline at end of file diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 585eb87a..f9876685 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @lllyasviel +* @mashb1t diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..adee0ed1 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" \ No newline at end of file diff --git a/.github/workflows/build_container.yml b/.github/workflows/build_container.yml new file mode 100644 index 00000000..eb70cda3 --- /dev/null +++ b/.github/workflows/build_container.yml @@ -0,0 +1,47 @@ +name: Docker image build + +on: + push: + branches: + - main + tags: + - v* + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }} + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=edge,branch=main + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index b969cd0e..820ae94a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:12.3.1-base-ubuntu22.04 +FROM nvidia/cuda:12.4.1-base-ubuntu22.04 ENV DEBIAN_FRONTEND noninteractive ENV CMDARGS --listen @@ -23,7 +23,7 @@ RUN chown -R user:user /content WORKDIR /content USER user -RUN git clone https://github.com/lllyasviel/Fooocus /content/app +COPY --chown=user:user . /content/app RUN mv /content/app/models /content/app/models.org CMD [ "sh", "-c", "/content/entrypoint.sh ${CMDARGS}" ] diff --git a/args_manager.py b/args_manager.py index 6a3ae9dc..5a2b37c9 100644 --- a/args_manager.py +++ b/args_manager.py @@ -1,7 +1,4 @@ import ldm_patched.modules.args_parser as args_parser -import os - -from tempfile import gettempdir args_parser.parser.add_argument("--share", action='store_true', help="Set whether to share on Gradio.") @@ -31,6 +28,9 @@ args_parser.parser.add_argument("--disable-metadata", action='store_true', args_parser.parser.add_argument("--disable-preset-download", action='store_true', help="Disables downloading models for presets", default=False) +args_parser.parser.add_argument("--enable-describe-uov-image", action='store_true', + help="Disables automatic description of uov images when prompt is empty", default=False) + args_parser.parser.add_argument("--always-download-new-model", action='store_true', help="Always download newer models ", default=False) diff --git a/css/style.css b/css/style.css index c702a725..6ed0f628 100644 --- a/css/style.css +++ b/css/style.css @@ -27,6 +27,7 @@ progress { border-radius: 5px; /* Round the corners of the progress bar */ background-color: #f3f3f3; /* Light grey background */ width: 100%; + vertical-align: middle !important; } /* Style the progress bar container */ @@ -69,30 +70,39 @@ progress::after { height: 30px !important; } -.type_row{ - height: 80px !important; +.progress-bar span { + text-align: right; + width: 215px; +} +div:has(> #positive_prompt) { + border: none; } -.type_row_half{ - height: 32px !important; +#positive_prompt { + padding: 1px; + background: var(--background-fill-primary); } -.scroll-hide{ - resize: none !important; +.type_row { + height: 84px !important; } -.refresh_button{ +.type_row_half { + height: 34px !important; +} + +.refresh_button { border: none !important; background: none !important; font-size: none !important; box-shadow: none !important; } -.advanced_check_row{ +.advanced_check_row { width: 250px !important; } -.min_check{ +.min_check { min-width: min(1px, 100%) !important; } @@ -101,10 +111,14 @@ progress::after { overflow: auto !important; } -.aspect_ratios label { +.performance_selection label { width: 140px !important; } +.aspect_ratios label { + flex: calc(50% - 5px) !important; +} + .aspect_ratios label span { white-space: nowrap !important; } @@ -391,6 +405,14 @@ progress::after { background-color: #fff8; font-family: monospace; text-align: center; - border-radius-top: 5px; + border-radius: 5px 5px 0px 0px; display: none; /* remove this to enable tooltip in preview image */ +} + +#inpaint_canvas .canvas-tooltip-info { + top: 2px; +} + +#inpaint_brush_color input[type=color]{ + background: none; } \ No newline at end of file diff --git a/development.md b/development.md new file mode 100644 index 00000000..bbb3def9 --- /dev/null +++ b/development.md @@ -0,0 +1,11 @@ +## Running unit tests + +Native python: +``` +python -m unittest tests/ +``` + +Embedded python (Windows zip file installation method): +``` +..\python_embeded\python.exe -m unittest +``` diff --git a/docker-compose.yml b/docker-compose.yml index dee7b3e7..f724964d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,12 +1,10 @@ -version: '3.9' - volumes: fooocus-data: services: app: build: . - image: fooocus + image: ghcr.io/lllyasviel/fooocus ports: - "7865:7865" environment: diff --git a/docker.md b/docker.md index 36cfa632..cd75d9f5 100644 --- a/docker.md +++ b/docker.md @@ -1,35 +1,99 @@ # Fooocus on Docker -The docker image is based on NVIDIA CUDA 12.3 and PyTorch 2.0, see [Dockerfile](Dockerfile) and [requirements_docker.txt](requirements_docker.txt) for details. +The docker image is based on NVIDIA CUDA 12.4 and PyTorch 2.1, see [Dockerfile](Dockerfile) and [requirements_docker.txt](requirements_docker.txt) for details. + +## Requirements + +- A computer with specs good enough to run Fooocus, and proprietary Nvidia drivers +- Docker, Docker Compose, or Podman ## Quick start -**This is just an easy way for testing. Please find more information in the [notes](#notes).** +**More information in the [notes](#notes).** + +### Running with Docker Compose 1. Clone this repository -2. Build the image with `docker compose build` -3. Run the docker container with `docker compose up`. Building the image takes some time. +2. Run the docker container with `docker compose up`. + +### Running with Docker + +```sh +docker run -p 7865:7865 -v fooocus-data:/content/data -it \ +--gpus all \ +-e CMDARGS=--listen \ +-e DATADIR=/content/data \ +-e config_path=/content/data/config.txt \ +-e config_example_path=/content/data/config_modification_tutorial.txt \ +-e path_checkpoints=/content/data/models/checkpoints/ \ +-e path_loras=/content/data/models/loras/ \ +-e path_embeddings=/content/data/models/embeddings/ \ +-e path_vae_approx=/content/data/models/vae_approx/ \ +-e path_upscale_models=/content/data/models/upscale_models/ \ +-e path_inpaint=/content/data/models/inpaint/ \ +-e path_controlnet=/content/data/models/controlnet/ \ +-e path_clip_vision=/content/data/models/clip_vision/ \ +-e path_fooocus_expansion=/content/data/models/prompt_expansion/fooocus_expansion/ \ +-e path_outputs=/content/app/outputs/ \ +ghcr.io/lllyasviel/fooocus +``` +### Running with Podman + +```sh +podman run -p 7865:7865 -v fooocus-data:/content/data -it \ +--security-opt=no-new-privileges --cap-drop=ALL --security-opt label=type:nvidia_container_t --device=nvidia.com/gpu=all \ +-e CMDARGS=--listen \ +-e DATADIR=/content/data \ +-e config_path=/content/data/config.txt \ +-e config_example_path=/content/data/config_modification_tutorial.txt \ +-e path_checkpoints=/content/data/models/checkpoints/ \ +-e path_loras=/content/data/models/loras/ \ +-e path_embeddings=/content/data/models/embeddings/ \ +-e path_vae_approx=/content/data/models/vae_approx/ \ +-e path_upscale_models=/content/data/models/upscale_models/ \ +-e path_inpaint=/content/data/models/inpaint/ \ +-e path_controlnet=/content/data/models/controlnet/ \ +-e path_clip_vision=/content/data/models/clip_vision/ \ +-e path_fooocus_expansion=/content/data/models/prompt_expansion/fooocus_expansion/ \ +-e path_outputs=/content/app/outputs/ \ +ghcr.io/lllyasviel/fooocus +``` When you see the message `Use the app with http://0.0.0.0:7865/` in the console, you can access the URL in your browser. -Your models and outputs are stored in the `fooocus-data` volume, which, depending on OS, is stored in `/var/lib/docker/volumes`. +Your models and outputs are stored in the `fooocus-data` volume, which, depending on OS, is stored in `/var/lib/docker/volumes/` (or `~/.local/share/containers/storage/volumes/` when using `podman`). + +## Building the container locally + +Clone the repository first, and open a terminal in the folder. + +Build with `docker`: +```sh +docker build . -t fooocus +``` + +Build with `podman`: +```sh +podman build . -t fooocus +``` ## Details -### Update the container manually +### Update the container manually (`docker compose`) When you are using `docker compose up` continuously, the container is not updated to the latest version of Fooocus automatically. Run `git pull` before executing `docker compose build --no-cache` to build an image with the latest Fooocus version. You can then start it with `docker compose up` ### Import models, outputs -If you want to import files from models or the outputs folder, you can uncomment the following settings in the [docker-compose.yml](docker-compose.yml): + +If you want to import files from models or the outputs folder, you can add the following bind mounts in the [docker-compose.yml](docker-compose.yml) or your preferred method of running the container: ``` #- ./models:/import/models # Once you import files, you don't need to mount again. #- ./outputs:/import/outputs # Once you import files, you don't need to mount again. ``` -After running `docker compose up`, your files will be copied into `/content/data/models` and `/content/data/outputs` -Since `/content/data` is a persistent volume folder, your files will be persisted even when you re-run `docker compose up --build` without above volume settings. +After running the container, your files will be copied into `/content/data/models` and `/content/data/outputs` +Since `/content/data` is a persistent volume folder, your files will be persisted even when you re-run the container without the above mounts. ### Paths inside the container @@ -54,6 +118,7 @@ Docker specified environments are there. They are used by 'entrypoint.sh' |CMDARGS|Arguments for [entry_with_update.py](entry_with_update.py) which is called by [entrypoint.sh](entrypoint.sh)| |config_path|'config.txt' location| |config_example_path|'config_modification_tutorial.txt' location| +|HF_MIRROR| huggingface mirror site domain| You can also use the same json key names and values explained in the 'config_modification_tutorial.txt' as the environments. See examples in the [docker-compose.yml](docker-compose.yml) diff --git a/extras/censor.py b/extras/censor.py new file mode 100644 index 00000000..45617fd8 --- /dev/null +++ b/extras/censor.py @@ -0,0 +1,60 @@ +import os + +import numpy as np +import torch +from transformers import CLIPConfig, CLIPImageProcessor + +import ldm_patched.modules.model_management as model_management +import modules.config +from extras.safety_checker.models.safety_checker import StableDiffusionSafetyChecker +from ldm_patched.modules.model_patcher import ModelPatcher + +safety_checker_repo_root = os.path.join(os.path.dirname(__file__), 'safety_checker') +config_path = os.path.join(safety_checker_repo_root, "configs", "config.json") +preprocessor_config_path = os.path.join(safety_checker_repo_root, "configs", "preprocessor_config.json") + + +class Censor: + def __init__(self): + self.safety_checker_model: ModelPatcher | None = None + self.clip_image_processor: CLIPImageProcessor | None = None + self.load_device = torch.device('cpu') + self.offload_device = torch.device('cpu') + + def init(self): + if self.safety_checker_model is None and self.clip_image_processor is None: + safety_checker_model = modules.config.downloading_safety_checker_model() + self.clip_image_processor = CLIPImageProcessor.from_json_file(preprocessor_config_path) + clip_config = CLIPConfig.from_json_file(config_path) + model = StableDiffusionSafetyChecker.from_pretrained(safety_checker_model, config=clip_config) + model.eval() + + self.load_device = model_management.text_encoder_device() + self.offload_device = model_management.text_encoder_offload_device() + + model.to(self.offload_device) + + self.safety_checker_model = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device) + + def censor(self, images: list | np.ndarray) -> list | np.ndarray: + self.init() + model_management.load_model_gpu(self.safety_checker_model) + + single = False + if not isinstance(images, list) or isinstance(images, np.ndarray): + images = [images] + single = True + + safety_checker_input = self.clip_image_processor(images, return_tensors="pt") + safety_checker_input.to(device=self.load_device) + checked_images, has_nsfw_concept = self.safety_checker_model.model(images=images, + clip_input=safety_checker_input.pixel_values) + checked_images = [image.astype(np.uint8) for image in checked_images] + + if single: + checked_images = checked_images[0] + + return checked_images + + +default_censor = Censor().censor diff --git a/extras/safety_checker/configs/config.json b/extras/safety_checker/configs/config.json new file mode 100644 index 00000000..aa454d22 --- /dev/null +++ b/extras/safety_checker/configs/config.json @@ -0,0 +1,171 @@ +{ + "_name_or_path": "clip-vit-large-patch14/", + "architectures": [ + "SafetyChecker" + ], + "initializer_factor": 1.0, + "logit_scale_init_value": 2.6592, + "model_type": "clip", + "projection_dim": 768, + "text_config": { + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "bos_token_id": 0, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "dropout": 0.0, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": 2, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "quick_gelu", + "hidden_size": 768, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 3072, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "layer_norm_eps": 1e-05, + "length_penalty": 1.0, + "max_length": 20, + "max_position_embeddings": 77, + "min_length": 0, + "model_type": "clip_text_model", + "no_repeat_ngram_size": 0, + "num_attention_heads": 12, + "num_beam_groups": 1, + "num_beams": 1, + "num_hidden_layers": 12, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": 1, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "task_specific_params": null, + "temperature": 1.0, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.21.0.dev0", + "typical_p": 1.0, + "use_bfloat16": false, + "vocab_size": 49408 + }, + "text_config_dict": { + "hidden_size": 768, + "intermediate_size": 3072, + "num_attention_heads": 12, + "num_hidden_layers": 12 + }, + "torch_dtype": "float32", + "transformers_version": null, + "vision_config": { + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "dropout": 0.0, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "quick_gelu", + "hidden_size": 1024, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "image_size": 224, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 4096, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "layer_norm_eps": 1e-05, + "length_penalty": 1.0, + "max_length": 20, + "min_length": 0, + "model_type": "clip_vision_model", + "no_repeat_ngram_size": 0, + "num_attention_heads": 16, + "num_beam_groups": 1, + "num_beams": 1, + "num_hidden_layers": 24, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "patch_size": 14, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "task_specific_params": null, + "temperature": 1.0, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.21.0.dev0", + "typical_p": 1.0, + "use_bfloat16": false + }, + "vision_config_dict": { + "hidden_size": 1024, + "intermediate_size": 4096, + "num_attention_heads": 16, + "num_hidden_layers": 24, + "patch_size": 14 + } +} diff --git a/extras/safety_checker/configs/preprocessor_config.json b/extras/safety_checker/configs/preprocessor_config.json new file mode 100644 index 00000000..5294955f --- /dev/null +++ b/extras/safety_checker/configs/preprocessor_config.json @@ -0,0 +1,20 @@ +{ + "crop_size": 224, + "do_center_crop": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_resize": true, + "feature_extractor_type": "CLIPFeatureExtractor", + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "resample": 3, + "size": 224 +} diff --git a/extras/safety_checker/models/safety_checker.py b/extras/safety_checker/models/safety_checker.py new file mode 100644 index 00000000..ea38bf03 --- /dev/null +++ b/extras/safety_checker/models/safety_checker.py @@ -0,0 +1,126 @@ +# from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/safety_checker.py + +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import torch +import torch.nn as nn +from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel +from transformers.utils import logging + +logger = logging.get_logger(__name__) + + +def cosine_distance(image_embeds, text_embeds): + normalized_image_embeds = nn.functional.normalize(image_embeds) + normalized_text_embeds = nn.functional.normalize(text_embeds) + return torch.mm(normalized_image_embeds, normalized_text_embeds.t()) + + +class StableDiffusionSafetyChecker(PreTrainedModel): + config_class = CLIPConfig + main_input_name = "clip_input" + + _no_split_modules = ["CLIPEncoderLayer"] + + def __init__(self, config: CLIPConfig): + super().__init__(config) + + self.vision_model = CLIPVisionModel(config.vision_config) + self.visual_projection = nn.Linear(config.vision_config.hidden_size, config.projection_dim, bias=False) + + self.concept_embeds = nn.Parameter(torch.ones(17, config.projection_dim), requires_grad=False) + self.special_care_embeds = nn.Parameter(torch.ones(3, config.projection_dim), requires_grad=False) + + self.concept_embeds_weights = nn.Parameter(torch.ones(17), requires_grad=False) + self.special_care_embeds_weights = nn.Parameter(torch.ones(3), requires_grad=False) + + @torch.no_grad() + def forward(self, clip_input, images): + pooled_output = self.vision_model(clip_input)[1] # pooled_output + image_embeds = self.visual_projection(pooled_output) + + # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16 + special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds).cpu().float().numpy() + cos_dist = cosine_distance(image_embeds, self.concept_embeds).cpu().float().numpy() + + result = [] + batch_size = image_embeds.shape[0] + for i in range(batch_size): + result_img = {"special_scores": {}, "special_care": [], "concept_scores": {}, "bad_concepts": []} + + # increase this value to create a stronger `nfsw` filter + # at the cost of increasing the possibility of filtering benign images + adjustment = 0.0 + + for concept_idx in range(len(special_cos_dist[0])): + concept_cos = special_cos_dist[i][concept_idx] + concept_threshold = self.special_care_embeds_weights[concept_idx].item() + result_img["special_scores"][concept_idx] = round(concept_cos - concept_threshold + adjustment, 3) + if result_img["special_scores"][concept_idx] > 0: + result_img["special_care"].append({concept_idx, result_img["special_scores"][concept_idx]}) + adjustment = 0.01 + + for concept_idx in range(len(cos_dist[0])): + concept_cos = cos_dist[i][concept_idx] + concept_threshold = self.concept_embeds_weights[concept_idx].item() + result_img["concept_scores"][concept_idx] = round(concept_cos - concept_threshold + adjustment, 3) + if result_img["concept_scores"][concept_idx] > 0: + result_img["bad_concepts"].append(concept_idx) + + result.append(result_img) + + has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result] + + for idx, has_nsfw_concept in enumerate(has_nsfw_concepts): + if has_nsfw_concept: + if torch.is_tensor(images) or torch.is_tensor(images[0]): + images[idx] = torch.zeros_like(images[idx]) # black image + else: + images[idx] = np.zeros(images[idx].shape) # black image + + if any(has_nsfw_concepts): + logger.warning( + "Potential NSFW content was detected in one or more images. A black image will be returned instead." + " Try again with a different prompt and/or seed." + ) + + return images, has_nsfw_concepts + + @torch.no_grad() + def forward_onnx(self, clip_input: torch.Tensor, images: torch.Tensor): + pooled_output = self.vision_model(clip_input)[1] # pooled_output + image_embeds = self.visual_projection(pooled_output) + + special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds) + cos_dist = cosine_distance(image_embeds, self.concept_embeds) + + # increase this value to create a stronger `nsfw` filter + # at the cost of increasing the possibility of filtering benign images + adjustment = 0.0 + + special_scores = special_cos_dist - self.special_care_embeds_weights + adjustment + # special_scores = special_scores.round(decimals=3) + special_care = torch.any(special_scores > 0, dim=1) + special_adjustment = special_care * 0.01 + special_adjustment = special_adjustment.unsqueeze(1).expand(-1, cos_dist.shape[1]) + + concept_scores = (cos_dist - self.concept_embeds_weights) + special_adjustment + # concept_scores = concept_scores.round(decimals=3) + has_nsfw_concepts = torch.any(concept_scores > 0, dim=1) + + images[has_nsfw_concepts] = 0.0 # black image + + return images, has_nsfw_concepts diff --git a/extras/vae_interpose.py b/extras/vae_interpose.py index 72fb09a4..d407ca83 100644 --- a/extras/vae_interpose.py +++ b/extras/vae_interpose.py @@ -1,69 +1,85 @@ # https://github.com/city96/SD-Latent-Interposer/blob/main/interposer.py import os -import torch -import safetensors.torch as sf -import torch.nn as nn -import ldm_patched.modules.model_management +import safetensors.torch as sf +import torch +import torch.nn as nn + +import ldm_patched.modules.model_management from ldm_patched.modules.model_patcher import ModelPatcher from modules.config import path_vae_approx -class Block(nn.Module): - def __init__(self, size): +class ResBlock(nn.Module): + """Block with residuals""" + + def __init__(self, ch): super().__init__() self.join = nn.ReLU() + self.norm = nn.BatchNorm2d(ch) self.long = nn.Sequential( - nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.1), - nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.1), - nn.Conv2d(size, size, kernel_size=3, stride=1, padding=1), + nn.Conv2d(ch, ch, kernel_size=3, stride=1, padding=1), + nn.SiLU(), + nn.Conv2d(ch, ch, kernel_size=3, stride=1, padding=1), + nn.SiLU(), + nn.Conv2d(ch, ch, kernel_size=3, stride=1, padding=1), + nn.Dropout(0.1) ) def forward(self, x): - y = self.long(x) - z = self.join(y + x) - return z + x = self.norm(x) + return self.join(self.long(x) + x) -class Interposer(nn.Module): - def __init__(self): +class ExtractBlock(nn.Module): + """Increase no. of channels by [out/in]""" + + def __init__(self, ch_in, ch_out): super().__init__() - self.chan = 4 - self.hid = 128 - - self.head_join = nn.ReLU() - self.head_short = nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1) - self.head_long = nn.Sequential( - nn.Conv2d(self.chan, self.hid, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.1), - nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), - nn.LeakyReLU(0.1), - nn.Conv2d(self.hid, self.hid, kernel_size=3, stride=1, padding=1), - ) - self.core = nn.Sequential( - Block(self.hid), - Block(self.hid), - Block(self.hid), - ) - self.tail = nn.Sequential( - nn.ReLU(), - nn.Conv2d(self.hid, self.chan, kernel_size=3, stride=1, padding=1) + self.join = nn.ReLU() + self.short = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=1, padding=1) + self.long = nn.Sequential( + nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=1, padding=1), + nn.SiLU(), + nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1), + nn.SiLU(), + nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1), + nn.Dropout(0.1) ) def forward(self, x): - y = self.head_join( - self.head_long(x) + - self.head_short(x) + return self.join(self.long(x) + self.short(x)) + + +class InterposerModel(nn.Module): + """Main neural network""" + + def __init__(self, ch_in=4, ch_out=4, ch_mid=64, scale=1.0, blocks=12): + super().__init__() + self.ch_in = ch_in + self.ch_out = ch_out + self.ch_mid = ch_mid + self.blocks = blocks + self.scale = scale + + self.head = ExtractBlock(self.ch_in, self.ch_mid) + self.core = nn.Sequential( + nn.Upsample(scale_factor=self.scale, mode="nearest"), + *[ResBlock(self.ch_mid) for _ in range(blocks)], + nn.BatchNorm2d(self.ch_mid), + nn.SiLU(), ) + self.tail = nn.Conv2d(self.ch_mid, self.ch_out, kernel_size=3, stride=1, padding=1) + + def forward(self, x): + y = self.head(x) z = self.core(y) return self.tail(z) vae_approx_model = None -vae_approx_filename = os.path.join(path_vae_approx, 'xl-to-v1_interposer-v3.1.safetensors') +vae_approx_filename = os.path.join(path_vae_approx, 'xl-to-v1_interposer-v4.0.safetensors') def parse(x): @@ -72,7 +88,7 @@ def parse(x): x_origin = x.clone() if vae_approx_model is None: - model = Interposer() + model = InterposerModel() model.eval() sd = sf.load_file(vae_approx_filename) model.load_state_dict(sd) diff --git a/fooocus_version.py b/fooocus_version.py index b2050196..84d1586b 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.3.1' +version = '2.4.3' diff --git a/javascript/localization.js b/javascript/localization.js index 0a8394ca..02e4cfba 100644 --- a/javascript/localization.js +++ b/javascript/localization.js @@ -80,6 +80,15 @@ function refresh_style_localization() { processNode(document.querySelector('.style_selections')); } +function refresh_aspect_ratios_label(value) { + label = document.querySelector('#aspect_ratios_accordion div span'); + translation = getTranslation("Aspect Ratios"); + if (typeof translation == "undefined") { + translation = "Aspect Ratios"; + } + label.textContent = translation + " " + htmlDecode(value); +} + function localizeWholePage() { processNode(gradioApp()); diff --git a/javascript/script.js b/javascript/script.js index 9aa0b5c1..21dd483d 100644 --- a/javascript/script.js +++ b/javascript/script.js @@ -122,6 +122,43 @@ document.addEventListener("DOMContentLoaded", function() { initStylePreviewOverlay(); }); +var onAppend = function(elem, f) { + var observer = new MutationObserver(function(mutations) { + mutations.forEach(function(m) { + if (m.addedNodes.length) { + f(m.addedNodes); + } + }); + }); + observer.observe(elem, {childList: true}); +} + +function addObserverIfDesiredNodeAvailable(querySelector, callback) { + var elem = document.querySelector(querySelector); + if (!elem) { + window.setTimeout(() => addObserverIfDesiredNodeAvailable(querySelector, callback), 1000); + return; + } + + onAppend(elem, callback); +} + +/** + * Show reset button on toast "Connection errored out." + */ +addObserverIfDesiredNodeAvailable(".toast-wrap", function(added) { + added.forEach(function(element) { + if (element.innerText.includes("Connection errored out.")) { + window.setTimeout(function() { + document.getElementById("reset_button").classList.remove("hidden"); + document.getElementById("generate_button").classList.add("hidden"); + document.getElementById("skip_button").classList.add("hidden"); + document.getElementById("stop_button").classList.add("hidden"); + }); + } + }); +}); + /** * Add a ctrl+enter as a shortcut to start a generation */ @@ -219,3 +256,8 @@ function set_theme(theme) { window.location.replace(gradioURL + '?__theme=' + theme); } } + +function htmlDecode(input) { + var doc = new DOMParser().parseFromString(input, "text/html"); + return doc.documentElement.textContent; +} \ No newline at end of file diff --git a/language/en.json b/language/en.json index fefc79c4..5819f4ee 100644 --- a/language/en.json +++ b/language/en.json @@ -4,12 +4,20 @@ "Generate": "Generate", "Skip": "Skip", "Stop": "Stop", + "Reconnect": "Reconnect", "Input Image": "Input Image", "Advanced": "Advanced", "Upscale or Variation": "Upscale or Variation", "Image Prompt": "Image Prompt", - "Inpaint or Outpaint (beta)": "Inpaint or Outpaint (beta)", - "Drag above image to here": "Drag above image to here", + "Inpaint or Outpaint": "Inpaint or Outpaint", + "Outpaint Direction": "Outpaint Direction", + "Method": "Method", + "Describe": "Describe", + "Content Type": "Content Type", + "Photograph": "Photograph", + "Art/Anime": "Art/Anime", + "Describe this Image into Prompt": "Describe this Image into Prompt", + "Image Size and Recommended Size": "Image Size and Recommended Size", "Upscale or Variation:": "Upscale or Variation:", "Disabled": "Disabled", "Vary (Subtle)": "Vary (Subtle)", @@ -54,9 +62,12 @@ "Disable seed increment": "Disable seed increment", "Disable automatic seed increment when image number is > 1.": "Disable automatic seed increment when image number is > 1.", "Read wildcards in order": "Read wildcards in order", + "Black Out NSFW": "Black Out NSFW", + "Use black image if NSFW is detected.": "Use black image if NSFW is detected.", "\ud83d\udcda History Log": "\uD83D\uDCDA History Log", "Image Style": "Image Style", "Fooocus V2": "Fooocus V2", + "Random Style": "Random Style", "Default (Slightly Cinematic)": "Default (Slightly Cinematic)", "Fooocus Masterpiece": "Fooocus Masterpiece", "Fooocus Photograph": "Fooocus Photograph", @@ -309,6 +320,8 @@ "vae": "vae", "CFG Mimicking from TSNR": "CFG Mimicking from TSNR", "Enabling Fooocus's implementation of CFG mimicking for TSNR (effective when real CFG > mimicked CFG).": "Enabling Fooocus's implementation of CFG mimicking for TSNR (effective when real CFG > mimicked CFG).", + "CLIP Skip": "CLIP Skip", + "Bypass CLIP layers to avoid overfitting (use 1 to not skip any layers, 2 is recommended).": "Bypass CLIP layers to avoid overfitting (use 1 to not skip any layers, 2 is recommended).", "Sampler": "Sampler", "dpmpp_2m_sde_gpu": "dpmpp_2m_sde_gpu", "Only effective in non-inpaint mode.": "Only effective in non-inpaint mode.", @@ -339,6 +352,8 @@ "sgm_uniform": "sgm_uniform", "simple": "simple", "ddim_uniform": "ddim_uniform", + "VAE": "VAE", + "Default (model)": "Default (model)", "Forced Overwrite of Sampling Step": "Forced Overwrite of Sampling Step", "Set as -1 to disable. For developer debugging.": "Set as -1 to disable. For developer debugging.", "Forced Overwrite of Refiner Switch Step": "Forced Overwrite of Refiner Switch Step", @@ -378,7 +393,7 @@ "Fooocus Enhance": "Fooocus Enhance", "Fooocus Cinematic": "Fooocus Cinematic", "Fooocus Sharp": "Fooocus Sharp", - "Drag any image generated by Fooocus here": "Drag any image generated by Fooocus here", + "For images created by Fooocus": "For images created by Fooocus", "Metadata": "Metadata", "Apply Metadata": "Apply Metadata", "Metadata Scheme": "Metadata Scheme", diff --git a/launch.py b/launch.py index afa66705..5d40cc5b 100644 --- a/launch.py +++ b/launch.py @@ -62,8 +62,8 @@ def prepare_environment(): vae_approx_filenames = [ ('xlvaeapp.pth', 'https://huggingface.co/lllyasviel/misc/resolve/main/xlvaeapp.pth'), ('vaeapp_sd15.pth', 'https://huggingface.co/lllyasviel/misc/resolve/main/vaeapp_sd15.pt'), - ('xl-to-v1_interposer-v3.1.safetensors', - 'https://huggingface.co/lllyasviel/misc/resolve/main/xl-to-v1_interposer-v3.1.safetensors') + ('xl-to-v1_interposer-v4.0.safetensors', + 'https://huggingface.co/mashb1t/misc/resolve/main/xl-to-v1_interposer-v4.0.safetensors') ] @@ -80,6 +80,10 @@ if args.gpu_device_id is not None: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_device_id) print("Set device to:", args.gpu_device_id) +if args.hf_mirror is not None : + os.environ['HF_MIRROR'] = str(args.hf_mirror) + print("Set hf_mirror to:", args.hf_mirror) + from modules import config os.environ['GRADIO_TEMP_DIR'] = config.temp_path diff --git a/ldm_patched/contrib/external_align_your_steps.py b/ldm_patched/contrib/external_align_your_steps.py new file mode 100644 index 00000000..624bbce2 --- /dev/null +++ b/ldm_patched/contrib/external_align_your_steps.py @@ -0,0 +1,55 @@ +# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py + +#from: https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/howto.html +import numpy as np +import torch + +def loglinear_interp(t_steps, num_steps): + """ + Performs log-linear interpolation of a given array of decreasing numbers. + """ + xs = np.linspace(0, 1, len(t_steps)) + ys = np.log(t_steps[::-1]) + + new_xs = np.linspace(0, 1, num_steps) + new_ys = np.interp(new_xs, xs, ys) + + interped_ys = np.exp(new_ys)[::-1].copy() + return interped_ys + +NOISE_LEVELS = {"SD1": [14.6146412293, 6.4745760956, 3.8636745985, 2.6946151520, 1.8841921177, 1.3943805092, 0.9642583904, 0.6523686016, 0.3977456272, 0.1515232662, 0.0291671582], + "SDXL":[14.6146412293, 6.3184485287, 3.7681790315, 2.1811480769, 1.3405244945, 0.8620721141, 0.5550693289, 0.3798540708, 0.2332364134, 0.1114188177, 0.0291671582], + "SVD": [700.00, 54.5, 15.886, 7.977, 4.248, 1.789, 0.981, 0.403, 0.173, 0.034, 0.002]} + +class AlignYourStepsScheduler: + @classmethod + def INPUT_TYPES(s): + return {"required": + {"model_type": (["SD1", "SDXL", "SVD"], ), + "steps": ("INT", {"default": 10, "min": 10, "max": 10000}), + "denoise": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}), + } + } + RETURN_TYPES = ("SIGMAS",) + CATEGORY = "sampling/custom_sampling/schedulers" + + FUNCTION = "get_sigmas" + + def get_sigmas(self, model_type, steps, denoise): + total_steps = steps + if denoise < 1.0: + if denoise <= 0.0: + return (torch.FloatTensor([]),) + total_steps = round(steps * denoise) + + sigmas = NOISE_LEVELS[model_type][:] + if (steps + 1) != len(sigmas): + sigmas = loglinear_interp(sigmas, steps + 1) + + sigmas = sigmas[-(total_steps + 1):] + sigmas[-1] = 0 + return (torch.FloatTensor(sigmas), ) + +NODE_CLASS_MAPPINGS = { + "AlignYourStepsScheduler": AlignYourStepsScheduler, +} \ No newline at end of file diff --git a/ldm_patched/contrib/external_custom_sampler.py b/ldm_patched/contrib/external_custom_sampler.py index 8f92e841..60d5e3bd 100644 --- a/ldm_patched/contrib/external_custom_sampler.py +++ b/ldm_patched/contrib/external_custom_sampler.py @@ -107,8 +107,7 @@ class SDTurboScheduler: def get_sigmas(self, model, steps, denoise): start_step = 10 - int(10 * denoise) timesteps = torch.flip(torch.arange(1, 11) * 100 - 1, (0,))[start_step:start_step + steps] - ldm_patched.modules.model_management.load_models_gpu([model]) - sigmas = model.model.model_sampling.sigma(timesteps) + sigmas = model.model_sampling.sigma(timesteps) sigmas = torch.cat([sigmas, sigmas.new_zeros([1])]) return (sigmas, ) @@ -230,6 +229,25 @@ class SamplerDPMPP_SDE: sampler = ldm_patched.modules.samplers.ksampler(sampler_name, {"eta": eta, "s_noise": s_noise, "r": r}) return (sampler, ) + +class SamplerTCD: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "eta": ("FLOAT", {"default": 0.3, "min": 0.0, "max": 1.0, "step": 0.01}), + } + } + RETURN_TYPES = ("SAMPLER",) + CATEGORY = "sampling/custom_sampling/samplers" + + FUNCTION = "get_sampler" + + def get_sampler(self, eta=0.3): + sampler = ldm_patched.modules.samplers.ksampler("tcd", {"eta": eta}) + return (sampler, ) + + class SamplerCustom: @classmethod def INPUT_TYPES(s): @@ -292,6 +310,7 @@ NODE_CLASS_MAPPINGS = { "KSamplerSelect": KSamplerSelect, "SamplerDPMPP_2M_SDE": SamplerDPMPP_2M_SDE, "SamplerDPMPP_SDE": SamplerDPMPP_SDE, + "SamplerTCD": SamplerTCD, "SplitSigmas": SplitSigmas, "FlipSigmas": FlipSigmas, } diff --git a/ldm_patched/contrib/external_model_advanced.py b/ldm_patched/contrib/external_model_advanced.py index 03a2f045..b9f0ebdc 100644 --- a/ldm_patched/contrib/external_model_advanced.py +++ b/ldm_patched/contrib/external_model_advanced.py @@ -70,7 +70,7 @@ class ModelSamplingDiscrete: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), - "sampling": (["eps", "v_prediction", "lcm"],), + "sampling": (["eps", "v_prediction", "lcm", "tcd"]), "zsnr": ("BOOLEAN", {"default": False}), }} @@ -90,6 +90,9 @@ class ModelSamplingDiscrete: elif sampling == "lcm": sampling_type = LCM sampling_base = ModelSamplingDiscreteDistilled + elif sampling == "tcd": + sampling_type = ldm_patched.modules.model_sampling.EPS + sampling_base = ModelSamplingDiscreteDistilled class ModelSamplingAdvanced(sampling_base, sampling_type): pass @@ -105,7 +108,7 @@ class ModelSamplingContinuousEDM: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), - "sampling": (["v_prediction", "eps"],), + "sampling": (["v_prediction", "edm_playground_v2.5", "eps"],), "sigma_max": ("FLOAT", {"default": 120.0, "min": 0.0, "max": 1000.0, "step":0.001, "round": False}), "sigma_min": ("FLOAT", {"default": 0.002, "min": 0.0, "max": 1000.0, "step":0.001, "round": False}), }} @@ -118,17 +121,25 @@ class ModelSamplingContinuousEDM: def patch(self, model, sampling, sigma_max, sigma_min): m = model.clone() + latent_format = None + sigma_data = 1.0 if sampling == "eps": sampling_type = ldm_patched.modules.model_sampling.EPS elif sampling == "v_prediction": sampling_type = ldm_patched.modules.model_sampling.V_PREDICTION + elif sampling == "edm_playground_v2.5": + sampling_type = ldm_patched.modules.model_sampling.EDM + sigma_data = 0.5 + latent_format = ldm_patched.modules.latent_formats.SDXL_Playground_2_5() class ModelSamplingAdvanced(ldm_patched.modules.model_sampling.ModelSamplingContinuousEDM, sampling_type): pass model_sampling = ModelSamplingAdvanced(model.model.model_config) - model_sampling.set_sigma_range(sigma_min, sigma_max) + model_sampling.set_parameters(sigma_min, sigma_max, sigma_data) m.add_object_patch("model_sampling", model_sampling) + if latent_format is not None: + m.add_object_patch("latent_format", latent_format) return (m, ) class RescaleCFG: diff --git a/ldm_patched/k_diffusion/sampling.py b/ldm_patched/k_diffusion/sampling.py index 761c2e0e..ea5540a4 100644 --- a/ldm_patched/k_diffusion/sampling.py +++ b/ldm_patched/k_diffusion/sampling.py @@ -752,7 +752,6 @@ def sample_lcm(model, x, sigmas, extra_args=None, callback=None, disable=None, n return x - @torch.no_grad() def sample_heunpp2(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.): # From MIT licensed: https://github.com/Carzit/sd-webui-samplers-scheduler/ @@ -808,3 +807,32 @@ def sample_heunpp2(model, x, sigmas, extra_args=None, callback=None, disable=Non d_prime = w1 * d + w2 * d_2 + w3 * d_3 x = x + d_prime * dt return x + + +@torch.no_grad() +def sample_tcd(model, x, sigmas, extra_args=None, callback=None, disable=None, noise_sampler=None, eta=0.3): + extra_args = {} if extra_args is None else extra_args + noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler + s_in = x.new_ones([x.shape[0]]) + + model_sampling = model.inner_model.inner_model.model_sampling + timesteps_s = torch.floor((1 - eta) * model_sampling.timestep(sigmas)).to(dtype=torch.long).detach().cpu() + timesteps_s[-1] = 0 + alpha_prod_s = model_sampling.alphas_cumprod[timesteps_s] + beta_prod_s = 1 - alpha_prod_s + for i in trange(len(sigmas) - 1, disable=disable): + denoised = model(x, sigmas[i] * s_in, **extra_args) # predicted_original_sample + eps = (x - denoised) / sigmas[i] + denoised = alpha_prod_s[i + 1].sqrt() * denoised + beta_prod_s[i + 1].sqrt() * eps + + if callback is not None: + callback({"x": x, "i": i, "sigma": sigmas[i], "sigma_hat": sigmas[i], "denoised": denoised}) + + x = denoised + if eta > 0 and sigmas[i + 1] > 0: + noise = noise_sampler(sigmas[i], sigmas[i + 1]) + x = x / alpha_prod_s[i+1].sqrt() + noise * (sigmas[i+1]**2 + 1 - 1/alpha_prod_s[i+1]).sqrt() + else: + x *= torch.sqrt(1.0 + sigmas[i + 1] ** 2) + + return x \ No newline at end of file diff --git a/ldm_patched/modules/args_parser.py b/ldm_patched/modules/args_parser.py index 0c6165a7..bf873783 100644 --- a/ldm_patched/modules/args_parser.py +++ b/ldm_patched/modules/args_parser.py @@ -37,6 +37,7 @@ parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nar parser.add_argument("--port", type=int, default=8188) parser.add_argument("--disable-header-check", type=str, default=None, metavar="ORIGIN", nargs="?", const="*") parser.add_argument("--web-upload-size", type=float, default=100) +parser.add_argument("--hf-mirror", type=str, default=None) parser.add_argument("--external-working-path", type=str, default=None, metavar="PATH", nargs='+', action='append') parser.add_argument("--output-path", type=str, default=None) diff --git a/ldm_patched/modules/latent_formats.py b/ldm_patched/modules/latent_formats.py index 2252a075..1606793e 100644 --- a/ldm_patched/modules/latent_formats.py +++ b/ldm_patched/modules/latent_formats.py @@ -1,3 +1,4 @@ +import torch class LatentFormat: scale_factor = 1.0 @@ -34,6 +35,70 @@ class SDXL(LatentFormat): ] self.taesd_decoder_name = "taesdxl_decoder" +class SDXL_Playground_2_5(LatentFormat): + def __init__(self): + self.scale_factor = 0.5 + self.latents_mean = torch.tensor([-1.6574, 1.886, -1.383, 2.5155]).view(1, 4, 1, 1) + self.latents_std = torch.tensor([8.4927, 5.9022, 6.5498, 5.2299]).view(1, 4, 1, 1) + + self.latent_rgb_factors = [ + # R G B + [ 0.3920, 0.4054, 0.4549], + [-0.2634, -0.0196, 0.0653], + [ 0.0568, 0.1687, -0.0755], + [-0.3112, -0.2359, -0.2076] + ] + self.taesd_decoder_name = "taesdxl_decoder" + + def process_in(self, latent): + latents_mean = self.latents_mean.to(latent.device, latent.dtype) + latents_std = self.latents_std.to(latent.device, latent.dtype) + return (latent - latents_mean) * self.scale_factor / latents_std + + def process_out(self, latent): + latents_mean = self.latents_mean.to(latent.device, latent.dtype) + latents_std = self.latents_std.to(latent.device, latent.dtype) + return latent * latents_std / self.scale_factor + latents_mean + + class SD_X4(LatentFormat): def __init__(self): self.scale_factor = 0.08333 + self.latent_rgb_factors = [ + [-0.2340, -0.3863, -0.3257], + [ 0.0994, 0.0885, -0.0908], + [-0.2833, -0.2349, -0.3741], + [ 0.2523, -0.0055, -0.1651] + ] + +class SC_Prior(LatentFormat): + def __init__(self): + self.scale_factor = 1.0 + self.latent_rgb_factors = [ + [-0.0326, -0.0204, -0.0127], + [-0.1592, -0.0427, 0.0216], + [ 0.0873, 0.0638, -0.0020], + [-0.0602, 0.0442, 0.1304], + [ 0.0800, -0.0313, -0.1796], + [-0.0810, -0.0638, -0.1581], + [ 0.1791, 0.1180, 0.0967], + [ 0.0740, 0.1416, 0.0432], + [-0.1745, -0.1888, -0.1373], + [ 0.2412, 0.1577, 0.0928], + [ 0.1908, 0.0998, 0.0682], + [ 0.0209, 0.0365, -0.0092], + [ 0.0448, -0.0650, -0.1728], + [-0.1658, -0.1045, -0.1308], + [ 0.0542, 0.1545, 0.1325], + [-0.0352, -0.1672, -0.2541] + ] + +class SC_B(LatentFormat): + def __init__(self): + self.scale_factor = 1.0 / 0.43 + self.latent_rgb_factors = [ + [ 0.1121, 0.2006, 0.1023], + [-0.2093, -0.0222, -0.0195], + [-0.3087, -0.1535, 0.0366], + [ 0.0290, -0.1574, -0.4078] + ] \ No newline at end of file diff --git a/ldm_patched/modules/model_sampling.py b/ldm_patched/modules/model_sampling.py index f39e275d..8971b4e6 100644 --- a/ldm_patched/modules/model_sampling.py +++ b/ldm_patched/modules/model_sampling.py @@ -1,7 +1,7 @@ import torch -import numpy as np from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule import math +import numpy as np class EPS: def calculate_input(self, sigma, noise): @@ -12,12 +12,28 @@ class EPS: sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) return model_input - model_output * sigma + def noise_scaling(self, sigma, noise, latent_image, max_denoise=False): + if max_denoise: + noise = noise * torch.sqrt(1.0 + sigma ** 2.0) + else: + noise = noise * sigma + + noise += latent_image + return noise + + def inverse_noise_scaling(self, sigma, latent): + return latent class V_PREDICTION(EPS): def calculate_denoised(self, sigma, model_output, model_input): sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) - model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 +class EDM(V_PREDICTION): + def calculate_denoised(self, sigma, model_output, model_input): + sigma = sigma.view(sigma.shape[:1] + (1,) * (model_output.ndim - 1)) + return model_input * self.sigma_data ** 2 / (sigma ** 2 + self.sigma_data ** 2) + model_output * sigma * self.sigma_data / (sigma ** 2 + self.sigma_data ** 2) ** 0.5 + class ModelSamplingDiscrete(torch.nn.Module): def __init__(self, model_config=None): @@ -42,8 +58,7 @@ class ModelSamplingDiscrete(torch.nn.Module): else: betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s) alphas = 1. - betas - alphas_cumprod = torch.tensor(np.cumprod(alphas, axis=0), dtype=torch.float32) - # alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + alphas_cumprod = torch.cumprod(alphas, dim=0) timesteps, = betas.shape self.num_timesteps = int(timesteps) @@ -55,11 +70,16 @@ class ModelSamplingDiscrete(torch.nn.Module): # self.register_buffer('alphas_cumprod_prev', torch.tensor(alphas_cumprod_prev, dtype=torch.float32)) sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5 + alphas_cumprod = torch.tensor(np.cumprod(alphas, axis=0), dtype=torch.float32) self.set_sigmas(sigmas) + self.set_alphas_cumprod(alphas_cumprod.float()) def set_sigmas(self, sigmas): - self.register_buffer('sigmas', sigmas) - self.register_buffer('log_sigmas', sigmas.log()) + self.register_buffer('sigmas', sigmas.float()) + self.register_buffer('log_sigmas', sigmas.log().float()) + + def set_alphas_cumprod(self, alphas_cumprod): + self.register_buffer("alphas_cumprod", alphas_cumprod.float()) @property def sigma_min(self): @@ -94,8 +114,6 @@ class ModelSamplingDiscrete(torch.nn.Module): class ModelSamplingContinuousEDM(torch.nn.Module): def __init__(self, model_config=None): super().__init__() - self.sigma_data = 1.0 - if model_config is not None: sampling_settings = model_config.sampling_settings else: @@ -103,9 +121,11 @@ class ModelSamplingContinuousEDM(torch.nn.Module): sigma_min = sampling_settings.get("sigma_min", 0.002) sigma_max = sampling_settings.get("sigma_max", 120.0) - self.set_sigma_range(sigma_min, sigma_max) + sigma_data = sampling_settings.get("sigma_data", 1.0) + self.set_parameters(sigma_min, sigma_max, sigma_data) - def set_sigma_range(self, sigma_min, sigma_max): + def set_parameters(self, sigma_min, sigma_max, sigma_data): + self.sigma_data = sigma_data sigmas = torch.linspace(math.log(sigma_min), math.log(sigma_max), 1000).exp() self.register_buffer('sigmas', sigmas) #for compatibility with some schedulers @@ -134,3 +154,56 @@ class ModelSamplingContinuousEDM(torch.nn.Module): log_sigma_min = math.log(self.sigma_min) return math.exp((math.log(self.sigma_max) - log_sigma_min) * percent + log_sigma_min) + +class StableCascadeSampling(ModelSamplingDiscrete): + def __init__(self, model_config=None): + super().__init__() + + if model_config is not None: + sampling_settings = model_config.sampling_settings + else: + sampling_settings = {} + + self.set_parameters(sampling_settings.get("shift", 1.0)) + + def set_parameters(self, shift=1.0, cosine_s=8e-3): + self.shift = shift + self.cosine_s = torch.tensor(cosine_s) + self._init_alpha_cumprod = torch.cos(self.cosine_s / (1 + self.cosine_s) * torch.pi * 0.5) ** 2 + + #This part is just for compatibility with some schedulers in the codebase + self.num_timesteps = 10000 + sigmas = torch.empty((self.num_timesteps), dtype=torch.float32) + for x in range(self.num_timesteps): + t = (x + 1) / self.num_timesteps + sigmas[x] = self.sigma(t) + + self.set_sigmas(sigmas) + + def sigma(self, timestep): + alpha_cumprod = (torch.cos((timestep + self.cosine_s) / (1 + self.cosine_s) * torch.pi * 0.5) ** 2 / self._init_alpha_cumprod) + + if self.shift != 1.0: + var = alpha_cumprod + logSNR = (var/(1-var)).log() + logSNR += 2 * torch.log(1.0 / torch.tensor(self.shift)) + alpha_cumprod = logSNR.sigmoid() + + alpha_cumprod = alpha_cumprod.clamp(0.0001, 0.9999) + return ((1 - alpha_cumprod) / alpha_cumprod) ** 0.5 + + def timestep(self, sigma): + var = 1 / ((sigma * sigma) + 1) + var = var.clamp(0, 1.0) + s, min_var = self.cosine_s.to(var.device), self._init_alpha_cumprod.to(var.device) + t = (((var * min_var) ** 0.5).acos() / (torch.pi * 0.5)) * (1 + s) - s + return t + + def percent_to_sigma(self, percent): + if percent <= 0.0: + return 999999999.9 + if percent >= 1.0: + return 0.0 + + percent = 1.0 - percent + return self.sigma(torch.tensor(percent)) \ No newline at end of file diff --git a/ldm_patched/modules/samplers.py b/ldm_patched/modules/samplers.py index 1f69d2b1..9ed1fcd2 100644 --- a/ldm_patched/modules/samplers.py +++ b/ldm_patched/modules/samplers.py @@ -523,7 +523,7 @@ class UNIPCBH2(Sampler): KSAMPLER_NAMES = ["euler", "euler_ancestral", "heun", "heunpp2","dpm_2", "dpm_2_ancestral", "lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_sde", "dpmpp_sde_gpu", - "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm"] + "dpmpp_2m", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm", "tcd", "edm_playground_v2.5"] class KSAMPLER(Sampler): def __init__(self, sampler_function, extra_options={}, inpaint_options={}): diff --git a/ldm_patched/modules/sd.py b/ldm_patched/modules/sd.py index e197c39c..282f2559 100644 --- a/ldm_patched/modules/sd.py +++ b/ldm_patched/modules/sd.py @@ -427,12 +427,13 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl return (ldm_patched.modules.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae) -def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True): +def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, vae_filename_param=None): sd = ldm_patched.modules.utils.load_torch_file(ckpt_path) sd_keys = sd.keys() clip = None clipvision = None vae = None + vae_filename = None model = None model_patcher = None clip_target = None @@ -462,8 +463,12 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o model.load_model_weights(sd, "model.diffusion_model.") if output_vae: - vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"first_stage_model.": ""}, filter_keys=True) - vae_sd = model_config.process_vae_state_dict(vae_sd) + if vae_filename_param is None: + vae_sd = ldm_patched.modules.utils.state_dict_prefix_replace(sd, {"first_stage_model.": ""}, filter_keys=True) + vae_sd = model_config.process_vae_state_dict(vae_sd) + else: + vae_sd = ldm_patched.modules.utils.load_torch_file(vae_filename_param) + vae_filename = vae_filename_param vae = VAE(sd=vae_sd) if output_clip: @@ -485,7 +490,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o print("loaded straight to GPU") model_management.load_model_gpu(model_patcher) - return (model_patcher, clip, vae, clipvision) + return model_patcher, clip, vae, vae_filename, clipvision def load_unet_state_dict(sd): #load unet in diffusers format diff --git a/models/safety_checker/put_safety_checker_models_here b/models/safety_checker/put_safety_checker_models_here new file mode 100644 index 00000000..e69de29b diff --git a/modules/__init__.py b/modules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/async_worker.py b/modules/async_worker.py index d8a1e072..76e10f92 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -4,6 +4,7 @@ from modules.patch import PatchSettings, patch_settings, patch_all patch_all() + class AsyncTask: def __init__(self, args): self.args = args @@ -43,11 +44,13 @@ def worker(): import fooocus_version import args_manager - from modules.sdxl_styles import apply_style, apply_wildcards, fooocus_expansion, apply_arrays + from extras.censor import default_censor + from modules.sdxl_styles import apply_style, get_random_style, fooocus_expansion, apply_arrays, random_style_name from modules.private_logger import log from extras.expansion import safe_str - from modules.util import remove_empty_str, HWC3, resize_image, get_image_shape_ceil, set_image_shape_ceil, \ - get_shape_ceil, resample_image, erode_or_dilate, ordinal_suffix, get_enabled_loras + from modules.util import (remove_empty_str, HWC3, resize_image, get_image_shape_ceil, set_image_shape_ceil, + get_shape_ceil, resample_image, erode_or_dilate, get_enabled_loras, + parse_lora_references_from_prompt, apply_wildcards) from modules.upscaler import perform_upscale from modules.flags import Performance from modules.meta_parser import get_metadata_parser, MetadataScheme @@ -68,10 +71,15 @@ def worker(): print(f'[Fooocus] {text}') async_task.yields.append(['preview', (number, text, None)]) - def yield_result(async_task, imgs, do_not_show_finished_images=False): + def yield_result(async_task, imgs, black_out_nsfw, censor=True, do_not_show_finished_images=False, + progressbar_index=flags.preparation_step_count): if not isinstance(imgs, list): imgs = [imgs] + if censor and (modules.config.default_black_out_nsfw or black_out_nsfw): + progressbar(async_task, progressbar_index, 'Checking for NSFW content ...') + imgs = default_censor(imgs) + async_task.results = async_task.results + imgs if do_not_show_finished_images: @@ -147,7 +155,8 @@ def worker(): base_model_name = args.pop() refiner_model_name = args.pop() refiner_switch = args.pop() - loras = get_enabled_loras([[bool(args.pop()), str(args.pop()), float(args.pop())] for _ in range(modules.config.default_max_lora_number)]) + loras = get_enabled_loras([(bool(args.pop()), str(args.pop()), float(args.pop())) for _ in + range(modules.config.default_max_lora_number)]) input_image_checkbox = args.pop() current_tab = args.pop() uov_method = args.pop() @@ -160,12 +169,15 @@ def worker(): disable_preview = args.pop() disable_intermediate_results = args.pop() disable_seed_increment = args.pop() + black_out_nsfw = args.pop() adm_scaler_positive = args.pop() adm_scaler_negative = args.pop() adm_scaler_end = args.pop() adaptive_cfg = args.pop() + clip_skip = args.pop() sampler_name = args.pop() scheduler_name = args.pop() + vae_name = args.pop() overwrite_step = args.pop() overwrite_switch = args.pop() overwrite_width = args.pop() @@ -195,7 +207,8 @@ def worker(): inpaint_erode_or_dilate = args.pop() save_metadata_to_images = args.pop() if not args_manager.args.disable_metadata else False - metadata_scheme = MetadataScheme(args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS + metadata_scheme = MetadataScheme( + args.pop()) if not args_manager.args.disable_metadata else MetadataScheme.FOOOCUS cn_tasks = {x: [] for x in flags.ip_list} for _ in range(flags.controlnet_image_count): @@ -225,10 +238,12 @@ def worker(): steps = performance_selection.steps() + performance_loras = [] + if performance_selection == Performance.EXTREME_SPEED: print('Enter LCM mode.') progressbar(async_task, 1, 'Downloading LCM components ...') - loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)] + performance_loras += [(modules.config.downloading_sdxl_lcm_lora(), 1.0)] if refiner_model_name != 'None': print(f'Refiner disabled in LCM mode.') @@ -247,7 +262,7 @@ def worker(): elif performance_selection == Performance.LIGHTNING: print('Enter Lightning mode.') progressbar(async_task, 1, 'Downloading Lightning components ...') - loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)] + performance_loras += [(modules.config.downloading_sdxl_lightning_lora(), 1.0)] if refiner_model_name != 'None': print(f'Refiner disabled in Lightning mode.') @@ -263,7 +278,27 @@ def worker(): adm_scaler_negative = 1.0 adm_scaler_end = 0.0 + elif performance_selection == Performance.HYPER_SD: + print('Enter Hyper-SD mode.') + progressbar(async_task, 1, 'Downloading Hyper-SD components ...') + performance_loras += [(modules.config.downloading_sdxl_hyper_sd_lora(), 0.8)] + + if refiner_model_name != 'None': + print(f'Refiner disabled in Hyper-SD mode.') + + refiner_model_name = 'None' + sampler_name = 'dpmpp_sde_gpu' + scheduler_name = 'karras' + sharpness = 0.0 + guidance_scale = 1.0 + adaptive_cfg = 1.0 + refiner_switch = 1.0 + adm_scaler_positive = 1.0 + adm_scaler_negative = 1.0 + adm_scaler_end = 0.0 + print(f'[Parameters] Adaptive CFG = {adaptive_cfg}') + print(f'[Parameters] CLIP Skip = {clip_skip}') print(f'[Parameters] Sharpness = {sharpness}') print(f'[Parameters] ControlNet Softness = {controlnet_softness}') print(f'[Parameters] ADM Scale = ' @@ -425,14 +460,21 @@ def worker(): extra_positive_prompts = prompts[1:] if len(prompts) > 1 else [] extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else [] - progressbar(async_task, 3, 'Loading models ...') + progressbar(async_task, 2, 'Loading models ...') + + lora_filenames = modules.util.remove_performance_lora(modules.config.lora_filenames, performance_selection) + loras, prompt = parse_lora_references_from_prompt(prompt, loras, modules.config.default_max_lora_number, lora_filenames=lora_filenames) + loras += performance_loras + pipeline.refresh_everything(refiner_model_name=refiner_model_name, base_model_name=base_model_name, loras=loras, base_model_additional_loras=base_model_additional_loras, - use_synthetic_refiner=use_synthetic_refiner) + use_synthetic_refiner=use_synthetic_refiner, vae_name=vae_name) + + pipeline.set_clip_skip(clip_skip) progressbar(async_task, 3, 'Processing prompts ...') tasks = [] - + for i in range(image_number): if disable_seed_increment: task_seed = seed % (constants.MAX_SEED + 1) @@ -443,14 +485,20 @@ def worker(): task_prompt = apply_wildcards(prompt, task_rng, i, read_wildcards_in_order) task_prompt = apply_arrays(task_prompt, i) task_negative_prompt = apply_wildcards(negative_prompt, task_rng, i, read_wildcards_in_order) - task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in extra_positive_prompts] - task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in extra_negative_prompts] + task_extra_positive_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in + extra_positive_prompts] + task_extra_negative_prompts = [apply_wildcards(pmt, task_rng, i, read_wildcards_in_order) for pmt in + extra_negative_prompts] positive_basic_workloads = [] negative_basic_workloads = [] + task_styles = style_selections.copy() if use_style: - for s in style_selections: + for i, s in enumerate(task_styles): + if s == random_style_name: + s = get_random_style(task_rng) + task_styles[i] = s p, n = apply_style(s, positive=task_prompt) positive_basic_workloads = positive_basic_workloads + p negative_basic_workloads = negative_basic_workloads + n @@ -478,29 +526,30 @@ def worker(): negative_top_k=len(negative_basic_workloads), log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts), log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts), + styles=task_styles )) if use_expansion: for i, t in enumerate(tasks): - progressbar(async_task, 5, f'Preparing Fooocus text #{i + 1} ...') + progressbar(async_task, 4, f'Preparing Fooocus text #{i + 1} ...') expansion = pipeline.final_expansion(t['task_prompt'], t['task_seed']) print(f'[Prompt Expansion] {expansion}') t['expansion'] = expansion t['positive'] = copy.deepcopy(t['positive']) + [expansion] # Deep copy. for i, t in enumerate(tasks): - progressbar(async_task, 7, f'Encoding positive #{i + 1} ...') + progressbar(async_task, 5, f'Encoding positive #{i + 1} ...') t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=t['positive_top_k']) for i, t in enumerate(tasks): if abs(float(cfg_scale) - 1.0) < 1e-4: t['uc'] = pipeline.clone_cond(t['c']) else: - progressbar(async_task, 10, f'Encoding negative #{i + 1} ...') + progressbar(async_task, 6, f'Encoding negative #{i + 1} ...') t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=t['negative_top_k']) if len(goals) > 0: - progressbar(async_task, 13, 'Image processing ...') + progressbar(async_task, 7, 'Image processing ...') if 'vary' in goals: if 'subtle' in uov_method: @@ -521,7 +570,7 @@ def worker(): uov_input_image = set_image_shape_ceil(uov_input_image, shape_ceil) initial_pixels = core.numpy_to_pytorch(uov_input_image) - progressbar(async_task, 13, 'VAE encoding ...') + progressbar(async_task, 8, 'VAE encoding ...') candidate_vae, _ = pipeline.get_candidate_vae( steps=steps, @@ -538,7 +587,7 @@ def worker(): if 'upscale' in goals: H, W, C = uov_input_image.shape - progressbar(async_task, 13, f'Upscaling image from {str((H, W))} ...') + progressbar(async_task, 9, f'Upscaling image from {str((H, W))} ...') uov_input_image = perform_upscale(uov_input_image) print(f'Image upscaled.') @@ -572,8 +621,12 @@ def worker(): if direct_return: d = [('Upscale (Fast)', 'upscale_fast', '2x')] + if modules.config.default_black_out_nsfw or black_out_nsfw: + progressbar(async_task, 100, 'Checking for NSFW content ...') + uov_input_image = default_censor(uov_input_image) + progressbar(async_task, 100, 'Saving image to system ...') uov_input_image_path = log(uov_input_image, d, output_format=output_format) - yield_result(async_task, uov_input_image_path, do_not_show_finished_images=True) + yield_result(async_task, uov_input_image_path, black_out_nsfw, False, do_not_show_finished_images=True) return tiled = True @@ -583,7 +636,7 @@ def worker(): denoising_strength = overwrite_upscale_strength initial_pixels = core.numpy_to_pytorch(uov_input_image) - progressbar(async_task, 13, 'VAE encoding ...') + progressbar(async_task, 10, 'VAE encoding ...') candidate_vae, _ = pipeline.get_candidate_vae( steps=steps, @@ -637,11 +690,11 @@ def worker(): ) if debugging_inpaint_preprocessor: - yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), + yield_result(async_task, inpaint_worker.current_task.visualize_mask_processing(), black_out_nsfw, do_not_show_finished_images=True) return - progressbar(async_task, 13, 'VAE Inpaint encoding ...') + progressbar(async_task, 11, 'VAE Inpaint encoding ...') inpaint_pixel_fill = core.numpy_to_pytorch(inpaint_worker.current_task.interested_fill) inpaint_pixel_image = core.numpy_to_pytorch(inpaint_worker.current_task.interested_image) @@ -661,7 +714,7 @@ def worker(): latent_swap = None if candidate_vae_swap is not None: - progressbar(async_task, 13, 'VAE SD15 encoding ...') + progressbar(async_task, 12, 'VAE SD15 encoding ...') latent_swap = core.encode_vae( vae=candidate_vae_swap, pixels=inpaint_pixel_fill)['samples'] @@ -701,7 +754,7 @@ def worker(): cn_img = HWC3(cn_img) task[0] = core.numpy_to_pytorch(cn_img) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, do_not_show_finished_images=True) + yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) return for task in cn_tasks[flags.cn_cpds]: cn_img, cn_stop, cn_weight = task @@ -713,7 +766,7 @@ def worker(): cn_img = HWC3(cn_img) task[0] = core.numpy_to_pytorch(cn_img) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, do_not_show_finished_images=True) + yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) return for task in cn_tasks[flags.cn_ip]: cn_img, cn_stop, cn_weight = task @@ -724,7 +777,7 @@ def worker(): task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_path) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, do_not_show_finished_images=True) + yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) return for task in cn_tasks[flags.cn_ip_face]: cn_img, cn_stop, cn_weight = task @@ -738,7 +791,7 @@ def worker(): task[0] = ip_adapter.preprocess(cn_img, ip_adapter_path=ip_adapter_face_path) if debugging_cn_preprocessor: - yield_result(async_task, cn_img, do_not_show_finished_images=True) + yield_result(async_task, cn_img, black_out_nsfw, do_not_show_finished_images=True) return all_ip_tasks = cn_tasks[flags.cn_ip] + cn_tasks[flags.cn_ip_face] @@ -773,29 +826,48 @@ def worker(): final_sampler_name = sampler_name final_scheduler_name = scheduler_name - if scheduler_name == 'lcm': + if scheduler_name in ['lcm', 'tcd']: final_scheduler_name = 'sgm_uniform' - if pipeline.final_unet is not None: - pipeline.final_unet = core.opModelSamplingDiscrete.patch( - pipeline.final_unet, - sampling='lcm', - zsnr=False)[0] - if pipeline.final_refiner_unet is not None: - pipeline.final_refiner_unet = core.opModelSamplingDiscrete.patch( - pipeline.final_refiner_unet, - sampling='lcm', - zsnr=False)[0] - print('Using lcm scheduler.') - async_task.yields.append(['preview', (13, 'Moving model to GPU ...', None)]) + def patch_discrete(unet): + return core.opModelSamplingDiscrete.patch( + pipeline.final_unet, + sampling=scheduler_name, + zsnr=False)[0] + + if pipeline.final_unet is not None: + pipeline.final_unet = patch_discrete(pipeline.final_unet) + if pipeline.final_refiner_unet is not None: + pipeline.final_refiner_unet = patch_discrete(pipeline.final_refiner_unet) + print(f'Using {scheduler_name} scheduler.') + elif scheduler_name == 'edm_playground_v2.5': + final_scheduler_name = 'karras' + + def patch_edm(unet): + return core.opModelSamplingContinuousEDM.patch( + unet, + sampling=scheduler_name, + sigma_max=120.0, + sigma_min=0.002)[0] + + if pipeline.final_unet is not None: + pipeline.final_unet = patch_edm(pipeline.final_unet) + if pipeline.final_refiner_unet is not None: + pipeline.final_refiner_unet = patch_edm(pipeline.final_refiner_unet) + + print(f'Using {scheduler_name} scheduler.') + + async_task.yields.append(['preview', (flags.preparation_step_count, 'Moving model to GPU ...', None)]) def callback(step, x0, x, total_steps, y): done_steps = current_task_id * steps + step async_task.yields.append(['preview', ( - int(15.0 + 85.0 * float(done_steps) / float(all_steps)), - f'Step {step}/{total_steps} in the {current_task_id + 1}{ordinal_suffix(current_task_id + 1)} Sampling', y)]) + int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(done_steps) / float(all_steps)), + f'Sampling step {step + 1}/{total_steps}, image {current_task_id + 1}/{image_number} ...', y)]) for current_task_id, task in enumerate(tasks): + current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float(current_task_id * steps) / float(all_steps)) + progressbar(async_task, current_progress, f'Preparing task {current_task_id + 1}/{image_number} ...') execution_start_time = time.perf_counter() try: @@ -838,11 +910,18 @@ def worker(): imgs = [inpaint_worker.current_task.post_process(x) for x in imgs] img_paths = [] + current_progress = int(flags.preparation_step_count + (100 - flags.preparation_step_count) * float((current_task_id + 1) * steps) / float(all_steps)) + if modules.config.default_black_out_nsfw or black_out_nsfw: + progressbar(async_task, current_progress, 'Checking for NSFW content ...') + imgs = default_censor(imgs) + + progressbar(async_task, current_progress, f'Saving image {current_task_id + 1}/{image_number} to system ...') for x in imgs: d = [('Prompt', 'prompt', task['log_positive_prompt']), ('Negative Prompt', 'negative_prompt', task['log_negative_prompt']), ('Fooocus V2 Expansion', 'prompt_expansion', task['expansion']), - ('Styles', 'styles', str(raw_style_selections)), + ('Styles', 'styles', + str(task['styles'] if not use_expansion else [fooocus_expansion] + task['styles'])), ('Performance', 'performance', performance_selection.value)] if performance_selection.steps() != steps: @@ -865,10 +944,14 @@ def worker(): if refiner_swap_method != flags.refiner_swap_method: d.append(('Refiner Swap Method', 'refiner_swap_method', refiner_swap_method)) if modules.patch.patch_settings[pid].adaptive_cfg != modules.config.default_cfg_tsnr: - d.append(('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg)) + d.append( + ('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg)) + if clip_skip > 1: + d.append(('CLIP Skip', 'clip_skip', clip_skip)) d.append(('Sampler', 'sampler', sampler_name)) d.append(('Scheduler', 'scheduler', scheduler_name)) + d.append(('VAE', 'vae', vae_name)) d.append(('Seed', 'seed', str(task['task_seed']))) if freeu_enabled: @@ -883,12 +966,14 @@ def worker(): metadata_parser = modules.meta_parser.get_metadata_parser(metadata_scheme) metadata_parser.set_data(task['log_positive_prompt'], task['positive'], task['log_negative_prompt'], task['negative'], - steps, base_model_name, refiner_model_name, loras) - d.append(('Metadata Scheme', 'metadata_scheme', metadata_scheme.value if save_metadata_to_images else save_metadata_to_images)) + steps, base_model_name, refiner_model_name, loras, vae_name) + d.append(('Metadata Scheme', 'metadata_scheme', + metadata_scheme.value if save_metadata_to_images else save_metadata_to_images)) d.append(('Version', 'version', 'Fooocus v' + fooocus_version.version)) - img_paths.append(log(x, d, metadata_parser, output_format)) + img_paths.append(log(x, d, metadata_parser, output_format, task)) - yield_result(async_task, img_paths, do_not_show_finished_images=len(tasks) == 1 or disable_intermediate_results) + yield_result(async_task, img_paths, black_out_nsfw, False, + do_not_show_finished_images=len(tasks) == 1 or disable_intermediate_results) except ldm_patched.modules.model_management.InterruptProcessingException as e: if async_task.last_stop == 'skip': print('User skipped') diff --git a/modules/config.py b/modules/config.py index b81e218a..a6767c37 100644 --- a/modules/config.py +++ b/modules/config.py @@ -2,13 +2,14 @@ import os import json import math import numbers + import args_manager import tempfile import modules.flags import modules.sdxl_styles from modules.model_loader import load_file_from_url -from modules.util import get_files_from_folder, makedirs_with_log +from modules.extra_utils import makedirs_with_log, get_files_from_folder, try_eval_env_var from modules.flags import OutputFormat, Performance, MetadataScheme @@ -20,7 +21,7 @@ def get_config_path(key, default_value): else: return os.path.abspath(default_value) - +wildcards_max_bfs_depth = 64 config_path = get_config_path('config_path', "./config.txt") config_example_path = get_config_path('config_example_path', "config_modification_tutorial.txt") config_dict = {} @@ -189,16 +190,18 @@ paths_checkpoints = get_dir_or_set_default('path_checkpoints', ['../models/check paths_loras = get_dir_or_set_default('path_loras', ['../models/loras/'], True) path_embeddings = get_dir_or_set_default('path_embeddings', '../models/embeddings/') path_vae_approx = get_dir_or_set_default('path_vae_approx', '../models/vae_approx/') +path_vae = get_dir_or_set_default('path_vae', '../models/vae/') path_upscale_models = get_dir_or_set_default('path_upscale_models', '../models/upscale_models/') path_inpaint = get_dir_or_set_default('path_inpaint', '../models/inpaint/') path_controlnet = get_dir_or_set_default('path_controlnet', '../models/controlnet/') path_clip_vision = get_dir_or_set_default('path_clip_vision', '../models/clip_vision/') path_fooocus_expansion = get_dir_or_set_default('path_fooocus_expansion', '../models/prompt_expansion/fooocus_expansion') path_wildcards = get_dir_or_set_default('path_wildcards', '../wildcards/') +path_safety_checker = get_dir_or_set_default('path_safety_checker', '../models/safety_checker/') path_outputs = get_path_output() -def get_config_item_or_set_default(key, default_value, validator, disable_empty_as_none=False): +def get_config_item_or_set_default(key, default_value, validator, disable_empty_as_none=False, expected_type=None): global config_dict, visited_keys if key not in visited_keys: @@ -206,6 +209,7 @@ def get_config_item_or_set_default(key, default_value, validator, disable_empty_ v = os.getenv(key) if v is not None: + v = try_eval_env_var(v, expected_type) print(f"Environment: {key} = {v}") config_dict[key] = v @@ -250,41 +254,49 @@ temp_path = init_temp_path(get_config_item_or_set_default( key='temp_path', default_value=default_temp_path, validator=lambda x: isinstance(x, str), + expected_type=str ), default_temp_path) temp_path_cleanup_on_launch = get_config_item_or_set_default( key='temp_path_cleanup_on_launch', default_value=True, - validator=lambda x: isinstance(x, bool) + validator=lambda x: isinstance(x, bool), + expected_type=bool ) default_base_model_name = default_model = get_config_item_or_set_default( key='default_model', default_value='model.safetensors', - validator=lambda x: isinstance(x, str) + validator=lambda x: isinstance(x, str), + expected_type=str ) previous_default_models = get_config_item_or_set_default( key='previous_default_models', default_value=[], - validator=lambda x: isinstance(x, list) and all(isinstance(k, str) for k in x) + validator=lambda x: isinstance(x, list) and all(isinstance(k, str) for k in x), + expected_type=list ) default_refiner_model_name = default_refiner = get_config_item_or_set_default( key='default_refiner', default_value='None', - validator=lambda x: isinstance(x, str) + validator=lambda x: isinstance(x, str), + expected_type=str ) default_refiner_switch = get_config_item_or_set_default( key='default_refiner_switch', default_value=0.8, - validator=lambda x: isinstance(x, numbers.Number) and 0 <= x <= 1 + validator=lambda x: isinstance(x, numbers.Number) and 0 <= x <= 1, + expected_type=numbers.Number ) default_loras_min_weight = get_config_item_or_set_default( key='default_loras_min_weight', default_value=-2, - validator=lambda x: isinstance(x, numbers.Number) and -10 <= x <= 10 + validator=lambda x: isinstance(x, numbers.Number) and -10 <= x <= 10, + expected_type=numbers.Number ) default_loras_max_weight = get_config_item_or_set_default( key='default_loras_max_weight', default_value=2, - validator=lambda x: isinstance(x, numbers.Number) and -10 <= x <= 10 + validator=lambda x: isinstance(x, numbers.Number) and -10 <= x <= 10, + expected_type=numbers.Number ) default_loras = get_config_item_or_set_default( key='default_loras', @@ -318,33 +330,45 @@ default_loras = get_config_item_or_set_default( validator=lambda x: isinstance(x, list) and all( len(y) == 3 and isinstance(y[0], bool) and isinstance(y[1], str) and isinstance(y[2], numbers.Number) or len(y) == 2 and isinstance(y[0], str) and isinstance(y[1], numbers.Number) - for y in x) + for y in x), + expected_type=list ) default_loras = [(y[0], y[1], y[2]) if len(y) == 3 else (True, y[0], y[1]) for y in default_loras] default_max_lora_number = get_config_item_or_set_default( key='default_max_lora_number', default_value=len(default_loras) if isinstance(default_loras, list) and len(default_loras) > 0 else 5, - validator=lambda x: isinstance(x, int) and x >= 1 + validator=lambda x: isinstance(x, int) and x >= 1, + expected_type=int ) default_cfg_scale = get_config_item_or_set_default( key='default_cfg_scale', default_value=7.0, - validator=lambda x: isinstance(x, numbers.Number) + validator=lambda x: isinstance(x, numbers.Number), + expected_type=numbers.Number ) default_sample_sharpness = get_config_item_or_set_default( key='default_sample_sharpness', default_value=2.0, - validator=lambda x: isinstance(x, numbers.Number) + validator=lambda x: isinstance(x, numbers.Number), + expected_type=numbers.Number ) default_sampler = get_config_item_or_set_default( key='default_sampler', default_value='dpmpp_2m_sde_gpu', - validator=lambda x: x in modules.flags.sampler_list + validator=lambda x: x in modules.flags.sampler_list, + expected_type=str ) default_scheduler = get_config_item_or_set_default( key='default_scheduler', default_value='karras', - validator=lambda x: x in modules.flags.scheduler_list + validator=lambda x: x in modules.flags.scheduler_list, + expected_type=str +) +default_vae = get_config_item_or_set_default( + key='default_vae', + default_value=modules.flags.default_vae, + validator=lambda x: isinstance(x, str), + expected_type=str ) default_styles = get_config_item_or_set_default( key='default_styles', @@ -353,117 +377,144 @@ default_styles = get_config_item_or_set_default( "Fooocus Enhance", "Fooocus Sharp" ], - validator=lambda x: isinstance(x, list) and all(y in modules.sdxl_styles.legal_style_names for y in x) + validator=lambda x: isinstance(x, list) and all(y in modules.sdxl_styles.legal_style_names for y in x), + expected_type=list ) default_prompt_negative = get_config_item_or_set_default( key='default_prompt_negative', default_value='', validator=lambda x: isinstance(x, str), - disable_empty_as_none=True + disable_empty_as_none=True, + expected_type=str ) default_prompt = get_config_item_or_set_default( key='default_prompt', default_value='', validator=lambda x: isinstance(x, str), - disable_empty_as_none=True + disable_empty_as_none=True, + expected_type=str ) default_performance = get_config_item_or_set_default( key='default_performance', default_value=Performance.SPEED.value, - validator=lambda x: x in Performance.list() + validator=lambda x: x in Performance.list(), + expected_type=str ) default_advanced_checkbox = get_config_item_or_set_default( key='default_advanced_checkbox', default_value=False, - validator=lambda x: isinstance(x, bool) + validator=lambda x: isinstance(x, bool), + expected_type=bool ) default_max_image_number = get_config_item_or_set_default( key='default_max_image_number', default_value=32, - validator=lambda x: isinstance(x, int) and x >= 1 + validator=lambda x: isinstance(x, int) and x >= 1, + expected_type=int ) default_output_format = get_config_item_or_set_default( key='default_output_format', default_value='png', - validator=lambda x: x in OutputFormat.list() + validator=lambda x: x in OutputFormat.list(), + expected_type=str ) default_image_number = get_config_item_or_set_default( key='default_image_number', default_value=2, - validator=lambda x: isinstance(x, int) and 1 <= x <= default_max_image_number + validator=lambda x: isinstance(x, int) and 1 <= x <= default_max_image_number, + expected_type=int ) checkpoint_downloads = get_config_item_or_set_default( key='checkpoint_downloads', default_value={}, - validator=lambda x: isinstance(x, dict) and all(isinstance(k, str) and isinstance(v, str) for k, v in x.items()) + validator=lambda x: isinstance(x, dict) and all(isinstance(k, str) and isinstance(v, str) for k, v in x.items()), + expected_type=dict ) lora_downloads = get_config_item_or_set_default( key='lora_downloads', default_value={}, - validator=lambda x: isinstance(x, dict) and all(isinstance(k, str) and isinstance(v, str) for k, v in x.items()) + validator=lambda x: isinstance(x, dict) and all(isinstance(k, str) and isinstance(v, str) for k, v in x.items()), + expected_type=dict ) embeddings_downloads = get_config_item_or_set_default( key='embeddings_downloads', default_value={}, - validator=lambda x: isinstance(x, dict) and all(isinstance(k, str) and isinstance(v, str) for k, v in x.items()) + validator=lambda x: isinstance(x, dict) and all(isinstance(k, str) and isinstance(v, str) for k, v in x.items()), + expected_type=dict ) available_aspect_ratios = get_config_item_or_set_default( key='available_aspect_ratios', - default_value=[ - '704*1408', '704*1344', '768*1344', '768*1280', '832*1216', '832*1152', - '896*1152', '896*1088', '960*1088', '960*1024', '1024*1024', '1024*960', - '1088*960', '1088*896', '1152*896', '1152*832', '1216*832', '1280*768', - '1344*768', '1344*704', '1408*704', '1472*704', '1536*640', '1600*640', - '1664*576', '1728*576' - ], - validator=lambda x: isinstance(x, list) and all('*' in v for v in x) and len(x) > 1 + default_value=modules.flags.sdxl_aspect_ratios, + validator=lambda x: isinstance(x, list) and all('*' in v for v in x) and len(x) > 1, + expected_type=list ) default_aspect_ratio = get_config_item_or_set_default( key='default_aspect_ratio', default_value='1152*896' if '1152*896' in available_aspect_ratios else available_aspect_ratios[0], - validator=lambda x: x in available_aspect_ratios + validator=lambda x: x in available_aspect_ratios, + expected_type=str ) default_inpaint_engine_version = get_config_item_or_set_default( key='default_inpaint_engine_version', default_value='v2.6', - validator=lambda x: x in modules.flags.inpaint_engine_versions + validator=lambda x: x in modules.flags.inpaint_engine_versions, + expected_type=str ) default_cfg_tsnr = get_config_item_or_set_default( key='default_cfg_tsnr', default_value=7.0, - validator=lambda x: isinstance(x, numbers.Number) + validator=lambda x: isinstance(x, numbers.Number), + expected_type=numbers.Number +) +default_clip_skip = get_config_item_or_set_default( + key='default_clip_skip', + default_value=2, + validator=lambda x: isinstance(x, int) and 1 <= x <= modules.flags.clip_skip_max, + expected_type=int ) default_overwrite_step = get_config_item_or_set_default( key='default_overwrite_step', default_value=-1, - validator=lambda x: isinstance(x, int) + validator=lambda x: isinstance(x, int), + expected_type=int ) default_overwrite_switch = get_config_item_or_set_default( key='default_overwrite_switch', default_value=-1, - validator=lambda x: isinstance(x, int) + validator=lambda x: isinstance(x, int), + expected_type=int ) example_inpaint_prompts = get_config_item_or_set_default( key='example_inpaint_prompts', default_value=[ 'highly detailed face', 'detailed girl face', 'detailed man face', 'detailed hand', 'beautiful eyes' ], - validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x) + validator=lambda x: isinstance(x, list) and all(isinstance(v, str) for v in x), + expected_type=list +) +default_black_out_nsfw = get_config_item_or_set_default( + key='default_black_out_nsfw', + default_value=False, + validator=lambda x: isinstance(x, bool), + expected_type=bool ) default_save_metadata_to_images = get_config_item_or_set_default( key='default_save_metadata_to_images', default_value=False, - validator=lambda x: isinstance(x, bool) + validator=lambda x: isinstance(x, bool), + expected_type=bool ) default_metadata_scheme = get_config_item_or_set_default( key='default_metadata_scheme', default_value=MetadataScheme.FOOOCUS.value, - validator=lambda x: x in [y[1] for y in modules.flags.metadata_scheme if y[1] == x] + validator=lambda x: x in [y[1] for y in modules.flags.metadata_scheme if y[1] == x], + expected_type=str ) metadata_created_by = get_config_item_or_set_default( key='metadata_created_by', default_value='', - validator=lambda x: isinstance(x, str) + validator=lambda x: isinstance(x, str), + expected_type=str ) example_inpaint_prompts = [[x] for x in example_inpaint_prompts] @@ -481,6 +532,8 @@ possible_preset_keys = { "default_loras": "", "default_cfg_scale": "guidance_scale", "default_sample_sharpness": "sharpness", + "default_cfg_tsnr": "adaptive_cfg", + "default_clip_skip": "clip_skip", "default_sampler": "sampler", "default_scheduler": "scheduler", "default_overwrite_step": "steps", @@ -493,7 +546,8 @@ possible_preset_keys = { "default_save_metadata_to_images": "default_save_metadata_to_images", "checkpoint_downloads": "checkpoint_downloads", "embeddings_downloads": "embeddings_downloads", - "lora_downloads": "lora_downloads" + "lora_downloads": "lora_downloads", + "default_vae": "vae" } REWRITE_PRESET = False @@ -514,7 +568,7 @@ def add_ratio(x): default_aspect_ratio = add_ratio(default_aspect_ratio) -available_aspect_ratios = [add_ratio(x) for x in available_aspect_ratios] +available_aspect_ratios_labels = [add_ratio(x) for x in available_aspect_ratios] # Only write config in the first launch. @@ -535,26 +589,28 @@ with open(config_example_path, "w", encoding="utf-8") as json_file: model_filenames = [] lora_filenames = [] +vae_filenames = [] wildcard_filenames = [] -sdxl_lcm_lora = 'sdxl_lcm_lora.safetensors' -sdxl_lightning_lora = 'sdxl_lightning_4step_lora.safetensors' -loras_metadata_remove = [sdxl_lcm_lora, sdxl_lightning_lora] - def get_model_filenames(folder_paths, extensions=None, name_filter=None): if extensions is None: extensions = ['.pth', '.ckpt', '.bin', '.safetensors', '.fooocus.patch'] files = [] + + if not isinstance(folder_paths, list): + folder_paths = [folder_paths] for folder in folder_paths: files += get_files_from_folder(folder, extensions, name_filter) + return files def update_files(): - global model_filenames, lora_filenames, wildcard_filenames, available_presets + global model_filenames, lora_filenames, vae_filenames, wildcard_filenames, available_presets model_filenames = get_model_filenames(paths_checkpoints) lora_filenames = get_model_filenames(paths_loras) + vae_filenames = get_model_filenames(path_vae) wildcard_filenames = get_files_from_folder(path_wildcards, ['.txt']) available_presets = get_presets() return @@ -602,17 +658,27 @@ def downloading_sdxl_lcm_lora(): load_file_from_url( url='https://huggingface.co/lllyasviel/misc/resolve/main/sdxl_lcm_lora.safetensors', model_dir=paths_loras[0], - file_name=sdxl_lcm_lora + file_name=modules.flags.PerformanceLoRA.EXTREME_SPEED.value ) - return sdxl_lcm_lora + return modules.flags.PerformanceLoRA.EXTREME_SPEED.value + def downloading_sdxl_lightning_lora(): load_file_from_url( - url='https://huggingface.co/ByteDance/SDXL-Lightning/resolve/main/sdxl_lightning_4step_lora.safetensors', + url='https://huggingface.co/mashb1t/misc/resolve/main/sdxl_lightning_4step_lora.safetensors', model_dir=paths_loras[0], - file_name=sdxl_lightning_lora + file_name=modules.flags.PerformanceLoRA.LIGHTNING.value ) - return sdxl_lightning_lora + return modules.flags.PerformanceLoRA.LIGHTNING.value + + +def downloading_sdxl_hyper_sd_lora(): + load_file_from_url( + url='https://huggingface.co/mashb1t/misc/resolve/main/sdxl_hyper_sd_4step_lora.safetensors', + model_dir=paths_loras[0], + file_name=modules.flags.PerformanceLoRA.HYPER_SD.value + ) + return modules.flags.PerformanceLoRA.HYPER_SD.value def downloading_controlnet_canny(): @@ -679,5 +745,13 @@ def downloading_upscale_model(): ) return os.path.join(path_upscale_models, 'fooocus_upscaler_s409985e5.bin') +def downloading_safety_checker_model(): + load_file_from_url( + url='https://huggingface.co/mashb1t/misc/resolve/main/stable-diffusion-safety-checker.bin', + model_dir=path_safety_checker, + file_name='stable-diffusion-safety-checker.bin' + ) + return os.path.join(path_safety_checker, 'stable-diffusion-safety-checker.bin') + update_files() diff --git a/modules/core.py b/modules/core.py index 38ee8e8d..78c89759 100644 --- a/modules/core.py +++ b/modules/core.py @@ -21,8 +21,7 @@ from modules.lora import match_lora from modules.util import get_file_from_folder_list from ldm_patched.modules.lora import model_lora_keys_unet, model_lora_keys_clip from modules.config import path_embeddings -from ldm_patched.contrib.external_model_advanced import ModelSamplingDiscrete - +from ldm_patched.contrib.external_model_advanced import ModelSamplingDiscrete, ModelSamplingContinuousEDM opEmptyLatentImage = EmptyLatentImage() opVAEDecode = VAEDecode() @@ -32,15 +31,17 @@ opVAEEncodeTiled = VAEEncodeTiled() opControlNetApplyAdvanced = ControlNetApplyAdvanced() opFreeU = FreeU_V2() opModelSamplingDiscrete = ModelSamplingDiscrete() +opModelSamplingContinuousEDM = ModelSamplingContinuousEDM() class StableDiffusionModel: - def __init__(self, unet=None, vae=None, clip=None, clip_vision=None, filename=None): + def __init__(self, unet=None, vae=None, clip=None, clip_vision=None, filename=None, vae_filename=None): self.unet = unet self.vae = vae self.clip = clip self.clip_vision = clip_vision self.filename = filename + self.vae_filename = vae_filename self.unet_with_lora = unet self.clip_with_lora = clip self.visited_loras = '' @@ -142,9 +143,10 @@ def apply_controlnet(positive, negative, control_net, image, strength, start_per @torch.no_grad() @torch.inference_mode() -def load_model(ckpt_filename): - unet, clip, vae, clip_vision = load_checkpoint_guess_config(ckpt_filename, embedding_directory=path_embeddings) - return StableDiffusionModel(unet=unet, clip=clip, vae=vae, clip_vision=clip_vision, filename=ckpt_filename) +def load_model(ckpt_filename, vae_filename=None): + unet, clip, vae, vae_filename, clip_vision = load_checkpoint_guess_config(ckpt_filename, embedding_directory=path_embeddings, + vae_filename_param=vae_filename) + return StableDiffusionModel(unet=unet, clip=clip, vae=vae, clip_vision=clip_vision, filename=ckpt_filename, vae_filename=vae_filename) @torch.no_grad() diff --git a/modules/default_pipeline.py b/modules/default_pipeline.py index 190601ec..494644d6 100644 --- a/modules/default_pipeline.py +++ b/modules/default_pipeline.py @@ -3,6 +3,7 @@ import os import torch import modules.patch import modules.config +import modules.flags import ldm_patched.modules.model_management import ldm_patched.modules.latent_formats import modules.inpaint_worker @@ -58,17 +59,21 @@ def assert_model_integrity(): @torch.no_grad() @torch.inference_mode() -def refresh_base_model(name): +def refresh_base_model(name, vae_name=None): global model_base filename = get_file_from_folder_list(name, modules.config.paths_checkpoints) - if model_base.filename == filename: + vae_filename = None + if vae_name is not None and vae_name != modules.flags.default_vae: + vae_filename = get_file_from_folder_list(vae_name, modules.config.path_vae) + + if model_base.filename == filename and model_base.vae_filename == vae_filename: return - model_base = core.StableDiffusionModel() - model_base = core.load_model(filename) + model_base = core.load_model(filename, vae_filename) print(f'Base model loaded: {model_base.filename}') + print(f'VAE loaded: {model_base.vae_filename}') return @@ -196,6 +201,17 @@ def clip_encode(texts, pool_top_k=1): return [[torch.cat(cond_list, dim=1), {"pooled_output": pooled_acc}]] +@torch.no_grad() +@torch.inference_mode() +def set_clip_skip(clip_skip: int): + global final_clip + + if final_clip is None: + return + + final_clip.clip_layer(-abs(clip_skip)) + return + @torch.no_grad() @torch.inference_mode() def clear_all_caches(): @@ -216,7 +232,7 @@ def prepare_text_encoder(async_call=True): @torch.no_grad() @torch.inference_mode() def refresh_everything(refiner_model_name, base_model_name, loras, - base_model_additional_loras=None, use_synthetic_refiner=False): + base_model_additional_loras=None, use_synthetic_refiner=False, vae_name=None): global final_unet, final_clip, final_vae, final_refiner_unet, final_refiner_vae, final_expansion final_unet = None @@ -227,11 +243,11 @@ def refresh_everything(refiner_model_name, base_model_name, loras, if use_synthetic_refiner and refiner_model_name == 'None': print('Synthetic Refiner Activated') - refresh_base_model(base_model_name) + refresh_base_model(base_model_name, vae_name) synthesize_refiner_model() else: refresh_refiner_model(refiner_model_name) - refresh_base_model(base_model_name) + refresh_base_model(base_model_name, vae_name) refresh_loras(loras, base_model_additional_loras=base_model_additional_loras) assert_model_integrity() @@ -254,7 +270,8 @@ def refresh_everything(refiner_model_name, base_model_name, loras, refresh_everything( refiner_model_name=modules.config.default_refiner_model_name, base_model_name=modules.config.default_base_model_name, - loras=get_enabled_loras(modules.config.default_loras) + loras=get_enabled_loras(modules.config.default_loras), + vae_name=modules.config.default_vae, ) diff --git a/modules/extra_utils.py b/modules/extra_utils.py new file mode 100644 index 00000000..c2dfa810 --- /dev/null +++ b/modules/extra_utils.py @@ -0,0 +1,41 @@ +import os +from ast import literal_eval + + +def makedirs_with_log(path): + try: + os.makedirs(path, exist_ok=True) + except OSError as error: + print(f'Directory {path} could not be created, reason: {error}') + + +def get_files_from_folder(folder_path, extensions=None, name_filter=None): + if not os.path.isdir(folder_path): + raise ValueError("Folder path is not a valid directory.") + + filenames = [] + + for root, _, files in os.walk(folder_path, topdown=False): + relative_path = os.path.relpath(root, folder_path) + if relative_path == ".": + relative_path = "" + for filename in sorted(files, key=lambda s: s.casefold()): + _, file_extension = os.path.splitext(filename) + if (extensions is None or file_extension.lower() in extensions) and (name_filter is None or name_filter in _): + path = os.path.join(relative_path, filename) + filenames.append(path) + + return filenames + + +def try_eval_env_var(value: str, expected_type=None): + try: + value_eval = value + if expected_type is bool: + value_eval = value.title() + value_eval = literal_eval(value_eval) + if expected_type is not None and not isinstance(value_eval, expected_type): + return value + return value_eval + except: + return value diff --git a/modules/flags.py b/modules/flags.py index c9d13fd8..29ac4615 100644 --- a/modules/flags.py +++ b/modules/flags.py @@ -34,7 +34,8 @@ KSAMPLER = { "dpmpp_3m_sde": "", "dpmpp_3m_sde_gpu": "", "ddpm": "", - "lcm": "LCM" + "lcm": "LCM", + "tcd": "TCD" } SAMPLER_EXTRA = { @@ -47,12 +48,16 @@ SAMPLERS = KSAMPLER | SAMPLER_EXTRA KSAMPLER_NAMES = list(KSAMPLER.keys()) -SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "lcm", "turbo"] +SCHEDULER_NAMES = ["normal", "karras", "exponential", "sgm_uniform", "simple", "ddim_uniform", "lcm", "turbo", "align_your_steps", "tcd", "edm_playground_v2.5"] SAMPLER_NAMES = KSAMPLER_NAMES + list(SAMPLER_EXTRA.keys()) sampler_list = SAMPLER_NAMES scheduler_list = SCHEDULER_NAMES +clip_skip_max = 12 + +default_vae = 'Default (model)' + refiner_swap_method = 'joint' cn_ip = "ImagePrompt" @@ -78,6 +83,14 @@ inpaint_options = [inpaint_option_default, inpaint_option_detail, inpaint_option desc_type_photo = 'Photograph' desc_type_anime = 'Art/Anime' +sdxl_aspect_ratios = [ + '704*1408', '704*1344', '768*1344', '768*1280', '832*1216', '832*1152', + '896*1152', '896*1088', '960*1088', '960*1024', '1024*1024', '1024*960', + '1088*960', '1088*896', '1152*896', '1152*832', '1216*832', '1280*768', + '1344*768', '1344*704', '1408*704', '1472*704', '1536*640', '1600*640', + '1664*576', '1728*576' +] + class MetadataScheme(Enum): FOOOCUS = 'fooocus' @@ -90,6 +103,7 @@ metadata_scheme = [ ] controlnet_image_count = 4 +preparation_step_count = 13 class OutputFormat(Enum): @@ -102,11 +116,24 @@ class OutputFormat(Enum): return list(map(lambda c: c.value, cls)) +class PerformanceLoRA(Enum): + QUALITY = None + SPEED = None + EXTREME_SPEED = 'sdxl_lcm_lora.safetensors' + LIGHTNING = 'sdxl_lightning_4step_lora.safetensors' + HYPER_SD = 'sdxl_hyper_sd_4step_lora.safetensors' + + class Steps(IntEnum): QUALITY = 60 SPEED = 30 EXTREME_SPEED = 8 LIGHTNING = 4 + HYPER_SD = 4 + + @classmethod + def keys(cls) -> list: + return list(map(lambda c: c, Steps.__members__)) class StepsUOV(IntEnum): @@ -114,6 +141,7 @@ class StepsUOV(IntEnum): SPEED = 18 EXTREME_SPEED = 8 LIGHTNING = 4 + HYPER_SD = 4 class Performance(Enum): @@ -121,19 +149,31 @@ class Performance(Enum): SPEED = 'Speed' EXTREME_SPEED = 'Extreme Speed' LIGHTNING = 'Lightning' + HYPER_SD = 'Hyper-SD' @classmethod def list(cls) -> list: return list(map(lambda c: c.value, cls)) + @classmethod + def values(cls) -> list: + return list(map(lambda c: c.value, cls)) + + @classmethod + def by_steps(cls, steps: int | str): + return cls[Steps(int(steps)).name] + @classmethod def has_restricted_features(cls, x) -> bool: if isinstance(x, Performance): x = x.value - return x in [cls.EXTREME_SPEED.value, cls.LIGHTNING.value] + return x in [cls.EXTREME_SPEED.value, cls.LIGHTNING.value, cls.HYPER_SD.value] def steps(self) -> int | None: - return Steps[self.name].value if Steps[self.name] else None + return Steps[self.name].value if self.name in Steps.__members__ else None def steps_uov(self) -> int | None: - return StepsUOV[self.name].value if Steps[self.name] else None + return StepsUOV[self.name].value if self.name in StepsUOV.__members__ else None + + def lora_filename(self) -> str | None: + return PerformanceLoRA[self.name].value if self.name in PerformanceLoRA.__members__ else None diff --git a/modules/meta_parser.py b/modules/meta_parser.py index 70ab8860..ff930cc0 100644 --- a/modules/meta_parser.py +++ b/modules/meta_parser.py @@ -32,20 +32,22 @@ def load_parameter_button_click(raw_metadata: dict | str, is_generating: bool): get_str('prompt', 'Prompt', loaded_parameter_dict, results) get_str('negative_prompt', 'Negative Prompt', loaded_parameter_dict, results) get_list('styles', 'Styles', loaded_parameter_dict, results) - get_str('performance', 'Performance', loaded_parameter_dict, results) + performance = get_str('performance', 'Performance', loaded_parameter_dict, results) get_steps('steps', 'Steps', loaded_parameter_dict, results) - get_float('overwrite_switch', 'Overwrite Switch', loaded_parameter_dict, results) + get_number('overwrite_switch', 'Overwrite Switch', loaded_parameter_dict, results) get_resolution('resolution', 'Resolution', loaded_parameter_dict, results) - get_float('guidance_scale', 'Guidance Scale', loaded_parameter_dict, results) - get_float('sharpness', 'Sharpness', loaded_parameter_dict, results) + get_number('guidance_scale', 'Guidance Scale', loaded_parameter_dict, results) + get_number('sharpness', 'Sharpness', loaded_parameter_dict, results) get_adm_guidance('adm_guidance', 'ADM Guidance', loaded_parameter_dict, results) get_str('refiner_swap_method', 'Refiner Swap Method', loaded_parameter_dict, results) - get_float('adaptive_cfg', 'CFG Mimicking from TSNR', loaded_parameter_dict, results) + get_number('adaptive_cfg', 'CFG Mimicking from TSNR', loaded_parameter_dict, results) + get_number('clip_skip', 'CLIP Skip', loaded_parameter_dict, results, cast_type=int) get_str('base_model', 'Base Model', loaded_parameter_dict, results) get_str('refiner_model', 'Refiner Model', loaded_parameter_dict, results) - get_float('refiner_switch', 'Refiner Switch', loaded_parameter_dict, results) + get_number('refiner_switch', 'Refiner Switch', loaded_parameter_dict, results) get_str('sampler', 'Sampler', loaded_parameter_dict, results) get_str('scheduler', 'Scheduler', loaded_parameter_dict, results) + get_str('vae', 'VAE', loaded_parameter_dict, results) get_seed('seed', 'Seed', loaded_parameter_dict, results) if is_generating: @@ -57,19 +59,27 @@ def load_parameter_button_click(raw_metadata: dict | str, is_generating: bool): get_freeu('freeu', 'FreeU', loaded_parameter_dict, results) + # prevent performance LoRAs to be added twice, by performance and by lora + performance_filename = None + if performance is not None and performance in Performance.values(): + performance = Performance(performance) + performance_filename = performance.lora_filename() + for i in range(modules.config.default_max_lora_number): - get_lora(f'lora_combined_{i + 1}', f'LoRA {i + 1}', loaded_parameter_dict, results) + get_lora(f'lora_combined_{i + 1}', f'LoRA {i + 1}', loaded_parameter_dict, results, performance_filename) return results -def get_str(key: str, fallback: str | None, source_dict: dict, results: list, default=None): +def get_str(key: str, fallback: str | None, source_dict: dict, results: list, default=None) -> str | None: try: h = source_dict.get(key, source_dict.get(fallback, default)) assert isinstance(h, str) results.append(h) + return h except: results.append(gr.update()) + return None def get_list(key: str, fallback: str | None, source_dict: dict, results: list, default=None): @@ -82,11 +92,11 @@ def get_list(key: str, fallback: str | None, source_dict: dict, results: list, d results.append(gr.update()) -def get_float(key: str, fallback: str | None, source_dict: dict, results: list, default=None): +def get_number(key: str, fallback: str | None, source_dict: dict, results: list, default=None, cast_type=float): try: h = source_dict.get(key, source_dict.get(fallback, default)) assert h is not None - h = float(h) + h = cast_type(h) results.append(h) except: results.append(gr.update()) @@ -109,8 +119,9 @@ def get_steps(key: str, fallback: str | None, source_dict: dict, results: list, assert h is not None h = int(h) # if not in steps or in steps and performance is not the same - if h not in iter(Steps) or Steps(h).name.casefold() != source_dict.get('performance', '').replace(' ', - '_').casefold(): + performance_name = source_dict.get('performance', '').replace(' ', '_').replace('-', '_').casefold() + performance_candidates = [key for key in Steps.keys() if key.casefold() == performance_name and Steps[key] == h] + if len(performance_candidates) == 0: results.append(h) return results.append(-1) @@ -123,7 +134,7 @@ def get_resolution(key: str, fallback: str | None, source_dict: dict, results: l h = source_dict.get(key, source_dict.get(fallback, default)) width, height = eval(h) formatted = modules.config.add_ratio(f'{width}*{height}') - if formatted in modules.config.available_aspect_ratios: + if formatted in modules.config.available_aspect_ratios_labels: results.append(formatted) results.append(-1) results.append(-1) @@ -179,7 +190,7 @@ def get_freeu(key: str, fallback: str | None, source_dict: dict, results: list, results.append(gr.update()) -def get_lora(key: str, fallback: str | None, source_dict: dict, results: list): +def get_lora(key: str, fallback: str | None, source_dict: dict, results: list, performance_filename: str | None): try: split_data = source_dict.get(key, source_dict.get(fallback)).split(' : ') enabled = True @@ -191,6 +202,9 @@ def get_lora(key: str, fallback: str | None, source_dict: dict, results: list): name = split_data[1] weight = split_data[2] + if name == performance_filename: + raise Exception + weight = float(weight) results.append(enabled) results.append(name) @@ -204,7 +218,6 @@ def get_lora(key: str, fallback: str | None, source_dict: dict, results: list): def get_sha256(filepath): global hash_cache if filepath not in hash_cache: - # is_safetensors = os.path.splitext(filepath)[1].lower() == '.safetensors' hash_cache[filepath] = sha256(filepath) return hash_cache[filepath] @@ -220,7 +233,7 @@ def parse_meta_from_preset(preset_content): loras = getattr(modules.config, settings_key) if settings_key in items: loras = items[settings_key] - for index, lora in enumerate(loras[:5]): + for index, lora in enumerate(loras[:modules.config.default_max_lora_number]): preset_prepared[f'lora_combined_{index + 1}'] = ' : '.join(map(str, lora)) elif settings_key == "default_aspect_ratio": if settings_key in items and items[settings_key] is not None: @@ -247,27 +260,28 @@ class MetadataParser(ABC): self.full_prompt: str = '' self.raw_negative_prompt: str = '' self.full_negative_prompt: str = '' - self.steps: int = 30 + self.steps: int = Steps.SPEED.value self.base_model_name: str = '' self.base_model_hash: str = '' self.refiner_model_name: str = '' self.refiner_model_hash: str = '' self.loras: list = [] + self.vae_name: str = '' @abstractmethod def get_scheme(self) -> MetadataScheme: raise NotImplementedError @abstractmethod - def parse_json(self, metadata: dict | str) -> dict: + def to_json(self, metadata: dict | str) -> dict: raise NotImplementedError @abstractmethod - def parse_string(self, metadata: dict) -> str: + def to_string(self, metadata: dict) -> str: raise NotImplementedError def set_data(self, raw_prompt, full_prompt, raw_negative_prompt, full_negative_prompt, steps, base_model_name, - refiner_model_name, loras): + refiner_model_name, loras, vae_name): self.raw_prompt = raw_prompt self.full_prompt = full_prompt self.raw_negative_prompt = raw_negative_prompt @@ -289,12 +303,7 @@ class MetadataParser(ABC): lora_path = get_file_from_folder_list(lora_name, modules.config.paths_loras) lora_hash = get_sha256(lora_path) self.loras.append((Path(lora_name).stem, lora_weight, lora_hash)) - - @staticmethod - def remove_special_loras(lora_filenames): - for lora_to_remove in modules.config.loras_metadata_remove: - if lora_to_remove in lora_filenames: - lora_filenames.remove(lora_to_remove) + self.vae_name = Path(vae_name).stem class A1111MetadataParser(MetadataParser): @@ -310,6 +319,7 @@ class A1111MetadataParser(MetadataParser): 'steps': 'Steps', 'sampler': 'Sampler', 'scheduler': 'Scheduler', + 'vae': 'VAE', 'guidance_scale': 'CFG scale', 'seed': 'Seed', 'resolution': 'Size', @@ -317,6 +327,7 @@ class A1111MetadataParser(MetadataParser): 'adm_guidance': 'ADM Guidance', 'refiner_swap_method': 'Refiner Swap Method', 'adaptive_cfg': 'Adaptive CFG', + 'clip_skip': 'Clip skip', 'overwrite_switch': 'Overwrite Switch', 'freeu': 'FreeU', 'base_model': 'Model', @@ -329,7 +340,7 @@ class A1111MetadataParser(MetadataParser): 'version': 'Version' } - def parse_json(self, metadata: str) -> dict: + def to_json(self, metadata: str) -> dict: metadata_prompt = '' metadata_negative_prompt = '' @@ -383,9 +394,9 @@ class A1111MetadataParser(MetadataParser): data['styles'] = str(found_styles) # try to load performance based on steps, fallback for direct A1111 imports - if 'steps' in data and 'performance' not in data: + if 'steps' in data and 'performance' in data is None: try: - data['performance'] = Performance[Steps(int(data['steps'])).name].value + data['performance'] = Performance.by_steps(data['steps']).value except ValueError | KeyError: pass @@ -397,13 +408,12 @@ class A1111MetadataParser(MetadataParser): data['sampler'] = k break - for key in ['base_model', 'refiner_model']: + for key in ['base_model', 'refiner_model', 'vae']: if key in data: - for filename in modules.config.model_filenames: - path = Path(filename) - if data[key] == path.stem: - data[key] = filename - break + if key == 'vae': + self.add_extension_to_filename(data, modules.config.vae_filenames, 'vae') + else: + self.add_extension_to_filename(data, modules.config.model_filenames, key) lora_data = '' if 'lora_weights' in data and data['lora_weights'] != '': @@ -412,13 +422,11 @@ class A1111MetadataParser(MetadataParser): lora_data = data['lora_hashes'] if lora_data != '': - lora_filenames = modules.config.lora_filenames.copy() - self.remove_special_loras(lora_filenames) for li, lora in enumerate(lora_data.split(', ')): lora_split = lora.split(': ') lora_name = lora_split[0] lora_weight = lora_split[2] if len(lora_split) == 3 else lora_split[1] - for filename in lora_filenames: + for filename in modules.config.lora_filenames: path = Path(filename) if lora_name == path.stem: data[f'lora_combined_{li + 1}'] = f'{filename} : {lora_weight}' @@ -426,13 +434,14 @@ class A1111MetadataParser(MetadataParser): return data - def parse_string(self, metadata: dict) -> str: + def to_string(self, metadata: dict) -> str: data = {k: v for _, k, v in metadata} width, height = eval(data['resolution']) sampler = data['sampler'] scheduler = data['scheduler'] + if sampler in SAMPLERS and SAMPLERS[sampler] != '': sampler = SAMPLERS[sampler] if sampler not in CIVITAI_NO_KARRAS and scheduler == 'karras': @@ -451,6 +460,7 @@ class A1111MetadataParser(MetadataParser): self.fooocus_to_a1111['performance']: data['performance'], self.fooocus_to_a1111['scheduler']: scheduler, + self.fooocus_to_a1111['vae']: Path(data['vae']).stem, # workaround for multiline prompts self.fooocus_to_a1111['raw_prompt']: self.raw_prompt, self.fooocus_to_a1111['raw_negative_prompt']: self.raw_negative_prompt, @@ -462,7 +472,7 @@ class A1111MetadataParser(MetadataParser): self.fooocus_to_a1111['refiner_model_hash']: self.refiner_model_hash } - for key in ['adaptive_cfg', 'overwrite_switch', 'refiner_swap_method', 'freeu']: + for key in ['adaptive_cfg', 'clip_skip', 'overwrite_switch', 'refiner_swap_method', 'freeu']: if key in data: generation_params[self.fooocus_to_a1111[key]] = data[key] @@ -491,28 +501,35 @@ class A1111MetadataParser(MetadataParser): negative_prompt_text = f"\nNegative prompt: {negative_prompt_resolved}" if negative_prompt_resolved else "" return f"{positive_prompt_resolved}{negative_prompt_text}\n{generation_params_text}".strip() + @staticmethod + def add_extension_to_filename(data, filenames, key): + for filename in filenames: + path = Path(filename) + if data[key] == path.stem: + data[key] = filename + break + class FooocusMetadataParser(MetadataParser): def get_scheme(self) -> MetadataScheme: return MetadataScheme.FOOOCUS - def parse_json(self, metadata: dict) -> dict: - model_filenames = modules.config.model_filenames.copy() - lora_filenames = modules.config.lora_filenames.copy() - self.remove_special_loras(lora_filenames) + def to_json(self, metadata: dict) -> dict: for key, value in metadata.items(): if value in ['', 'None']: continue if key in ['base_model', 'refiner_model']: - metadata[key] = self.replace_value_with_filename(key, value, model_filenames) + metadata[key] = self.replace_value_with_filename(key, value, modules.config.model_filenames) elif key.startswith('lora_combined_'): - metadata[key] = self.replace_value_with_filename(key, value, lora_filenames) + metadata[key] = self.replace_value_with_filename(key, value, modules.config.lora_filenames) + elif key == 'vae': + metadata[key] = self.replace_value_with_filename(key, value, modules.config.vae_filenames) else: continue return metadata - def parse_string(self, metadata: list) -> str: + def to_string(self, metadata: list) -> str: for li, (label, key, value) in enumerate(metadata): # remove model folder paths from metadata if key.startswith('lora_combined_'): @@ -533,6 +550,7 @@ class FooocusMetadataParser(MetadataParser): res['refiner_model'] = self.refiner_model_name res['refiner_model_hash'] = self.refiner_model_hash + res['vae'] = self.vae_name res['loras'] = self.loras if modules.config.metadata_created_by != '': @@ -551,6 +569,8 @@ class FooocusMetadataParser(MetadataParser): elif value == path.stem: return filename + return None + def get_metadata_parser(metadata_scheme: MetadataScheme) -> MetadataParser: match metadata_scheme: diff --git a/modules/model_loader.py b/modules/model_loader.py index 8ba336a9..1143f75e 100644 --- a/modules/model_loader.py +++ b/modules/model_loader.py @@ -14,6 +14,8 @@ def load_file_from_url( Returns the path to the downloaded file. """ + domain = os.environ.get("HF_MIRROR", "https://huggingface.co").rstrip('/') + url = str.replace(url, "https://huggingface.co", domain, 1) os.makedirs(model_dir, exist_ok=True) if not file_name: parts = urlparse(url) diff --git a/modules/patch_precision.py b/modules/patch_precision.py index 83569bdd..22ffda0a 100644 --- a/modules/patch_precision.py +++ b/modules/patch_precision.py @@ -51,6 +51,8 @@ def patched_register_schedule(self, given_betas=None, beta_schedule="linear", ti self.linear_end = linear_end sigmas = torch.tensor(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, dtype=torch.float32) self.set_sigmas(sigmas) + alphas_cumprod = torch.tensor(alphas_cumprod, dtype=torch.float32) + self.set_alphas_cumprod(alphas_cumprod) return diff --git a/modules/private_logger.py b/modules/private_logger.py index edd9457d..6fdb680c 100644 --- a/modules/private_logger.py +++ b/modules/private_logger.py @@ -21,13 +21,13 @@ def get_current_html_path(output_format=None): return html_name -def log(img, metadata, metadata_parser: MetadataParser | None = None, output_format=None) -> str: +def log(img, metadata, metadata_parser: MetadataParser | None = None, output_format=None, task=None) -> str: path_outputs = modules.config.temp_path if args_manager.args.disable_image_log else modules.config.path_outputs output_format = output_format if output_format else modules.config.default_output_format date_string, local_temp_filename, only_name = generate_temp_filename(folder=path_outputs, extension=output_format) os.makedirs(os.path.dirname(local_temp_filename), exist_ok=True) - parsed_parameters = metadata_parser.parse_string(metadata.copy()) if metadata_parser is not None else '' + parsed_parameters = metadata_parser.to_string(metadata.copy()) if metadata_parser is not None else '' image = Image.fromarray(img) if output_format == OutputFormat.PNG.value: @@ -111,9 +111,15 @@ def log(img, metadata, metadata_parser: MetadataParser | None = None, output_for for label, key, value in metadata: value_txt = str(value).replace('\n', '
') item += f"{label}{value_txt}\n" + + if task is not None and 'positive' in task and 'negative' in task: + full_prompt_details = f"""
Positive{', '.join(task['positive'])}
+
Negative{', '.join(task['negative'])}
""" + item += f"Full raw prompt{full_prompt_details}\n" + item += "" - js_txt = urllib.parse.quote(json.dumps({k: v for _, k, v in metadata}, indent=0), safe='') + js_txt = urllib.parse.quote(json.dumps({k: v for _, k, v, in metadata}, indent=0), safe='') item += f"
" item += "" diff --git a/modules/sample_hijack.py b/modules/sample_hijack.py index 5936a096..84752ede 100644 --- a/modules/sample_hijack.py +++ b/modules/sample_hijack.py @@ -3,6 +3,7 @@ import ldm_patched.modules.samplers import ldm_patched.modules.model_management from collections import namedtuple +from ldm_patched.contrib.external_align_your_steps import AlignYourStepsScheduler from ldm_patched.contrib.external_custom_sampler import SDTurboScheduler from ldm_patched.k_diffusion import sampling as k_diffusion_sampling from ldm_patched.modules.samplers import normal_scheduler, simple_scheduler, ddim_scheduler @@ -174,7 +175,10 @@ def calculate_sigmas_scheduler_hacked(model, scheduler_name, steps): elif scheduler_name == "sgm_uniform": sigmas = normal_scheduler(model, steps, sgm=True) elif scheduler_name == "turbo": - sigmas = SDTurboScheduler().get_sigmas(namedtuple('Patcher', ['model'])(model=model), steps=steps, denoise=1.0)[0] + sigmas = SDTurboScheduler().get_sigmas(model=model, steps=steps, denoise=1.0)[0] + elif scheduler_name == "align_your_steps": + model_type = 'SDXL' if isinstance(model.latent_format, ldm_patched.modules.latent_formats.SDXL) else 'SD1' + sigmas = AlignYourStepsScheduler().get_sigmas(model_type=model_type, steps=steps, denoise=1.0)[0] else: raise TypeError("error invalid scheduler") return sigmas diff --git a/modules/sdxl_styles.py b/modules/sdxl_styles.py index 77ad6b57..12ab6c5c 100644 --- a/modules/sdxl_styles.py +++ b/modules/sdxl_styles.py @@ -2,13 +2,12 @@ import os import re import json import math -import modules.config -from modules.util import get_files_from_folder +from modules.extra_utils import get_files_from_folder +from random import Random # cannot use modules.config - validators causing circular imports styles_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../sdxl_styles/')) -wildcards_max_bfs_depth = 64 def normalize_key(k): @@ -24,7 +23,6 @@ def normalize_key(k): styles = {} - styles_files = get_files_from_folder(styles_path, ['.json']) for x in ['sdxl_styles_fooocus.json', @@ -50,8 +48,13 @@ for styles_file in styles_files: print(f'Failed to load style file {styles_file}') style_keys = list(styles.keys()) -fooocus_expansion = "Fooocus V2" -legal_style_names = [fooocus_expansion] + style_keys +fooocus_expansion = 'Fooocus V2' +random_style_name = 'Random Style' +legal_style_names = [fooocus_expansion, random_style_name] + style_keys + + +def get_random_style(rng: Random) -> str: + return rng.choice(list(styles.items()))[0] def apply_style(style, positive): @@ -59,34 +62,7 @@ def apply_style(style, positive): return p.replace('{prompt}', positive).splitlines(), n.splitlines() -def apply_wildcards(wildcard_text, rng, i, read_wildcards_in_order): - for _ in range(wildcards_max_bfs_depth): - placeholders = re.findall(r'__([\w-]+)__', wildcard_text) - if len(placeholders) == 0: - return wildcard_text - - print(f'[Wildcards] processing: {wildcard_text}') - for placeholder in placeholders: - try: - matches = [x for x in modules.config.wildcard_filenames if os.path.splitext(os.path.basename(x))[0] == placeholder] - words = open(os.path.join(modules.config.path_wildcards, matches[0]), encoding='utf-8').read().splitlines() - words = [x for x in words if x != ''] - assert len(words) > 0 - if read_wildcards_in_order: - wildcard_text = wildcard_text.replace(f'__{placeholder}__', words[i % len(words)], 1) - else: - wildcard_text = wildcard_text.replace(f'__{placeholder}__', rng.choice(words), 1) - except: - print(f'[Wildcards] Warning: {placeholder}.txt missing or empty. ' - f'Using "{placeholder}" as a normal word.') - wildcard_text = wildcard_text.replace(f'__{placeholder}__', placeholder) - print(f'[Wildcards] {wildcard_text}') - - print(f'[Wildcards] BFS stack overflow. Current text: {wildcard_text}') - return wildcard_text - - -def get_words(arrays, totalMult, index): +def get_words(arrays, total_mult, index): if len(arrays) == 1: return [arrays[0].split(',')[index]] else: @@ -95,7 +71,7 @@ def get_words(arrays, totalMult, index): index -= index % len(words) index /= len(words) index = math.floor(index) - return [word] + get_words(arrays[1:], math.floor(totalMult/len(words)), index) + return [word] + get_words(arrays[1:], math.floor(total_mult / len(words)), index) def apply_arrays(text, index): diff --git a/modules/ui_gradio_extensions.py b/modules/ui_gradio_extensions.py index bebf9f8c..409c7e33 100644 --- a/modules/ui_gradio_extensions.py +++ b/modules/ui_gradio_extensions.py @@ -39,7 +39,7 @@ def javascript_html(): head += f'\n' head += f'\n' head += f'\n' - head += f'\n' + head += f'\n' if args_manager.args.theme: head += f'\n' diff --git a/modules/util.py b/modules/util.py index 9e0fb294..5003f79a 100644 --- a/modules/util.py +++ b/modules/util.py @@ -1,4 +1,4 @@ -import typing +from pathlib import Path import numpy as np import datetime @@ -6,16 +6,28 @@ import random import math import os import cv2 +import re +from typing import List, Tuple, AnyStr, NamedTuple + import json import hashlib from PIL import Image +import modules.config import modules.sdxl_styles +from modules.flags import Performance LANCZOS = (Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS) + +# Regexp compiled once. Matches entries with the following pattern: +# +# +LORAS_PROMPT_PATTERN = re.compile(r"()", re.X) + HASH_SHA256_LENGTH = 10 + def erode_or_dilate(x, k): k = int(k) if k > 0: @@ -163,25 +175,6 @@ def generate_temp_filename(folder='./outputs/', extension='png'): return date_string, os.path.abspath(result), filename -def get_files_from_folder(folder_path, extensions=None, name_filter=None): - if not os.path.isdir(folder_path): - raise ValueError("Folder path is not a valid directory.") - - filenames = [] - - for root, dirs, files in os.walk(folder_path, topdown=False): - relative_path = os.path.relpath(root, folder_path) - if relative_path == ".": - relative_path = "" - for filename in sorted(files, key=lambda s: s.casefold()): - _, file_extension = os.path.splitext(filename) - if (extensions is None or file_extension.lower() in extensions) and (name_filter is None or name_filter in _): - path = os.path.join(relative_path, filename) - filenames.append(path) - - return filenames - - def sha256(filename, use_addnet_hash=False, length=HASH_SHA256_LENGTH): print(f"Calculating sha256 for {filename}: ", end='') if use_addnet_hash: @@ -355,7 +348,7 @@ def extract_styles_from_prompt(prompt, negative_prompt): return list(reversed(extracted)), real_prompt, negative_prompt -class PromptStyle(typing.NamedTuple): +class PromptStyle(NamedTuple): name: str prompt: str negative_prompt: str @@ -370,7 +363,18 @@ def is_json(data: str) -> bool: return True +def get_filname_by_stem(lora_name, filenames: List[str]) -> str | None: + for filename in filenames: + path = Path(filename) + if lora_name == path.stem: + return filename + return None + + def get_file_from_folder_list(name, folders): + if not isinstance(folders, list): + folders = [folders] + for folder in folders: filename = os.path.abspath(os.path.realpath(os.path.join(folder, name))) if os.path.isfile(filename): @@ -379,10 +383,6 @@ def get_file_from_folder_list(name, folders): return os.path.abspath(os.path.realpath(os.path.join(folders[0], name))) -def ordinal_suffix(number: int) -> str: - return 'th' if 10 <= number % 100 <= 20 else {1: 'st', 2: 'nd', 3: 'rd'}.get(number % 10, 'th') - - def makedirs_with_log(path): try: os.makedirs(path, exist_ok=True) @@ -390,5 +390,132 @@ def makedirs_with_log(path): print(f'Directory {path} could not be created, reason: {error}') -def get_enabled_loras(loras: list) -> list: - return [[lora[1], lora[2]] for lora in loras if lora[0]] +def get_enabled_loras(loras: list, remove_none=True) -> list: + return [(lora[1], lora[2]) for lora in loras if lora[0] and (lora[1] != 'None' if remove_none else True)] + + +def parse_lora_references_from_prompt(prompt: str, loras: List[Tuple[AnyStr, float]], loras_limit: int = 5, + skip_file_check=False, prompt_cleanup=True, deduplicate_loras=True, + lora_filenames=None) -> tuple[List[Tuple[AnyStr, float]], str]: + if lora_filenames is None: + lora_filenames = [] + + found_loras = [] + prompt_without_loras = '' + cleaned_prompt = '' + + for token in prompt.split(','): + matches = LORAS_PROMPT_PATTERN.findall(token) + + if len(matches) == 0: + prompt_without_loras += token + ', ' + continue + for match in matches: + lora_name = match[1] + '.safetensors' + if not skip_file_check: + lora_name = get_filname_by_stem(match[1], lora_filenames) + if lora_name is not None: + found_loras.append((lora_name, float(match[2]))) + token = token.replace(match[0], '') + prompt_without_loras += token + ', ' + + if prompt_without_loras != '': + cleaned_prompt = prompt_without_loras[:-2] + + if prompt_cleanup: + cleaned_prompt = cleanup_prompt(prompt_without_loras) + + new_loras = [] + lora_names = [lora[0] for lora in loras] + for found_lora in found_loras: + if deduplicate_loras and (found_lora[0] in lora_names or found_lora in new_loras): + continue + new_loras.append(found_lora) + + if len(new_loras) == 0: + return loras, cleaned_prompt + + updated_loras = [] + for lora in loras + new_loras: + if lora[0] != "None": + updated_loras.append(lora) + + return updated_loras[:loras_limit], cleaned_prompt + + +def remove_performance_lora(filenames: list, performance: Performance | None): + loras_without_performance = filenames.copy() + + if performance is None: + return loras_without_performance + + performance_lora = performance.lora_filename() + + for filename in filenames: + path = Path(filename) + if performance_lora == path.name: + loras_without_performance.remove(filename) + + return loras_without_performance + + +def cleanup_prompt(prompt): + prompt = re.sub(' +', ' ', prompt) + prompt = re.sub(',+', ',', prompt) + cleaned_prompt = '' + for token in prompt.split(','): + token = token.strip() + if token == '': + continue + cleaned_prompt += token + ', ' + return cleaned_prompt[:-2] + + +def apply_wildcards(wildcard_text, rng, i, read_wildcards_in_order) -> str: + for _ in range(modules.config.wildcards_max_bfs_depth): + placeholders = re.findall(r'__([\w-]+)__', wildcard_text) + if len(placeholders) == 0: + return wildcard_text + + print(f'[Wildcards] processing: {wildcard_text}') + for placeholder in placeholders: + try: + matches = [x for x in modules.config.wildcard_filenames if os.path.splitext(os.path.basename(x))[0] == placeholder] + words = open(os.path.join(modules.config.path_wildcards, matches[0]), encoding='utf-8').read().splitlines() + words = [x for x in words if x != ''] + assert len(words) > 0 + if read_wildcards_in_order: + wildcard_text = wildcard_text.replace(f'__{placeholder}__', words[i % len(words)], 1) + else: + wildcard_text = wildcard_text.replace(f'__{placeholder}__', rng.choice(words), 1) + except: + print(f'[Wildcards] Warning: {placeholder}.txt missing or empty. ' + f'Using "{placeholder}" as a normal word.') + wildcard_text = wildcard_text.replace(f'__{placeholder}__', placeholder) + print(f'[Wildcards] {wildcard_text}') + + print(f'[Wildcards] BFS stack overflow. Current text: {wildcard_text}') + return wildcard_text + + +def get_image_size_info(image: np.ndarray, aspect_ratios: list) -> str: + try: + image = Image.fromarray(np.uint8(image)) + width, height = image.size + ratio = round(width / height, 2) + gcd = math.gcd(width, height) + lcm_ratio = f'{width // gcd}:{height // gcd}' + size_info = f'Image Size: {width} x {height}, Ratio: {ratio}, {lcm_ratio}' + + closest_ratio = min(aspect_ratios, key=lambda x: abs(ratio - float(x.split('*')[0]) / float(x.split('*')[1]))) + recommended_width, recommended_height = map(int, closest_ratio.split('*')) + recommended_ratio = round(recommended_width / recommended_height, 2) + recommended_gcd = math.gcd(recommended_width, recommended_height) + recommended_lcm_ratio = f'{recommended_width // recommended_gcd}:{recommended_height // recommended_gcd}' + + size_info = f'{width} x {height}, {ratio}, {lcm_ratio}' + size_info += f'\n{recommended_width} x {recommended_height}, {recommended_ratio}, {recommended_lcm_ratio}' + + return size_info + except Exception as e: + return f'Error reading image: {e}' diff --git a/presets/.gitignore b/presets/.gitignore index 481930c5..27e74136 100644 --- a/presets/.gitignore +++ b/presets/.gitignore @@ -2,5 +2,6 @@ !anime.json !default.json !lcm.json +!playground_v2.5.json !realistic.json !sai.json \ No newline at end of file diff --git a/presets/playground_v2.5.json b/presets/playground_v2.5.json new file mode 100644 index 00000000..5bc6059e --- /dev/null +++ b/presets/playground_v2.5.json @@ -0,0 +1,49 @@ +{ + "default_model": "playground-v2.5-1024px-aesthetic.fp16.safetensors", + "default_refiner": "None", + "default_refiner_switch": 0.5, + "default_loras": [ + [ + true, + "None", + 1.0 + ], + [ + true, + "None", + 1.0 + ], + [ + true, + "None", + 1.0 + ], + [ + true, + "None", + 1.0 + ], + [ + true, + "None", + 1.0 + ] + ], + "default_cfg_scale": 2.0, + "default_sample_sharpness": 2.0, + "default_sampler": "dpmpp_2m", + "default_scheduler": "edm_playground_v2.5", + "default_performance": "Speed", + "default_prompt": "", + "default_prompt_negative": "", + "default_styles": [ + "Fooocus V2" + ], + "default_aspect_ratio": "1024*1024", + "checkpoint_downloads": { + "playground-v2.5-1024px-aesthetic.fp16.safetensors": "https://huggingface.co/mashb1t/fav_models/resolve/main/fav/playground-v2.5-1024px-aesthetic.fp16.safetensors" + }, + "embeddings_downloads": {}, + "lora_downloads": {}, + "previous_default_models": [] +} \ No newline at end of file diff --git a/readme.md b/readme.md index 5f66e02a..e79b72aa 100644 --- a/readme.md +++ b/readme.md @@ -368,26 +368,38 @@ A safer way is just to try "run_anime.bat" or "run_realistic.bat" - they should entry_with_update.py [-h] [--listen [IP]] [--port PORT] [--disable-header-check [ORIGIN]] [--web-upload-size WEB_UPLOAD_SIZE] + [--hf-mirror HF_MIRROR] [--external-working-path PATH [PATH ...]] - [--output-path OUTPUT_PATH] [--temp-path TEMP_PATH] + [--output-path OUTPUT_PATH] + [--temp-path TEMP_PATH] [--cache-path CACHE_PATH] [--in-browser] - [--disable-in-browser] [--gpu-device-id DEVICE_ID] + [--disable-in-browser] + [--gpu-device-id DEVICE_ID] [--async-cuda-allocation | --disable-async-cuda-allocation] - [--disable-attention-upcast] [--all-in-fp32 | --all-in-fp16] + [--disable-attention-upcast] + [--all-in-fp32 | --all-in-fp16] [--unet-in-bf16 | --unet-in-fp16 | --unet-in-fp8-e4m3fn | --unet-in-fp8-e5m2] - [--vae-in-fp16 | --vae-in-fp32 | --vae-in-bf16] + [--vae-in-fp16 | --vae-in-fp32 | --vae-in-bf16] + [--vae-in-cpu] [--clip-in-fp8-e4m3fn | --clip-in-fp8-e5m2 | --clip-in-fp16 | --clip-in-fp32] - [--directml [DIRECTML_DEVICE]] [--disable-ipex-hijack] + [--directml [DIRECTML_DEVICE]] + [--disable-ipex-hijack] [--preview-option [none,auto,fast,taesd]] [--attention-split | --attention-quad | --attention-pytorch] [--disable-xformers] - [--always-gpu | --always-high-vram | --always-normal-vram | - --always-low-vram | --always-no-vram | --always-cpu [CPU_NUM_THREADS]] - [--always-offload-from-vram] [--disable-server-log] - [--debug-mode] [--is-windows-embedded-python] - [--disable-server-info] [--share] [--preset PRESET] - [--language LANGUAGE] [--disable-offload-from-vram] - [--theme THEME] [--disable-image-log] + [--always-gpu | --always-high-vram | --always-normal-vram | + --always-low-vram | --always-no-vram | --always-cpu [CPU_NUM_THREADS]] + [--always-offload-from-vram] + [--pytorch-deterministic] [--disable-server-log] + [--debug-mode] [--is-windows-embedded-python] + [--disable-server-info] [--multi-user] [--share] + [--preset PRESET] [--disable-preset-selection] + [--language LANGUAGE] + [--disable-offload-from-vram] [--theme THEME] + [--disable-image-log] [--disable-analytics] + [--disable-metadata] [--disable-preset-download] + [--enable-describe-uov-image] + [--always-download-new-model] ``` ## Advanced Features diff --git a/requirements_docker.txt b/requirements_docker.txt index 3cf4aa89..21883adf 100644 --- a/requirements_docker.txt +++ b/requirements_docker.txt @@ -1,5 +1,2 @@ -torch==2.0.1 -torchvision==0.15.2 -torchaudio==2.0.2 -torchtext==0.15.2 -torchdata==0.6.1 +torch==2.1.0 +torchvision==0.16.0 diff --git a/sdxl_styles/samples/random_style.jpg b/sdxl_styles/samples/random_style.jpg new file mode 100644 index 00000000..9f685108 Binary files /dev/null and b/sdxl_styles/samples/random_style.jpg differ diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..c424468f --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,4 @@ +import sys +import pathlib + +sys.path.append(pathlib.Path(f'{__file__}/../modules').parent.resolve()) diff --git a/tests/test_extra_utils.py b/tests/test_extra_utils.py new file mode 100644 index 00000000..a849aa16 --- /dev/null +++ b/tests/test_extra_utils.py @@ -0,0 +1,74 @@ +import numbers +import os +import unittest + +import modules.flags +from modules import extra_utils + + +class TestUtils(unittest.TestCase): + def test_try_eval_env_var(self): + test_cases = [ + { + "input": ("foo", str), + "output": "foo" + }, + { + "input": ("1", int), + "output": 1 + }, + { + "input": ("1.0", float), + "output": 1.0 + }, + { + "input": ("1", numbers.Number), + "output": 1 + }, + { + "input": ("1.0", numbers.Number), + "output": 1.0 + }, + { + "input": ("true", bool), + "output": True + }, + { + "input": ("True", bool), + "output": True + }, + { + "input": ("false", bool), + "output": False + }, + { + "input": ("False", bool), + "output": False + }, + { + "input": ("True", str), + "output": "True" + }, + { + "input": ("False", str), + "output": "False" + }, + { + "input": ("['a', 'b', 'c']", list), + "output": ['a', 'b', 'c'] + }, + { + "input": ("{'a':1}", dict), + "output": {'a': 1} + }, + { + "input": ("('foo', 1)", tuple), + "output": ('foo', 1) + } + ] + + for test in test_cases: + value, expected_type = test["input"] + expected = test["output"] + actual = extra_utils.try_eval_env_var(value, expected_type) + self.assertEqual(expected, actual) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..c1f49c13 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,137 @@ +import os +import unittest + +import modules.flags +from modules import util + + +class TestUtils(unittest.TestCase): + def test_can_parse_tokens_with_lora(self): + test_cases = [ + { + "input": ("some prompt, very cool, , cool ", [], 5, True), + "output": ( + [('hey-lora.safetensors', 0.4), ('you-lora.safetensors', 0.2)], 'some prompt, very cool, cool'), + }, + # Test can not exceed limit + { + "input": ("some prompt, very cool, , cool ", [], 1, True), + "output": ( + [('hey-lora.safetensors', 0.4)], + 'some prompt, very cool, cool' + ), + }, + # test Loras from UI take precedence over prompt + { + "input": ( + "some prompt, very cool, , , , , , ", + [("hey-lora.safetensors", 0.4)], + 5, + True + ), + "output": ( + [ + ('hey-lora.safetensors', 0.4), + ('l1.safetensors', 0.4), + ('l2.safetensors', -0.2), + ('l3.safetensors', 0.3), + ('l4.safetensors', 0.5) + ], + 'some prompt, very cool' + ) + }, + # test correct matching even if there is no space separating loras in the same token + { + "input": ("some prompt, very cool, ", [], 3, True), + "output": ( + [ + ('hey-lora.safetensors', 0.4), + ('you-lora.safetensors', 0.2) + ], + 'some prompt, very cool' + ), + }, + # test deduplication, also selected loras are never overridden with loras in prompt + { + "input": ( + "some prompt, very cool, ", + [('you-lora.safetensors', 0.3)], + 3, + True + ), + "output": ( + [ + ('you-lora.safetensors', 0.3), + ('hey-lora.safetensors', 0.4) + ], + 'some prompt, very cool' + ), + }, + { + "input": (", , , and ", [], 6, True), + "output": ( + [], + ', , , and ' + ) + } + ] + + for test in test_cases: + prompt, loras, loras_limit, skip_file_check = test["input"] + expected = test["output"] + actual = util.parse_lora_references_from_prompt(prompt, loras, loras_limit=loras_limit, + skip_file_check=skip_file_check) + self.assertEqual(expected, actual) + + def test_can_parse_tokens_and_strip_performance_lora(self): + lora_filenames = [ + 'hey-lora.safetensors', + modules.flags.PerformanceLoRA.EXTREME_SPEED.value, + modules.flags.PerformanceLoRA.LIGHTNING.value, + os.path.join('subfolder', modules.flags.PerformanceLoRA.HYPER_SD.value) + ] + + test_cases = [ + { + "input": ("some prompt, ", [], 5, True, modules.flags.Performance.QUALITY), + "output": ( + [('hey-lora.safetensors', 0.4)], + 'some prompt' + ), + }, + { + "input": ("some prompt, ", [], 5, True, modules.flags.Performance.SPEED), + "output": ( + [('hey-lora.safetensors', 0.4)], + 'some prompt' + ), + }, + { + "input": ("some prompt, , ", [], 5, True, modules.flags.Performance.EXTREME_SPEED), + "output": ( + [('hey-lora.safetensors', 0.4)], + 'some prompt' + ), + }, + { + "input": ("some prompt, , ", [], 5, True, modules.flags.Performance.LIGHTNING), + "output": ( + [('hey-lora.safetensors', 0.4)], + 'some prompt' + ), + }, + { + "input": ("some prompt, , ", [], 5, True, modules.flags.Performance.HYPER_SD), + "output": ( + [('hey-lora.safetensors', 0.4)], + 'some prompt' + ), + } + ] + + for test in test_cases: + prompt, loras, loras_limit, skip_file_check, performance = test["input"] + lora_filenames = modules.util.remove_performance_lora(lora_filenames, performance) + expected = test["output"] + actual = util.parse_lora_references_from_prompt(prompt, loras, loras_limit=loras_limit, lora_filenames=lora_filenames) + self.assertEqual(expected, actual) diff --git a/update_log.md b/update_log.md index 62c4882b..8aa43647 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,41 @@ +# [2.4.3](https://github.com/lllyasviel/Fooocus/releases/tag/v2.4.3) + +* Fix alphas_cumprod setter for TCD sampler +* Add parser for env var strings to expected config value types to allow override of all non-path config keys + +# [2.4.2](https://github.com/lllyasviel/Fooocus/releases/tag/v2.4.2) + +* Fix some small bugs (tcd scheduler when gamma is 0, chown in Dockerfile, update cmd args in readme, translation for aspect ratios, vae default after file reload) +* Fix performance LoRA replacement when data is loaded from history log and inline prompt +* Add support and preset for playground v2.5 (only works with performance Quality or Speed, use with scheduler edm_playground_v2) +* Make textboxes (incl. positive prompt) resizable +* Hide intermediate images when performance of Gradio would bottleneck the generation process (Extreme Speed, Lightning, Hyper-SD) + +# [2.4.1](https://github.com/lllyasviel/Fooocus/releases/tag/v2.4.1) + +* Fix some small bugs (e.g. adjust clip skip default value from 1 to 2, add type check to aspect ratios js update function) +* Add automated docker build on push to main, tagged with `edge`. See [available docker images](https://github.com/lllyasviel/Fooocus/pkgs/container/fooocus). + +# [2.4.0](https://github.com/lllyasviel/Fooocus/releases/tag/v2.4.0) + +* Change settings tab elements to be more compact +* Add clip skip slider +* Add select for custom VAE +* Add new style "Random Style" +* Update default anime model to animaPencilXL_v310 +* Add button to reconnect the UI after Fooocus crashed without having to configure everything again (no page reload required) +* Add performance "hyper-sd" (based on [Hyper-SDXL 4 step LoRA](https://huggingface.co/ByteDance/Hyper-SD/blob/main/Hyper-SDXL-4steps-lora.safetensors)) +* Add [AlignYourSteps](https://research.nvidia.com/labs/toronto-ai/AlignYourSteps/) scheduler by Nvidia, see +* Add [TCD](https://github.com/jabir-zheng/TCD) sampler and scheduler (based on sgm_uniform) +* Add NSFW image censoring (disables intermediate image preview while generating). Set config value `default_black_out_nsfw` to True to always enable. +* Add argument `--enable-describe-uov-image` to automatically describe uploaded images for upscaling +* Add inline lora prompt references with subfolder support, example prompt: `colorful bird ` +* Add size and aspect ratio recommendation on image describe +* Add inpaint brush color picker, helpful when image and mask brush have the same color +* Add automated Docker image build using Github Actions on each release. +* Add full raw prompts to history logs +* Change code ownership from @lllyasviel to @mashb1t for automated issue / MR notification + # [2.3.1](https://github.com/lllyasviel/Fooocus/releases/tag/2.3.1) * Remove positive prompt from anime prefix to not reset prompt after switching presets diff --git a/webui.py b/webui.py index 98780bff..a0982cae 100644 --- a/webui.py +++ b/webui.py @@ -112,10 +112,10 @@ with shared.gradio_root: gallery = gr.Gallery(label='Gallery', show_label=False, object_fit='contain', visible=True, height=768, elem_classes=['resizable_area', 'main_view', 'final_gallery', 'image_gallery'], elem_id='final_gallery') - with gr.Row(elem_classes='type_row'): + with gr.Row(): with gr.Column(scale=17): prompt = gr.Textbox(show_label=False, placeholder="Type prompt here or paste parameters.", elem_id='positive_prompt', - container=False, autofocus=True, elem_classes='type_row', lines=1024) + autofocus=True, lines=3) default_prompt = modules.config.default_prompt if isinstance(default_prompt, str) and default_prompt != '': @@ -123,8 +123,9 @@ with shared.gradio_root: with gr.Column(scale=3, min_width=0): generate_button = gr.Button(label="Generate", value="Generate", elem_classes='type_row', elem_id='generate_button', visible=True) + reset_button = gr.Button(label="Reconnect", value="Reconnect", elem_classes='type_row', elem_id='reset_button', visible=False) load_parameter_button = gr.Button(label="Load Parameters", value="Load Parameters", elem_classes='type_row', elem_id='load_parameter_button', visible=False) - skip_button = gr.Button(label="Skip", value="Skip", elem_classes='type_row_half', visible=False) + skip_button = gr.Button(label="Skip", value="Skip", elem_classes='type_row_half', elem_id='skip_button', visible=False) stop_button = gr.Button(label="Stop", value="Stop", elem_classes='type_row_half', elem_id='stop_button', visible=False) def stop_clicked(currentTask): @@ -151,7 +152,7 @@ with shared.gradio_root: with gr.TabItem(label='Upscale or Variation') as uov_tab: with gr.Row(): with gr.Column(): - uov_input_image = grh.Image(label='Drag above image to here', source='upload', type='numpy') + uov_input_image = grh.Image(label='Image', source='upload', type='numpy', show_label=False) with gr.Column(): uov_method = gr.Radio(label='Upscale or Variation:', choices=flags.uov_list, value=flags.disabled) gr.HTML('\U0001F4D4 Document') @@ -200,7 +201,7 @@ with shared.gradio_root: queue=False, show_progress=False) with gr.TabItem(label='Inpaint or Outpaint') as inpaint_tab: with gr.Row(): - inpaint_input_image = grh.Image(label='Drag inpaint or outpaint image to here', source='upload', type='numpy', tool='sketch', height=500, brush_color="#FFFFFF", elem_id='inpaint_canvas') + inpaint_input_image = grh.Image(label='Image', source='upload', type='numpy', tool='sketch', height=500, brush_color="#FFFFFF", elem_id='inpaint_canvas', show_label=False) inpaint_mask_image = grh.Image(label='Mask Upload', source='upload', type='numpy', height=500, visible=False) with gr.Row(): @@ -213,17 +214,26 @@ with shared.gradio_root: with gr.TabItem(label='Describe') as desc_tab: with gr.Row(): with gr.Column(): - desc_input_image = grh.Image(label='Drag any image to here', source='upload', type='numpy') + desc_input_image = grh.Image(label='Image', source='upload', type='numpy', show_label=False) with gr.Column(): desc_method = gr.Radio( label='Content Type', choices=[flags.desc_type_photo, flags.desc_type_anime], value=flags.desc_type_photo) desc_btn = gr.Button(value='Describe this Image into Prompt') + desc_image_size = gr.Textbox(label='Image Size and Recommended Size', elem_id='desc_image_size', visible=False) gr.HTML('\U0001F4D4 Document') - with gr.TabItem(label='Metadata') as load_tab: + + def trigger_show_image_properties(image): + value = modules.util.get_image_size_info(image, modules.flags.sdxl_aspect_ratios) + return gr.update(value=value, visible=True) + + desc_input_image.upload(trigger_show_image_properties, inputs=desc_input_image, + outputs=desc_image_size, show_progress=False, queue=False) + + with gr.TabItem(label='Metadata') as metadata_tab: with gr.Column(): - metadata_input_image = grh.Image(label='Drag any image generated by Fooocus here', source='upload', type='filepath') + metadata_input_image = grh.Image(label='For images created by Fooocus', source='upload', type='filepath') metadata_json = gr.JSON(label='Metadata') metadata_import_button = gr.Button(value='Apply Metadata') @@ -254,25 +264,34 @@ with shared.gradio_root: inpaint_tab.select(lambda: 'inpaint', outputs=current_tab, queue=False, _js=down_js, show_progress=False) ip_tab.select(lambda: 'ip', outputs=current_tab, queue=False, _js=down_js, show_progress=False) desc_tab.select(lambda: 'desc', outputs=current_tab, queue=False, _js=down_js, show_progress=False) + metadata_tab.select(lambda: 'metadata', outputs=current_tab, queue=False, _js=down_js, show_progress=False) with gr.Column(scale=1, visible=modules.config.default_advanced_checkbox) as advanced_column: with gr.Tab(label='Setting'): if not args_manager.args.disable_preset_selection: - preset_selection = gr.Radio(label='Preset', - choices=modules.config.available_presets, - value=args_manager.args.preset if args_manager.args.preset else "initial", - interactive=True) + preset_selection = gr.Dropdown(label='Preset', + choices=modules.config.available_presets, + value=args_manager.args.preset if args_manager.args.preset else "initial", + interactive=True) performance_selection = gr.Radio(label='Performance', choices=flags.Performance.list(), - value=modules.config.default_performance) - aspect_ratios_selection = gr.Radio(label='Aspect Ratios', choices=modules.config.available_aspect_ratios, - value=modules.config.default_aspect_ratio, info='width × height', - elem_classes='aspect_ratios') + value=modules.config.default_performance, + elem_classes=['performance_selection']) + with gr.Accordion(label='Aspect Ratios', open=False, elem_id='aspect_ratios_accordion') as aspect_ratios_accordion: + aspect_ratios_selection = gr.Radio(label='Aspect Ratios', show_label=False, + choices=modules.config.available_aspect_ratios_labels, + value=modules.config.default_aspect_ratio, + info='width × height', + elem_classes='aspect_ratios') + + aspect_ratios_selection.change(lambda x: None, inputs=aspect_ratios_selection, queue=False, show_progress=False, _js='(x)=>{refresh_aspect_ratios_label(x);}') + shared.gradio_root.load(lambda x: None, inputs=aspect_ratios_selection, queue=False, show_progress=False, _js='(x)=>{refresh_aspect_ratios_label(x);}') + image_number = gr.Slider(label='Image Number', minimum=1, maximum=modules.config.default_max_image_number, step=1, value=modules.config.default_image_number) output_format = gr.Radio(label='Output Format', - choices=flags.OutputFormat.list(), - value=modules.config.default_output_format) + choices=flags.OutputFormat.list(), + value=modules.config.default_output_format) negative_prompt = gr.Textbox(label='Negative Prompt', show_label=True, placeholder="Type prompt here.", info='Describing what you do not want to see.', lines=2, @@ -402,10 +421,15 @@ with shared.gradio_root: value=modules.config.default_cfg_tsnr, info='Enabling Fooocus\'s implementation of CFG mimicking for TSNR ' '(effective when real CFG > mimicked CFG).') + clip_skip = gr.Slider(label='CLIP Skip', minimum=1, maximum=flags.clip_skip_max, step=1, + value=modules.config.default_clip_skip, + info='Bypass CLIP layers to avoid overfitting (use 1 to not skip any layers, 2 is recommended).') sampler_name = gr.Dropdown(label='Sampler', choices=flags.sampler_list, value=modules.config.default_sampler) scheduler_name = gr.Dropdown(label='Scheduler', choices=flags.scheduler_list, value=modules.config.default_scheduler) + vae_name = gr.Dropdown(label='VAE', choices=[modules.flags.default_vae] + modules.config.vae_filenames, + value=modules.config.default_vae, show_label=True) generate_image_grid = gr.Checkbox(label='Generate Image Grid for Each Batch', info='(Experimental) This may cause performance problems on some computers and certain internet conditions.', @@ -433,17 +457,26 @@ with shared.gradio_root: overwrite_upscale_strength = gr.Slider(label='Forced Overwrite of Denoising Strength of "Upscale"', minimum=-1, maximum=1.0, step=0.001, value=-1, info='Set as negative number to disable. For developer debugging.') - disable_preview = gr.Checkbox(label='Disable Preview', value=False, + disable_preview = gr.Checkbox(label='Disable Preview', value=modules.config.default_black_out_nsfw, + interactive=not modules.config.default_black_out_nsfw, info='Disable preview during generation.') - disable_intermediate_results = gr.Checkbox(label='Disable Intermediate Results', - value=modules.config.default_performance == flags.Performance.EXTREME_SPEED.value, - interactive=modules.config.default_performance != flags.Performance.EXTREME_SPEED.value, + disable_intermediate_results = gr.Checkbox(label='Disable Intermediate Results', + value=flags.Performance.has_restricted_features(modules.config.default_performance), info='Disable intermediate results during generation, only show final gallery.') disable_seed_increment = gr.Checkbox(label='Disable seed increment', info='Disable automatic seed increment when image number is > 1.', value=False) read_wildcards_in_order = gr.Checkbox(label="Read wildcards in order", value=False) + black_out_nsfw = gr.Checkbox(label='Black Out NSFW', + value=modules.config.default_black_out_nsfw, + interactive=not modules.config.default_black_out_nsfw, + info='Use black image if NSFW is detected.') + + black_out_nsfw.change(lambda x: gr.update(value=x, interactive=not x), + inputs=black_out_nsfw, outputs=disable_preview, queue=False, + show_progress=False) + if not args_manager.args.disable_metadata: save_metadata_to_images = gr.Checkbox(label='Save Metadata to Images', value=modules.config.default_save_metadata_to_images, info='Adds parameters to generated images allowing manual regeneration.') @@ -502,13 +535,20 @@ with shared.gradio_root: inpaint_mask_upload_checkbox = gr.Checkbox(label='Enable Mask Upload', value=False) invert_mask_checkbox = gr.Checkbox(label='Invert Mask', value=False) + inpaint_mask_color = gr.ColorPicker(label='Inpaint brush color', value='#FFFFFF', elem_id='inpaint_brush_color') + inpaint_ctrls = [debugging_inpaint_preprocessor, inpaint_disable_initial_latent, inpaint_engine, inpaint_strength, inpaint_respective_field, inpaint_mask_upload_checkbox, invert_mask_checkbox, inpaint_erode_or_dilate] inpaint_mask_upload_checkbox.change(lambda x: gr.update(visible=x), - inputs=inpaint_mask_upload_checkbox, - outputs=inpaint_mask_image, queue=False, show_progress=False) + inputs=inpaint_mask_upload_checkbox, + outputs=inpaint_mask_image, queue=False, + show_progress=False) + + inpaint_mask_color.change(lambda x: gr.update(brush_color=x), inputs=inpaint_mask_color, + outputs=inpaint_input_image, + queue=False, show_progress=False) with gr.Tab(label='FreeU'): freeu_enabled = gr.Checkbox(label='Enabled', value=False) @@ -528,6 +568,7 @@ with shared.gradio_root: modules.config.update_files() results = [gr.update(choices=modules.config.model_filenames)] results += [gr.update(choices=['None'] + modules.config.model_filenames)] + results += [gr.update(choices=[flags.default_vae] + modules.config.vae_filenames)] if not args_manager.args.disable_preset_selection: results += [gr.update(choices=modules.config.available_presets)] for i in range(modules.config.default_max_lora_number): @@ -535,7 +576,7 @@ with shared.gradio_root: gr.update(choices=['None'] + modules.config.lora_filenames), gr.update()] return results - refresh_files_output = [base_model, refiner_model] + refresh_files_output = [base_model, refiner_model, vae_name] if not args_manager.args.disable_preset_selection: refresh_files_output += [preset_selection] refresh_files.click(refresh_files_clicked, [], refresh_files_output + lora_ctrls, @@ -546,9 +587,9 @@ with shared.gradio_root: load_data_outputs = [advanced_checkbox, image_number, prompt, negative_prompt, style_selections, performance_selection, overwrite_step, overwrite_switch, aspect_ratios_selection, overwrite_width, overwrite_height, guidance_scale, sharpness, adm_scaler_positive, - adm_scaler_negative, adm_scaler_end, refiner_swap_method, adaptive_cfg, base_model, - refiner_model, refiner_switch, sampler_name, scheduler_name, seed_random, image_seed, - generate_button, load_parameter_button] + freeu_ctrls + lora_ctrls + adm_scaler_negative, adm_scaler_end, refiner_swap_method, adaptive_cfg, clip_skip, + base_model, refiner_model, refiner_switch, sampler_name, scheduler_name, vae_name, + seed_random, image_seed, generate_button, load_parameter_button] + freeu_ctrls + lora_ctrls if not args_manager.args.disable_preset_selection: def preset_selection_change(preset, is_generating): @@ -570,11 +611,11 @@ with shared.gradio_root: return modules.meta_parser.load_parameter_button_click(json.dumps(preset_prepared), is_generating) preset_selection.change(preset_selection_change, inputs=[preset_selection, state_is_generating], outputs=load_data_outputs, queue=False, show_progress=True) \ - .then(fn=style_sorter.sort_styles, inputs=style_selections, outputs=style_selections, queue=False, show_progress=False) \ + .then(fn=style_sorter.sort_styles, inputs=style_selections, outputs=style_selections, queue=False, show_progress=False) performance_selection.change(lambda x: [gr.update(interactive=not flags.Performance.has_restricted_features(x))] * 11 + [gr.update(visible=not flags.Performance.has_restricted_features(x))] * 1 + - [gr.update(interactive=not flags.Performance.has_restricted_features(x), value=flags.Performance.has_restricted_features(x))] * 1, + [gr.update(value=flags.Performance.has_restricted_features(x))] * 1, inputs=performance_selection, outputs=[ guidance_scale, sharpness, adm_scaler_end, adm_scaler_positive, @@ -632,9 +673,9 @@ with shared.gradio_root: ctrls += [input_image_checkbox, current_tab] ctrls += [uov_method, uov_input_image] ctrls += [outpaint_selections, inpaint_input_image, inpaint_additional_prompt, inpaint_mask_image] - ctrls += [disable_preview, disable_intermediate_results, disable_seed_increment] - ctrls += [adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg] - ctrls += [sampler_name, scheduler_name] + ctrls += [disable_preview, disable_intermediate_results, disable_seed_increment, black_out_nsfw] + ctrls += [adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, clip_skip] + ctrls += [sampler_name, scheduler_name, vae_name] ctrls += [overwrite_step, overwrite_switch, overwrite_width, overwrite_height, overwrite_vary_strength] ctrls += [overwrite_upscale_strength, mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint] ctrls += [debugging_cn_preprocessor, skipping_cn_preprocessor, canny_low_threshold, canny_high_threshold] @@ -671,7 +712,7 @@ with shared.gradio_root: parsed_parameters = {} else: metadata_parser = modules.meta_parser.get_metadata_parser(metadata_scheme) - parsed_parameters = metadata_parser.parse_json(parameters) + parsed_parameters = metadata_parser.to_json(parameters) return modules.meta_parser.load_parameter_button_click(parsed_parameters, state_is_generating) @@ -688,6 +729,14 @@ with shared.gradio_root: .then(fn=update_history_link, outputs=history_link) \ .then(fn=lambda: None, _js='playNotification').then(fn=lambda: None, _js='refresh_grid_delayed') + reset_button.click(lambda: [worker.AsyncTask(args=[]), False, gr.update(visible=True, interactive=True)] + + [gr.update(visible=False)] * 6 + + [gr.update(visible=True, value=[])], + outputs=[currentTask, state_is_generating, generate_button, + reset_button, stop_button, skip_button, + progress_html, progress_window, progress_gallery, gallery], + queue=False) + for notification_file in ['notification.ogg', 'notification.mp3']: if os.path.exists(notification_file): gr.Audio(interactive=False, value=notification_file, elem_id='audio_notification', visible=False) @@ -705,6 +754,15 @@ with shared.gradio_root: desc_btn.click(trigger_describe, inputs=[desc_method, desc_input_image], outputs=[prompt, style_selections], show_progress=True, queue=True) + if args_manager.args.enable_describe_uov_image: + def trigger_uov_describe(mode, img, prompt): + # keep prompt if not empty + if prompt == '': + return trigger_describe(mode, img) + return gr.update(), gr.update() + + uov_input_image.upload(trigger_uov_describe, inputs=[desc_method, uov_input_image, prompt], + outputs=[prompt, style_selections], show_progress=True, queue=True) def dump_default_english_config(): from modules.localization import dump_english_config diff --git a/wildcards/.gitignore b/wildcards/.gitignore new file mode 100644 index 00000000..7e4ac188 --- /dev/null +++ b/wildcards/.gitignore @@ -0,0 +1,8 @@ +*.txt +!animal.txt +!artist.txt +!color.txt +!color_flower.txt +!extended-color.txt +!flower.txt +!nationality.txt \ No newline at end of file