From 7568b72d9b8dd285238bb0c680b84d6a80fe7932 Mon Sep 17 00:00:00 2001 From: Manuel Schmid Date: Sat, 18 May 2024 01:59:15 +0200 Subject: [PATCH] feat: move censor to extras, optimize safety checker file handling --- .gitignore | 1 - {modules => extras}/censor.py | 16 +- extras/safety_checker/configs/config.json | 171 ++++++++++++++++++ .../configs/preprocessor_config.json | 20 ++ .../models}/safety_checker.py | 0 modules/async_worker.py | 2 +- modules/config.py | 8 + 7 files changed, 211 insertions(+), 7 deletions(-) rename {modules => extras}/censor.py (65%) create mode 100644 extras/safety_checker/configs/config.json create mode 100644 extras/safety_checker/configs/preprocessor_config.json rename extras/{diffusers/pipelines/stable_diffusion => safety_checker/models}/safety_checker.py (100%) diff --git a/.gitignore b/.gitignore index e423ef81..85914986 100644 --- a/.gitignore +++ b/.gitignore @@ -18,7 +18,6 @@ config.txt config_modification_tutorial.txt user_path_config.txt user_path_config-deprecated.txt -/models/safety_checker_models /modules/*.png /repositories /fooocus_env diff --git a/modules/censor.py b/extras/censor.py similarity index 65% rename from modules/censor.py rename to extras/censor.py index ca47693a..2047db24 100644 --- a/modules/censor.py +++ b/extras/censor.py @@ -1,12 +1,16 @@ # modified version of https://github.com/AUTOMATIC1111/stable-diffusion-webui-nsfw-censor/blob/master/scripts/censor.py import numpy as np +import os -from extras.diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker -from transformers import AutoFeatureExtractor +from extras.safety_checker.models.safety_checker import StableDiffusionSafetyChecker +from transformers import CLIPFeatureExtractor, CLIPConfig from PIL import Image import modules.config -safety_model_id = "CompVis/stable-diffusion-safety-checker" +safety_checker_repo_root = os.path.join(os.path.dirname(__file__), 'safety_checker') +config_path = os.path.join(safety_checker_repo_root, "configs", "config.json") +preprocessor_config_path = os.path.join(safety_checker_repo_root, "configs", "preprocessor_config.json") + safety_feature_extractor = None safety_checker = None @@ -23,8 +27,10 @@ def check_safety(x_image): global safety_feature_extractor, safety_checker if safety_feature_extractor is None or safety_checker is None: - safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id, cache_dir=modules.config.path_safety_checker_models) - safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id, cache_dir=modules.config.path_safety_checker_models) + safety_checker_model = modules.config.downloading_safety_checker_model() + safety_feature_extractor = CLIPFeatureExtractor.from_json_file(preprocessor_config_path) + clip_config = CLIPConfig.from_json_file(config_path) + safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_checker_model, config=clip_config) safety_checker_input = safety_feature_extractor(numpy_to_pil(x_image), return_tensors="pt") x_checked_image, has_nsfw_concept = safety_checker(images=x_image, clip_input=safety_checker_input.pixel_values) diff --git a/extras/safety_checker/configs/config.json b/extras/safety_checker/configs/config.json new file mode 100644 index 00000000..aa454d22 --- /dev/null +++ b/extras/safety_checker/configs/config.json @@ -0,0 +1,171 @@ +{ + "_name_or_path": "clip-vit-large-patch14/", + "architectures": [ + "SafetyChecker" + ], + "initializer_factor": 1.0, + "logit_scale_init_value": 2.6592, + "model_type": "clip", + "projection_dim": 768, + "text_config": { + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "bos_token_id": 0, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "dropout": 0.0, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": 2, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "quick_gelu", + "hidden_size": 768, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 3072, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "layer_norm_eps": 1e-05, + "length_penalty": 1.0, + "max_length": 20, + "max_position_embeddings": 77, + "min_length": 0, + "model_type": "clip_text_model", + "no_repeat_ngram_size": 0, + "num_attention_heads": 12, + "num_beam_groups": 1, + "num_beams": 1, + "num_hidden_layers": 12, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": 1, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "task_specific_params": null, + "temperature": 1.0, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.21.0.dev0", + "typical_p": 1.0, + "use_bfloat16": false, + "vocab_size": 49408 + }, + "text_config_dict": { + "hidden_size": 768, + "intermediate_size": 3072, + "num_attention_heads": 12, + "num_hidden_layers": 12 + }, + "torch_dtype": "float32", + "transformers_version": null, + "vision_config": { + "_name_or_path": "", + "add_cross_attention": false, + "architectures": null, + "attention_dropout": 0.0, + "bad_words_ids": null, + "bos_token_id": null, + "chunk_size_feed_forward": 0, + "cross_attention_hidden_size": null, + "decoder_start_token_id": null, + "diversity_penalty": 0.0, + "do_sample": false, + "dropout": 0.0, + "early_stopping": false, + "encoder_no_repeat_ngram_size": 0, + "eos_token_id": null, + "exponential_decay_length_penalty": null, + "finetuning_task": null, + "forced_bos_token_id": null, + "forced_eos_token_id": null, + "hidden_act": "quick_gelu", + "hidden_size": 1024, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "image_size": 224, + "initializer_factor": 1.0, + "initializer_range": 0.02, + "intermediate_size": 4096, + "is_decoder": false, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "layer_norm_eps": 1e-05, + "length_penalty": 1.0, + "max_length": 20, + "min_length": 0, + "model_type": "clip_vision_model", + "no_repeat_ngram_size": 0, + "num_attention_heads": 16, + "num_beam_groups": 1, + "num_beams": 1, + "num_hidden_layers": 24, + "num_return_sequences": 1, + "output_attentions": false, + "output_hidden_states": false, + "output_scores": false, + "pad_token_id": null, + "patch_size": 14, + "prefix": null, + "problem_type": null, + "pruned_heads": {}, + "remove_invalid_values": false, + "repetition_penalty": 1.0, + "return_dict": true, + "return_dict_in_generate": false, + "sep_token_id": null, + "task_specific_params": null, + "temperature": 1.0, + "tie_encoder_decoder": false, + "tie_word_embeddings": true, + "tokenizer_class": null, + "top_k": 50, + "top_p": 1.0, + "torch_dtype": null, + "torchscript": false, + "transformers_version": "4.21.0.dev0", + "typical_p": 1.0, + "use_bfloat16": false + }, + "vision_config_dict": { + "hidden_size": 1024, + "intermediate_size": 4096, + "num_attention_heads": 16, + "num_hidden_layers": 24, + "patch_size": 14 + } +} diff --git a/extras/safety_checker/configs/preprocessor_config.json b/extras/safety_checker/configs/preprocessor_config.json new file mode 100644 index 00000000..5294955f --- /dev/null +++ b/extras/safety_checker/configs/preprocessor_config.json @@ -0,0 +1,20 @@ +{ + "crop_size": 224, + "do_center_crop": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_resize": true, + "feature_extractor_type": "CLIPFeatureExtractor", + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "resample": 3, + "size": 224 +} diff --git a/extras/diffusers/pipelines/stable_diffusion/safety_checker.py b/extras/safety_checker/models/safety_checker.py similarity index 100% rename from extras/diffusers/pipelines/stable_diffusion/safety_checker.py rename to extras/safety_checker/models/safety_checker.py diff --git a/modules/async_worker.py b/modules/async_worker.py index 0d95725c..fa10ff8a 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -43,7 +43,7 @@ def worker(): import fooocus_version import args_manager - from modules.censor import censor_batch, censor_single + from extras.censor import censor_batch, censor_single from modules.sdxl_styles import apply_style, apply_wildcards, fooocus_expansion, apply_arrays from modules.private_logger import log from extras.expansion import safe_str diff --git a/modules/config.py b/modules/config.py index 5a18e963..8b277242 100644 --- a/modules/config.py +++ b/modules/config.py @@ -685,5 +685,13 @@ def downloading_upscale_model(): ) return os.path.join(path_upscale_models, 'fooocus_upscaler_s409985e5.bin') +def downloading_safety_checker_model(): + load_file_from_url( + url='https://huggingface.co/mashb1t/misc/resolve/main/stable-diffusion-safety-checker.bin', + model_dir=path_safety_checker_models, + file_name='stable-diffusion-safety-checker.bin' + ) + return os.path.join(path_safety_checker_models, 'stable-diffusion-safety-checker.bin') + update_files()