feat: move censor to extras, optimize safety checker file handling

2024-05-18 01:59:15 +02:00 · 2024-05-18 01:59:15 +02:00 · 7568b72d9b
parent 0f78f8d8cc
commit 7568b72d9b
7 changed files with 211 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
@ -18,7 +18,6 @@ config.txt
 config_modification_tutorial.txt
 user_path_config.txt
 user_path_config-deprecated.txt
-/models/safety_checker_models
 /modules/*.png
 /repositories
 /fooocus_env
--- a/modules/censor.py
+++ b/modules/censor.py
@ -1,12 +1,16 @@
 # modified version of https://github.com/AUTOMATIC1111/stable-diffusion-webui-nsfw-censor/blob/master/scripts/censor.py
 import numpy as np
+import os

-from extras.diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
-from transformers import AutoFeatureExtractor
+from extras.safety_checker.models.safety_checker import StableDiffusionSafetyChecker
+from transformers import CLIPFeatureExtractor, CLIPConfig
 from PIL import Image
 import modules.config

-safety_model_id = "CompVis/stable-diffusion-safety-checker"
+safety_checker_repo_root = os.path.join(os.path.dirname(__file__), 'safety_checker')
+config_path = os.path.join(safety_checker_repo_root, "configs", "config.json")
+preprocessor_config_path = os.path.join(safety_checker_repo_root, "configs", "preprocessor_config.json")
+
 safety_feature_extractor = None
 safety_checker = None

@ -23,8 +27,10 @@ def check_safety(x_image):
    global safety_feature_extractor, safety_checker

    if safety_feature_extractor is None or safety_checker is None:
-        safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id, cache_dir=modules.config.path_safety_checker_models)
-        safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id, cache_dir=modules.config.path_safety_checker_models)
+        safety_checker_model = modules.config.downloading_safety_checker_model()
+        safety_feature_extractor = CLIPFeatureExtractor.from_json_file(preprocessor_config_path)
+        clip_config = CLIPConfig.from_json_file(config_path)
+        safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_checker_model, config=clip_config)

    safety_checker_input = safety_feature_extractor(numpy_to_pil(x_image), return_tensors="pt")
    x_checked_image, has_nsfw_concept = safety_checker(images=x_image, clip_input=safety_checker_input.pixel_values)
--- a/extras/safety_checker/configs/config.json
+++ b/extras/safety_checker/configs/config.json
@ -0,0 +1,171 @@
+{
+  "_name_or_path": "clip-vit-large-patch14/",
+  "architectures": [
+    "SafetyChecker"
+  ],
+  "initializer_factor": 1.0,
+  "logit_scale_init_value": 2.6592,
+  "model_type": "clip",
+  "projection_dim": 768,
+  "text_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 77,
+    "min_length": 0,
+    "model_type": "clip_text_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.21.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "vocab_size": 49408
+  },
+  "text_config_dict": {
+    "hidden_size": 768,
+    "intermediate_size": 3072,
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12
+  },
+  "torch_dtype": "float32",
+  "transformers_version": null,
+  "vision_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "clip_vision_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 16,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 24,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 14,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.21.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "vision_config_dict": {
+    "hidden_size": 1024,
+    "intermediate_size": 4096,
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "patch_size": 14
+  }
+}
--- a/extras/safety_checker/configs/preprocessor_config.json
+++ b/extras/safety_checker/configs/preprocessor_config.json
@ -0,0 +1,20 @@
+{
+  "crop_size": 224,
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_resize": true,
+  "feature_extractor_type": "CLIPFeatureExtractor",
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "size": 224
+}
--- a/extras/diffusers/pipelines/stable_diffusion/safety_checker.py
+++ b/extras/diffusers/pipelines/stable_diffusion/safety_checker.py
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@ -43,7 +43,7 @@ def worker():
    import fooocus_version
    import args_manager

-    from modules.censor import censor_batch, censor_single
+    from extras.censor import censor_batch, censor_single
    from modules.sdxl_styles import apply_style, apply_wildcards, fooocus_expansion, apply_arrays
    from modules.private_logger import log
    from extras.expansion import safe_str
--- a/modules/config.py
+++ b/modules/config.py
@ -685,5 +685,13 @@ def downloading_upscale_model():
    )
    return os.path.join(path_upscale_models, 'fooocus_upscaler_s409985e5.bin')

+def downloading_safety_checker_model():
+    load_file_from_url(
+        url='https://huggingface.co/mashb1t/misc/resolve/main/stable-diffusion-safety-checker.bin',
+        model_dir=path_safety_checker_models,
+        file_name='stable-diffusion-safety-checker.bin'
+    )
+    return os.path.join(path_safety_checker_models, 'stable-diffusion-safety-checker.bin')
+

 update_files()