diff --git a/language/en.json b/language/en.json index 90eaf2ee..5819f4ee 100644 --- a/language/en.json +++ b/language/en.json @@ -320,6 +320,8 @@ "vae": "vae", "CFG Mimicking from TSNR": "CFG Mimicking from TSNR", "Enabling Fooocus's implementation of CFG mimicking for TSNR (effective when real CFG > mimicked CFG).": "Enabling Fooocus's implementation of CFG mimicking for TSNR (effective when real CFG > mimicked CFG).", + "CLIP Skip": "CLIP Skip", + "Bypass CLIP layers to avoid overfitting (use 1 to not skip any layers, 2 is recommended).": "Bypass CLIP layers to avoid overfitting (use 1 to not skip any layers, 2 is recommended).", "Sampler": "Sampler", "dpmpp_2m_sde_gpu": "dpmpp_2m_sde_gpu", "Only effective in non-inpaint mode.": "Only effective in non-inpaint mode.", diff --git a/modules/async_worker.py b/modules/async_worker.py index 594886d2..d7d9b9fd 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -174,6 +174,7 @@ def worker(): adm_scaler_negative = args.pop() adm_scaler_end = args.pop() adaptive_cfg = args.pop() + clip_skip = args.pop() sampler_name = args.pop() scheduler_name = args.pop() vae_name = args.pop() @@ -297,6 +298,7 @@ def worker(): adm_scaler_end = 0.0 print(f'[Parameters] Adaptive CFG = {adaptive_cfg}') + print(f'[Parameters] CLIP Skip = {clip_skip}') print(f'[Parameters] Sharpness = {sharpness}') print(f'[Parameters] ControlNet Softness = {controlnet_softness}') print(f'[Parameters] ADM Scale = ' @@ -466,6 +468,8 @@ def worker(): loras=loras, base_model_additional_loras=base_model_additional_loras, use_synthetic_refiner=use_synthetic_refiner, vae_name=vae_name) + pipeline.set_clip_skip(clip_skip) + progressbar(async_task, 3, 'Processing prompts ...') tasks = [] @@ -924,6 +928,8 @@ def worker(): d.append( ('CFG Mimicking from TSNR', 'adaptive_cfg', modules.patch.patch_settings[pid].adaptive_cfg)) + if clip_skip > 1: + d.append(('CLIP Skip', 'clip_skip', clip_skip)) d.append(('Sampler', 'sampler', sampler_name)) d.append(('Scheduler', 'scheduler', scheduler_name)) d.append(('VAE', 'vae', vae_name)) diff --git a/modules/config.py b/modules/config.py index 08ed99d7..cb651c5b 100644 --- a/modules/config.py +++ b/modules/config.py @@ -434,6 +434,11 @@ default_cfg_tsnr = get_config_item_or_set_default( default_value=7.0, validator=lambda x: isinstance(x, numbers.Number) ) +default_clip_skip = get_config_item_or_set_default( + key='default_clip_skip', + default_value=2, + validator=lambda x: isinstance(x, int) and 1 <= x <= modules.flags.clip_skip_max +) default_overwrite_step = get_config_item_or_set_default( key='default_overwrite_step', default_value=-1, @@ -488,6 +493,7 @@ possible_preset_keys = { "default_cfg_scale": "guidance_scale", "default_sample_sharpness": "sharpness", "default_cfg_tsnr": "adaptive_cfg", + "default_clip_skip": "clip_skip", "default_sampler": "sampler", "default_scheduler": "scheduler", "default_overwrite_step": "steps", diff --git a/modules/default_pipeline.py b/modules/default_pipeline.py index 38f914c5..494644d6 100644 --- a/modules/default_pipeline.py +++ b/modules/default_pipeline.py @@ -201,6 +201,17 @@ def clip_encode(texts, pool_top_k=1): return [[torch.cat(cond_list, dim=1), {"pooled_output": pooled_acc}]] +@torch.no_grad() +@torch.inference_mode() +def set_clip_skip(clip_skip: int): + global final_clip + + if final_clip is None: + return + + final_clip.clip_layer(-abs(clip_skip)) + return + @torch.no_grad() @torch.inference_mode() def clear_all_caches(): diff --git a/modules/flags.py b/modules/flags.py index 89e1ea0f..e48052e1 100644 --- a/modules/flags.py +++ b/modules/flags.py @@ -54,6 +54,8 @@ SAMPLER_NAMES = KSAMPLER_NAMES + list(SAMPLER_EXTRA.keys()) sampler_list = SAMPLER_NAMES scheduler_list = SCHEDULER_NAMES +clip_skip_max = 12 + default_vae = 'Default (model)' refiner_swap_method = 'joint' diff --git a/modules/meta_parser.py b/modules/meta_parser.py index 4ce12435..586e62da 100644 --- a/modules/meta_parser.py +++ b/modules/meta_parser.py @@ -34,16 +34,17 @@ def load_parameter_button_click(raw_metadata: dict | str, is_generating: bool): get_list('styles', 'Styles', loaded_parameter_dict, results) get_str('performance', 'Performance', loaded_parameter_dict, results) get_steps('steps', 'Steps', loaded_parameter_dict, results) - get_float('overwrite_switch', 'Overwrite Switch', loaded_parameter_dict, results) + get_number('overwrite_switch', 'Overwrite Switch', loaded_parameter_dict, results) get_resolution('resolution', 'Resolution', loaded_parameter_dict, results) - get_float('guidance_scale', 'Guidance Scale', loaded_parameter_dict, results) - get_float('sharpness', 'Sharpness', loaded_parameter_dict, results) + get_number('guidance_scale', 'Guidance Scale', loaded_parameter_dict, results) + get_number('sharpness', 'Sharpness', loaded_parameter_dict, results) get_adm_guidance('adm_guidance', 'ADM Guidance', loaded_parameter_dict, results) get_str('refiner_swap_method', 'Refiner Swap Method', loaded_parameter_dict, results) - get_float('adaptive_cfg', 'CFG Mimicking from TSNR', loaded_parameter_dict, results) + get_number('adaptive_cfg', 'CFG Mimicking from TSNR', loaded_parameter_dict, results) + get_number('clip_skip', 'CLIP Skip', loaded_parameter_dict, results, cast_type=int) get_str('base_model', 'Base Model', loaded_parameter_dict, results) get_str('refiner_model', 'Refiner Model', loaded_parameter_dict, results) - get_float('refiner_switch', 'Refiner Switch', loaded_parameter_dict, results) + get_number('refiner_switch', 'Refiner Switch', loaded_parameter_dict, results) get_str('sampler', 'Sampler', loaded_parameter_dict, results) get_str('scheduler', 'Scheduler', loaded_parameter_dict, results) get_str('vae', 'VAE', loaded_parameter_dict, results) @@ -83,11 +84,11 @@ def get_list(key: str, fallback: str | None, source_dict: dict, results: list, d results.append(gr.update()) -def get_float(key: str, fallback: str | None, source_dict: dict, results: list, default=None): +def get_number(key: str, fallback: str | None, source_dict: dict, results: list, default=None, cast_type=float): try: h = source_dict.get(key, source_dict.get(fallback, default)) assert h is not None - h = float(h) + h = cast_type(h) results.append(h) except: results.append(gr.update()) @@ -314,6 +315,7 @@ class A1111MetadataParser(MetadataParser): 'adm_guidance': 'ADM Guidance', 'refiner_swap_method': 'Refiner Swap Method', 'adaptive_cfg': 'Adaptive CFG', + 'clip_skip': 'Clip skip', 'overwrite_switch': 'Overwrite Switch', 'freeu': 'FreeU', 'base_model': 'Model', @@ -458,7 +460,7 @@ class A1111MetadataParser(MetadataParser): self.fooocus_to_a1111['refiner_model_hash']: self.refiner_model_hash } - for key in ['adaptive_cfg', 'overwrite_switch', 'refiner_swap_method', 'freeu']: + for key in ['adaptive_cfg', 'clip_skip', 'overwrite_switch', 'refiner_swap_method', 'freeu']: if key in data: generation_params[self.fooocus_to_a1111[key]] = data[key] diff --git a/webui.py b/webui.py index 090604a0..49f00aab 100644 --- a/webui.py +++ b/webui.py @@ -421,6 +421,9 @@ with shared.gradio_root: value=modules.config.default_cfg_tsnr, info='Enabling Fooocus\'s implementation of CFG mimicking for TSNR ' '(effective when real CFG > mimicked CFG).') + clip_skip = gr.Slider(label='CLIP Skip', minimum=1, maximum=flags.clip_skip_max, step=1, + value=modules.config.default_clip_skip, + info='Bypass CLIP layers to avoid overfitting (use 1 to not skip any layers, 2 is recommended).') sampler_name = gr.Dropdown(label='Sampler', choices=flags.sampler_list, value=modules.config.default_sampler) scheduler_name = gr.Dropdown(label='Scheduler', choices=flags.scheduler_list, @@ -585,9 +588,9 @@ with shared.gradio_root: load_data_outputs = [advanced_checkbox, image_number, prompt, negative_prompt, style_selections, performance_selection, overwrite_step, overwrite_switch, aspect_ratios_selection, overwrite_width, overwrite_height, guidance_scale, sharpness, adm_scaler_positive, - adm_scaler_negative, adm_scaler_end, refiner_swap_method, adaptive_cfg, base_model, - refiner_model, refiner_switch, sampler_name, scheduler_name, vae_name, seed_random, - image_seed, generate_button, load_parameter_button] + freeu_ctrls + lora_ctrls + adm_scaler_negative, adm_scaler_end, refiner_swap_method, adaptive_cfg, clip_skip, + base_model, refiner_model, refiner_switch, sampler_name, scheduler_name, vae_name, + seed_random, image_seed, generate_button, load_parameter_button] + freeu_ctrls + lora_ctrls if not args_manager.args.disable_preset_selection: def preset_selection_change(preset, is_generating): @@ -672,7 +675,7 @@ with shared.gradio_root: ctrls += [uov_method, uov_input_image] ctrls += [outpaint_selections, inpaint_input_image, inpaint_additional_prompt, inpaint_mask_image] ctrls += [disable_preview, disable_intermediate_results, disable_seed_increment, black_out_nsfw] - ctrls += [adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg] + ctrls += [adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, clip_skip] ctrls += [sampler_name, scheduler_name, vae_name] ctrls += [overwrite_step, overwrite_switch, overwrite_width, overwrite_height, overwrite_vary_strength] ctrls += [overwrite_upscale_strength, mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint]