diff --git a/fooocus_version.py b/fooocus_version.py index e6638147..37962c77 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '1.0.67' +version = '2.0.0' diff --git a/modules/async_worker.py b/modules/async_worker.py index 9fe4cd4f..57818a3e 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -11,14 +11,15 @@ def worker(): import time import shared import random + import copy import modules.default_pipeline as pipeline import modules.path import modules.patch - from modules.sdxl_styles import apply_style_negative, apply_style_positive, aspect_ratios + from modules.sdxl_styles import apply_style, aspect_ratios, fooocus_expansion from modules.private_logger import log from modules.expansion import safe_str - from modules.util import join_prompts + from modules.util import join_prompts, remove_empty_str try: async_gradio_app = shared.gradio_root @@ -29,20 +30,42 @@ def worker(): except Exception as e: print(e) + def progressbar(number, text): + outputs.append(['preview', (number, text, None)]) + def handler(task): - prompt, negative_prompt, style_selction, performance_selction, \ - aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode, \ + prompt, negative_prompt, style_selections, performance_selction, \ + aspect_ratios_selction, image_number, image_seed, sharpness, \ base_model_name, refiner_model_name, \ l1, w1, l2, w2, l3, w3, l4, w4, l5, w5 = task loras = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)] + raw_style_selections = copy.deepcopy(style_selections) + + if fooocus_expansion in style_selections: + use_expansion = True + style_selections.remove(fooocus_expansion) + else: + use_expansion = False + + use_style = len(style_selections) > 0 + modules.patch.sharpness = sharpness - outputs.append(['preview', (1, 'Initializing ...', None)]) + progressbar(1, 'Initializing ...') - prompt = safe_str(prompt) - negative_prompt = safe_str(negative_prompt) + raw_prompt = prompt + raw_negative_prompt = negative_prompt + + prompts = remove_empty_str([safe_str(p) for p in prompt.split('\n')], default='') + negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.split('\n')], default='') + + prompt = prompts[0] + negative_prompt = negative_prompts[0] + + extra_positive_prompts = prompts[1:] if len(prompts) > 1 else [] + extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else [] seed = image_seed max_seed = int(1024 * 1024 * 1024) @@ -52,63 +75,74 @@ def worker(): seed = - seed seed = seed % max_seed - outputs.append(['preview', (3, 'Load models ...', None)]) + progressbar(3, 'Loading models ...') pipeline.refresh_base_model(base_model_name) pipeline.refresh_refiner_model(refiner_model_name) pipeline.refresh_loras(loras) + pipeline.clear_all_caches() - tasks = [] - if raw_mode: - outputs.append(['preview', (5, 'Encoding negative text ...', None)]) - n_txt = apply_style_negative(style_selction, negative_prompt) - n_cond = pipeline.process_prompt(n_txt) - outputs.append(['preview', (9, 'Encoding positive text ...', None)]) - p_txt = apply_style_positive(style_selction, prompt) - p_cond = pipeline.process_prompt(p_txt) + progressbar(3, 'Processing prompts ...') - for i in range(image_number): - tasks.append(dict( - prompt=prompt, - negative_prompt=negative_prompt, - seed=seed + i, - n_cond=n_cond, - p_cond=p_cond, - real_positive_prompt=p_txt, - real_negative_prompt=n_txt - )) + positive_basic_workloads = [] + negative_basic_workloads = [] + + if use_style: + for s in style_selections: + p, n = apply_style(s, positive=prompt) + positive_basic_workloads.append(p) + negative_basic_workloads.append(n) else: - for i in range(image_number): - outputs.append(['preview', (5, f'Preparing positive text #{i + 1} ...', None)]) - current_seed = seed + i + positive_basic_workloads.append(prompt) - expansion_weight = 0.1 + negative_basic_workloads.append(negative_prompt) # Always use independent workload for negative. - suffix = pipeline.expansion(prompt, current_seed) - suffix = f'({suffix}:{expansion_weight})' - print(f'[Prompt Expansion] New suffix: {suffix}') + positive_basic_workloads = positive_basic_workloads + extra_positive_prompts + negative_basic_workloads = negative_basic_workloads + extra_negative_prompts - p_txt = apply_style_positive(style_selction, prompt) - p_txt = safe_str(p_txt) + positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=prompt) + negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=negative_prompt) - p_txt = join_prompts(p_txt, suffix) + positive_top_k = len(positive_basic_workloads) + negative_top_k = len(negative_basic_workloads) - tasks.append(dict( - prompt=prompt, - negative_prompt=negative_prompt, - seed=current_seed, - real_positive_prompt=p_txt, - )) + tasks = [dict( + task_seed=seed + i, + positive=positive_basic_workloads, + negative=negative_basic_workloads, + expansion='', + c=[None, None], + uc=[None, None], + ) for i in range(image_number)] - outputs.append(['preview', (9, 'Encoding negative text ...', None)]) - n_txt = apply_style_negative(style_selction, negative_prompt) - n_cond = pipeline.process_prompt(n_txt) + if use_expansion: + for i, t in enumerate(tasks): + progressbar(5, f'Preparing Fooocus text #{i + 1} ...') + expansion = pipeline.expansion(prompt, t['task_seed']) + print(f'[Prompt Expansion] New suffix: {expansion}') + t['expansion'] = expansion + t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(prompt, expansion)] # Deep copy. + + for i, t in enumerate(tasks): + progressbar(7, f'Encoding base positive #{i + 1} ...') + t['c'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['positive'], + pool_top_k=positive_top_k) + + for i, t in enumerate(tasks): + progressbar(9, f'Encoding base negative #{i + 1} ...') + t['uc'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['negative'], + pool_top_k=negative_top_k) + + if pipeline.xl_refiner is not None: + for i, t in enumerate(tasks): + progressbar(11, f'Encoding refiner positive #{i + 1} ...') + t['c'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['positive'], + pool_top_k=positive_top_k) for i, t in enumerate(tasks): - outputs.append(['preview', (12, f'Encoding positive text #{i + 1} ...', None)]) - t['p_cond'] = pipeline.process_prompt(t['real_positive_prompt']) - t['real_negative_prompt'] = n_txt - t['n_cond'] = n_cond + progressbar(13, f'Encoding refiner negative #{i + 1} ...') + t['uc'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['negative'], + pool_top_k=negative_top_k) if performance_selction == 'Speed': steps = 30 @@ -117,6 +151,7 @@ def worker(): steps = 60 switch = 40 + pipeline.clear_all_caches() # save memory width, height = aspect_ratios[aspect_ratios_selction] results = [] @@ -132,34 +167,32 @@ def worker(): outputs.append(['preview', (13, 'Starting tasks ...', None)]) for current_task_id, task in enumerate(tasks): imgs = pipeline.process_diffusion( - positive_cond=task['p_cond'], - negative_cond=task['n_cond'], + positive_cond=task['c'], + negative_cond=task['uc'], steps=steps, switch=switch, width=width, height=height, - image_seed=task['seed'], + image_seed=task['task_seed'], callback=callback) for x in imgs: d = [ - ('Prompt', task['prompt']), - ('Negative Prompt', task['negative_prompt']), - ('Real Positive Prompt', task['real_positive_prompt']), - ('Real Negative Prompt', task['real_negative_prompt']), - ('Raw Mode', str(raw_mode)), - ('Style', style_selction), + ('Prompt', raw_prompt), + ('Negative Prompt', raw_negative_prompt), + ('Fooocus V2 Expansion', task['expansion']), + ('Styles', str(raw_style_selections)), ('Performance', performance_selction), ('Resolution', str((width, height))), ('Sharpness', sharpness), ('Base Model', base_model_name), ('Refiner Model', refiner_model_name), - ('Seed', task['seed']) + ('Seed', task['task_seed']) ] for n, w in loras: if n != 'None': d.append((f'LoRA [{n}] weight', w)) - log(x, d) + log(x, d, single_line_number=3) results += imgs diff --git a/modules/default_pipeline.py b/modules/default_pipeline.py index 22333215..52d81fe2 100644 --- a/modules/default_pipeline.py +++ b/modules/default_pipeline.py @@ -106,13 +106,55 @@ refresh_loras([(modules.path.default_lora_name, 0.5), ('None', 0.5), ('None', 0. expansion = FooocusExpansion() -def process_prompt(text): - base_cond = core.encode_prompt_condition(clip=xl_base_patched.clip, prompt=text) - if xl_refiner is not None: - refiner_cond = core.encode_prompt_condition(clip=xl_refiner.clip, prompt=text) - else: - refiner_cond = None - return base_cond, refiner_cond +def clip_encode_single(clip, text, verbose=False): + cached = clip.fcs_cond_cache.get(text, None) + if cached is not None: + if verbose: + print(f'[CLIP Cached] {text}') + return cached + tokens = clip.tokenize(text) + result = clip.encode_from_tokens(tokens, return_pooled=True) + clip.fcs_cond_cache[text] = result + if verbose: + print(f'[CLIP Encoded] {text}') + return result + + +def clip_encode(sd, texts, pool_top_k=1): + if sd is None: + return None + if sd.clip is None: + return None + if not isinstance(texts, list): + return None + if len(texts) == 0: + return None + + clip = sd.clip + cond_list = [] + pooled_acc = 0 + + for i, text in enumerate(texts): + cond, pooled = clip_encode_single(clip, text) + cond_list.append(cond) + if i < pool_top_k: + pooled_acc += pooled + + return [[torch.cat(cond_list, dim=1), {"pooled_output": pooled_acc}]] + + +def clear_sd_cond_cache(sd): + if sd is None: + return None + if sd.clip is None: + return None + sd.clip.fcs_cond_cache = {} + return + + +def clear_all_caches(): + clear_sd_cond_cache(xl_base_patched) + clear_sd_cond_cache(xl_refiner) @torch.no_grad() diff --git a/modules/expansion.py b/modules/expansion.py index de361e31..e424994a 100644 --- a/modules/expansion.py +++ b/modules/expansion.py @@ -5,7 +5,6 @@ from modules.path import fooocus_expansion_path fooocus_magic_split = [ ', extremely', - ', trending', ', intricate,', ] dangrous_patterns = '[]【】()()|::' diff --git a/modules/private_logger.py b/modules/private_logger.py index 200d2fa0..25fc34b7 100644 --- a/modules/private_logger.py +++ b/modules/private_logger.py @@ -5,7 +5,7 @@ from PIL import Image from modules.util import generate_temp_filename -def log(img, dic): +def log(img, dic, single_line_number=3): date_string, local_temp_filename, only_name = generate_temp_filename(folder=modules.path.temp_outputs_path, extension='png') os.makedirs(os.path.dirname(local_temp_filename), exist_ok=True) Image.fromarray(img).save(local_temp_filename) @@ -22,10 +22,10 @@ def log(img, dic): f.write(f"
{only_name}
\n") i = 0 for k, v in dic: - if i < 4: + if i < single_line_number: f.write(f"{k}: {v}
\n") else: - if i % 2 == 0: + if (i - single_line_number) % 2 == 0: f.write(f"{k}: {v}, ") else: f.write(f"{k}: {v}
\n") diff --git a/modules/sdxl_styles.py b/modules/sdxl_styles.py index a2e439eb..614f9856 100644 --- a/modules/sdxl_styles.py +++ b/modules/sdxl_styles.py @@ -1,16 +1,14 @@ from modules.util import join_prompts +fooocus_expansion = "Fooocus V2" +default_styles = ["Default (Slightly Cinematic)"] + # https://github.com/twri/sdxl_prompt_styler/blob/main/sdxl_styles.json styles = [ { - "name": "None", - "prompt": "{prompt}", - "negative_prompt": "" - }, - { - "name": "cinematic-default", + "name": "Default (Slightly Cinematic)", "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy", "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured" }, @@ -926,8 +924,20 @@ styles = [ } ] -styles = {k['name']: (k['prompt'], k['negative_prompt']) for k in styles} -default_style = styles['None'] + +def normalize_key(k): + k = k.replace('-', ' ') + words = k.split(' ') + words = [w[:1].upper() + w[1:].lower() for w in words] + k = ' '.join(words) + k = k.replace('3d', '3D') + k = k.replace('Sai', 'SAI') + k = k.replace('(s', '(S') + return k + + +default_styles = [normalize_key(x) for x in default_styles] +styles = {normalize_key(k['name']): (k['prompt'], k['negative_prompt']) for k in styles} style_keys = list(styles.keys()) SD_XL_BASE_RATIOS = { @@ -962,11 +972,6 @@ SD_XL_BASE_RATIOS = { aspect_ratios = {str(v[0]) + '×' + str(v[1]): v for k, v in SD_XL_BASE_RATIOS.items()} -def apply_style_positive(style, txt): - p, n = styles.get(style, default_style) - return p.replace('{prompt}', txt) - - -def apply_style_negative(style, txt): - p, n = styles.get(style, default_style) - return join_prompts(n, txt) +def apply_style(style, positive): + p, n = styles[style] + return p.replace('{prompt}', positive), n diff --git a/modules/util.py b/modules/util.py index 9fd56667..a173cf21 100644 --- a/modules/util.py +++ b/modules/util.py @@ -3,6 +3,13 @@ import random import os +def remove_empty_str(items, default=None): + items = [x for x in items if x != ""] + if len(items) == 0 and default is not None: + return [default] + return items + + def join_prompts(*args, **kwargs): prompts = [str(x) for x in args if str(x) != ""] if len(prompts) == 0: diff --git a/update_log.md b/update_log.md index e171856d..1c96796f 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,13 @@ +### 2.0.0 + +* V2 released. +* completely rewrite text processing pipeline (higher image quality and prompt understanding). +* support multi-style. +* In 100 tests (prompts written by ChatGPT), V2 default results outperform V1 default results in 87 cases, evaluated by two human. +* In 100 tests (prompts written by ChatGPT), V2 prompt understanding outperform V1 prompt understanding in 81 cases, evaluated by two human, in both default setting and multi/single style mode. +* Because the above number is above 80%, we view this as a major update and directly jump to 2.0.0. +* Some other things are renamed. + ### 1.0.67 * Use dynamic weighting and lower weights for prompt expansion. diff --git a/webui.py b/webui.py index b7dd9b46..a4eb0424 100644 --- a/webui.py +++ b/webui.py @@ -8,7 +8,7 @@ import fooocus_version import modules.html import modules.async_worker as worker -from modules.sdxl_styles import style_keys, aspect_ratios +from modules.sdxl_styles import style_keys, aspect_ratios, fooocus_expansion, default_styles def generate_clicked(*args): @@ -76,11 +76,10 @@ with shared.gradio_root: seed_random.change(random_checked, inputs=[seed_random], outputs=[image_seed]) with gr.Tab(label='Style'): - raw_mode_check = gr.Checkbox(label='Raw Mode', value=False, - info='Similar to Midjourney\'s \"raw\" mode.') - style_selction = gr.Radio(show_label=True, container=True, - choices=style_keys, value='cinematic-default', label='Image Style', - info='Similar to Midjourney\'s \"--style\".') + style_selections = gr.CheckboxGroup(show_label=False, container=False, + choices=[fooocus_expansion] + style_keys, + value=[fooocus_expansion] + default_styles, + label='Image Style') with gr.Tab(label='Advanced'): with gr.Row(): base_model = gr.Dropdown(label='SDXL Base Model', choices=modules.path.model_filenames, value=modules.path.default_base_model_name, show_label=True) @@ -110,8 +109,8 @@ with shared.gradio_root: advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col) ctrls = [ - prompt, negative_prompt, style_selction, - performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode_check + prompt, negative_prompt, style_selections, + performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness ] ctrls += [base_model, refiner_model] + lora_ctrls run_button.click(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed)\