[Major Update] Fooocus 2.0.0 (#346)

2023-09-11 23:10:45 -07:00 · 2023-09-11 23:10:45 -07:00 · 47876aaf99
parent 25fed6a4fe
commit 47876aaf99
9 changed files with 191 additions and 96 deletions
--- a/fooocus_version.py
+++ b/fooocus_version.py
@ -1 +1 @@
-version = '1.0.67'
+version = '2.0.0'
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@ -11,14 +11,15 @@ def worker():
    import time
    import shared
    import random
    import copy
    import modules.default_pipeline as pipeline
    import modules.path
    import modules.patch
-    from modules.sdxl_styles import apply_style_negative, apply_style_positive, aspect_ratios
+    from modules.sdxl_styles import apply_style, aspect_ratios, fooocus_expansion
    from modules.private_logger import log
    from modules.expansion import safe_str
-    from modules.util import join_prompts
+    from modules.util import join_prompts, remove_empty_str
    try:
        async_gradio_app = shared.gradio_root
@ -29,20 +30,42 @@ def worker():
    except Exception as e:
        print(e)
    def progressbar(number, text):
        outputs.append(['preview', (number, text, None)])
    def handler(task):
-        prompt, negative_prompt, style_selction, performance_selction, \
+        prompt, negative_prompt, style_selections, performance_selction, \
-        aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode, \
+        aspect_ratios_selction, image_number, image_seed, sharpness, \
        base_model_name, refiner_model_name, \
        l1, w1, l2, w2, l3, w3, l4, w4, l5, w5 = task
        loras = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)]
        raw_style_selections = copy.deepcopy(style_selections)
        if fooocus_expansion in style_selections:
            use_expansion = True
            style_selections.remove(fooocus_expansion)
        else:
            use_expansion = False
        use_style = len(style_selections) > 0
        modules.patch.sharpness = sharpness
-        outputs.append(['preview', (1, 'Initializing ...', None)])
+        progressbar(1, 'Initializing ...')
-        prompt = safe_str(prompt)
+        raw_prompt = prompt
-        negative_prompt = safe_str(negative_prompt)
+        raw_negative_prompt = negative_prompt
        prompts = remove_empty_str([safe_str(p) for p in prompt.split('\n')], default='')
        negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.split('\n')], default='')
        prompt = prompts[0]
        negative_prompt = negative_prompts[0]
        extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
        extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
        seed = image_seed
        max_seed = int(1024 * 1024 * 1024)
@ -52,63 +75,74 @@ def worker():
            seed = - seed
        seed = seed % max_seed
-        outputs.append(['preview', (3, 'Load models ...', None)])
+        progressbar(3, 'Loading models ...')
        pipeline.refresh_base_model(base_model_name)
        pipeline.refresh_refiner_model(refiner_model_name)
        pipeline.refresh_loras(loras)
        pipeline.clear_all_caches()
-        tasks = []
+        progressbar(3, 'Processing prompts ...')
        if raw_mode:
            outputs.append(['preview', (5, 'Encoding negative text ...', None)])
            n_txt = apply_style_negative(style_selction, negative_prompt)
            n_cond = pipeline.process_prompt(n_txt)
            outputs.append(['preview', (9, 'Encoding positive text ...', None)])
            p_txt = apply_style_positive(style_selction, prompt)
            p_cond = pipeline.process_prompt(p_txt)
-            for i in range(image_number):
+        positive_basic_workloads = []
-                tasks.append(dict(
+        negative_basic_workloads = []
-                    prompt=prompt,
+
-                    negative_prompt=negative_prompt,
+        if use_style:
-                    seed=seed + i,
+            for s in style_selections:
-                    n_cond=n_cond,
+                p, n = apply_style(s, positive=prompt)
-                    p_cond=p_cond,
+                positive_basic_workloads.append(p)
-                    real_positive_prompt=p_txt,
+                negative_basic_workloads.append(n)
                    real_negative_prompt=n_txt
                ))
        else:
-            for i in range(image_number):
+            positive_basic_workloads.append(prompt)
                outputs.append(['preview', (5, f'Preparing positive text #{i + 1} ...', None)])
                current_seed = seed + i
-                expansion_weight = 0.1
+        negative_basic_workloads.append(negative_prompt)  # Always use independent workload for negative.
-                suffix = pipeline.expansion(prompt, current_seed)
+        positive_basic_workloads = positive_basic_workloads + extra_positive_prompts
-                suffix = f'({suffix}:{expansion_weight})'
+        negative_basic_workloads = negative_basic_workloads + extra_negative_prompts
                print(f'[Prompt Expansion] New suffix: {suffix}')
-                p_txt = apply_style_positive(style_selction, prompt)
+        positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=prompt)
-                p_txt = safe_str(p_txt)
+        negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=negative_prompt)
-                p_txt = join_prompts(p_txt, suffix)
+        positive_top_k = len(positive_basic_workloads)
        negative_top_k = len(negative_basic_workloads)
-                tasks.append(dict(
+        tasks = [dict(
-                    prompt=prompt,
+            task_seed=seed + i,
-                    negative_prompt=negative_prompt,
+            positive=positive_basic_workloads,
-                    seed=current_seed,
+            negative=negative_basic_workloads,
-                    real_positive_prompt=p_txt,
+            expansion='',
-                ))
+            c=[None, None],
            uc=[None, None],
        ) for i in range(image_number)]
-            outputs.append(['preview', (9, 'Encoding negative text ...', None)])
+        if use_expansion:
-            n_txt = apply_style_negative(style_selction, negative_prompt)
+            for i, t in enumerate(tasks):
-            n_cond = pipeline.process_prompt(n_txt)
+                progressbar(5, f'Preparing Fooocus text #{i + 1} ...')
                expansion = pipeline.expansion(prompt, t['task_seed'])
                print(f'[Prompt Expansion] New suffix: {expansion}')
                t['expansion'] = expansion
                t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(prompt, expansion)]  # Deep copy.
        for i, t in enumerate(tasks):
            progressbar(7, f'Encoding base positive #{i + 1} ...')
            t['c'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['positive'],
                                             pool_top_k=positive_top_k)
        for i, t in enumerate(tasks):
            progressbar(9, f'Encoding base negative #{i + 1} ...')
            t['uc'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['negative'],
                                              pool_top_k=negative_top_k)
        if pipeline.xl_refiner is not None:
            for i, t in enumerate(tasks):
                progressbar(11, f'Encoding refiner positive #{i + 1} ...')
                t['c'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['positive'],
                                                 pool_top_k=positive_top_k)
            for i, t in enumerate(tasks):
-                outputs.append(['preview', (12, f'Encoding positive text #{i + 1} ...', None)])
+                progressbar(13, f'Encoding refiner negative #{i + 1} ...')
-                t['p_cond'] = pipeline.process_prompt(t['real_positive_prompt'])
+                t['uc'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['negative'],
-                t['real_negative_prompt'] = n_txt
+                                                  pool_top_k=negative_top_k)
                t['n_cond'] = n_cond
        if performance_selction == 'Speed':
            steps = 30
@ -117,6 +151,7 @@ def worker():
            steps = 60
            switch = 40
        pipeline.clear_all_caches()  # save memory
        width, height = aspect_ratios[aspect_ratios_selction]
        results = []
@ -132,34 +167,32 @@ def worker():
        outputs.append(['preview', (13, 'Starting tasks ...', None)])
        for current_task_id, task in enumerate(tasks):
            imgs = pipeline.process_diffusion(
-                positive_cond=task['p_cond'],
+                positive_cond=task['c'],
-                negative_cond=task['n_cond'],
+                negative_cond=task['uc'],
                steps=steps,
                switch=switch,
                width=width,
                height=height,
-                image_seed=task['seed'],
+                image_seed=task['task_seed'],
                callback=callback)
            for x in imgs:
                d = [
-                    ('Prompt', task['prompt']),
+                    ('Prompt', raw_prompt),
-                    ('Negative Prompt', task['negative_prompt']),
+                    ('Negative Prompt', raw_negative_prompt),
-                    ('Real Positive Prompt', task['real_positive_prompt']),
+                    ('Fooocus V2 Expansion', task['expansion']),
-                    ('Real Negative Prompt', task['real_negative_prompt']),
+                    ('Styles', str(raw_style_selections)),
                    ('Raw Mode', str(raw_mode)),
                    ('Style', style_selction),
                    ('Performance', performance_selction),
                    ('Resolution', str((width, height))),
                    ('Sharpness', sharpness),
                    ('Base Model', base_model_name),
                    ('Refiner Model', refiner_model_name),
-                    ('Seed', task['seed'])
+                    ('Seed', task['task_seed'])
                ]
                for n, w in loras:
                    if n != 'None':
                        d.append((f'LoRA [{n}] weight', w))
-                log(x, d)
+                log(x, d, single_line_number=3)
            results += imgs
--- a/modules/default_pipeline.py
+++ b/modules/default_pipeline.py
@ -106,13 +106,55 @@ refresh_loras([(modules.path.default_lora_name, 0.5), ('None', 0.5), ('None', 0.
 expansion = FooocusExpansion()
-def process_prompt(text):
+def clip_encode_single(clip, text, verbose=False):
-    base_cond = core.encode_prompt_condition(clip=xl_base_patched.clip, prompt=text)
+    cached = clip.fcs_cond_cache.get(text, None)
-    if xl_refiner is not None:
+    if cached is not None:
-        refiner_cond = core.encode_prompt_condition(clip=xl_refiner.clip, prompt=text)
+        if verbose:
-    else:
+            print(f'[CLIP Cached] {text}')
-        refiner_cond = None
+        return cached
-    return base_cond, refiner_cond
+    tokens = clip.tokenize(text)
    result = clip.encode_from_tokens(tokens, return_pooled=True)
    clip.fcs_cond_cache[text] = result
    if verbose:
        print(f'[CLIP Encoded] {text}')
    return result
 def clip_encode(sd, texts, pool_top_k=1):
    if sd is None:
        return None
    if sd.clip is None:
        return None
    if not isinstance(texts, list):
        return None
    if len(texts) == 0:
        return None
    clip = sd.clip
    cond_list = []
    pooled_acc = 0
    for i, text in enumerate(texts):
        cond, pooled = clip_encode_single(clip, text)
        cond_list.append(cond)
        if i < pool_top_k:
            pooled_acc += pooled
    return [[torch.cat(cond_list, dim=1), {"pooled_output": pooled_acc}]]
 def clear_sd_cond_cache(sd):
    if sd is None:
        return None
    if sd.clip is None:
        return None
    sd.clip.fcs_cond_cache = {}
    return
 def clear_all_caches():
    clear_sd_cond_cache(xl_base_patched)
    clear_sd_cond_cache(xl_refiner)
@torch.no_grad()
--- a/modules/expansion.py
+++ b/modules/expansion.py
@ -5,7 +5,6 @@ from modules.path import fooocus_expansion_path
 fooocus_magic_split = [
    ', extremely',
    ', trending',
    ', intricate,',
 ]
 dangrous_patterns = '[]【】()（）|:：'
--- a/modules/private_logger.py
+++ b/modules/private_logger.py
@ -5,7 +5,7 @@ from PIL import Image
 from modules.util import generate_temp_filename
-def log(img, dic):
+def log(img, dic, single_line_number=3):
    date_string, local_temp_filename, only_name = generate_temp_filename(folder=modules.path.temp_outputs_path, extension='png')
    os.makedirs(os.path.dirname(local_temp_filename), exist_ok=True)
    Image.fromarray(img).save(local_temp_filename)
@ -22,10 +22,10 @@ def log(img, dic):
        f.write(f"<p>{only_name}</p>\n")
        i = 0
        for k, v in dic:
-            if i < 4:
+            if i < single_line_number:
                f.write(f"<p>{k}: <b>{v}</b> </p>\n")
            else:
-                if i % 2 == 0:
+                if (i - single_line_number) % 2 == 0:
                    f.write(f"<p>{k}: <b>{v}</b>, ")
                else:
                    f.write(f"{k}: <b>{v}</b></p>\n")
--- a/modules/sdxl_styles.py
+++ b/modules/sdxl_styles.py
@ -1,16 +1,14 @@
 from modules.util import join_prompts
 fooocus_expansion = "Fooocus V2"
 default_styles = ["Default (Slightly Cinematic)"]
 # https://github.com/twri/sdxl_prompt_styler/blob/main/sdxl_styles.json
 styles = [
    {
-        "name": "None",
+        "name": "Default (Slightly Cinematic)",
        "prompt": "{prompt}",
        "negative_prompt": ""
    },
    {
        "name": "cinematic-default",
        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
        "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
    },
@ -926,8 +924,20 @@ styles = [
    }
 ]
-styles = {k['name']: (k['prompt'], k['negative_prompt']) for k in styles}
+
-default_style = styles['None']
+def normalize_key(k):
    k = k.replace('-', ' ')
    words = k.split(' ')
    words = [w[:1].upper() + w[1:].lower() for w in words]
    k = ' '.join(words)
    k = k.replace('3d', '3D')
    k = k.replace('Sai', 'SAI')
    k = k.replace('(s', '(S')
    return k
 default_styles = [normalize_key(x) for x in default_styles]
 styles = {normalize_key(k['name']): (k['prompt'], k['negative_prompt']) for k in styles}
 style_keys = list(styles.keys())
 SD_XL_BASE_RATIOS = {
@ -962,11 +972,6 @@ SD_XL_BASE_RATIOS = {
 aspect_ratios = {str(v[0]) + '×' + str(v[1]): v for k, v in SD_XL_BASE_RATIOS.items()}
-def apply_style_positive(style, txt):
+def apply_style(style, positive):
-    p, n = styles.get(style, default_style)
+    p, n = styles[style]
-    return p.replace('{prompt}', txt)
+    return p.replace('{prompt}', positive), n
 def apply_style_negative(style, txt):
    p, n = styles.get(style, default_style)
    return join_prompts(n, txt)
--- a/modules/util.py
+++ b/modules/util.py
@ -3,6 +3,13 @@ import random
 import os
 def remove_empty_str(items, default=None):
    items = [x for x in items if x != ""]
    if len(items) == 0 and default is not None:
        return [default]
    return items
 def join_prompts(*args, **kwargs):
    prompts = [str(x) for x in args if str(x) != ""]
    if len(prompts) == 0:
--- a/update_log.md
+++ b/update_log.md
@ -1,3 +1,13 @@
 ### 2.0.0
 * V2 released.
 * completely rewrite text processing pipeline (higher image quality and prompt understanding).
 * support multi-style.
 * In 100 tests (prompts written by ChatGPT), V2 default results outperform V1 default results in 87 cases, evaluated by two human.
 * In 100 tests (prompts written by ChatGPT), V2 prompt understanding outperform V1 prompt understanding in 81 cases, evaluated by two human, in both default setting and multi/single style mode.
 * Because the above number is above 80%, we view this as a major update and directly jump to 2.0.0.
 * Some other things are renamed.
 ### 1.0.67
 * Use dynamic weighting and lower weights for prompt expansion.
--- a/webui.py
+++ b/webui.py
@ -8,7 +8,7 @@ import fooocus_version
 import modules.html
 import modules.async_worker as worker
-from modules.sdxl_styles import style_keys, aspect_ratios
+from modules.sdxl_styles import style_keys, aspect_ratios, fooocus_expansion, default_styles
 def generate_clicked(*args):
@ -76,11 +76,10 @@ with shared.gradio_root:
                seed_random.change(random_checked, inputs=[seed_random], outputs=[image_seed])
            with gr.Tab(label='Style'):
-                raw_mode_check = gr.Checkbox(label='Raw Mode', value=False,
+                style_selections = gr.CheckboxGroup(show_label=False, container=False,
-                                             info='Similar to Midjourney\'s \"raw\" mode.')
+                                                    choices=[fooocus_expansion] + style_keys,
-                style_selction = gr.Radio(show_label=True, container=True,
+                                                    value=[fooocus_expansion] + default_styles,
-                                          choices=style_keys, value='cinematic-default', label='Image Style',
+                                                    label='Image Style')
                                          info='Similar to Midjourney\'s \"--style\".')
            with gr.Tab(label='Advanced'):
                with gr.Row():
                    base_model = gr.Dropdown(label='SDXL Base Model', choices=modules.path.model_filenames, value=modules.path.default_base_model_name, show_label=True)
@ -110,8 +109,8 @@ with shared.gradio_root:
        advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col)
        ctrls = [
-            prompt, negative_prompt, style_selction,
+            prompt, negative_prompt, style_selections,
-            performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode_check
+            performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness
        ]
        ctrls += [base_model, refiner_model] + lora_ctrls
        run_button.click(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed)\