[Major Update] Fooocus 2.0.0 (#346)

2023-09-11 23:10:45 -07:00 · 2023-09-11 23:10:45 -07:00 · 47876aaf99
parent 25fed6a4fe
commit 47876aaf99
9 changed files with 191 additions and 96 deletions
--- a/fooocus_version.py
+++ b/fooocus_version.py
@ -1 +1 @@
-version = '1.0.67'
+version = '2.0.0'
--- a/modules/async_worker.py
+++ b/modules/async_worker.py
@ -11,14 +11,15 @@ def worker():
    import time
    import shared
    import random
+    import copy
    import modules.default_pipeline as pipeline
    import modules.path
    import modules.patch

-    from modules.sdxl_styles import apply_style_negative, apply_style_positive, aspect_ratios
+    from modules.sdxl_styles import apply_style, aspect_ratios, fooocus_expansion
    from modules.private_logger import log
    from modules.expansion import safe_str
-    from modules.util import join_prompts
+    from modules.util import join_prompts, remove_empty_str

    try:
        async_gradio_app = shared.gradio_root
@ -29,20 +30,42 @@ def worker():
    except Exception as e:
        print(e)

+    def progressbar(number, text):
+        outputs.append(['preview', (number, text, None)])
+
    def handler(task):
-        prompt, negative_prompt, style_selction, performance_selction, \
-        aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode, \
+        prompt, negative_prompt, style_selections, performance_selction, \
+        aspect_ratios_selction, image_number, image_seed, sharpness, \
        base_model_name, refiner_model_name, \
        l1, w1, l2, w2, l3, w3, l4, w4, l5, w5 = task

        loras = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)]

+        raw_style_selections = copy.deepcopy(style_selections)
+
+        if fooocus_expansion in style_selections:
+            use_expansion = True
+            style_selections.remove(fooocus_expansion)
+        else:
+            use_expansion = False
+
+        use_style = len(style_selections) > 0
+
        modules.patch.sharpness = sharpness

-        outputs.append(['preview', (1, 'Initializing ...', None)])
+        progressbar(1, 'Initializing ...')

-        prompt = safe_str(prompt)
-        negative_prompt = safe_str(negative_prompt)
+        raw_prompt = prompt
+        raw_negative_prompt = negative_prompt
+
+        prompts = remove_empty_str([safe_str(p) for p in prompt.split('\n')], default='')
+        negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.split('\n')], default='')
+
+        prompt = prompts[0]
+        negative_prompt = negative_prompts[0]
+
+        extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
+        extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []

        seed = image_seed
        max_seed = int(1024 * 1024 * 1024)
@ -52,63 +75,74 @@ def worker():
            seed = - seed
        seed = seed % max_seed

-        outputs.append(['preview', (3, 'Load models ...', None)])
+        progressbar(3, 'Loading models ...')

        pipeline.refresh_base_model(base_model_name)
        pipeline.refresh_refiner_model(refiner_model_name)
        pipeline.refresh_loras(loras)
+        pipeline.clear_all_caches()

-        tasks = []
-        if raw_mode:
-            outputs.append(['preview', (5, 'Encoding negative text ...', None)])
-            n_txt = apply_style_negative(style_selction, negative_prompt)
-            n_cond = pipeline.process_prompt(n_txt)
-            outputs.append(['preview', (9, 'Encoding positive text ...', None)])
-            p_txt = apply_style_positive(style_selction, prompt)
-            p_cond = pipeline.process_prompt(p_txt)
+        progressbar(3, 'Processing prompts ...')

-            for i in range(image_number):
-                tasks.append(dict(
-                    prompt=prompt,
-                    negative_prompt=negative_prompt,
-                    seed=seed + i,
-                    n_cond=n_cond,
-                    p_cond=p_cond,
-                    real_positive_prompt=p_txt,
-                    real_negative_prompt=n_txt
-                ))
+        positive_basic_workloads = []
+        negative_basic_workloads = []
+
+        if use_style:
+            for s in style_selections:
+                p, n = apply_style(s, positive=prompt)
+                positive_basic_workloads.append(p)
+                negative_basic_workloads.append(n)
        else:
-            for i in range(image_number):
-                outputs.append(['preview', (5, f'Preparing positive text #{i + 1} ...', None)])
-                current_seed = seed + i
+            positive_basic_workloads.append(prompt)

-                expansion_weight = 0.1
+        negative_basic_workloads.append(negative_prompt)  # Always use independent workload for negative.

-                suffix = pipeline.expansion(prompt, current_seed)
-                suffix = f'({suffix}:{expansion_weight})'
-                print(f'[Prompt Expansion] New suffix: {suffix}')
+        positive_basic_workloads = positive_basic_workloads + extra_positive_prompts
+        negative_basic_workloads = negative_basic_workloads + extra_negative_prompts

-                p_txt = apply_style_positive(style_selction, prompt)
-                p_txt = safe_str(p_txt)
+        positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=prompt)
+        negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=negative_prompt)

-                p_txt = join_prompts(p_txt, suffix)
+        positive_top_k = len(positive_basic_workloads)
+        negative_top_k = len(negative_basic_workloads)

-                tasks.append(dict(
-                    prompt=prompt,
-                    negative_prompt=negative_prompt,
-                    seed=current_seed,
-                    real_positive_prompt=p_txt,
-                ))
+        tasks = [dict(
+            task_seed=seed + i,
+            positive=positive_basic_workloads,
+            negative=negative_basic_workloads,
+            expansion='',
+            c=[None, None],
+            uc=[None, None],
+        ) for i in range(image_number)]

-            outputs.append(['preview', (9, 'Encoding negative text ...', None)])
-            n_txt = apply_style_negative(style_selction, negative_prompt)
-            n_cond = pipeline.process_prompt(n_txt)
+        if use_expansion:
+            for i, t in enumerate(tasks):
+                progressbar(5, f'Preparing Fooocus text #{i + 1} ...')
+                expansion = pipeline.expansion(prompt, t['task_seed'])
+                print(f'[Prompt Expansion] New suffix: {expansion}')
+                t['expansion'] = expansion
+                t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(prompt, expansion)]  # Deep copy.
+
+        for i, t in enumerate(tasks):
+            progressbar(7, f'Encoding base positive #{i + 1} ...')
+            t['c'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['positive'],
+                                             pool_top_k=positive_top_k)
+
+        for i, t in enumerate(tasks):
+            progressbar(9, f'Encoding base negative #{i + 1} ...')
+            t['uc'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['negative'],
+                                              pool_top_k=negative_top_k)
+
+        if pipeline.xl_refiner is not None:
+            for i, t in enumerate(tasks):
+                progressbar(11, f'Encoding refiner positive #{i + 1} ...')
+                t['c'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['positive'],
+                                                 pool_top_k=positive_top_k)

            for i, t in enumerate(tasks):
-                outputs.append(['preview', (12, f'Encoding positive text #{i + 1} ...', None)])
-                t['p_cond'] = pipeline.process_prompt(t['real_positive_prompt'])
-                t['real_negative_prompt'] = n_txt
-                t['n_cond'] = n_cond
+                progressbar(13, f'Encoding refiner negative #{i + 1} ...')
+                t['uc'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['negative'],
+                                                  pool_top_k=negative_top_k)

        if performance_selction == 'Speed':
            steps = 30
@ -117,6 +151,7 @@ def worker():
            steps = 60
            switch = 40

+        pipeline.clear_all_caches()  # save memory
        width, height = aspect_ratios[aspect_ratios_selction]

        results = []
@ -132,34 +167,32 @@ def worker():
        outputs.append(['preview', (13, 'Starting tasks ...', None)])
        for current_task_id, task in enumerate(tasks):
            imgs = pipeline.process_diffusion(
-                positive_cond=task['p_cond'],
-                negative_cond=task['n_cond'],
+                positive_cond=task['c'],
+                negative_cond=task['uc'],
                steps=steps,
                switch=switch,
                width=width,
                height=height,
-                image_seed=task['seed'],
+                image_seed=task['task_seed'],
                callback=callback)

            for x in imgs:
                d = [
-                    ('Prompt', task['prompt']),
-                    ('Negative Prompt', task['negative_prompt']),
-                    ('Real Positive Prompt', task['real_positive_prompt']),
-                    ('Real Negative Prompt', task['real_negative_prompt']),
-                    ('Raw Mode', str(raw_mode)),
-                    ('Style', style_selction),
+                    ('Prompt', raw_prompt),
+                    ('Negative Prompt', raw_negative_prompt),
+                    ('Fooocus V2 Expansion', task['expansion']),
+                    ('Styles', str(raw_style_selections)),
                    ('Performance', performance_selction),
                    ('Resolution', str((width, height))),
                    ('Sharpness', sharpness),
                    ('Base Model', base_model_name),
                    ('Refiner Model', refiner_model_name),
-                    ('Seed', task['seed'])
+                    ('Seed', task['task_seed'])
                ]
                for n, w in loras:
                    if n != 'None':
                        d.append((f'LoRA [{n}] weight', w))
-                log(x, d)
+                log(x, d, single_line_number=3)

            results += imgs

--- a/modules/default_pipeline.py
+++ b/modules/default_pipeline.py
@ -106,13 +106,55 @@ refresh_loras([(modules.path.default_lora_name, 0.5), ('None', 0.5), ('None', 0.
 expansion = FooocusExpansion()


-def process_prompt(text):
-    base_cond = core.encode_prompt_condition(clip=xl_base_patched.clip, prompt=text)
-    if xl_refiner is not None:
-        refiner_cond = core.encode_prompt_condition(clip=xl_refiner.clip, prompt=text)
-    else:
-        refiner_cond = None
-    return base_cond, refiner_cond
+def clip_encode_single(clip, text, verbose=False):
+    cached = clip.fcs_cond_cache.get(text, None)
+    if cached is not None:
+        if verbose:
+            print(f'[CLIP Cached] {text}')
+        return cached
+    tokens = clip.tokenize(text)
+    result = clip.encode_from_tokens(tokens, return_pooled=True)
+    clip.fcs_cond_cache[text] = result
+    if verbose:
+        print(f'[CLIP Encoded] {text}')
+    return result
+
+
+def clip_encode(sd, texts, pool_top_k=1):
+    if sd is None:
+        return None
+    if sd.clip is None:
+        return None
+    if not isinstance(texts, list):
+        return None
+    if len(texts) == 0:
+        return None
+
+    clip = sd.clip
+    cond_list = []
+    pooled_acc = 0
+
+    for i, text in enumerate(texts):
+        cond, pooled = clip_encode_single(clip, text)
+        cond_list.append(cond)
+        if i < pool_top_k:
+            pooled_acc += pooled
+
+    return [[torch.cat(cond_list, dim=1), {"pooled_output": pooled_acc}]]
+
+
+def clear_sd_cond_cache(sd):
+    if sd is None:
+        return None
+    if sd.clip is None:
+        return None
+    sd.clip.fcs_cond_cache = {}
+    return
+
+
+def clear_all_caches():
+    clear_sd_cond_cache(xl_base_patched)
+    clear_sd_cond_cache(xl_refiner)


@torch.no_grad()
--- a/modules/expansion.py
+++ b/modules/expansion.py
@ -5,7 +5,6 @@ from modules.path import fooocus_expansion_path

 fooocus_magic_split = [
    ', extremely',
-    ', trending',
    ', intricate,',
 ]
 dangrous_patterns = '[]【】()（）|:：'
--- a/modules/private_logger.py
+++ b/modules/private_logger.py
@ -5,7 +5,7 @@ from PIL import Image
 from modules.util import generate_temp_filename


-def log(img, dic):
+def log(img, dic, single_line_number=3):
    date_string, local_temp_filename, only_name = generate_temp_filename(folder=modules.path.temp_outputs_path, extension='png')
    os.makedirs(os.path.dirname(local_temp_filename), exist_ok=True)
    Image.fromarray(img).save(local_temp_filename)
@ -22,10 +22,10 @@ def log(img, dic):
        f.write(f"<p>{only_name}</p>\n")
        i = 0
        for k, v in dic:
-            if i < 4:
+            if i < single_line_number:
                f.write(f"<p>{k}: <b>{v}</b> </p>\n")
            else:
-                if i % 2 == 0:
+                if (i - single_line_number) % 2 == 0:
                    f.write(f"<p>{k}: <b>{v}</b>, ")
                else:
                    f.write(f"{k}: <b>{v}</b></p>\n")
--- a/modules/sdxl_styles.py
+++ b/modules/sdxl_styles.py
@ -1,16 +1,14 @@
 from modules.util import join_prompts


+fooocus_expansion = "Fooocus V2"
+default_styles = ["Default (Slightly Cinematic)"]
+
 # https://github.com/twri/sdxl_prompt_styler/blob/main/sdxl_styles.json

 styles = [
    {
-        "name": "None",
-        "prompt": "{prompt}",
-        "negative_prompt": ""
-    },
-    {
-        "name": "cinematic-default",
+        "name": "Default (Slightly Cinematic)",
        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
        "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
    },
@ -926,8 +924,20 @@ styles = [
    }
 ]

-styles = {k['name']: (k['prompt'], k['negative_prompt']) for k in styles}
-default_style = styles['None']
+
+def normalize_key(k):
+    k = k.replace('-', ' ')
+    words = k.split(' ')
+    words = [w[:1].upper() + w[1:].lower() for w in words]
+    k = ' '.join(words)
+    k = k.replace('3d', '3D')
+    k = k.replace('Sai', 'SAI')
+    k = k.replace('(s', '(S')
+    return k
+
+
+default_styles = [normalize_key(x) for x in default_styles]
+styles = {normalize_key(k['name']): (k['prompt'], k['negative_prompt']) for k in styles}
 style_keys = list(styles.keys())

 SD_XL_BASE_RATIOS = {
@ -962,11 +972,6 @@ SD_XL_BASE_RATIOS = {
 aspect_ratios = {str(v[0]) + '×' + str(v[1]): v for k, v in SD_XL_BASE_RATIOS.items()}


-def apply_style_positive(style, txt):
-    p, n = styles.get(style, default_style)
-    return p.replace('{prompt}', txt)
-
-
-def apply_style_negative(style, txt):
-    p, n = styles.get(style, default_style)
-    return join_prompts(n, txt)
+def apply_style(style, positive):
+    p, n = styles[style]
+    return p.replace('{prompt}', positive), n
--- a/modules/util.py
+++ b/modules/util.py
@ -3,6 +3,13 @@ import random
 import os


+def remove_empty_str(items, default=None):
+    items = [x for x in items if x != ""]
+    if len(items) == 0 and default is not None:
+        return [default]
+    return items
+
+
 def join_prompts(*args, **kwargs):
    prompts = [str(x) for x in args if str(x) != ""]
    if len(prompts) == 0:
--- a/update_log.md
+++ b/update_log.md
@ -1,3 +1,13 @@
+### 2.0.0
+
+* V2 released.
+* completely rewrite text processing pipeline (higher image quality and prompt understanding).
+* support multi-style.
+* In 100 tests (prompts written by ChatGPT), V2 default results outperform V1 default results in 87 cases, evaluated by two human.
+* In 100 tests (prompts written by ChatGPT), V2 prompt understanding outperform V1 prompt understanding in 81 cases, evaluated by two human, in both default setting and multi/single style mode.
+* Because the above number is above 80%, we view this as a major update and directly jump to 2.0.0.
+* Some other things are renamed.
+
 ### 1.0.67

 * Use dynamic weighting and lower weights for prompt expansion.
--- a/webui.py
+++ b/webui.py
@ -8,7 +8,7 @@ import fooocus_version
 import modules.html
 import modules.async_worker as worker

-from modules.sdxl_styles import style_keys, aspect_ratios
+from modules.sdxl_styles import style_keys, aspect_ratios, fooocus_expansion, default_styles


 def generate_clicked(*args):
@ -76,11 +76,10 @@ with shared.gradio_root:
                seed_random.change(random_checked, inputs=[seed_random], outputs=[image_seed])

            with gr.Tab(label='Style'):
-                raw_mode_check = gr.Checkbox(label='Raw Mode', value=False,
-                                             info='Similar to Midjourney\'s \"raw\" mode.')
-                style_selction = gr.Radio(show_label=True, container=True,
-                                          choices=style_keys, value='cinematic-default', label='Image Style',
-                                          info='Similar to Midjourney\'s \"--style\".')
+                style_selections = gr.CheckboxGroup(show_label=False, container=False,
+                                                    choices=[fooocus_expansion] + style_keys,
+                                                    value=[fooocus_expansion] + default_styles,
+                                                    label='Image Style')
            with gr.Tab(label='Advanced'):
                with gr.Row():
                    base_model = gr.Dropdown(label='SDXL Base Model', choices=modules.path.model_filenames, value=modules.path.default_base_model_name, show_label=True)
@ -110,8 +109,8 @@ with shared.gradio_root:

        advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col)
        ctrls = [
-            prompt, negative_prompt, style_selction,
-            performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode_check
+            prompt, negative_prompt, style_selections,
+            performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness
        ]
        ctrls += [base_model, refiner_model] + lora_ctrls
        run_button.click(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed)\