[Major Update] Fooocus 2.0.0 (#346)

[Major Update] Fooocus 2.0.0 (#346)
This commit is contained in:
lllyasviel 2023-09-11 23:10:45 -07:00 committed by GitHub
parent 25fed6a4fe
commit 47876aaf99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 191 additions and 96 deletions

View File

@ -1 +1 @@
version = '1.0.67' version = '2.0.0'

View File

@ -11,14 +11,15 @@ def worker():
import time import time
import shared import shared
import random import random
import copy
import modules.default_pipeline as pipeline import modules.default_pipeline as pipeline
import modules.path import modules.path
import modules.patch import modules.patch
from modules.sdxl_styles import apply_style_negative, apply_style_positive, aspect_ratios from modules.sdxl_styles import apply_style, aspect_ratios, fooocus_expansion
from modules.private_logger import log from modules.private_logger import log
from modules.expansion import safe_str from modules.expansion import safe_str
from modules.util import join_prompts from modules.util import join_prompts, remove_empty_str
try: try:
async_gradio_app = shared.gradio_root async_gradio_app = shared.gradio_root
@ -29,20 +30,42 @@ def worker():
except Exception as e: except Exception as e:
print(e) print(e)
def progressbar(number, text):
outputs.append(['preview', (number, text, None)])
def handler(task): def handler(task):
prompt, negative_prompt, style_selction, performance_selction, \ prompt, negative_prompt, style_selections, performance_selction, \
aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode, \ aspect_ratios_selction, image_number, image_seed, sharpness, \
base_model_name, refiner_model_name, \ base_model_name, refiner_model_name, \
l1, w1, l2, w2, l3, w3, l4, w4, l5, w5 = task l1, w1, l2, w2, l3, w3, l4, w4, l5, w5 = task
loras = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)] loras = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)]
raw_style_selections = copy.deepcopy(style_selections)
if fooocus_expansion in style_selections:
use_expansion = True
style_selections.remove(fooocus_expansion)
else:
use_expansion = False
use_style = len(style_selections) > 0
modules.patch.sharpness = sharpness modules.patch.sharpness = sharpness
outputs.append(['preview', (1, 'Initializing ...', None)]) progressbar(1, 'Initializing ...')
prompt = safe_str(prompt) raw_prompt = prompt
negative_prompt = safe_str(negative_prompt) raw_negative_prompt = negative_prompt
prompts = remove_empty_str([safe_str(p) for p in prompt.split('\n')], default='')
negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.split('\n')], default='')
prompt = prompts[0]
negative_prompt = negative_prompts[0]
extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
seed = image_seed seed = image_seed
max_seed = int(1024 * 1024 * 1024) max_seed = int(1024 * 1024 * 1024)
@ -52,63 +75,74 @@ def worker():
seed = - seed seed = - seed
seed = seed % max_seed seed = seed % max_seed
outputs.append(['preview', (3, 'Load models ...', None)]) progressbar(3, 'Loading models ...')
pipeline.refresh_base_model(base_model_name) pipeline.refresh_base_model(base_model_name)
pipeline.refresh_refiner_model(refiner_model_name) pipeline.refresh_refiner_model(refiner_model_name)
pipeline.refresh_loras(loras) pipeline.refresh_loras(loras)
pipeline.clear_all_caches()
tasks = [] progressbar(3, 'Processing prompts ...')
if raw_mode:
outputs.append(['preview', (5, 'Encoding negative text ...', None)])
n_txt = apply_style_negative(style_selction, negative_prompt)
n_cond = pipeline.process_prompt(n_txt)
outputs.append(['preview', (9, 'Encoding positive text ...', None)])
p_txt = apply_style_positive(style_selction, prompt)
p_cond = pipeline.process_prompt(p_txt)
for i in range(image_number): positive_basic_workloads = []
tasks.append(dict( negative_basic_workloads = []
prompt=prompt,
negative_prompt=negative_prompt, if use_style:
seed=seed + i, for s in style_selections:
n_cond=n_cond, p, n = apply_style(s, positive=prompt)
p_cond=p_cond, positive_basic_workloads.append(p)
real_positive_prompt=p_txt, negative_basic_workloads.append(n)
real_negative_prompt=n_txt
))
else: else:
for i in range(image_number): positive_basic_workloads.append(prompt)
outputs.append(['preview', (5, f'Preparing positive text #{i + 1} ...', None)])
current_seed = seed + i
expansion_weight = 0.1 negative_basic_workloads.append(negative_prompt) # Always use independent workload for negative.
suffix = pipeline.expansion(prompt, current_seed) positive_basic_workloads = positive_basic_workloads + extra_positive_prompts
suffix = f'({suffix}:{expansion_weight})' negative_basic_workloads = negative_basic_workloads + extra_negative_prompts
print(f'[Prompt Expansion] New suffix: {suffix}')
p_txt = apply_style_positive(style_selction, prompt) positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=prompt)
p_txt = safe_str(p_txt) negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=negative_prompt)
p_txt = join_prompts(p_txt, suffix) positive_top_k = len(positive_basic_workloads)
negative_top_k = len(negative_basic_workloads)
tasks.append(dict( tasks = [dict(
prompt=prompt, task_seed=seed + i,
negative_prompt=negative_prompt, positive=positive_basic_workloads,
seed=current_seed, negative=negative_basic_workloads,
real_positive_prompt=p_txt, expansion='',
)) c=[None, None],
uc=[None, None],
) for i in range(image_number)]
outputs.append(['preview', (9, 'Encoding negative text ...', None)]) if use_expansion:
n_txt = apply_style_negative(style_selction, negative_prompt) for i, t in enumerate(tasks):
n_cond = pipeline.process_prompt(n_txt) progressbar(5, f'Preparing Fooocus text #{i + 1} ...')
expansion = pipeline.expansion(prompt, t['task_seed'])
print(f'[Prompt Expansion] New suffix: {expansion}')
t['expansion'] = expansion
t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(prompt, expansion)] # Deep copy.
for i, t in enumerate(tasks):
progressbar(7, f'Encoding base positive #{i + 1} ...')
t['c'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['positive'],
pool_top_k=positive_top_k)
for i, t in enumerate(tasks):
progressbar(9, f'Encoding base negative #{i + 1} ...')
t['uc'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['negative'],
pool_top_k=negative_top_k)
if pipeline.xl_refiner is not None:
for i, t in enumerate(tasks):
progressbar(11, f'Encoding refiner positive #{i + 1} ...')
t['c'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['positive'],
pool_top_k=positive_top_k)
for i, t in enumerate(tasks): for i, t in enumerate(tasks):
outputs.append(['preview', (12, f'Encoding positive text #{i + 1} ...', None)]) progressbar(13, f'Encoding refiner negative #{i + 1} ...')
t['p_cond'] = pipeline.process_prompt(t['real_positive_prompt']) t['uc'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['negative'],
t['real_negative_prompt'] = n_txt pool_top_k=negative_top_k)
t['n_cond'] = n_cond
if performance_selction == 'Speed': if performance_selction == 'Speed':
steps = 30 steps = 30
@ -117,6 +151,7 @@ def worker():
steps = 60 steps = 60
switch = 40 switch = 40
pipeline.clear_all_caches() # save memory
width, height = aspect_ratios[aspect_ratios_selction] width, height = aspect_ratios[aspect_ratios_selction]
results = [] results = []
@ -132,34 +167,32 @@ def worker():
outputs.append(['preview', (13, 'Starting tasks ...', None)]) outputs.append(['preview', (13, 'Starting tasks ...', None)])
for current_task_id, task in enumerate(tasks): for current_task_id, task in enumerate(tasks):
imgs = pipeline.process_diffusion( imgs = pipeline.process_diffusion(
positive_cond=task['p_cond'], positive_cond=task['c'],
negative_cond=task['n_cond'], negative_cond=task['uc'],
steps=steps, steps=steps,
switch=switch, switch=switch,
width=width, width=width,
height=height, height=height,
image_seed=task['seed'], image_seed=task['task_seed'],
callback=callback) callback=callback)
for x in imgs: for x in imgs:
d = [ d = [
('Prompt', task['prompt']), ('Prompt', raw_prompt),
('Negative Prompt', task['negative_prompt']), ('Negative Prompt', raw_negative_prompt),
('Real Positive Prompt', task['real_positive_prompt']), ('Fooocus V2 Expansion', task['expansion']),
('Real Negative Prompt', task['real_negative_prompt']), ('Styles', str(raw_style_selections)),
('Raw Mode', str(raw_mode)),
('Style', style_selction),
('Performance', performance_selction), ('Performance', performance_selction),
('Resolution', str((width, height))), ('Resolution', str((width, height))),
('Sharpness', sharpness), ('Sharpness', sharpness),
('Base Model', base_model_name), ('Base Model', base_model_name),
('Refiner Model', refiner_model_name), ('Refiner Model', refiner_model_name),
('Seed', task['seed']) ('Seed', task['task_seed'])
] ]
for n, w in loras: for n, w in loras:
if n != 'None': if n != 'None':
d.append((f'LoRA [{n}] weight', w)) d.append((f'LoRA [{n}] weight', w))
log(x, d) log(x, d, single_line_number=3)
results += imgs results += imgs

View File

@ -106,13 +106,55 @@ refresh_loras([(modules.path.default_lora_name, 0.5), ('None', 0.5), ('None', 0.
expansion = FooocusExpansion() expansion = FooocusExpansion()
def process_prompt(text): def clip_encode_single(clip, text, verbose=False):
base_cond = core.encode_prompt_condition(clip=xl_base_patched.clip, prompt=text) cached = clip.fcs_cond_cache.get(text, None)
if xl_refiner is not None: if cached is not None:
refiner_cond = core.encode_prompt_condition(clip=xl_refiner.clip, prompt=text) if verbose:
else: print(f'[CLIP Cached] {text}')
refiner_cond = None return cached
return base_cond, refiner_cond tokens = clip.tokenize(text)
result = clip.encode_from_tokens(tokens, return_pooled=True)
clip.fcs_cond_cache[text] = result
if verbose:
print(f'[CLIP Encoded] {text}')
return result
def clip_encode(sd, texts, pool_top_k=1):
if sd is None:
return None
if sd.clip is None:
return None
if not isinstance(texts, list):
return None
if len(texts) == 0:
return None
clip = sd.clip
cond_list = []
pooled_acc = 0
for i, text in enumerate(texts):
cond, pooled = clip_encode_single(clip, text)
cond_list.append(cond)
if i < pool_top_k:
pooled_acc += pooled
return [[torch.cat(cond_list, dim=1), {"pooled_output": pooled_acc}]]
def clear_sd_cond_cache(sd):
if sd is None:
return None
if sd.clip is None:
return None
sd.clip.fcs_cond_cache = {}
return
def clear_all_caches():
clear_sd_cond_cache(xl_base_patched)
clear_sd_cond_cache(xl_refiner)
@torch.no_grad() @torch.no_grad()

View File

@ -5,7 +5,6 @@ from modules.path import fooocus_expansion_path
fooocus_magic_split = [ fooocus_magic_split = [
', extremely', ', extremely',
', trending',
', intricate,', ', intricate,',
] ]
dangrous_patterns = '[]【】()|:' dangrous_patterns = '[]【】()|:'

View File

@ -5,7 +5,7 @@ from PIL import Image
from modules.util import generate_temp_filename from modules.util import generate_temp_filename
def log(img, dic): def log(img, dic, single_line_number=3):
date_string, local_temp_filename, only_name = generate_temp_filename(folder=modules.path.temp_outputs_path, extension='png') date_string, local_temp_filename, only_name = generate_temp_filename(folder=modules.path.temp_outputs_path, extension='png')
os.makedirs(os.path.dirname(local_temp_filename), exist_ok=True) os.makedirs(os.path.dirname(local_temp_filename), exist_ok=True)
Image.fromarray(img).save(local_temp_filename) Image.fromarray(img).save(local_temp_filename)
@ -22,10 +22,10 @@ def log(img, dic):
f.write(f"<p>{only_name}</p>\n") f.write(f"<p>{only_name}</p>\n")
i = 0 i = 0
for k, v in dic: for k, v in dic:
if i < 4: if i < single_line_number:
f.write(f"<p>{k}: <b>{v}</b> </p>\n") f.write(f"<p>{k}: <b>{v}</b> </p>\n")
else: else:
if i % 2 == 0: if (i - single_line_number) % 2 == 0:
f.write(f"<p>{k}: <b>{v}</b>, ") f.write(f"<p>{k}: <b>{v}</b>, ")
else: else:
f.write(f"{k}: <b>{v}</b></p>\n") f.write(f"{k}: <b>{v}</b></p>\n")

View File

@ -1,16 +1,14 @@
from modules.util import join_prompts from modules.util import join_prompts
fooocus_expansion = "Fooocus V2"
default_styles = ["Default (Slightly Cinematic)"]
# https://github.com/twri/sdxl_prompt_styler/blob/main/sdxl_styles.json # https://github.com/twri/sdxl_prompt_styler/blob/main/sdxl_styles.json
styles = [ styles = [
{ {
"name": "None", "name": "Default (Slightly Cinematic)",
"prompt": "{prompt}",
"negative_prompt": ""
},
{
"name": "cinematic-default",
"prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy", "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
"negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured" "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
}, },
@ -926,8 +924,20 @@ styles = [
} }
] ]
styles = {k['name']: (k['prompt'], k['negative_prompt']) for k in styles}
default_style = styles['None'] def normalize_key(k):
k = k.replace('-', ' ')
words = k.split(' ')
words = [w[:1].upper() + w[1:].lower() for w in words]
k = ' '.join(words)
k = k.replace('3d', '3D')
k = k.replace('Sai', 'SAI')
k = k.replace('(s', '(S')
return k
default_styles = [normalize_key(x) for x in default_styles]
styles = {normalize_key(k['name']): (k['prompt'], k['negative_prompt']) for k in styles}
style_keys = list(styles.keys()) style_keys = list(styles.keys())
SD_XL_BASE_RATIOS = { SD_XL_BASE_RATIOS = {
@ -962,11 +972,6 @@ SD_XL_BASE_RATIOS = {
aspect_ratios = {str(v[0]) + '×' + str(v[1]): v for k, v in SD_XL_BASE_RATIOS.items()} aspect_ratios = {str(v[0]) + '×' + str(v[1]): v for k, v in SD_XL_BASE_RATIOS.items()}
def apply_style_positive(style, txt): def apply_style(style, positive):
p, n = styles.get(style, default_style) p, n = styles[style]
return p.replace('{prompt}', txt) return p.replace('{prompt}', positive), n
def apply_style_negative(style, txt):
p, n = styles.get(style, default_style)
return join_prompts(n, txt)

View File

@ -3,6 +3,13 @@ import random
import os import os
def remove_empty_str(items, default=None):
items = [x for x in items if x != ""]
if len(items) == 0 and default is not None:
return [default]
return items
def join_prompts(*args, **kwargs): def join_prompts(*args, **kwargs):
prompts = [str(x) for x in args if str(x) != ""] prompts = [str(x) for x in args if str(x) != ""]
if len(prompts) == 0: if len(prompts) == 0:

View File

@ -1,3 +1,13 @@
### 2.0.0
* V2 released.
* completely rewrite text processing pipeline (higher image quality and prompt understanding).
* support multi-style.
* In 100 tests (prompts written by ChatGPT), V2 default results outperform V1 default results in 87 cases, evaluated by two human.
* In 100 tests (prompts written by ChatGPT), V2 prompt understanding outperform V1 prompt understanding in 81 cases, evaluated by two human, in both default setting and multi/single style mode.
* Because the above number is above 80%, we view this as a major update and directly jump to 2.0.0.
* Some other things are renamed.
### 1.0.67 ### 1.0.67
* Use dynamic weighting and lower weights for prompt expansion. * Use dynamic weighting and lower weights for prompt expansion.

View File

@ -8,7 +8,7 @@ import fooocus_version
import modules.html import modules.html
import modules.async_worker as worker import modules.async_worker as worker
from modules.sdxl_styles import style_keys, aspect_ratios from modules.sdxl_styles import style_keys, aspect_ratios, fooocus_expansion, default_styles
def generate_clicked(*args): def generate_clicked(*args):
@ -76,11 +76,10 @@ with shared.gradio_root:
seed_random.change(random_checked, inputs=[seed_random], outputs=[image_seed]) seed_random.change(random_checked, inputs=[seed_random], outputs=[image_seed])
with gr.Tab(label='Style'): with gr.Tab(label='Style'):
raw_mode_check = gr.Checkbox(label='Raw Mode', value=False, style_selections = gr.CheckboxGroup(show_label=False, container=False,
info='Similar to Midjourney\'s \"raw\" mode.') choices=[fooocus_expansion] + style_keys,
style_selction = gr.Radio(show_label=True, container=True, value=[fooocus_expansion] + default_styles,
choices=style_keys, value='cinematic-default', label='Image Style', label='Image Style')
info='Similar to Midjourney\'s \"--style\".')
with gr.Tab(label='Advanced'): with gr.Tab(label='Advanced'):
with gr.Row(): with gr.Row():
base_model = gr.Dropdown(label='SDXL Base Model', choices=modules.path.model_filenames, value=modules.path.default_base_model_name, show_label=True) base_model = gr.Dropdown(label='SDXL Base Model', choices=modules.path.model_filenames, value=modules.path.default_base_model_name, show_label=True)
@ -110,8 +109,8 @@ with shared.gradio_root:
advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col) advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col)
ctrls = [ ctrls = [
prompt, negative_prompt, style_selction, prompt, negative_prompt, style_selections,
performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode_check performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness
] ]
ctrls += [base_model, refiner_model] + lora_ctrls ctrls += [base_model, refiner_model] + lora_ctrls
run_button.click(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed)\ run_button.click(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed)\