[Major Update] Fooocus 2.0.0 (#346)

[Major Update] Fooocus 2.0.0 (#346)
This commit is contained in:
lllyasviel 2023-09-11 23:10:45 -07:00 committed by GitHub
parent 25fed6a4fe
commit 47876aaf99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 191 additions and 96 deletions

View File

@ -1 +1 @@
version = '1.0.67'
version = '2.0.0'

View File

@ -11,14 +11,15 @@ def worker():
import time
import shared
import random
import copy
import modules.default_pipeline as pipeline
import modules.path
import modules.patch
from modules.sdxl_styles import apply_style_negative, apply_style_positive, aspect_ratios
from modules.sdxl_styles import apply_style, aspect_ratios, fooocus_expansion
from modules.private_logger import log
from modules.expansion import safe_str
from modules.util import join_prompts
from modules.util import join_prompts, remove_empty_str
try:
async_gradio_app = shared.gradio_root
@ -29,20 +30,42 @@ def worker():
except Exception as e:
print(e)
def progressbar(number, text):
outputs.append(['preview', (number, text, None)])
def handler(task):
prompt, negative_prompt, style_selction, performance_selction, \
aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode, \
prompt, negative_prompt, style_selections, performance_selction, \
aspect_ratios_selction, image_number, image_seed, sharpness, \
base_model_name, refiner_model_name, \
l1, w1, l2, w2, l3, w3, l4, w4, l5, w5 = task
loras = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)]
raw_style_selections = copy.deepcopy(style_selections)
if fooocus_expansion in style_selections:
use_expansion = True
style_selections.remove(fooocus_expansion)
else:
use_expansion = False
use_style = len(style_selections) > 0
modules.patch.sharpness = sharpness
outputs.append(['preview', (1, 'Initializing ...', None)])
progressbar(1, 'Initializing ...')
prompt = safe_str(prompt)
negative_prompt = safe_str(negative_prompt)
raw_prompt = prompt
raw_negative_prompt = negative_prompt
prompts = remove_empty_str([safe_str(p) for p in prompt.split('\n')], default='')
negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.split('\n')], default='')
prompt = prompts[0]
negative_prompt = negative_prompts[0]
extra_positive_prompts = prompts[1:] if len(prompts) > 1 else []
extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else []
seed = image_seed
max_seed = int(1024 * 1024 * 1024)
@ -52,63 +75,74 @@ def worker():
seed = - seed
seed = seed % max_seed
outputs.append(['preview', (3, 'Load models ...', None)])
progressbar(3, 'Loading models ...')
pipeline.refresh_base_model(base_model_name)
pipeline.refresh_refiner_model(refiner_model_name)
pipeline.refresh_loras(loras)
pipeline.clear_all_caches()
tasks = []
if raw_mode:
outputs.append(['preview', (5, 'Encoding negative text ...', None)])
n_txt = apply_style_negative(style_selction, negative_prompt)
n_cond = pipeline.process_prompt(n_txt)
outputs.append(['preview', (9, 'Encoding positive text ...', None)])
p_txt = apply_style_positive(style_selction, prompt)
p_cond = pipeline.process_prompt(p_txt)
progressbar(3, 'Processing prompts ...')
for i in range(image_number):
tasks.append(dict(
prompt=prompt,
negative_prompt=negative_prompt,
seed=seed + i,
n_cond=n_cond,
p_cond=p_cond,
real_positive_prompt=p_txt,
real_negative_prompt=n_txt
))
positive_basic_workloads = []
negative_basic_workloads = []
if use_style:
for s in style_selections:
p, n = apply_style(s, positive=prompt)
positive_basic_workloads.append(p)
negative_basic_workloads.append(n)
else:
for i in range(image_number):
outputs.append(['preview', (5, f'Preparing positive text #{i + 1} ...', None)])
current_seed = seed + i
positive_basic_workloads.append(prompt)
expansion_weight = 0.1
negative_basic_workloads.append(negative_prompt) # Always use independent workload for negative.
suffix = pipeline.expansion(prompt, current_seed)
suffix = f'({suffix}:{expansion_weight})'
print(f'[Prompt Expansion] New suffix: {suffix}')
positive_basic_workloads = positive_basic_workloads + extra_positive_prompts
negative_basic_workloads = negative_basic_workloads + extra_negative_prompts
p_txt = apply_style_positive(style_selction, prompt)
p_txt = safe_str(p_txt)
positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=prompt)
negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=negative_prompt)
p_txt = join_prompts(p_txt, suffix)
positive_top_k = len(positive_basic_workloads)
negative_top_k = len(negative_basic_workloads)
tasks.append(dict(
prompt=prompt,
negative_prompt=negative_prompt,
seed=current_seed,
real_positive_prompt=p_txt,
))
tasks = [dict(
task_seed=seed + i,
positive=positive_basic_workloads,
negative=negative_basic_workloads,
expansion='',
c=[None, None],
uc=[None, None],
) for i in range(image_number)]
outputs.append(['preview', (9, 'Encoding negative text ...', None)])
n_txt = apply_style_negative(style_selction, negative_prompt)
n_cond = pipeline.process_prompt(n_txt)
if use_expansion:
for i, t in enumerate(tasks):
progressbar(5, f'Preparing Fooocus text #{i + 1} ...')
expansion = pipeline.expansion(prompt, t['task_seed'])
print(f'[Prompt Expansion] New suffix: {expansion}')
t['expansion'] = expansion
t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(prompt, expansion)] # Deep copy.
for i, t in enumerate(tasks):
progressbar(7, f'Encoding base positive #{i + 1} ...')
t['c'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['positive'],
pool_top_k=positive_top_k)
for i, t in enumerate(tasks):
progressbar(9, f'Encoding base negative #{i + 1} ...')
t['uc'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['negative'],
pool_top_k=negative_top_k)
if pipeline.xl_refiner is not None:
for i, t in enumerate(tasks):
progressbar(11, f'Encoding refiner positive #{i + 1} ...')
t['c'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['positive'],
pool_top_k=positive_top_k)
for i, t in enumerate(tasks):
outputs.append(['preview', (12, f'Encoding positive text #{i + 1} ...', None)])
t['p_cond'] = pipeline.process_prompt(t['real_positive_prompt'])
t['real_negative_prompt'] = n_txt
t['n_cond'] = n_cond
progressbar(13, f'Encoding refiner negative #{i + 1} ...')
t['uc'][1] = pipeline.clip_encode(sd=pipeline.xl_refiner, texts=t['negative'],
pool_top_k=negative_top_k)
if performance_selction == 'Speed':
steps = 30
@ -117,6 +151,7 @@ def worker():
steps = 60
switch = 40
pipeline.clear_all_caches() # save memory
width, height = aspect_ratios[aspect_ratios_selction]
results = []
@ -132,34 +167,32 @@ def worker():
outputs.append(['preview', (13, 'Starting tasks ...', None)])
for current_task_id, task in enumerate(tasks):
imgs = pipeline.process_diffusion(
positive_cond=task['p_cond'],
negative_cond=task['n_cond'],
positive_cond=task['c'],
negative_cond=task['uc'],
steps=steps,
switch=switch,
width=width,
height=height,
image_seed=task['seed'],
image_seed=task['task_seed'],
callback=callback)
for x in imgs:
d = [
('Prompt', task['prompt']),
('Negative Prompt', task['negative_prompt']),
('Real Positive Prompt', task['real_positive_prompt']),
('Real Negative Prompt', task['real_negative_prompt']),
('Raw Mode', str(raw_mode)),
('Style', style_selction),
('Prompt', raw_prompt),
('Negative Prompt', raw_negative_prompt),
('Fooocus V2 Expansion', task['expansion']),
('Styles', str(raw_style_selections)),
('Performance', performance_selction),
('Resolution', str((width, height))),
('Sharpness', sharpness),
('Base Model', base_model_name),
('Refiner Model', refiner_model_name),
('Seed', task['seed'])
('Seed', task['task_seed'])
]
for n, w in loras:
if n != 'None':
d.append((f'LoRA [{n}] weight', w))
log(x, d)
log(x, d, single_line_number=3)
results += imgs

View File

@ -106,13 +106,55 @@ refresh_loras([(modules.path.default_lora_name, 0.5), ('None', 0.5), ('None', 0.
expansion = FooocusExpansion()
def process_prompt(text):
base_cond = core.encode_prompt_condition(clip=xl_base_patched.clip, prompt=text)
if xl_refiner is not None:
refiner_cond = core.encode_prompt_condition(clip=xl_refiner.clip, prompt=text)
else:
refiner_cond = None
return base_cond, refiner_cond
def clip_encode_single(clip, text, verbose=False):
cached = clip.fcs_cond_cache.get(text, None)
if cached is not None:
if verbose:
print(f'[CLIP Cached] {text}')
return cached
tokens = clip.tokenize(text)
result = clip.encode_from_tokens(tokens, return_pooled=True)
clip.fcs_cond_cache[text] = result
if verbose:
print(f'[CLIP Encoded] {text}')
return result
def clip_encode(sd, texts, pool_top_k=1):
if sd is None:
return None
if sd.clip is None:
return None
if not isinstance(texts, list):
return None
if len(texts) == 0:
return None
clip = sd.clip
cond_list = []
pooled_acc = 0
for i, text in enumerate(texts):
cond, pooled = clip_encode_single(clip, text)
cond_list.append(cond)
if i < pool_top_k:
pooled_acc += pooled
return [[torch.cat(cond_list, dim=1), {"pooled_output": pooled_acc}]]
def clear_sd_cond_cache(sd):
if sd is None:
return None
if sd.clip is None:
return None
sd.clip.fcs_cond_cache = {}
return
def clear_all_caches():
clear_sd_cond_cache(xl_base_patched)
clear_sd_cond_cache(xl_refiner)
@torch.no_grad()

View File

@ -5,7 +5,6 @@ from modules.path import fooocus_expansion_path
fooocus_magic_split = [
', extremely',
', trending',
', intricate,',
]
dangrous_patterns = '[]【】()|:'

View File

@ -5,7 +5,7 @@ from PIL import Image
from modules.util import generate_temp_filename
def log(img, dic):
def log(img, dic, single_line_number=3):
date_string, local_temp_filename, only_name = generate_temp_filename(folder=modules.path.temp_outputs_path, extension='png')
os.makedirs(os.path.dirname(local_temp_filename), exist_ok=True)
Image.fromarray(img).save(local_temp_filename)
@ -22,10 +22,10 @@ def log(img, dic):
f.write(f"<p>{only_name}</p>\n")
i = 0
for k, v in dic:
if i < 4:
if i < single_line_number:
f.write(f"<p>{k}: <b>{v}</b> </p>\n")
else:
if i % 2 == 0:
if (i - single_line_number) % 2 == 0:
f.write(f"<p>{k}: <b>{v}</b>, ")
else:
f.write(f"{k}: <b>{v}</b></p>\n")

View File

@ -1,16 +1,14 @@
from modules.util import join_prompts
fooocus_expansion = "Fooocus V2"
default_styles = ["Default (Slightly Cinematic)"]
# https://github.com/twri/sdxl_prompt_styler/blob/main/sdxl_styles.json
styles = [
{
"name": "None",
"prompt": "{prompt}",
"negative_prompt": ""
},
{
"name": "cinematic-default",
"name": "Default (Slightly Cinematic)",
"prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
"negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
},
@ -926,8 +924,20 @@ styles = [
}
]
styles = {k['name']: (k['prompt'], k['negative_prompt']) for k in styles}
default_style = styles['None']
def normalize_key(k):
k = k.replace('-', ' ')
words = k.split(' ')
words = [w[:1].upper() + w[1:].lower() for w in words]
k = ' '.join(words)
k = k.replace('3d', '3D')
k = k.replace('Sai', 'SAI')
k = k.replace('(s', '(S')
return k
default_styles = [normalize_key(x) for x in default_styles]
styles = {normalize_key(k['name']): (k['prompt'], k['negative_prompt']) for k in styles}
style_keys = list(styles.keys())
SD_XL_BASE_RATIOS = {
@ -962,11 +972,6 @@ SD_XL_BASE_RATIOS = {
aspect_ratios = {str(v[0]) + '×' + str(v[1]): v for k, v in SD_XL_BASE_RATIOS.items()}
def apply_style_positive(style, txt):
p, n = styles.get(style, default_style)
return p.replace('{prompt}', txt)
def apply_style_negative(style, txt):
p, n = styles.get(style, default_style)
return join_prompts(n, txt)
def apply_style(style, positive):
p, n = styles[style]
return p.replace('{prompt}', positive), n

View File

@ -3,6 +3,13 @@ import random
import os
def remove_empty_str(items, default=None):
items = [x for x in items if x != ""]
if len(items) == 0 and default is not None:
return [default]
return items
def join_prompts(*args, **kwargs):
prompts = [str(x) for x in args if str(x) != ""]
if len(prompts) == 0:

View File

@ -1,3 +1,13 @@
### 2.0.0
* V2 released.
* completely rewrite text processing pipeline (higher image quality and prompt understanding).
* support multi-style.
* In 100 tests (prompts written by ChatGPT), V2 default results outperform V1 default results in 87 cases, evaluated by two human.
* In 100 tests (prompts written by ChatGPT), V2 prompt understanding outperform V1 prompt understanding in 81 cases, evaluated by two human, in both default setting and multi/single style mode.
* Because the above number is above 80%, we view this as a major update and directly jump to 2.0.0.
* Some other things are renamed.
### 1.0.67
* Use dynamic weighting and lower weights for prompt expansion.

View File

@ -8,7 +8,7 @@ import fooocus_version
import modules.html
import modules.async_worker as worker
from modules.sdxl_styles import style_keys, aspect_ratios
from modules.sdxl_styles import style_keys, aspect_ratios, fooocus_expansion, default_styles
def generate_clicked(*args):
@ -76,11 +76,10 @@ with shared.gradio_root:
seed_random.change(random_checked, inputs=[seed_random], outputs=[image_seed])
with gr.Tab(label='Style'):
raw_mode_check = gr.Checkbox(label='Raw Mode', value=False,
info='Similar to Midjourney\'s \"raw\" mode.')
style_selction = gr.Radio(show_label=True, container=True,
choices=style_keys, value='cinematic-default', label='Image Style',
info='Similar to Midjourney\'s \"--style\".')
style_selections = gr.CheckboxGroup(show_label=False, container=False,
choices=[fooocus_expansion] + style_keys,
value=[fooocus_expansion] + default_styles,
label='Image Style')
with gr.Tab(label='Advanced'):
with gr.Row():
base_model = gr.Dropdown(label='SDXL Base Model', choices=modules.path.model_filenames, value=modules.path.default_base_model_name, show_label=True)
@ -110,8 +109,8 @@ with shared.gradio_root:
advanced_checkbox.change(lambda x: gr.update(visible=x), advanced_checkbox, right_col)
ctrls = [
prompt, negative_prompt, style_selction,
performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness, raw_mode_check
prompt, negative_prompt, style_selections,
performance_selction, aspect_ratios_selction, image_number, image_seed, sharpness
]
ctrls += [base_model, refiner_model] + lora_ctrls
run_button.click(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed)\