diff --git a/fooocus_version.py b/fooocus_version.py index ff5a8830..e6525665 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.864' +version = '2.1.865 (mashb1t)' diff --git a/javascript/contextMenus.js b/javascript/contextMenus.js index 2f32af1b..7494674d 100644 --- a/javascript/contextMenus.js +++ b/javascript/contextMenus.js @@ -154,12 +154,8 @@ let cancelGenerateForever = function() { let generateOnRepeatForButtons = function() { generateOnRepeat('#generate_button', '#stop_button'); }; - appendContextMenuOption('#generate_button', 'Generate forever', generateOnRepeatForButtons); -// appendContextMenuOption('#stop_button', 'Generate forever', generateOnRepeatForButtons); -// appendContextMenuOption('#stop_button', 'Cancel generate forever', cancelGenerateForever); -// appendContextMenuOption('#generate_button', 'Cancel generate forever', cancelGenerateForever); })(); //End example Context Menu Items diff --git a/launch.py b/launch.py index fb7df30b..b0ff7f1a 100644 --- a/launch.py +++ b/launch.py @@ -10,7 +10,8 @@ os.chdir(root) os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0" -os.environ["GRADIO_SERVER_PORT"] = "7865" +if "GRADIO_SERVER_PORT" not in os.environ: + os.environ["GRADIO_SERVER_PORT"] = "7865" ssl._create_default_https_context = ssl._create_unverified_context @@ -21,9 +22,6 @@ import fooocus_version from build_launcher import build_launcher from modules.launch_util import is_installed, run, python, run_pip, requirements_met from modules.model_loader import load_file_from_url -from modules import config - -os.environ["U2NET_HOME"] = config.path_inpaint REINSTALL_ALL = False TRY_INSTALL_XFORMERS = False @@ -84,6 +82,9 @@ if args.gpu_device_id is not None: os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu_device_id) print("Set device to:", args.gpu_device_id) +from modules import config +os.environ["U2NET_HOME"] = config.path_inpaint + def download_models(default_model, previous_default_models, checkpoint_downloads, embeddings_downloads, lora_downloads): for file_name, url in vae_approx_filenames: diff --git a/ldm_patched/contrib/external_canny.py b/ldm_patched/contrib/external_canny.py index 42c22210..7347ba1e 100644 --- a/ldm_patched/contrib/external_canny.py +++ b/ldm_patched/contrib/external_canny.py @@ -78,7 +78,7 @@ def spatial_gradient(input, normalized: bool = True): Return: the derivatives of the input feature map. with shape :math:`(B, C, 2, H, W)`. .. note:: - See a working example `here `__. Examples: >>> input = torch.rand(1, 3, 4, 4) @@ -120,7 +120,7 @@ def rgb_to_grayscale(image, rgb_weights = None): grayscale version of the image with shape :math:`(*,1,H,W)`. .. note:: - See a working example `here `__. Example: @@ -176,7 +176,7 @@ def canny( - the canny edge magnitudes map, shape of :math:`(B,1,H,W)`. - the canny edge detection filtered by thresholds and hysteresis, shape of :math:`(B,1,H,W)`. .. note:: - See a working example `here `__. Example: >>> input = torch.rand(5, 3, 4, 4) diff --git a/ldm_patched/modules/conds.py b/ldm_patched/modules/conds.py index ed03bd64..0ee184bc 100644 --- a/ldm_patched/modules/conds.py +++ b/ldm_patched/modules/conds.py @@ -3,8 +3,6 @@ import math import ldm_patched.modules.utils -def lcm(a, b): #TODO: eventually replace by math.lcm (added in python3.9) - return abs(a*b) // math.gcd(a, b) class CONDRegular: def __init__(self, cond): @@ -41,7 +39,7 @@ class CONDCrossAttn(CONDRegular): if s1[0] != s2[0] or s1[2] != s2[2]: #these 2 cases should not happen return False - mult_min = lcm(s1[1], s2[1]) + mult_min = math.lcm(s1[1], s2[1]) diff = mult_min // min(s1[1], s2[1]) if diff > 4: #arbitrary limit on the padding because it's probably going to impact performance negatively if it's too much return False @@ -52,7 +50,7 @@ class CONDCrossAttn(CONDRegular): crossattn_max_len = self.cond.shape[1] for x in others: c = x.cond - crossattn_max_len = lcm(crossattn_max_len, c.shape[1]) + crossattn_max_len = math.lcm(crossattn_max_len, c.shape[1]) conds.append(c) out = [] diff --git a/ldm_patched/pfn/architecture/HAT.py b/ldm_patched/pfn/architecture/HAT.py index 66947421..7e12ad0f 100644 --- a/ldm_patched/pfn/architecture/HAT.py +++ b/ldm_patched/pfn/architecture/HAT.py @@ -14,7 +14,7 @@ from .timm.weight_init import trunc_normal_ def drop_path(x, drop_prob: float = 0.0, training: bool = False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). - From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py + From: https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/drop.py """ if drop_prob == 0.0 or not training: return x @@ -30,7 +30,7 @@ def drop_path(x, drop_prob: float = 0.0, training: bool = False): class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). - From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py + From: https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/drop.py """ def __init__(self, drop_prob=None): diff --git a/ldm_patched/pfn/architecture/RRDB.py b/ldm_patched/pfn/architecture/RRDB.py index b50db7c2..8d318b90 100644 --- a/ldm_patched/pfn/architecture/RRDB.py +++ b/ldm_patched/pfn/architecture/RRDB.py @@ -13,7 +13,7 @@ import torch.nn.functional as F from . import block as B -# Borrowed from https://github.com/rlaphoenix/VSGAN/blob/master/vsgan/archs/ESRGAN.py +# Borrowed from https://github.com/rlaphoenix/VSGAN/blob/master/vsgan/archs/esrgan.py # Which enhanced stuff that was already here class RRDBNet(nn.Module): def __init__( diff --git a/ldm_patched/pfn/architecture/face/codeformer.py b/ldm_patched/pfn/architecture/face/codeformer.py index 06614007..a0e2e985 100644 --- a/ldm_patched/pfn/architecture/face/codeformer.py +++ b/ldm_patched/pfn/architecture/face/codeformer.py @@ -2,7 +2,7 @@ Modified from https://github.com/sczhou/CodeFormer VQGAN code, adapted from the original created by the Unleashing Transformers authors: https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py -This verison of the arch specifically was gathered from an old version of GFPGAN. If this is a problem, please contact me. +This version of the arch specifically was gathered from an old version of GFPGAN. If this is a problem, please contact me. """ import math from typing import Optional diff --git a/modules/async_worker.py b/modules/async_worker.py index 33a19cf5..119f1f27 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -375,11 +375,11 @@ def worker(): ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path) ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_face_path) - switch = int(round(steps * refiner_switch)) - if overwrite_step > 0: steps = overwrite_step + switch = int(round(steps * refiner_switch)) + if overwrite_switch > 0: switch = overwrite_switch @@ -773,8 +773,7 @@ def worker(): done_steps = current_task_id * steps + step async_task.yields.append(['preview', ( int(15.0 + 85.0 * float(done_steps) / float(all_steps)), - f'Sampling Image {current_task_id + 1}/{image_number}, Step {step + 1}/{total_steps} ...', - y)]) + f'Sampling Image {current_task_id + 1}/{image_number}, Step {step + 1}/{total_steps} ...', y)]) for current_task_id, task in enumerate(tasks): execution_start_time = time.perf_counter() diff --git a/modules/config.py b/modules/config.py index 7574dee2..1cec6fa5 100644 --- a/modules/config.py +++ b/modules/config.py @@ -137,6 +137,18 @@ def try_load_preset_global(preset): preset = args_manager.args.preset try_load_preset_global(preset) +def get_path_output(make_directory=False) -> str: + """ + Checking output path argument and overriding default path. + """ + global config_dict + path_output = get_dir_or_set_default('path_outputs', '../outputs/', make_directory) + if args_manager.args.output_path: + print(f'[CONFIG] Overriding config value path_outputs with {args_manager.args.output_path}') + config_dict['path_outputs'] = path_output = args_manager.args.output_path + return path_output + + def get_dir_or_set_default(key, default_value, make_directory=False): global config_dict, visited_keys, always_save_keys @@ -173,8 +185,8 @@ path_inpaint = get_dir_or_set_default('path_inpaint', '../models/inpaint/') path_controlnet = get_dir_or_set_default('path_controlnet', '../models/controlnet/') path_clip_vision = get_dir_or_set_default('path_clip_vision', '../models/clip_vision/') path_fooocus_expansion = get_dir_or_set_default('path_fooocus_expansion', '../models/prompt_expansion/fooocus_expansion') -path_outputs = get_dir_or_set_default('path_outputs', '../outputs/', True) path_safety_checker_models = get_dir_or_set_default('path_safety_checker_models', '../models/safety_checker_models/') +path_outputs = get_path_output(True) def get_config_item_or_set_default(key, default_value, validator, disable_empty_as_none=False): diff --git a/modules/launch_util.py b/modules/launch_util.py index 8d92fad0..b483d515 100644 --- a/modules/launch_util.py +++ b/modules/launch_util.py @@ -15,7 +15,7 @@ from packaging.requirements import Requirement logging.getLogger("torch.distributed.nn").setLevel(logging.ERROR) # sshh... logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage()) -re_requirement = re.compile(r"\s*([-_a-zA-Z0-9]+)\s*(?:==\s*([-+_.a-zA-Z0-9]+))?\s*") +re_requirement = re.compile(r"\s*([-\w]+)\s*(?:==\s*([-+.\w]+))?\s*") python = sys.executable default_command_live = (os.environ.get('LAUNCH_LIVE_OUTPUT') == "1") diff --git a/modules/private_logger.py b/modules/private_logger.py index e63f4ba0..8fa5f73c 100644 --- a/modules/private_logger.py +++ b/modules/private_logger.py @@ -90,7 +90,7 @@ def log(img, metadata, metadata_parser: MetadataParser | None = None, output_for """ ) - begin_part = f"Fooocus Log {date_string}{css_styles}{js}

Fooocus Log {date_string} (private)

\n

All images are clean, without any hidden data/meta, and safe to share with others.

\n\n" + begin_part = f"Fooocus Log {date_string}{css_styles}{js}

Fooocus Log {date_string} (private)

\n

All images are clean, without any hidden data/meta, and safe to share with others.

\n\n" end_part = f'\n' middle_part = log_cache.get(html_name, "") @@ -105,7 +105,7 @@ def log(img, metadata, metadata_parser: MetadataParser | None = None, output_for div_name = only_name.replace('.', '_') item = f"

\n" - item += f"" + item += f"" item += "
{only_name}
{only_name}
" for label, key, value in metadata: value_txt = str(value).replace('\n', '
') diff --git a/modules/util.py b/modules/util.py index de4a7fcc..4a919a1a 100644 --- a/modules/util.py +++ b/modules/util.py @@ -173,13 +173,13 @@ def get_files_from_folder(folder_path, exensions=None, name_filter=None): relative_path = os.path.relpath(root, folder_path) if relative_path == ".": relative_path = "" - for filename in sorted(files): + for filename in sorted(files, key=lambda s: s.casefold()): _, file_extension = os.path.splitext(filename) if (exensions is None or file_extension.lower() in exensions) and (name_filter is None or name_filter in _): path = os.path.join(relative_path, filename) filenames.append(path) - return sorted(filenames, key=lambda x: -1 if os.sep in x else 1) + return filenames def calculate_sha256(filename, length=HASH_SHA256_LENGTH) -> str: @@ -340,3 +340,6 @@ def is_json(data: str) -> bool: except (ValueError, AssertionError): return False return True + +def ordinal_suffix(number: int) -> str: + return 'th' if 10 <= number % 100 <= 20 else {1: 'st', 2: 'nd', 3: 'rd'}.get(number % 10, 'th') diff --git a/readme.md b/readme.md index c1abaed7..cf19e5f0 100644 --- a/readme.md +++ b/readme.md @@ -26,14 +26,14 @@ Sadly the creator of Fooocus has gone dark multiple times for an extended amount * ✨ show more details for each performance setting, e.g. steps * ✨ add default_overwrite_step handling for meta data and gradio (allows turbo preset switching to set default_overwrite_step correctly) * ✨ ~~https://github.com/lllyasviel/Fooocus/pull/1762 - add style preview on mouseover~~ -* 🐛 https://github.com/lllyasviel/Fooocus/pull/1784 - correctly sort files, display deepest directory level first -* ✨ https://github.com/lllyasviel/Fooocus/pull/1785 - update model Juggernaut XL v6 to v8 +* 🐛 ~~https://github.com/lllyasviel/Fooocus/pull/1784 - correctly sort files, display deepest directory level first~~ +* ✨ ~~https://github.com/lllyasviel/Fooocus/pull/1785 - update model Juggernaut XL v6 to v8~~ * ✨ https://github.com/lllyasviel/Fooocus/pull/1809 - reduce file size of preview images * ✨ https://github.com/lllyasviel/Fooocus/pull/1932 - use consistent file name in gradio * ✨ https://github.com/lllyasviel/Fooocus/pull/1863 - image extension support (png, jpg, webp) * ✨ https://github.com/lllyasviel/Fooocus/pull/1938 - automatically describe image on uov image upload if prompt is empty * ✨ https://github.com/lllyasviel/Fooocus/pull/1940 - meta data handling, schemes: Fooocus (json) and A1111 (plain text). Compatible with Civitai. -* ✨ https://github.com/lllyasviel/Fooocus/pull/1979 - prevent outdated history log link after midnight +* ✨ ~~https://github.com/lllyasviel/Fooocus/pull/1979 - prevent outdated history log link after midnight~~ * ✨ https://github.com/lllyasviel/Fooocus/pull/2032 - add inpaint mask generation functionality using rembg, incl. segmentation support ✨ = new feature
@@ -397,7 +397,7 @@ AMD is not intensively tested, however. The AMD support is in beta. Use `python entry_with_update.py --preset anime` or `python entry_with_update.py --preset realistic` for Fooocus Anime/Realistic Edition. -### Windows(AMD GPUs) +### Windows (AMD GPUs) Note that the [minimal requirement](#minimal-requirement) for different platforms is different. @@ -476,6 +476,13 @@ Given different goals, the default models and configs of Fooocus are different: Note that the download is **automatic** - you do not need to do anything if the internet connection is okay. However, you can download them manually if you (or move them from somewhere else) have your own preparation. +## UI Access and Authentication +In addition to running on localhost, Fooocus can also expose its UI in two ways: +* Local UI listener: use `--listen` (specify port e.g. with `--port 8888`). +* API access: use `--share` (registers an endpoint at `.gradio.live`). + +In both ways the access is unauthenticated by default. You can add basic authentication by creating a file called `auth.json` in the main directory, which contains a list of JSON objects with the keys `user` and `pass` (see example in [auth-example.json](./auth-example.json)). + ## List of "Hidden" Tricks @@ -483,7 +490,7 @@ The below things are already inside the software, and **users do not need to do 1. GPT2-based [prompt expansion as a dynamic style "Fooocus V2".](https://github.com/lllyasviel/Fooocus/discussions/117#raw) (similar to Midjourney's hidden pre-processsing and "raw" mode, or the LeonardoAI's Prompt Magic). 2. Native refiner swap inside one single k-sampler. The advantage is that the refiner model can now reuse the base model's momentum (or ODE's history parameters) collected from k-sampling to achieve more coherent sampling. In Automatic1111's high-res fix and ComfyUI's node system, the base model and refiner use two independent k-samplers, which means the momentum is largely wasted, and the sampling continuity is broken. Fooocus uses its own advanced k-diffusion sampling that ensures seamless, native, and continuous swap in a refiner setup. (Update Aug 13: Actually, I discussed this with Automatic1111 several days ago, and it seems that the “native refiner swap inside one single k-sampler” is [merged]( https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/12371) into the dev branch of webui. Great!) -3. Negative ADM guidance. Because the highest resolution level of XL Base does not have cross attentions, the positive and negative signals for XL's highest resolution level cannot receive enough contrasts during the CFG sampling, causing the results to look a bit plastic or overly smooth in certain cases. Fortunately, since the XL's highest resolution level is still conditioned on image aspect ratios (ADM), we can modify the adm on the positive/negative side to compensate for the lack of CFG contrast in the highest resolution level. (Update Aug 16, the IOS App [Drawing Things](https://apps.apple.com/us/app/draw-things-ai-generation/id6444050820) will support Negative ADM Guidance. Great!) +3. Negative ADM guidance. Because the highest resolution level of XL Base does not have cross attentions, the positive and negative signals for XL's highest resolution level cannot receive enough contrasts during the CFG sampling, causing the results to look a bit plastic or overly smooth in certain cases. Fortunately, since the XL's highest resolution level is still conditioned on image aspect ratios (ADM), we can modify the adm on the positive/negative side to compensate for the lack of CFG contrast in the highest resolution level. (Update Aug 16, the IOS App [Draw Things](https://apps.apple.com/us/app/draw-things-ai-generation/id6444050820) will support Negative ADM Guidance. Great!) 4. We implemented a carefully tuned variation of Section 5.1 of ["Improving Sample Quality of Diffusion Models Using Self-Attention Guidance"](https://arxiv.org/pdf/2210.00939.pdf). The weight is set to very low, but this is Fooocus's final guarantee to make sure that the XL will never yield an overly smooth or plastic appearance (examples [here](https://github.com/lllyasviel/Fooocus/discussions/117#sharpness)). This can almost eliminate all cases for which XL still occasionally produces overly smooth results, even with negative ADM guidance. (Update 2023 Aug 18, the Gaussian kernel of SAG is changed to an anisotropic kernel for better structure preservation and fewer artifacts.) 5. We modified the style templates a bit and added the "cinematic-default". 6. We tested the "sd_xl_offset_example-lora_1.0.safetensors" and it seems that when the lora weight is below 0.5, the results are always better than XL without lora. diff --git a/webui.py b/webui.py index f33b09ff..75b6104e 100644 --- a/webui.py +++ b/webui.py @@ -360,7 +360,7 @@ with shared.gradio_root: return gr.update(value=f'\U0001F4DA History Log') history_link = gr.HTML() - shared.gradio_root.load(update_history_link,outputs=history_link) + shared.gradio_root.load(update_history_link, outputs=history_link, queue=False, show_progress=False) with gr.Tab(label='Styles'): style_sorter.try_load_sorted_styles( @@ -796,6 +796,6 @@ shared.gradio_root.launch( server_name=args_manager.args.listen, server_port=args_manager.args.port, share=args_manager.args.share, - auth=check_auth if args_manager.args.share and auth_enabled else None, + auth=check_auth if (args_manager.args.share or args_manager.args.listen) and auth_enabled else None, blocked_paths=[constants.AUTH_FILENAME] )