From 323af5667a4f1a168cc31864aa9883b52dfb38bf Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Thu, 14 Dec 2023 13:55:49 -0800 Subject: [PATCH 01/23] 2.1.840 * Fix some potential frozen after model mismatch * Fix crash when cfg=1 when using anime preset * Added some guidelines for troubleshoot the "CUDA kernel errors asynchronously" problem --- fooocus_version.py | 2 +- modules/async_worker.py | 6 +++--- modules/patch.py | 14 +++++++++----- troubleshoot.md | 10 +++++----- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/fooocus_version.py b/fooocus_version.py index efcfe020..1118b486 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.839' +version = '2.1.840' diff --git a/modules/async_worker.py b/modules/async_worker.py index 4ffd4f51..93a76579 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -801,12 +801,12 @@ def worker(): task = async_tasks.pop(0) try: handler(task) - except: - traceback.print_exc() - finally: build_image_wall(task) task.yields.append(['finish', task.results]) pipeline.prepare_text_encoder(async_call=True) + except: + traceback.print_exc() + task.yields.append(['finish', task.results]) pass diff --git a/modules/patch.py b/modules/patch.py index c6012dfd..da678d00 100644 --- a/modules/patch.py +++ b/modules/patch.py @@ -214,16 +214,20 @@ def compute_cfg(uncond, cond, cfg_scale, t): def patched_sampling_function(model, x, timestep, uncond, cond, cond_scale, model_options=None, seed=None): - if math.isclose(cond_scale, 1.0): - return calc_cond_uncond_batch(model, cond, None, x, timestep, model_options)[0] - global eps_record + if math.isclose(cond_scale, 1.0): + final_x0 = calc_cond_uncond_batch(model, cond, None, x, timestep, model_options)[0] + + if eps_record is not None: + eps_record = ((x - final_x0) / timestep).cpu() + + return final_x0 + positive_x0, negative_x0 = calc_cond_uncond_batch(model, cond, uncond, x, timestep, model_options) positive_eps = x - positive_x0 negative_eps = x - negative_x0 - sigma = timestep alpha = 0.001 * sharpness * global_diffusion_progress @@ -234,7 +238,7 @@ def patched_sampling_function(model, x, timestep, uncond, cond, cond_scale, mode cfg_scale=cond_scale, t=global_diffusion_progress) if eps_record is not None: - eps_record = (final_eps / sigma).cpu() + eps_record = (final_eps / timestep).cpu() return x - final_eps diff --git a/troubleshoot.md b/troubleshoot.md index 0d4fbefa..7be743d5 100644 --- a/troubleshoot.md +++ b/troubleshoot.md @@ -118,12 +118,12 @@ If you get this error elsewhere in the world, then you may need to look at [this ### CUDA kernel errors might be asynchronously reported at some other API call -This problem is fixed two months ago. Please make sure that you are using the latest version of Fooocus (try fresh install). - -If it still does not work, try to upgrade your Nvidia driver. - -If it still does not work, open an issue with full log, and we will take a look. +A very small amount of devices does have this problem. The cause can be complicated but usually can be resolved after following these steps: +1. Make sure that you are using official version and latest version installed from [here](https://github.com/lllyasviel/Fooocus#download). (Some forks and other versions are more likely to cause this problem.) +2. Upgrade your Nvidia driver to the latest version. (Usually the version of your Nvidia driver should be 53X, not 3XX or 4XX.) +3. If things still do not work, then perhaps it is a problem with CUDA 12. You can use CUDA 11 and Xformers to try to solve this problem. We have prepared all files for you, and please do NOT install any CUDA or other environment on you own. The only one official way to do this is: (1) Backup and delete your `python_embeded` folder (near the `run.bat`); (2) Download the "previous_old_xformers_env.7z" from the [release page](https://github.com/lllyasviel/Fooocus/releases/tag/release), decompress it, and put the newly extracted `python_embeded` folder near your `run.bat`; (3) run Fooocus. +4. If it still does not work, please open an issue for us to take a look. ### Found no NVIDIA driver on your system From a39c1d134967724f2d0e475df62776c621bf1303 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Thu, 14 Dec 2023 14:50:34 -0800 Subject: [PATCH 02/23] Update readme.md (#1416) --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 8a91c320..feb8e362 100644 --- a/readme.md +++ b/readme.md @@ -121,7 +121,7 @@ See also the common problems and troubleshoots [here](troubleshoot.md). In Colab, you can modify the last line to `!python entry_with_update.py --share` or `!python entry_with_update.py --preset anime --share` or `!python entry_with_update.py --preset realistic --share` for Fooocus Default/Anime/Realistic Edition. -Note that this Colab will disable refiner by default because Colab free's resource is relatively limited. +Note that this Colab will disable refiner by default because Colab free's resource is relatively limited (and some "big" features like image prompt may cause free-tier Colab to disconnect). We make sure that basic text-to-image is always working on free-tier Colab. Thanks to [camenduru](https://github.com/camenduru)! From 179bcb2c4e6e6b9574c5a38e28e3c9813ed95bd7 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Thu, 14 Dec 2023 19:15:16 -0800 Subject: [PATCH 03/23] Fix inpaint device problem in `--always-gpu` mode. (#1420) 2.1.841 --- fooocus_version.py | 2 +- ldm_patched/modules/samplers.py | 46 ++++++++++++++++----------------- modules/core.py | 2 +- modules/patch.py | 7 ++--- update_log.md | 8 ++++++ 5 files changed, 37 insertions(+), 28 deletions(-) diff --git a/fooocus_version.py b/fooocus_version.py index 1118b486..5ab49105 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.840' +version = '2.1.841' diff --git a/ldm_patched/modules/samplers.py b/ldm_patched/modules/samplers.py index 9996e74c..4e13d727 100644 --- a/ldm_patched/modules/samplers.py +++ b/ldm_patched/modules/samplers.py @@ -2,6 +2,7 @@ from ldm_patched.k_diffusion import sampling as k_diffusion_sampling from ldm_patched.unipc import uni_pc import torch import enum +import collections from ldm_patched.modules import model_management import math from ldm_patched.modules import model_base @@ -61,9 +62,7 @@ def get_area_and_mult(conds, x_in, timestep_in): for c in model_conds: conditioning[c] = model_conds[c].process_cond(batch_size=x_in.shape[0], device=x_in.device, area=area) - control = None - if 'control' in conds: - control = conds['control'] + control = conds.get('control', None) patches = None if 'gligen' in conds: @@ -78,7 +77,8 @@ def get_area_and_mult(conds, x_in, timestep_in): patches['middle_patch'] = [gligen_patch] - return (input_x, mult, conditioning, area, control, patches) + cond_obj = collections.namedtuple('cond_obj', ['input_x', 'mult', 'conditioning', 'area', 'control', 'patches']) + return cond_obj(input_x, mult, conditioning, area, control, patches) def cond_equal_size(c1, c2): if c1 is c2: @@ -91,24 +91,24 @@ def cond_equal_size(c1, c2): return True def can_concat_cond(c1, c2): - if c1[0].shape != c2[0].shape: + if c1.input_x.shape != c2.input_x.shape: return False - #control - if (c1[4] is None) != (c2[4] is None): - return False - if c1[4] is not None: - if c1[4] is not c2[4]: + def objects_concatable(obj1, obj2): + if (obj1 is None) != (obj2 is None): return False + if obj1 is not None: + if obj1 is not obj2: + return False + return True - #patches - if (c1[5] is None) != (c2[5] is None): + if not objects_concatable(c1.control, c2.control): return False - if (c1[5] is not None): - if c1[5] is not c2[5]: - return False - return cond_equal_size(c1[2], c2[2]) + if not objects_concatable(c1.patches, c2.patches): + return False + + return cond_equal_size(c1.conditioning, c2.conditioning) def cond_cat(c_list): c_crossattn = [] @@ -184,13 +184,13 @@ def calc_cond_uncond_batch(model, cond, uncond, x_in, timestep, model_options): for x in to_batch: o = to_run.pop(x) p = o[0] - input_x += [p[0]] - mult += [p[1]] - c += [p[2]] - area += [p[3]] - cond_or_uncond += [o[1]] - control = p[4] - patches = p[5] + input_x.append(p.input_x) + mult.append(p.mult) + c.append(p.conditioning) + area.append(p.area) + cond_or_uncond.append(o[1]) + control = p.control + patches = p.patches batch_chunks = len(cond_or_uncond) input_x = torch.cat(input_x) diff --git a/modules/core.py b/modules/core.py index 86c56b5c..989b8e32 100644 --- a/modules/core.py +++ b/modules/core.py @@ -191,7 +191,7 @@ def encode_vae_inpaint(vae, pixels, mask): latent_mask = mask[:, None, :, :] latent_mask = torch.nn.functional.interpolate(latent_mask, size=(H * 8, W * 8), mode="bilinear").round() - latent_mask = torch.nn.functional.max_pool2d(latent_mask, (8, 8)).round() + latent_mask = torch.nn.functional.max_pool2d(latent_mask, (8, 8)).round().to(latent) return latent, latent_mask diff --git a/modules/patch.py b/modules/patch.py index da678d00..0a04bafb 100644 --- a/modules/patch.py +++ b/modules/patch.py @@ -269,9 +269,10 @@ def sdxl_encode_adm_patched(self, **kwargs): height = float(height) * positive_adm_scale def embedder(number_list): - h = [self.embedder(torch.Tensor([number])) for number in number_list] - y = torch.flatten(torch.cat(h)).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1) - return y + h = torch.tensor(number_list, dtype=torch.float32) + h = self.embedder(h) + h = torch.flatten(h).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1) + return h width, height = round_to_64(width), round_to_64(height) target_width, target_height = round_to_64(target_width), round_to_64(target_height) diff --git a/update_log.md b/update_log.md index f889070b..82698557 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,11 @@ +# 2.1.841 + +* Backend maintain. +* Fix some potential frozen after model mismatch. +* Fix crash when cfg=1 when using anime preset. +* Added some guidelines for troubleshoot the "CUDA kernel errors asynchronously" problem. +* Fix inpaint device problem in `--always-gpu` mode. + # 2.1.839 * Maintained some computation codes in backend for efficiency. From 059037eeb27cea6a729b11cd010ecf84a613a072 Mon Sep 17 00:00:00 2001 From: Danil Boldyrev Date: Fri, 15 Dec 2023 22:37:11 +0300 Subject: [PATCH 04/23] fix zooming on large-width images (#1428) --- css/style.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/css/style.css b/css/style.css index 09ce6cf0..b87b20a7 100644 --- a/css/style.css +++ b/css/style.css @@ -94,6 +94,10 @@ overflow:inherit !important; } +.gradio-container{ + overflow: visible; +} + /* fullpage image viewer */ #lightboxModal{ From 26ea5085887c90a5a5b1766cec1672f90e86fa2f Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Fri, 15 Dec 2023 11:37:45 -0800 Subject: [PATCH 05/23] backend maintain (#1429) --- ldm_patched/contrib/external_sag.py | 6 +----- ldm_patched/ldm/modules/attention.py | 4 +--- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/ldm_patched/contrib/external_sag.py b/ldm_patched/contrib/external_sag.py index 59d1890c..3505b44e 100644 --- a/ldm_patched/contrib/external_sag.py +++ b/ldm_patched/contrib/external_sag.py @@ -29,9 +29,7 @@ def attention_basic_with_sim(q, k, v, heads, mask=None): # force cast to fp32 to avoid overflowing if _ATTN_PRECISION =="fp32": - with torch.autocast(enabled=False, device_type = 'cuda'): - q, k = q.float(), k.float() - sim = einsum('b i d, b j d -> b i j', q, k) * scale + sim = einsum('b i d, b j d -> b i j', q.float(), k.float()) * scale else: sim = einsum('b i d, b j d -> b i j', q, k) * scale @@ -113,7 +111,6 @@ class SelfAttentionGuidance: m = model.clone() attn_scores = None - mid_block_shape = None # TODO: make this work properly with chunked batches # currently, we can only save the attn from one UNet call @@ -136,7 +133,6 @@ class SelfAttentionGuidance: def post_cfg_function(args): nonlocal attn_scores - nonlocal mid_block_shape uncond_attn = attn_scores sag_scale = scale diff --git a/ldm_patched/ldm/modules/attention.py b/ldm_patched/ldm/modules/attention.py index f4579bac..49e502ed 100644 --- a/ldm_patched/ldm/modules/attention.py +++ b/ldm_patched/ldm/modules/attention.py @@ -104,9 +104,7 @@ def attention_basic(q, k, v, heads, mask=None): # force cast to fp32 to avoid overflowing if _ATTN_PRECISION =="fp32": - with torch.autocast(enabled=False, device_type = 'cuda'): - q, k = q.float(), k.float() - sim = einsum('b i d, b j d -> b i j', q, k) * scale + sim = einsum('b i d, b j d -> b i j', q.float(), k.float()) * scale else: sim = einsum('b i d, b j d -> b i j', q, k) * scale From a7f82b2545a5959d00edfaf5e92a7f2cdabd9223 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Fri, 15 Dec 2023 11:40:01 -0800 Subject: [PATCH 06/23] Update fooocus_version.py (#1430) --- fooocus_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fooocus_version.py b/fooocus_version.py index 5ab49105..e3f05a10 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.841' +version = '2.1.842' From 05ef3e68619114492c11d4c4547df7b245520040 Mon Sep 17 00:00:00 2001 From: Danil Boldyrev Date: Sat, 16 Dec 2023 00:27:14 +0300 Subject: [PATCH 07/23] clean up, add undo and some small changes (#1432) --- javascript/zoom.js | 291 +++++++++++++-------------------------------- 1 file changed, 85 insertions(+), 206 deletions(-) diff --git a/javascript/zoom.js b/javascript/zoom.js index e3fdcfb7..450a0347 100644 --- a/javascript/zoom.js +++ b/javascript/zoom.js @@ -1,18 +1,5 @@ onUiLoaded(async() => { // Helper functions - // Get active tab - - /** - * Waits for an element to be present in the DOM. - */ - const waitForElement = (id) => new Promise(resolve => { - const checkForElement = () => { - const element = document.querySelector(id); - if (element) return resolve(element); - setTimeout(checkForElement, 100); - }; - checkForElement(); - }); // Detect whether the element has a horizontal scroll bar function hasHorizontalScrollbar(element) { @@ -33,140 +20,40 @@ onUiLoaded(async() => { } } - // Check if hotkey is valid - function isValidHotkey(value) { - const specialKeys = ["Ctrl", "Alt", "Shift", "Disable"]; - return ( - (typeof value === "string" && - value.length === 1 && - /[a-z]/i.test(value)) || - specialKeys.includes(value) - ); - } - - // Normalize hotkey - function normalizeHotkey(hotkey) { - return hotkey.length === 1 ? "Key" + hotkey.toUpperCase() : hotkey; - } - - // Format hotkey for display - function formatHotkeyForDisplay(hotkey) { - return hotkey.startsWith("Key") ? hotkey.slice(3) : hotkey; - } - // Create hotkey configuration with the provided options function createHotkeyConfig(defaultHotkeysConfig) { const result = {}; // Resulting hotkey configuration - for (const key in defaultHotkeysConfig) { result[key] = defaultHotkeysConfig[key]; } - return result; } - // Disables functions in the config object based on the provided list of function names - function disableFunctions(config, disabledFunctions) { - // Bind the hasOwnProperty method to the functionMap object to avoid errors - const hasOwnProperty = - Object.prototype.hasOwnProperty.bind(functionMap); - - // Loop through the disabledFunctions array and disable the corresponding functions in the config object - disabledFunctions.forEach(funcName => { - if (hasOwnProperty(funcName)) { - const key = functionMap[funcName]; - config[key] = "disable"; - } - }); - - // Return the updated config object - return config; - } - - /** - * The restoreImgRedMask function displays a red mask around an image to indicate the aspect ratio. - * If the image display property is set to 'none', the mask breaks. To fix this, the function - * temporarily sets the display property to 'block' and then hides the mask again after 300 milliseconds - * to avoid breaking the canvas. Additionally, the function adjusts the mask to work correctly on - * very long images. - */ - function restoreImgRedMask(elements) { - const mainTabId = getTabId(elements); - - if (!mainTabId) return; - - const mainTab = gradioApp().querySelector(mainTabId); - const img = mainTab.querySelector("img"); - const imageARPreview = gradioApp().querySelector("#imageARPreview"); - - if (!img || !imageARPreview) return; - - imageARPreview.style.transform = ""; - if (parseFloat(mainTab.style.width) > 865) { - const transformString = mainTab.style.transform; - const scaleMatch = transformString.match( - /scale\(([-+]?[0-9]*\.?[0-9]+)\)/ - ); - let zoom = 1; // default zoom - - if (scaleMatch && scaleMatch[1]) { - zoom = Number(scaleMatch[1]); - } - - imageARPreview.style.transformOrigin = "0 0"; - imageARPreview.style.transform = `scale(${zoom})`; - } - - if (img.style.display !== "none") return; - - img.style.display = "block"; - - setTimeout(() => { - img.style.display = "none"; - }, 400); - } - // Default config const defaultHotkeysConfig = { - canvas_hotkey_zoom: "Alt", + canvas_hotkey_zoom: "Shift", canvas_hotkey_adjust: "Ctrl", + canvas_zoom_undo_extra_key: "Ctrl", + canvas_zoom_hotkey_undo: "KeyZ", canvas_hotkey_reset: "KeyR", canvas_hotkey_fullscreen: "KeyS", canvas_hotkey_move: "KeyF", - canvas_hotkey_overlap: "KeyO", - canvas_disabled_functions: [], canvas_show_tooltip: true, canvas_auto_expand: true, - canvas_blur_prompt: false, - }; - - const functionMap = { - "Zoom": "canvas_hotkey_zoom", - "Adjust brush size": "canvas_hotkey_adjust", - "Moving canvas": "canvas_hotkey_move", - "Fullscreen": "canvas_hotkey_fullscreen", - "Reset Zoom": "canvas_hotkey_reset", - "Overlap": "canvas_hotkey_overlap" + canvas_blur_prompt: true, }; // Loading the configuration from opts - const preHotkeysConfig = createHotkeyConfig( + const hotkeysConfig = createHotkeyConfig( defaultHotkeysConfig ); - // Disable functions that are not needed by the user - const hotkeysConfig = disableFunctions( - preHotkeysConfig, - preHotkeysConfig.canvas_disabled_functions - ); - let isMoving = false; - let mouseX, mouseY; let activeElement; const elemData = {}; - function applyZoomAndPan(elemId, isExtension = true) { + function applyZoomAndPan(elemId) { const targetElement = gradioApp().querySelector(elemId); if (!targetElement) { @@ -181,6 +68,7 @@ onUiLoaded(async() => { panX: 0, panY: 0 }; + let fullScreenMode = false; // Create tooltip @@ -211,44 +99,46 @@ onUiLoaded(async() => { action: "Adjust brush size", keySuffix: " + wheel" }, + {configKey: "canvas_zoom_hotkey_undo", action: "Undo last action", keyPrefix: `${hotkeysConfig.canvas_zoom_undo_extra_key} + ` }, {configKey: "canvas_hotkey_reset", action: "Reset zoom"}, { configKey: "canvas_hotkey_fullscreen", action: "Fullscreen mode" }, - {configKey: "canvas_hotkey_move", action: "Move canvas"}, - {configKey: "canvas_hotkey_overlap", action: "Overlap"} + {configKey: "canvas_hotkey_move", action: "Move canvas"} ]; - // Create hotkeys array with disabled property based on the config values - const hotkeys = hotkeysInfo.map(info => { + // Create hotkeys array based on the config values + const hotkeys = hotkeysInfo.map((info) => { const configValue = hotkeysConfig[info.configKey]; - const key = info.keySuffix ? - `${configValue}${info.keySuffix}` : - configValue.charAt(configValue.length - 1); - return { - key, - action: info.action, - disabled: configValue === "disable" - }; - }); - - for (const hotkey of hotkeys) { - if (hotkey.disabled) { - continue; + + let key = configValue.slice(-1); + + if (info.keySuffix) { + key = `${configValue}${info.keySuffix}`; } + + if (info.keyPrefix && info.keyPrefix !== "None + ") { + key = `${info.keyPrefix}${configValue[3]}`; + } + + return { + key, + action: info.action, + }; + }); + + hotkeys + .forEach(hotkey => { + const p = document.createElement("p"); + p.innerHTML = `${hotkey.key} - ${hotkey.action}`; + tooltipContent.appendChild(p); + }); + + tooltip.append(info, tooltipContent); - const p = document.createElement("p"); - p.innerHTML = `${hotkey.key} - ${hotkey.action}`; - tooltipContent.appendChild(p); - } - - // Add information and content elements to the tooltip element - tooltip.appendChild(info); - tooltip.appendChild(tooltipContent); - - // Add a hint element to the target element - toolTipElemnt.appendChild(tooltip); + // Add a hint element to the target element + toolTipElemnt.appendChild(tooltip); } //Show tool tip if setting enable @@ -264,9 +154,7 @@ onUiLoaded(async() => { panY: 0 }; - if (isExtension) { - targetElement.style.overflow = "hidden"; - } + targetElement.style.overflow = "hidden"; targetElement.isZoomed = false; @@ -284,7 +172,7 @@ onUiLoaded(async() => { closeBtn.addEventListener("click", resetZoom); } - if (canvas && isExtension) { + if (canvas) { const parentElement = targetElement.closest('[id^="component-"]'); if ( canvas && @@ -297,16 +185,6 @@ onUiLoaded(async() => { } - if ( - canvas && - !isExtension && - parseFloat(canvas.style.width) > 865 && - parseFloat(targetElement.style.width) > 865 - ) { - fitToElement(); - return; - } - targetElement.style.width = ""; } @@ -372,12 +250,10 @@ onUiLoaded(async() => { targetElement.style.transformOrigin = "0 0"; targetElement.style.transform = `translate(${elemData[elemId].panX}px, ${elemData[elemId].panY}px) scale(${newZoomLevel})`; + targetElement.style.overflow = "visible"; toggleOverlap("on"); - if (isExtension) { - targetElement.style.overflow = "visible"; - } - + return newZoomLevel; } @@ -388,6 +264,7 @@ onUiLoaded(async() => { let zoomPosX, zoomPosY; let delta = 0.2; + if (elemData[elemId].zoomLevel > 7) { delta = 0.9; } else if (elemData[elemId].zoomLevel > 2) { @@ -421,12 +298,7 @@ onUiLoaded(async() => { let parentElement; - if (isExtension) { - parentElement = targetElement.closest('[id^="component-"]'); - } else { - parentElement = targetElement.parentElement; - } - + parentElement = targetElement.closest('[id^="component-"]'); // Get element and screen dimensions const elementWidth = targetElement.offsetWidth; @@ -455,6 +327,26 @@ onUiLoaded(async() => { toggleOverlap("off"); } + // Undo last action + function undoLastAction(e) { + let isCtrlPressed = isModifierKey(e, hotkeysConfig.canvas_zoom_undo_extra_key) + const isAuxButton = e.button >= 3; + + if (isAuxButton) { + isCtrlPressed = true + } else { + if (!isModifierKey(e, hotkeysConfig.canvas_zoom_undo_extra_key)) return; + } + + // Move undoBtn query outside the if statement to avoid unnecessary queries + const undoBtn = document.querySelector(`${activeElement} button[aria-label="Undo"]`); + + if ((isCtrlPressed) && undoBtn ) { + e.preventDefault(); + undoBtn.click(); + } + } + /** * This function fits the target element to the screen by calculating * the required scale and offsets. It also updates the global variables @@ -469,13 +361,8 @@ onUiLoaded(async() => { if (!canvas) return; - if (canvas.offsetWidth > 862 || isExtension) { - targetElement.style.width = (canvas.offsetWidth + 2) + "px"; - } - - if (isExtension) { - targetElement.style.overflow = "visible"; - } + targetElement.style.width = (canvas.offsetWidth + 2) + "px"; + targetElement.style.overflow = "visible"; if (fullScreenMode) { resetZoom(); @@ -549,11 +436,11 @@ onUiLoaded(async() => { } } - const hotkeyActions = { [hotkeysConfig.canvas_hotkey_reset]: resetZoom, [hotkeysConfig.canvas_hotkey_overlap]: toggleOverlap, - [hotkeysConfig.canvas_hotkey_fullscreen]: fitToScreen + [hotkeysConfig.canvas_hotkey_fullscreen]: fitToScreen, + [hotkeysConfig.canvas_zoom_hotkey_undo]: undoLastAction, }; const action = hotkeyActions[event.code]; @@ -597,26 +484,27 @@ onUiLoaded(async() => { } targetElement.addEventListener("mousemove", getMousePosition); + targetElement.addEventListener("auxclick", undoLastAction); //observers // Creating an observer with a callback function to handle DOM changes const observer = new MutationObserver((mutationsList, observer) => { for (let mutation of mutationsList) { - // If the style attribute of the canvas has changed, by observation it happens only when the picture changes - if (mutation.type === 'attributes' && mutation.attributeName === 'style' && - mutation.target.tagName.toLowerCase() === 'canvas') { - targetElement.isExpanded = false; - setTimeout(resetZoom, 10); - } + // If the style attribute of the canvas has changed, by observation it happens only when the picture changes + if (mutation.type === 'attributes' && mutation.attributeName === 'style' && + mutation.target.tagName.toLowerCase() === 'canvas') { + targetElement.isExpanded = false; + setTimeout(resetZoom, 10); + } } - }); - - // Apply auto expand if enabled - if (hotkeysConfig.canvas_auto_expand) { + }); + + // Apply auto expand if enabled + if (hotkeysConfig.canvas_auto_expand) { targetElement.addEventListener("mousemove", autoExpand); // Set up an observer to track attribute changes - observer.observe(targetElement, {attributes: true, childList: true, subtree: true}); - } + observer.observe(targetElement, { attributes: true, childList: true, subtree: true }); + } // Handle events only inside the targetElement let isKeyDownHandlerAttached = false; @@ -661,7 +549,7 @@ onUiLoaded(async() => { function handleMoveKeyDown(e) { // Disable key locks to make pasting from the buffer work correctly - if ((e.ctrlKey && e.code === 'KeyV') || (e.ctrlKey && event.code === 'KeyC') || e.code === "F5") { + if ((e.ctrlKey && e.code === 'KeyV') || (e.ctrlKey && e.code === 'KeyC') || e.code === "F5") { return; } @@ -713,11 +601,7 @@ onUiLoaded(async() => { if (isMoving && elemId === activeElement) { updatePanPosition(e.movementX, e.movementY); targetElement.style.pointerEvents = "none"; - - if (isExtension) { - targetElement.style.overflow = "visible"; - } - + targetElement.style.overflow = "visible"; } else { targetElement.style.pointerEvents = "auto"; } @@ -745,18 +629,13 @@ onUiLoaded(async() => { } } - if (isExtension) { - targetElement.addEventListener("mousemove", checkForOutBox); - } - + targetElement.addEventListener("mousemove", checkForOutBox); window.addEventListener('resize', (e) => { resetZoom(); - if (isExtension) { - targetElement.isExpanded = false; - targetElement.isZoomed = false; - } + targetElement.isExpanded = false; + targetElement.isZoomed = false; }); gradioApp().addEventListener("mousemove", handleMoveByKey); From 776e080b3f5f1d716208d07ce69ca20ac78cdf7c Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Fri, 15 Dec 2023 13:29:53 -0800 Subject: [PATCH 08/23] add thanks (#1433) --- fooocus_version.py | 2 +- readme.md | 2 +- update_log.md | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fooocus_version.py b/fooocus_version.py index e3f05a10..1a708c50 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.842' +version = '2.1.843' diff --git a/readme.md b/readme.md index feb8e362..78682fc0 100644 --- a/readme.md +++ b/readme.md @@ -384,7 +384,7 @@ See also [About Forking and Promotion of Forks](https://github.com/lllyasviel/Fo ## Thanks -Special thanks to [twri](https://github.com/twri) and [3Diva](https://github.com/3Diva) for creating additional SDXL styles available in Fooocus. +Special thanks to [twri](https://github.com/twri) and [3Diva](https://github.com/3Diva) for creating additional SDXL styles available in Fooocus. Thanks [daswer123](https://github.com/daswer123) for contributing the Canvas Zoom! ## Update Log diff --git a/update_log.md b/update_log.md index 82698557..640b5665 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,7 @@ +# 2.1.843 + +* Many improvements to Canvas. Thanks CanvasZoom author! + # 2.1.841 * Backend maintain. From 3a727fd240151ab247fb554d47ff587a79e3bdb0 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Sat, 16 Dec 2023 10:34:47 -0800 Subject: [PATCH 09/23] 2.1.844 * maintain clip vision device * update links in troubleshoot --- extras/ip_adapter.py | 9 +-- fooocus_version.py | 2 +- ldm_patched/contrib/external.py | 1 + ldm_patched/contrib/external_latent.py | 25 +++++++- .../contrib/external_model_advanced.py | 48 +++------------- ldm_patched/contrib/external_perpneg.py | 57 +++++++++++++++++++ ldm_patched/contrib/external_sag.py | 2 +- ldm_patched/modules/clip_vision.py | 31 ++++------ ldm_patched/modules/samplers.py | 3 +- troubleshoot.md | 12 ++-- 10 files changed, 113 insertions(+), 77 deletions(-) create mode 100644 ldm_patched/contrib/external_perpneg.py diff --git a/extras/ip_adapter.py b/extras/ip_adapter.py index a145b68d..b18f0dfc 100644 --- a/extras/ip_adapter.py +++ b/extras/ip_adapter.py @@ -167,14 +167,7 @@ def preprocess(img, ip_adapter_path): ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher) pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device)) - - if clip_vision.dtype != torch.float32: - precision_scope = torch.autocast - else: - precision_scope = lambda a, b: contextlib.nullcontext(a) - - with precision_scope(ldm_patched.modules.model_management.get_autocast_device(clip_vision.load_device), torch.float32): - outputs = clip_vision.model(pixel_values=pixel_values, intermediate_output=-2) + outputs = clip_vision.model(pixel_values=pixel_values, intermediate_output=-2) ip_adapter = entry['ip_adapter'] ip_layers = entry['ip_layers'] diff --git a/fooocus_version.py b/fooocus_version.py index 1a708c50..a7dac990 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.843' +version = '2.1.844' diff --git a/ldm_patched/contrib/external.py b/ldm_patched/contrib/external.py index e20b08c5..7f95f084 100644 --- a/ldm_patched/contrib/external.py +++ b/ldm_patched/contrib/external.py @@ -1870,6 +1870,7 @@ def init_custom_nodes(): "nodes_images.py", "nodes_video_model.py", "nodes_sag.py", + "nodes_perpneg.py", ] for node_file in extras_files: diff --git a/ldm_patched/contrib/external_latent.py b/ldm_patched/contrib/external_latent.py index e2364b88..c6f874e1 100644 --- a/ldm_patched/contrib/external_latent.py +++ b/ldm_patched/contrib/external_latent.py @@ -5,9 +5,7 @@ import torch def reshape_latent_to(target_shape, latent): if latent.shape[1:] != target_shape[1:]: - latent.movedim(1, -1) latent = ldm_patched.modules.utils.common_upscale(latent, target_shape[3], target_shape[2], "bilinear", "center") - latent.movedim(-1, 1) return ldm_patched.modules.utils.repeat_to_batch_size(latent, target_shape[0]) @@ -104,9 +102,32 @@ class LatentInterpolate: samples_out["samples"] = st * (m1 * ratio + m2 * (1.0 - ratio)) return (samples_out,) +class LatentBatch: + @classmethod + def INPUT_TYPES(s): + return {"required": { "samples1": ("LATENT",), "samples2": ("LATENT",)}} + + RETURN_TYPES = ("LATENT",) + FUNCTION = "batch" + + CATEGORY = "latent/batch" + + def batch(self, samples1, samples2): + samples_out = samples1.copy() + s1 = samples1["samples"] + s2 = samples2["samples"] + + if s1.shape[1:] != s2.shape[1:]: + s2 = ldm_patched.modules.utils.common_upscale(s2, s1.shape[3], s1.shape[2], "bilinear", "center") + s = torch.cat((s1, s2), dim=0) + samples_out["samples"] = s + samples_out["batch_index"] = samples1.get("batch_index", [x for x in range(0, s1.shape[0])]) + samples2.get("batch_index", [x for x in range(0, s2.shape[0])]) + return (samples_out,) + NODE_CLASS_MAPPINGS = { "LatentAdd": LatentAdd, "LatentSubtract": LatentSubtract, "LatentMultiply": LatentMultiply, "LatentInterpolate": LatentInterpolate, + "LatentBatch": LatentBatch, } diff --git a/ldm_patched/contrib/external_model_advanced.py b/ldm_patched/contrib/external_model_advanced.py index 4ebd9dbf..03a2f045 100644 --- a/ldm_patched/contrib/external_model_advanced.py +++ b/ldm_patched/contrib/external_model_advanced.py @@ -19,41 +19,19 @@ class LCM(ldm_patched.modules.model_sampling.EPS): return c_out * x0 + c_skip * model_input -class ModelSamplingDiscreteDistilled(torch.nn.Module): +class ModelSamplingDiscreteDistilled(ldm_patched.modules.model_sampling.ModelSamplingDiscrete): original_timesteps = 50 - def __init__(self): - super().__init__() - self.sigma_data = 1.0 - timesteps = 1000 - beta_start = 0.00085 - beta_end = 0.012 + def __init__(self, model_config=None): + super().__init__(model_config) - betas = torch.linspace(beta_start**0.5, beta_end**0.5, timesteps, dtype=torch.float32) ** 2 - alphas = 1.0 - betas - alphas_cumprod = torch.cumprod(alphas, dim=0) + self.skip_steps = self.num_timesteps // self.original_timesteps - self.skip_steps = timesteps // self.original_timesteps - - - alphas_cumprod_valid = torch.zeros((self.original_timesteps), dtype=torch.float32) + sigmas_valid = torch.zeros((self.original_timesteps), dtype=torch.float32) for x in range(self.original_timesteps): - alphas_cumprod_valid[self.original_timesteps - 1 - x] = alphas_cumprod[timesteps - 1 - x * self.skip_steps] + sigmas_valid[self.original_timesteps - 1 - x] = self.sigmas[self.num_timesteps - 1 - x * self.skip_steps] - sigmas = ((1 - alphas_cumprod_valid) / alphas_cumprod_valid) ** 0.5 - self.set_sigmas(sigmas) - - def set_sigmas(self, sigmas): - self.register_buffer('sigmas', sigmas) - self.register_buffer('log_sigmas', sigmas.log()) - - @property - def sigma_min(self): - return self.sigmas[0] - - @property - def sigma_max(self): - return self.sigmas[-1] + self.set_sigmas(sigmas_valid) def timestep(self, sigma): log_sigma = sigma.log() @@ -68,14 +46,6 @@ class ModelSamplingDiscreteDistilled(torch.nn.Module): log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx] return log_sigma.exp().to(timestep.device) - def percent_to_sigma(self, percent): - if percent <= 0.0: - return 999999999.9 - if percent >= 1.0: - return 0.0 - percent = 1.0 - percent - return self.sigma(torch.tensor(percent * 999.0)).item() - def rescale_zero_terminal_snr_sigmas(sigmas): alphas_cumprod = 1 / ((sigmas * sigmas) + 1) @@ -124,7 +94,7 @@ class ModelSamplingDiscrete: class ModelSamplingAdvanced(sampling_base, sampling_type): pass - model_sampling = ModelSamplingAdvanced() + model_sampling = ModelSamplingAdvanced(model.model.model_config) if zsnr: model_sampling.set_sigmas(rescale_zero_terminal_snr_sigmas(model_sampling.sigmas)) @@ -156,7 +126,7 @@ class ModelSamplingContinuousEDM: class ModelSamplingAdvanced(ldm_patched.modules.model_sampling.ModelSamplingContinuousEDM, sampling_type): pass - model_sampling = ModelSamplingAdvanced() + model_sampling = ModelSamplingAdvanced(model.model.model_config) model_sampling.set_sigma_range(sigma_min, sigma_max) m.add_object_patch("model_sampling", model_sampling) return (m, ) diff --git a/ldm_patched/contrib/external_perpneg.py b/ldm_patched/contrib/external_perpneg.py new file mode 100644 index 00000000..ec91681f --- /dev/null +++ b/ldm_patched/contrib/external_perpneg.py @@ -0,0 +1,57 @@ +# https://github.com/comfyanonymous/ComfyUI/blob/master/nodes.py + +import torch +import ldm_patched.modules.model_management +import ldm_patched.modules.sample +import ldm_patched.modules.samplers +import ldm_patched.modules.utils + + +class PerpNeg: + @classmethod + def INPUT_TYPES(s): + return {"required": {"model": ("MODEL", ), + "empty_conditioning": ("CONDITIONING", ), + "neg_scale": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 100.0}), + }} + RETURN_TYPES = ("MODEL",) + FUNCTION = "patch" + + CATEGORY = "_for_testing" + + def patch(self, model, empty_conditioning, neg_scale): + m = model.clone() + nocond = ldm_patched.modules.sample.convert_cond(empty_conditioning) + + def cfg_function(args): + model = args["model"] + noise_pred_pos = args["cond_denoised"] + noise_pred_neg = args["uncond_denoised"] + cond_scale = args["cond_scale"] + x = args["input"] + sigma = args["sigma"] + model_options = args["model_options"] + nocond_processed = ldm_patched.modules.samplers.encode_model_conds(model.extra_conds, nocond, x, x.device, "negative") + + (noise_pred_nocond, _) = ldm_patched.modules.samplers.calc_cond_uncond_batch(model, nocond_processed, None, x, sigma, model_options) + + pos = noise_pred_pos - noise_pred_nocond + neg = noise_pred_neg - noise_pred_nocond + perp = ((torch.mul(pos, neg).sum())/(torch.norm(neg)**2)) * neg + perp_neg = perp * neg_scale + cfg_result = noise_pred_nocond + cond_scale*(pos - perp_neg) + cfg_result = x - cfg_result + return cfg_result + + m.set_model_sampler_cfg_function(cfg_function) + + return (m, ) + + +NODE_CLASS_MAPPINGS = { + "PerpNeg": PerpNeg, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "PerpNeg": "Perp-Neg", +} diff --git a/ldm_patched/contrib/external_sag.py b/ldm_patched/contrib/external_sag.py index 3505b44e..06ca67fa 100644 --- a/ldm_patched/contrib/external_sag.py +++ b/ldm_patched/contrib/external_sag.py @@ -60,7 +60,7 @@ def create_blur_map(x0, attn, sigma=3.0, threshold=1.0): attn = attn.reshape(b, -1, hw1, hw2) # Global Average Pool mask = attn.mean(1, keepdim=False).sum(1, keepdim=False) > threshold - ratio = round(math.sqrt(lh * lw / hw1)) + ratio = math.ceil(math.sqrt(lh * lw / hw1)) mid_shape = [math.ceil(lh / ratio), math.ceil(lw / ratio)] # Reshape diff --git a/ldm_patched/modules/clip_vision.py b/ldm_patched/modules/clip_vision.py index eda441af..9699210d 100644 --- a/ldm_patched/modules/clip_vision.py +++ b/ldm_patched/modules/clip_vision.py @@ -19,11 +19,13 @@ class Output: def clip_preprocess(image, size=224): mean = torch.tensor([ 0.48145466,0.4578275,0.40821073], device=image.device, dtype=image.dtype) std = torch.tensor([0.26862954,0.26130258,0.27577711], device=image.device, dtype=image.dtype) - scale = (size / min(image.shape[1], image.shape[2])) - image = torch.nn.functional.interpolate(image.movedim(-1, 1), size=(round(scale * image.shape[1]), round(scale * image.shape[2])), mode="bicubic", antialias=True) - h = (image.shape[2] - size)//2 - w = (image.shape[3] - size)//2 - image = image[:,:,h:h+size,w:w+size] + image = image.movedim(-1, 1) + if not (image.shape[2] == size and image.shape[3] == size): + scale = (size / min(image.shape[2], image.shape[3])) + image = torch.nn.functional.interpolate(image, size=(round(scale * image.shape[2]), round(scale * image.shape[3])), mode="bicubic", antialias=True) + h = (image.shape[2] - size)//2 + w = (image.shape[3] - size)//2 + image = image[:,:,h:h+size,w:w+size] image = torch.clip((255. * image), 0, 255).round() / 255.0 return (image - mean.view([3,1,1])) / std.view([3,1,1]) @@ -34,11 +36,9 @@ class ClipVisionModel(): self.load_device = ldm_patched.modules.model_management.text_encoder_device() offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() - self.dtype = torch.float32 - if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False): - self.dtype = torch.float16 - - self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.disable_weight_init) + self.dtype = ldm_patched.modules.model_management.text_encoder_dtype(self.load_device) + self.model = ldm_patched.modules.clip_model.CLIPVisionModelProjection(config, self.dtype, offload_device, ldm_patched.modules.ops.manual_cast) + self.model.eval() self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) def load_sd(self, sd): @@ -46,15 +46,8 @@ class ClipVisionModel(): def encode_image(self, image): ldm_patched.modules.model_management.load_model_gpu(self.patcher) - pixel_values = clip_preprocess(image.to(self.load_device)) - - if self.dtype != torch.float32: - precision_scope = torch.autocast - else: - precision_scope = lambda a, b: contextlib.nullcontext(a) - - with precision_scope(ldm_patched.modules.model_management.get_autocast_device(self.load_device), torch.float32): - out = self.model(pixel_values=pixel_values, intermediate_output=-2) + pixel_values = clip_preprocess(image.to(self.load_device)).float() + out = self.model(pixel_values=pixel_values, intermediate_output=-2) outputs = Output() outputs["last_hidden_state"] = out[0].to(ldm_patched.modules.model_management.intermediate_device()) diff --git a/ldm_patched/modules/samplers.py b/ldm_patched/modules/samplers.py index 4e13d727..bfcb3f56 100644 --- a/ldm_patched/modules/samplers.py +++ b/ldm_patched/modules/samplers.py @@ -251,7 +251,8 @@ def sampling_function(model, x, timestep, uncond, cond, cond_scale, model_option cond_pred, uncond_pred = calc_cond_uncond_batch(model, cond, uncond_, x, timestep, model_options) if "sampler_cfg_function" in model_options: - args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep} + args = {"cond": x - cond_pred, "uncond": x - uncond_pred, "cond_scale": cond_scale, "timestep": timestep, "input": x, "sigma": timestep, + "cond_denoised": cond_pred, "uncond_denoised": uncond_pred, "model": model, "model_options": model_options} cfg_result = x - model_options["sampler_cfg_function"](args) else: cfg_result = uncond_pred + (cond_pred - uncond_pred) * cond_scale diff --git a/troubleshoot.md b/troubleshoot.md index 7be743d5..7e079742 100644 --- a/troubleshoot.md +++ b/troubleshoot.md @@ -143,19 +143,19 @@ Besides, the current support for MAC is very experimental, and we encourage user ### I am using Nvidia with 8GB VRAM, I get CUDA Out Of Memory -It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement). +It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement). ### I am using Nvidia with 6GB VRAM, I get CUDA Out Of Memory -It is very likely a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement). +It is very likely a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement). ### I am using Nvidia with 4GB VRAM with Float16 support, like RTX 3050, I get CUDA Out Of Memory -It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](readme.md#minimal-requirement). +It is a BUG. Please let us know as soon as possible. Please make an issue. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement). ### I am using Nvidia with 4GB VRAM without Float16 support, like GTX 960, I get CUDA Out Of Memory -Supporting GPU with 4GB VRAM without fp16 is extremely difficult, and you may not be able to use SDXL. However, you may still make an issue and let us know. You may try SD1.5 in Automatic1111 or other software for your device. See also [minimal requirements](readme.md#minimal-requirement). +Supporting GPU with 4GB VRAM without fp16 is extremely difficult, and you may not be able to use SDXL. However, you may still make an issue and let us know. You may try SD1.5 in Automatic1111 or other software for your device. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement). ### I am using AMD GPU on Windows, I get CUDA Out Of Memory @@ -163,11 +163,11 @@ Current AMD support is very experimental for Windows. If you see this, then perh However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help. -Besides, the AMD support on Linux is slightly better because it will use ROCM. You may also try it if you are willing to change OS to linux. See also [minimal requirements](readme.md#minimal-requirement). +Besides, the AMD support on Linux is slightly better because it will use ROCM. You may also try it if you are willing to change OS to linux. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement). ### I am using AMD GPU on Linux, I get CUDA Out Of Memory -Current AMD support for Linux is better than that for Windows, but still, very experimental. However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help. See also [minimal requirements](readme.md#minimal-requirement). +Current AMD support for Linux is better than that for Windows, but still, very experimental. However, if you re able to run SDXL on this same device on any other software, please let us know immediately, and we will support it as soon as possible. If no other software can enable your device to run SDXL on Windows, then we also do not have much to help. See also [minimal requirements](https://github.com/lllyasviel/Fooocus/tree/main?tab=readme-ov-file#minimal-requirement). ### I tried flags like --lowvram or --gpu-only or --bf16 or so on, and things are not getting any better? From ec5dd950a25cbbc2c6a9462101712402bf0920c7 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Sat, 16 Dec 2023 15:55:53 -0800 Subject: [PATCH 10/23] Fix many precision problems Many users reported that image quality is different from 2.1.824. We reviewed all codes and fixed several precision problems in 2.1.846. --- extras/ip_adapter.py | 16 ++- fooocus_version.py | 2 +- modules/patch.py | 46 +----- modules/patch_clip.py | 279 +++++++++++++++++++++++++++++++++++++ modules/patch_precision.py | 60 ++++++++ update_log.md | 4 + 6 files changed, 363 insertions(+), 44 deletions(-) create mode 100644 modules/patch_clip.py create mode 100644 modules/patch_precision.py diff --git a/extras/ip_adapter.py b/extras/ip_adapter.py index b18f0dfc..ac64ef1d 100644 --- a/extras/ip_adapter.py +++ b/extras/ip_adapter.py @@ -167,14 +167,26 @@ def preprocess(img, ip_adapter_path): ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher) pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device)) - outputs = clip_vision.model(pixel_values=pixel_values, intermediate_output=-2) + + if clip_vision.dtype != torch.float32: + precision_scope = torch.autocast + else: + precision_scope = lambda a, b: contextlib.nullcontext(a) + + with precision_scope(ldm_patched.modules.model_management.get_autocast_device(clip_vision.load_device), torch.float32): + outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True) ip_adapter = entry['ip_adapter'] ip_layers = entry['ip_layers'] image_proj_model = entry['image_proj_model'] ip_unconds = entry['ip_unconds'] - cond = outputs[1].to(device=ip_adapter.load_device, dtype=ip_adapter.dtype) + if ip_adapter.plus: + cond = outputs.hidden_states[-2] + else: + cond = outputs.image_embeds + + cond = cond.to(device=ip_adapter.load_device, dtype=ip_adapter.dtype) ldm_patched.modules.model_management.load_model_gpu(image_proj_model) cond = image_proj_model.model(cond).to(device=ip_adapter.load_device, dtype=ip_adapter.dtype) diff --git a/fooocus_version.py b/fooocus_version.py index a7dac990..26b2cf04 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.844' +version = '2.1.846' diff --git a/modules/patch.py b/modules/patch.py index 0a04bafb..0ae53585 100644 --- a/modules/patch.py +++ b/modules/patch.py @@ -25,6 +25,8 @@ import modules.constants as constants from ldm_patched.modules.samplers import calc_cond_uncond_batch from ldm_patched.k_diffusion.sampling import BatchedBrownianTree from ldm_patched.ldm.modules.diffusionmodules.openaimodel import forward_timestep_embed, apply_control +from modules.patch_precision import patch_all_precision +from modules.patch_clip import patch_all_clip sharpness = 2.0 @@ -286,46 +288,6 @@ def sdxl_encode_adm_patched(self, **kwargs): return final_adm -def encode_token_weights_patched_with_a1111_method(self, token_weight_pairs): - to_encode = list() - max_token_len = 0 - has_weights = False - for x in token_weight_pairs: - tokens = list(map(lambda a: a[0], x)) - max_token_len = max(len(tokens), max_token_len) - has_weights = has_weights or not all(map(lambda a: a[1] == 1.0, x)) - to_encode.append(tokens) - - sections = len(to_encode) - if has_weights or sections == 0: - to_encode.append(ldm_patched.modules.sd1_clip.gen_empty_tokens(self.special_tokens, max_token_len)) - - out, pooled = self.encode(to_encode) - if pooled is not None: - first_pooled = pooled[0:1].to(ldm_patched.modules.model_management.intermediate_device()) - else: - first_pooled = pooled - - output = [] - for k in range(0, sections): - z = out[k:k + 1] - if has_weights: - original_mean = z.mean() - z_empty = out[-1] - for i in range(len(z)): - for j in range(len(z[i])): - weight = token_weight_pairs[k][j][1] - if weight != 1.0: - z[i][j] = (z[i][j] - z_empty[j]) * weight + z_empty[j] - new_mean = z.mean() - z = z * (original_mean / new_mean) - output.append(z) - - if len(output) == 0: - return out[-1:].to(ldm_patched.modules.model_management.intermediate_device()), first_pooled - return torch.cat(output, dim=-2).to(ldm_patched.modules.model_management.intermediate_device()), first_pooled - - def patched_KSamplerX0Inpaint_forward(self, x, sigma, uncond, cond, cond_scale, denoise_mask, model_options={}, seed=None): if inpaint_worker.current_task is not None: latent_processor = self.inner_model.inner_model.process_latent_in @@ -519,6 +481,9 @@ def build_loaded(module, loader_name): def patch_all(): + patch_all_precision() + patch_all_clip() + if not hasattr(ldm_patched.modules.model_management, 'load_models_gpu_origin'): ldm_patched.modules.model_management.load_models_gpu_origin = ldm_patched.modules.model_management.load_models_gpu @@ -527,7 +492,6 @@ def patch_all(): ldm_patched.controlnet.cldm.ControlNet.forward = patched_cldm_forward ldm_patched.ldm.modules.diffusionmodules.openaimodel.UNetModel.forward = patched_unet_forward ldm_patched.modules.model_base.SDXL.encode_adm = sdxl_encode_adm_patched - ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_patched_with_a1111_method ldm_patched.modules.samplers.KSamplerX0Inpaint.forward = patched_KSamplerX0Inpaint_forward ldm_patched.k_diffusion.sampling.BrownianTreeNoiseSampler = BrownianTreeNoiseSamplerPatched ldm_patched.modules.samplers.sampling_function = patched_sampling_function diff --git a/modules/patch_clip.py b/modules/patch_clip.py new file mode 100644 index 00000000..157f051b --- /dev/null +++ b/modules/patch_clip.py @@ -0,0 +1,279 @@ +# Consistent with Kohya/A1111 to reduce differences between model training and inference. + +import os +import torch +import ldm_patched.controlnet.cldm +import ldm_patched.k_diffusion.sampling +import ldm_patched.ldm.modules.attention +import ldm_patched.ldm.modules.diffusionmodules.model +import ldm_patched.ldm.modules.diffusionmodules.openaimodel +import ldm_patched.ldm.modules.diffusionmodules.openaimodel +import ldm_patched.modules.args_parser +import ldm_patched.modules.model_base +import ldm_patched.modules.model_management +import ldm_patched.modules.model_patcher +import ldm_patched.modules.ops +import ldm_patched.modules.samplers +import ldm_patched.modules.sd +import ldm_patched.modules.sd1_clip +import ldm_patched.modules.clip_vision +import ldm_patched.modules.model_management as model_management +import contextlib + +from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection + + +@contextlib.contextmanager +def use_disable_weight_init_linear_ops(device=None, dtype=None): + old_torch_nn_linear = torch.nn.Linear + force_device = device + force_dtype = dtype + + def linear_with_dtype(in_features: int, out_features: int, bias: bool = True, device=None, dtype=None): + if force_device is not None: + device = force_device + if force_dtype is not None: + dtype = force_dtype + return ldm_patched.modules.ops.disable_weight_init.Linear(in_features, out_features, bias=bias, device=device, + dtype=dtype) + + torch.nn.Linear = linear_with_dtype + try: + yield + finally: + torch.nn.Linear = old_torch_nn_linear + return + + +def encode_token_weights_fooocus(self, token_weight_pairs): + to_encode = list() + max_token_len = 0 + has_weights = False + for x in token_weight_pairs: + tokens = list(map(lambda a: a[0], x)) + max_token_len = max(len(tokens), max_token_len) + has_weights = has_weights or not all(map(lambda a: a[1] == 1.0, x)) + to_encode.append(tokens) + + sections = len(to_encode) + if has_weights or sections == 0: + to_encode.append(ldm_patched.modules.sd1_clip.gen_empty_tokens(self.special_tokens, max_token_len)) + + out, pooled = self.encode(to_encode) + if pooled is not None: + first_pooled = pooled[0:1].to(ldm_patched.modules.model_management.intermediate_device()) + else: + first_pooled = pooled + + output = [] + for k in range(0, sections): + z = out[k:k + 1] + if has_weights: + original_mean = z.mean() + z_empty = out[-1] + for i in range(len(z)): + for j in range(len(z[i])): + weight = token_weight_pairs[k][j][1] + if weight != 1.0: + z[i][j] = (z[i][j] - z_empty[j]) * weight + z_empty[j] + new_mean = z.mean() + z = z * (original_mean / new_mean) + output.append(z) + + if len(output) == 0: + return out[-1:].to(ldm_patched.modules.model_management.intermediate_device()), first_pooled + return torch.cat(output, dim=-2).to(ldm_patched.modules.model_management.intermediate_device()), first_pooled + + +class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder): + """Uses the CLIP transformer encoder for text (from huggingface)""" + LAYERS = [ + "last", + "pooled", + "hidden" + ] + + def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77, + freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=ldm_patched.modules.clip_model.CLIPTextModel, + special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True): # clip-vit-base-patch32 + super().__init__() + assert layer in self.LAYERS + + if textmodel_json_config is None: + textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(ldm_patched.modules.sd1_clip.__file__)), "sd1_clip_config.json") + + config = CLIPTextConfig.from_json_file(textmodel_json_config) + + self.num_layers = config.num_hidden_layers + with use_disable_weight_init_linear_ops(device, dtype): + with modeling_utils.no_init_weights(): + self.transformer = CLIPTextModel(config) + + self.inner_name = "text_model" + if dtype is not None: + self.transformer.to(dtype) + inner_model = getattr(self.transformer, self.inner_name) + if hasattr(inner_model, "embeddings"): + inner_model.embeddings.to(torch.float32) + else: + self.transformer.set_input_embeddings(self.transformer.get_input_embeddings().to(torch.float32)) + + self.max_length = max_length + if freeze: + self.freeze() + self.layer = layer + self.layer_idx = None + self.special_tokens = special_tokens + self.text_projection = torch.nn.Parameter(torch.eye(self.transformer.get_input_embeddings().weight.shape[1])) + self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055)) + self.enable_attention_masks = False + + self.layer_norm_hidden_state = layer_norm_hidden_state + if layer == "hidden": + assert layer_idx is not None + assert abs(layer_idx) < self.num_layers + self.clip_layer(layer_idx) + self.layer_default = (self.layer, self.layer_idx) + + def freeze(self): + self.transformer = self.transformer.eval() + # self.train = disabled_train + for param in self.parameters(): + param.requires_grad = False + + def clip_layer(self, layer_idx): + if abs(layer_idx) > self.num_layers: + self.layer = "last" + else: + self.layer = "hidden" + self.layer_idx = layer_idx + + def reset_clip_layer(self): + self.layer = self.layer_default[0] + self.layer_idx = self.layer_default[1] + + def set_up_textual_embeddings(self, tokens, current_embeds): + out_tokens = [] + next_new_token = token_dict_size = current_embeds.weight.shape[0] - 1 + embedding_weights = [] + + for x in tokens: + tokens_temp = [] + for y in x: + if isinstance(y, int): + if y == token_dict_size: # EOS token + y = -1 + tokens_temp += [y] + else: + if y.shape[0] == current_embeds.weight.shape[1]: + embedding_weights += [y] + tokens_temp += [next_new_token] + next_new_token += 1 + else: + print("WARNING: shape mismatch when trying to apply embedding, embedding will be ignored", + y.shape[0], current_embeds.weight.shape[1]) + while len(tokens_temp) < len(x): + tokens_temp += [self.special_tokens["pad"]] + out_tokens += [tokens_temp] + + n = token_dict_size + if len(embedding_weights) > 0: + new_embedding = torch.nn.Embedding(next_new_token + 1, current_embeds.weight.shape[1], + device=current_embeds.weight.device, dtype=current_embeds.weight.dtype) + new_embedding.weight[:token_dict_size] = current_embeds.weight[:-1] + for x in embedding_weights: + new_embedding.weight[n] = x + n += 1 + new_embedding.weight[n] = current_embeds.weight[-1] # EOS embedding + self.transformer.set_input_embeddings(new_embedding) + + processed_tokens = [] + for x in out_tokens: + processed_tokens += [ + list(map(lambda a: n if a == -1 else a, x))] # The EOS token should always be the largest one + + return processed_tokens + + def forward(self, tokens): + backup_embeds = self.transformer.get_input_embeddings() + device = backup_embeds.weight.device + tokens = self.set_up_textual_embeddings(tokens, backup_embeds) + tokens = torch.LongTensor(tokens).to(device) + + if getattr(self.transformer, self.inner_name).final_layer_norm.weight.dtype != torch.float32: + precision_scope = torch.autocast + else: + precision_scope = lambda a, dtype: contextlib.nullcontext(a) + + with precision_scope(model_management.get_autocast_device(device), dtype=torch.float32): + attention_mask = None + if self.enable_attention_masks: + attention_mask = torch.zeros_like(tokens) + max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1 + for x in range(attention_mask.shape[0]): + for y in range(attention_mask.shape[1]): + attention_mask[x, y] = 1 + if tokens[x, y] == max_token: + break + + outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask, + output_hidden_states=self.layer == "hidden") + self.transformer.set_input_embeddings(backup_embeds) + + if self.layer == "last": + z = outputs.last_hidden_state + elif self.layer == "pooled": + z = outputs.pooler_output[:, None, :] + else: + z = outputs.hidden_states[self.layer_idx] + if self.layer_norm_hidden_state: + z = getattr(self.transformer, self.inner_name).final_layer_norm(z) + + if hasattr(outputs, "pooler_output"): + pooled_output = outputs.pooler_output.float() + else: + pooled_output = None + + if self.text_projection is not None and pooled_output is not None: + pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float() + return z.float(), pooled_output + + def encode(self, tokens): + return self(tokens) + + def load_sd(self, sd): + if "text_projection" in sd: + self.text_projection[:] = sd.pop("text_projection") + if "text_projection.weight" in sd: + self.text_projection[:] = sd.pop("text_projection.weight").transpose(0, 1) + return self.transformer.load_state_dict(sd, strict=False) + + +class ClipVisionModelFooocus: + def __init__(self, json_config): + config = CLIPVisionConfig.from_json_file(json_config) + self.load_device = ldm_patched.modules.model_management.text_encoder_device() + offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() + self.dtype = torch.float32 + if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False): + self.dtype = torch.float16 + + with use_disable_weight_init_linear_ops(offload_device, self.dtype): + with modeling_utils.no_init_weights(): + self.model = CLIPVisionModelWithProjection(config) + self.model.to(self.dtype) + + self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) + + def load_sd(self, sd): + return self.model.load_state_dict(sd, strict=False) + + def encode_image(self, image): + raise NotImplementedError('wrong clip vision call!') + + +def patch_all_clip(): + ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_fooocus + ldm_patched.modules.sd1_clip.SDClipModel = SDClipModelFooocus + ldm_patched.modules.clip_vision.ClipVisionModel = ClipVisionModelFooocus + return diff --git a/modules/patch_precision.py b/modules/patch_precision.py new file mode 100644 index 00000000..83569bdd --- /dev/null +++ b/modules/patch_precision.py @@ -0,0 +1,60 @@ +# Consistent with Kohya to reduce differences between model training and inference. + +import torch +import math +import einops +import numpy as np + +import ldm_patched.ldm.modules.diffusionmodules.openaimodel +import ldm_patched.modules.model_sampling +import ldm_patched.modules.sd1_clip + +from ldm_patched.ldm.modules.diffusionmodules.util import make_beta_schedule + + +def patched_timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False): + # Consistent with Kohya to reduce differences between model training and inference. + + if not repeat_only: + half = dim // 2 + freqs = torch.exp( + -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half + ).to(device=timesteps.device) + args = timesteps[:, None].float() * freqs[None] + embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) + if dim % 2: + embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) + else: + embedding = einops.repeat(timesteps, 'b -> b d', d=dim) + return embedding + + +def patched_register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000, + linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + # Consistent with Kohya to reduce differences between model training and inference. + + if given_betas is not None: + betas = given_betas + else: + betas = make_beta_schedule( + beta_schedule, + timesteps, + linear_start=linear_start, + linear_end=linear_end, + cosine_s=cosine_s) + + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + self.linear_start = linear_start + self.linear_end = linear_end + sigmas = torch.tensor(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5, dtype=torch.float32) + self.set_sigmas(sigmas) + return + + +def patch_all_precision(): + ldm_patched.ldm.modules.diffusionmodules.openaimodel.timestep_embedding = patched_timestep_embedding + ldm_patched.modules.model_sampling.ModelSamplingDiscrete._register_schedule = patched_register_schedule + return diff --git a/update_log.md b/update_log.md index 640b5665..39e4c9e5 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,7 @@ +# 2.1.846 + +* Many users reported that image quality is different from 2.1.824. We reviewed all codes and fixed several precision problems in 2.1.846. + # 2.1.843 * Many improvements to Canvas. Thanks CanvasZoom author! From 085bc3aeaafee4b2310bc31be9536f869581120a Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Sat, 16 Dec 2023 16:58:46 -0800 Subject: [PATCH 11/23] revise caster --- fooocus_version.py | 2 +- modules/patch.py | 4 +- modules/patch_clip.py | 91 ++++++++++++++++++------------------------- 3 files changed, 40 insertions(+), 57 deletions(-) diff --git a/fooocus_version.py b/fooocus_version.py index 26b2cf04..0404a937 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.846' +version = '2.1.847' diff --git a/modules/patch.py b/modules/patch.py index 0ae53585..6a7111a6 100644 --- a/modules/patch.py +++ b/modules/patch.py @@ -271,8 +271,8 @@ def sdxl_encode_adm_patched(self, **kwargs): height = float(height) * positive_adm_scale def embedder(number_list): - h = torch.tensor(number_list, dtype=torch.float32) - h = self.embedder(h) + h = [self.embedder(torch.tensor([x], dtype=torch.float32)) for x in number_list] + h = torch.cat(h) h = torch.flatten(h).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1) return h diff --git a/modules/patch_clip.py b/modules/patch_clip.py index 157f051b..8aa7468f 100644 --- a/modules/patch_clip.py +++ b/modules/patch_clip.py @@ -23,28 +23,6 @@ import contextlib from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection -@contextlib.contextmanager -def use_disable_weight_init_linear_ops(device=None, dtype=None): - old_torch_nn_linear = torch.nn.Linear - force_device = device - force_dtype = dtype - - def linear_with_dtype(in_features: int, out_features: int, bias: bool = True, device=None, dtype=None): - if force_device is not None: - device = force_device - if force_dtype is not None: - dtype = force_dtype - return ldm_patched.modules.ops.disable_weight_init.Linear(in_features, out_features, bias=bias, device=device, - dtype=dtype) - - torch.nn.Linear = linear_with_dtype - try: - yield - finally: - torch.nn.Linear = old_torch_nn_linear - return - - def encode_token_weights_fooocus(self, token_weight_pairs): to_encode = list() max_token_len = 0 @@ -93,34 +71,40 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken "hidden" ] - def __init__(self, version="openai/clip-vit-large-patch14", device="cpu", max_length=77, - freeze=True, layer="last", layer_idx=None, textmodel_json_config=None, dtype=None, model_class=ldm_patched.modules.clip_model.CLIPTextModel, - special_tokens={"start": 49406, "end": 49407, "pad": 49407}, layer_norm_hidden_state=True): # clip-vit-base-patch32 + def __init__(self, + device="cpu", + max_length=77, + freeze=True, + layer="last", + layer_idx=None, + textmodel_json_config=None, + dtype=None, + special_tokens=None, + layer_norm_hidden_state=True, + **kwargs): super().__init__() assert layer in self.LAYERS + if special_tokens is None: + special_tokens = {"start": 49406, "end": 49407, "pad": 49407} + if textmodel_json_config is None: textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(ldm_patched.modules.sd1_clip.__file__)), "sd1_clip_config.json") config = CLIPTextConfig.from_json_file(textmodel_json_config) - self.num_layers = config.num_hidden_layers - with use_disable_weight_init_linear_ops(device, dtype): - with modeling_utils.no_init_weights(): - self.transformer = CLIPTextModel(config) - self.inner_name = "text_model" + with modeling_utils.no_init_weights(): + self.transformer = CLIPTextModel(config) + if dtype is not None: self.transformer.to(dtype) - inner_model = getattr(self.transformer, self.inner_name) - if hasattr(inner_model, "embeddings"): - inner_model.embeddings.to(torch.float32) - else: - self.transformer.set_input_embeddings(self.transformer.get_input_embeddings().to(torch.float32)) + self.transformer.text_model.embeddings.to(torch.float32) - self.max_length = max_length if freeze: self.freeze() + + self.max_length = max_length self.layer = layer self.layer_idx = None self.special_tokens = special_tokens @@ -131,7 +115,6 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken self.layer_norm_hidden_state = layer_norm_hidden_state if layer == "hidden": assert layer_idx is not None - assert abs(layer_idx) < self.num_layers self.clip_layer(layer_idx) self.layer_default = (self.layer, self.layer_idx) @@ -142,11 +125,8 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken param.requires_grad = False def clip_layer(self, layer_idx): - if abs(layer_idx) > self.num_layers: - self.layer = "last" - else: - self.layer = "hidden" - self.layer_idx = layer_idx + self.layer = "hidden" + self.layer_idx = layer_idx def reset_clip_layer(self): self.layer = self.layer_default[0] @@ -200,7 +180,7 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken tokens = self.set_up_textual_embeddings(tokens, backup_embeds) tokens = torch.LongTensor(tokens).to(device) - if getattr(self.transformer, self.inner_name).final_layer_norm.weight.dtype != torch.float32: + if self.transformer.text_model.final_layer_norm.weight.dtype != torch.float32: precision_scope = torch.autocast else: precision_scope = lambda a, dtype: contextlib.nullcontext(a) @@ -227,7 +207,7 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken else: z = outputs.hidden_states[self.layer_idx] if self.layer_norm_hidden_state: - z = getattr(self.transformer, self.inner_name).final_layer_norm(z) + z = self.transformer.text_model.final_layer_norm(z) if hasattr(outputs, "pooler_output"): pooled_output = outputs.pooler_output.float() @@ -252,25 +232,28 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken class ClipVisionModelFooocus: def __init__(self, json_config): config = CLIPVisionConfig.from_json_file(json_config) + self.load_device = ldm_patched.modules.model_management.text_encoder_device() - offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() - self.dtype = torch.float32 + self.offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() + if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False): self.dtype = torch.float16 + else: + self.dtype = torch.float32 + + with modeling_utils.no_init_weights(): + self.model = CLIPVisionModelWithProjection(config) - with use_disable_weight_init_linear_ops(offload_device, self.dtype): - with modeling_utils.no_init_weights(): - self.model = CLIPVisionModelWithProjection(config) self.model.to(self.dtype) - - self.patcher = ldm_patched.modules.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) + self.patcher = ldm_patched.modules.model_patcher.ModelPatcher( + self.model, + load_device=self.load_device, + offload_device=self.offload_device + ) def load_sd(self, sd): return self.model.load_state_dict(sd, strict=False) - def encode_image(self, image): - raise NotImplementedError('wrong clip vision call!') - def patch_all_clip(): ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_fooocus From efb312d4951f228158f1a47ffd2c04f76a664557 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Sat, 16 Dec 2023 17:16:24 -0800 Subject: [PATCH 12/23] fix autocast (#1459) --- fooocus_version.py | 2 +- modules/patch_clip.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fooocus_version.py b/fooocus_version.py index 0404a937..709af32d 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.847' +version = '2.1.848' diff --git a/modules/patch_clip.py b/modules/patch_clip.py index 8aa7468f..4a1e0307 100644 --- a/modules/patch_clip.py +++ b/modules/patch_clip.py @@ -72,7 +72,6 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken ] def __init__(self, - device="cpu", max_length=77, freeze=True, layer="last", @@ -97,6 +96,9 @@ class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipToken with modeling_utils.no_init_weights(): self.transformer = CLIPTextModel(config) + if 'cuda' not in model_management.text_encoder_device().type: + dtype = torch.float32 + if dtype is not None: self.transformer.to(dtype) self.transformer.text_model.embeddings.to(torch.float32) @@ -241,6 +243,9 @@ class ClipVisionModelFooocus: else: self.dtype = torch.float32 + if 'cuda' not in self.load_device.type: + self.dtype = torch.float32 + with modeling_utils.no_init_weights(): self.model = CLIPVisionModelWithProjection(config) From 67808d5ee541d3fff350980bf36d38c8e5943029 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Sat, 16 Dec 2023 19:54:05 -0800 Subject: [PATCH 13/23] fix all precision issues We fixed number precision issues again. Now 2.1.849 will give 100% exactly same results as 2.1.824. --- fooocus_version.py | 2 +- modules/patch.py | 5 +- modules/patch_clip.py | 221 ++++++++++++++---------------------------- 3 files changed, 75 insertions(+), 153 deletions(-) diff --git a/fooocus_version.py b/fooocus_version.py index 709af32d..e1578ebb 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.848' +version = '2.1.849' diff --git a/modules/patch.py b/modules/patch.py index 6a7111a6..66b243cb 100644 --- a/modules/patch.py +++ b/modules/patch.py @@ -271,12 +271,11 @@ def sdxl_encode_adm_patched(self, **kwargs): height = float(height) * positive_adm_scale def embedder(number_list): - h = [self.embedder(torch.tensor([x], dtype=torch.float32)) for x in number_list] - h = torch.cat(h) + h = self.embedder(torch.tensor(number_list, dtype=torch.float32)) h = torch.flatten(h).unsqueeze(dim=0).repeat(clip_pooled.shape[0], 1) return h - width, height = round_to_64(width), round_to_64(height) + width, height = int(width), int(height) target_width, target_height = round_to_64(target_width), round_to_64(target_height) adm_emphasized = embedder([height, width, 0, 0, target_height, target_width]) diff --git a/modules/patch_clip.py b/modules/patch_clip.py index 4a1e0307..0ef22e8b 100644 --- a/modules/patch_clip.py +++ b/modules/patch_clip.py @@ -63,172 +63,94 @@ def encode_token_weights_fooocus(self, token_weight_pairs): return torch.cat(output, dim=-2).to(ldm_patched.modules.model_management.intermediate_device()), first_pooled -class SDClipModelFooocus(torch.nn.Module, ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder): - """Uses the CLIP transformer encoder for text (from huggingface)""" - LAYERS = [ - "last", - "pooled", - "hidden" - ] +def patched_SDClipModel__init__(self, max_length=77, freeze=True, layer="last", layer_idx=None, + textmodel_json_config=None, dtype=None, special_tokens=None, + layer_norm_hidden_state=True, **kwargs): + torch.nn.Module.__init__(self) + assert layer in self.LAYERS - def __init__(self, - max_length=77, - freeze=True, - layer="last", - layer_idx=None, - textmodel_json_config=None, - dtype=None, - special_tokens=None, - layer_norm_hidden_state=True, - **kwargs): - super().__init__() - assert layer in self.LAYERS + if special_tokens is None: + special_tokens = {"start": 49406, "end": 49407, "pad": 49407} - if special_tokens is None: - special_tokens = {"start": 49406, "end": 49407, "pad": 49407} + if textmodel_json_config is None: + textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(ldm_patched.modules.sd1_clip.__file__)), + "sd1_clip_config.json") - if textmodel_json_config is None: - textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(ldm_patched.modules.sd1_clip.__file__)), "sd1_clip_config.json") + config = CLIPTextConfig.from_json_file(textmodel_json_config) + self.num_layers = config.num_hidden_layers - config = CLIPTextConfig.from_json_file(textmodel_json_config) - self.num_layers = config.num_hidden_layers + with modeling_utils.no_init_weights(): + self.transformer = CLIPTextModel(config) - with modeling_utils.no_init_weights(): - self.transformer = CLIPTextModel(config) + if 'cuda' not in model_management.text_encoder_device().type: + dtype = torch.float32 - if 'cuda' not in model_management.text_encoder_device().type: - dtype = torch.float32 + if dtype is not None: + self.transformer.to(dtype) + self.transformer.text_model.embeddings.to(torch.float32) - if dtype is not None: - self.transformer.to(dtype) - self.transformer.text_model.embeddings.to(torch.float32) + if freeze: + self.freeze() - if freeze: - self.freeze() + self.max_length = max_length + self.layer = layer + self.layer_idx = None + self.special_tokens = special_tokens + self.text_projection = torch.nn.Parameter(torch.eye(self.transformer.get_input_embeddings().weight.shape[1])) + self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055)) + self.enable_attention_masks = False - self.max_length = max_length - self.layer = layer - self.layer_idx = None - self.special_tokens = special_tokens - self.text_projection = torch.nn.Parameter(torch.eye(self.transformer.get_input_embeddings().weight.shape[1])) - self.logit_scale = torch.nn.Parameter(torch.tensor(4.6055)) - self.enable_attention_masks = False + self.layer_norm_hidden_state = layer_norm_hidden_state + if layer == "hidden": + assert layer_idx is not None + assert abs(layer_idx) < self.num_layers + self.clip_layer(layer_idx) + self.layer_default = (self.layer, self.layer_idx) - self.layer_norm_hidden_state = layer_norm_hidden_state - if layer == "hidden": - assert layer_idx is not None - self.clip_layer(layer_idx) - self.layer_default = (self.layer, self.layer_idx) - def freeze(self): - self.transformer = self.transformer.eval() - # self.train = disabled_train - for param in self.parameters(): - param.requires_grad = False +def patched_SDClipModel_forward(self, tokens): + backup_embeds = self.transformer.get_input_embeddings() + device = backup_embeds.weight.device + tokens = self.set_up_textual_embeddings(tokens, backup_embeds) + tokens = torch.LongTensor(tokens).to(device) - def clip_layer(self, layer_idx): - self.layer = "hidden" - self.layer_idx = layer_idx + if self.transformer.text_model.final_layer_norm.weight.dtype != torch.float32: + precision_scope = torch.autocast + else: + precision_scope = lambda a, dtype: contextlib.nullcontext(a) - def reset_clip_layer(self): - self.layer = self.layer_default[0] - self.layer_idx = self.layer_default[1] + with precision_scope(model_management.get_autocast_device(device), dtype=torch.float32): + attention_mask = None + if self.enable_attention_masks: + attention_mask = torch.zeros_like(tokens) + max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1 + for x in range(attention_mask.shape[0]): + for y in range(attention_mask.shape[1]): + attention_mask[x, y] = 1 + if tokens[x, y] == max_token: + break - def set_up_textual_embeddings(self, tokens, current_embeds): - out_tokens = [] - next_new_token = token_dict_size = current_embeds.weight.shape[0] - 1 - embedding_weights = [] + outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask, + output_hidden_states=self.layer == "hidden") + self.transformer.set_input_embeddings(backup_embeds) - for x in tokens: - tokens_temp = [] - for y in x: - if isinstance(y, int): - if y == token_dict_size: # EOS token - y = -1 - tokens_temp += [y] - else: - if y.shape[0] == current_embeds.weight.shape[1]: - embedding_weights += [y] - tokens_temp += [next_new_token] - next_new_token += 1 - else: - print("WARNING: shape mismatch when trying to apply embedding, embedding will be ignored", - y.shape[0], current_embeds.weight.shape[1]) - while len(tokens_temp) < len(x): - tokens_temp += [self.special_tokens["pad"]] - out_tokens += [tokens_temp] - - n = token_dict_size - if len(embedding_weights) > 0: - new_embedding = torch.nn.Embedding(next_new_token + 1, current_embeds.weight.shape[1], - device=current_embeds.weight.device, dtype=current_embeds.weight.dtype) - new_embedding.weight[:token_dict_size] = current_embeds.weight[:-1] - for x in embedding_weights: - new_embedding.weight[n] = x - n += 1 - new_embedding.weight[n] = current_embeds.weight[-1] # EOS embedding - self.transformer.set_input_embeddings(new_embedding) - - processed_tokens = [] - for x in out_tokens: - processed_tokens += [ - list(map(lambda a: n if a == -1 else a, x))] # The EOS token should always be the largest one - - return processed_tokens - - def forward(self, tokens): - backup_embeds = self.transformer.get_input_embeddings() - device = backup_embeds.weight.device - tokens = self.set_up_textual_embeddings(tokens, backup_embeds) - tokens = torch.LongTensor(tokens).to(device) - - if self.transformer.text_model.final_layer_norm.weight.dtype != torch.float32: - precision_scope = torch.autocast + if self.layer == "last": + z = outputs.last_hidden_state + elif self.layer == "pooled": + z = outputs.pooler_output[:, None, :] else: - precision_scope = lambda a, dtype: contextlib.nullcontext(a) + z = outputs.hidden_states[self.layer_idx] + if self.layer_norm_hidden_state: + z = self.transformer.text_model.final_layer_norm(z) - with precision_scope(model_management.get_autocast_device(device), dtype=torch.float32): - attention_mask = None - if self.enable_attention_masks: - attention_mask = torch.zeros_like(tokens) - max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1 - for x in range(attention_mask.shape[0]): - for y in range(attention_mask.shape[1]): - attention_mask[x, y] = 1 - if tokens[x, y] == max_token: - break + if hasattr(outputs, "pooler_output"): + pooled_output = outputs.pooler_output.float() + else: + pooled_output = None - outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask, - output_hidden_states=self.layer == "hidden") - self.transformer.set_input_embeddings(backup_embeds) - - if self.layer == "last": - z = outputs.last_hidden_state - elif self.layer == "pooled": - z = outputs.pooler_output[:, None, :] - else: - z = outputs.hidden_states[self.layer_idx] - if self.layer_norm_hidden_state: - z = self.transformer.text_model.final_layer_norm(z) - - if hasattr(outputs, "pooler_output"): - pooled_output = outputs.pooler_output.float() - else: - pooled_output = None - - if self.text_projection is not None and pooled_output is not None: - pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float() - return z.float(), pooled_output - - def encode(self, tokens): - return self(tokens) - - def load_sd(self, sd): - if "text_projection" in sd: - self.text_projection[:] = sd.pop("text_projection") - if "text_projection.weight" in sd: - self.text_projection[:] = sd.pop("text_projection.weight").transpose(0, 1) - return self.transformer.load_state_dict(sd, strict=False) + if self.text_projection is not None and pooled_output is not None: + pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float() + return z.float(), pooled_output class ClipVisionModelFooocus: @@ -262,6 +184,7 @@ class ClipVisionModelFooocus: def patch_all_clip(): ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_fooocus - ldm_patched.modules.sd1_clip.SDClipModel = SDClipModelFooocus + ldm_patched.modules.sd1_clip.SDClipModel.__init__ = patched_SDClipModel__init__ + ldm_patched.modules.sd1_clip.SDClipModel.forward = patched_SDClipModel_forward ldm_patched.modules.clip_vision.ClipVisionModel = ClipVisionModelFooocus return From 1669370d2e44b789a4167b9a546a94abcedbb726 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Sun, 17 Dec 2023 00:13:51 -0800 Subject: [PATCH 14/23] Refactor CLIP Vision --- fooocus_version.py | 2 +- modules/patch_clip.py | 69 ++++++++++++++++++++++++++++--------------- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/fooocus_version.py b/fooocus_version.py index e1578ebb..70a5e92a 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.849' +version = '2.1.850' diff --git a/modules/patch_clip.py b/modules/patch_clip.py index 0ef22e8b..5a3e85df 100644 --- a/modules/patch_clip.py +++ b/modules/patch_clip.py @@ -23,7 +23,7 @@ import contextlib from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection -def encode_token_weights_fooocus(self, token_weight_pairs): +def patched_encode_token_weights(self, token_weight_pairs): to_encode = list() max_token_len = 0 has_weights = False @@ -153,38 +153,59 @@ def patched_SDClipModel_forward(self, tokens): return z.float(), pooled_output -class ClipVisionModelFooocus: - def __init__(self, json_config): - config = CLIPVisionConfig.from_json_file(json_config) +def patched_ClipVisionModel__init__(self, json_config): + config = CLIPVisionConfig.from_json_file(json_config) - self.load_device = ldm_patched.modules.model_management.text_encoder_device() - self.offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() + self.load_device = ldm_patched.modules.model_management.text_encoder_device() + self.offload_device = ldm_patched.modules.model_management.text_encoder_offload_device() - if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False): - self.dtype = torch.float16 - else: - self.dtype = torch.float32 + if ldm_patched.modules.model_management.should_use_fp16(self.load_device, prioritize_performance=False): + self.dtype = torch.float16 + else: + self.dtype = torch.float32 - if 'cuda' not in self.load_device.type: - self.dtype = torch.float32 + if 'cuda' not in self.load_device.type: + self.dtype = torch.float32 - with modeling_utils.no_init_weights(): - self.model = CLIPVisionModelWithProjection(config) + with modeling_utils.no_init_weights(): + self.model = CLIPVisionModelWithProjection(config) - self.model.to(self.dtype) - self.patcher = ldm_patched.modules.model_patcher.ModelPatcher( - self.model, - load_device=self.load_device, - offload_device=self.offload_device - ) + self.model.to(self.dtype) + self.patcher = ldm_patched.modules.model_patcher.ModelPatcher( + self.model, + load_device=self.load_device, + offload_device=self.offload_device + ) - def load_sd(self, sd): - return self.model.load_state_dict(sd, strict=False) + +def patched_ClipVisionModel_encode_image(self, image): + ldm_patched.modules.model_management.load_model_gpu(self.patcher) + pixel_values = ldm_patched.modules.clip_vision.clip_preprocess(image.to(self.load_device)) + + if self.dtype != torch.float32: + precision_scope = torch.autocast + else: + precision_scope = lambda a, b: contextlib.nullcontext(a) + + with precision_scope(ldm_patched.modules.model_management.get_autocast_device(self.load_device), torch.float32): + outputs = self.model(pixel_values=pixel_values, output_hidden_states=True) + + for k in outputs: + t = outputs[k] + if t is not None: + if k == 'hidden_states': + outputs["penultimate_hidden_states"] = t[-2].to(ldm_patched.modules.model_management.intermediate_device()) + outputs["hidden_states"] = None + else: + outputs[k] = t.to(ldm_patched.modules.model_management.intermediate_device()) + + return outputs def patch_all_clip(): - ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = encode_token_weights_fooocus + ldm_patched.modules.sd1_clip.ClipTokenWeightEncoder.encode_token_weights = patched_encode_token_weights ldm_patched.modules.sd1_clip.SDClipModel.__init__ = patched_SDClipModel__init__ ldm_patched.modules.sd1_clip.SDClipModel.forward = patched_SDClipModel_forward - ldm_patched.modules.clip_vision.ClipVisionModel = ClipVisionModelFooocus + ldm_patched.modules.clip_vision.ClipVisionModel.__init__ = patched_ClipVisionModel__init__ + ldm_patched.modules.clip_vision.ClipVisionModel.encode_image = patched_ClipVisionModel_encode_image return From 69a23c4d60c9e627409d0cb0f8862cdb015488eb Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Sun, 17 Dec 2023 00:43:00 -0800 Subject: [PATCH 15/23] python 3.11 need this (#1465) --- requirements_versions.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements_versions.txt b/requirements_versions.txt index 16050a5e..b2111c1f 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -8,7 +8,6 @@ Pillow==9.2.0 scipy==1.9.3 tqdm==4.64.1 psutil==5.9.5 -numpy==1.23.5 pytorch_lightning==1.9.4 omegaconf==2.2.3 gradio==3.41.2 From 0e1aa8d084b8a6c0bd7734f7a77eef80814cbb47 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Sun, 17 Dec 2023 17:09:15 -0800 Subject: [PATCH 16/23] better caster (#1480) related to mps/rocm/cpu casting for fp16 and etc on clip --- extras/ip_adapter.py | 9 +- fooocus_version.py | 2 +- ldm_patched/contrib/external_images.py | 4 +- ldm_patched/modules/args_parser.py | 2 +- ldm_patched/modules/model_management.py | 4 + modules/patch_clip.py | 106 ++++++++++++------------ 6 files changed, 63 insertions(+), 64 deletions(-) diff --git a/extras/ip_adapter.py b/extras/ip_adapter.py index ac64ef1d..cb1d366f 100644 --- a/extras/ip_adapter.py +++ b/extras/ip_adapter.py @@ -167,14 +167,7 @@ def preprocess(img, ip_adapter_path): ldm_patched.modules.model_management.load_model_gpu(clip_vision.patcher) pixel_values = clip_preprocess(numpy_to_pytorch(img).to(clip_vision.load_device)) - - if clip_vision.dtype != torch.float32: - precision_scope = torch.autocast - else: - precision_scope = lambda a, b: contextlib.nullcontext(a) - - with precision_scope(ldm_patched.modules.model_management.get_autocast_device(clip_vision.load_device), torch.float32): - outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True) + outputs = clip_vision.model(pixel_values=pixel_values, output_hidden_states=True) ip_adapter = entry['ip_adapter'] ip_layers = entry['ip_layers'] diff --git a/fooocus_version.py b/fooocus_version.py index 70a5e92a..b588f46f 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.850' +version = '2.1.851' diff --git a/ldm_patched/contrib/external_images.py b/ldm_patched/contrib/external_images.py index 3dbb3e3b..17e9c497 100644 --- a/ldm_patched/contrib/external_images.py +++ b/ldm_patched/contrib/external_images.py @@ -76,7 +76,7 @@ class SaveAnimatedWEBP: OUTPUT_NODE = True - CATEGORY = "_for_testing" + CATEGORY = "image/animation" def save_images(self, images, fps, filename_prefix, lossless, quality, method, num_frames=0, prompt=None, extra_pnginfo=None): method = self.methods.get(method) @@ -138,7 +138,7 @@ class SaveAnimatedPNG: OUTPUT_NODE = True - CATEGORY = "_for_testing" + CATEGORY = "image/animation" def save_images(self, images, fps, compress_level, filename_prefix="ldm_patched", prompt=None, extra_pnginfo=None): filename_prefix += self.prefix_append diff --git a/ldm_patched/modules/args_parser.py b/ldm_patched/modules/args_parser.py index 3931997d..7957783e 100644 --- a/ldm_patched/modules/args_parser.py +++ b/ldm_patched/modules/args_parser.py @@ -102,7 +102,7 @@ vram_group.add_argument("--always-cpu", action="store_true") parser.add_argument("--always-offload-from-vram", action="store_true") - +parser.add_argument("--pytorch-deterministic", action="store_true") parser.add_argument("--disable-server-log", action="store_true") parser.add_argument("--debug-mode", action="store_true") diff --git a/ldm_patched/modules/model_management.py b/ldm_patched/modules/model_management.py index 0e783b36..31cf95da 100644 --- a/ldm_patched/modules/model_management.py +++ b/ldm_patched/modules/model_management.py @@ -28,6 +28,10 @@ total_vram = 0 lowvram_available = True xpu_available = False +if args.pytorch_deterministic: + print("Using deterministic algorithms for pytorch") + torch.use_deterministic_algorithms(True, warn_only=True) + directml_enabled = False if args.directml is not None: import torch_directml diff --git a/modules/patch_clip.py b/modules/patch_clip.py index 5a3e85df..74ee436a 100644 --- a/modules/patch_clip.py +++ b/modules/patch_clip.py @@ -12,17 +12,34 @@ import ldm_patched.modules.args_parser import ldm_patched.modules.model_base import ldm_patched.modules.model_management import ldm_patched.modules.model_patcher -import ldm_patched.modules.ops import ldm_patched.modules.samplers import ldm_patched.modules.sd import ldm_patched.modules.sd1_clip import ldm_patched.modules.clip_vision import ldm_patched.modules.model_management as model_management +import ldm_patched.modules.ops as ops import contextlib from transformers import CLIPTextModel, CLIPTextConfig, modeling_utils, CLIPVisionConfig, CLIPVisionModelWithProjection +@contextlib.contextmanager +def use_patched_ops(operations): + op_names = ['Linear', 'Conv2d', 'Conv3d', 'GroupNorm', 'LayerNorm'] + backups = {op_name: getattr(torch.nn, op_name) for op_name in op_names} + + try: + for op_name in op_names: + setattr(torch.nn, op_name, getattr(operations, op_name)) + + yield + + finally: + for op_name in op_names: + setattr(torch.nn, op_name, backups[op_name]) + return + + def patched_encode_token_weights(self, token_weight_pairs): to_encode = list() max_token_len = 0 @@ -79,15 +96,14 @@ def patched_SDClipModel__init__(self, max_length=77, freeze=True, layer="last", config = CLIPTextConfig.from_json_file(textmodel_json_config) self.num_layers = config.num_hidden_layers - with modeling_utils.no_init_weights(): - self.transformer = CLIPTextModel(config) - - if 'cuda' not in model_management.text_encoder_device().type: - dtype = torch.float32 + with use_patched_ops(ops.manual_cast): + with modeling_utils.no_init_weights(): + self.transformer = CLIPTextModel(config) if dtype is not None: self.transformer.to(dtype) - self.transformer.text_model.embeddings.to(torch.float32) + + self.transformer.text_model.embeddings.to(torch.float32) if freeze: self.freeze() @@ -114,42 +130,37 @@ def patched_SDClipModel_forward(self, tokens): tokens = self.set_up_textual_embeddings(tokens, backup_embeds) tokens = torch.LongTensor(tokens).to(device) - if self.transformer.text_model.final_layer_norm.weight.dtype != torch.float32: - precision_scope = torch.autocast + attention_mask = None + if self.enable_attention_masks: + attention_mask = torch.zeros_like(tokens) + max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1 + for x in range(attention_mask.shape[0]): + for y in range(attention_mask.shape[1]): + attention_mask[x, y] = 1 + if tokens[x, y] == max_token: + break + + outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask, + output_hidden_states=self.layer == "hidden") + self.transformer.set_input_embeddings(backup_embeds) + + if self.layer == "last": + z = outputs.last_hidden_state + elif self.layer == "pooled": + z = outputs.pooler_output[:, None, :] else: - precision_scope = lambda a, dtype: contextlib.nullcontext(a) + z = outputs.hidden_states[self.layer_idx] + if self.layer_norm_hidden_state: + z = self.transformer.text_model.final_layer_norm(z) - with precision_scope(model_management.get_autocast_device(device), dtype=torch.float32): - attention_mask = None - if self.enable_attention_masks: - attention_mask = torch.zeros_like(tokens) - max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1 - for x in range(attention_mask.shape[0]): - for y in range(attention_mask.shape[1]): - attention_mask[x, y] = 1 - if tokens[x, y] == max_token: - break + if hasattr(outputs, "pooler_output"): + pooled_output = outputs.pooler_output.float() + else: + pooled_output = None - outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask, - output_hidden_states=self.layer == "hidden") - self.transformer.set_input_embeddings(backup_embeds) + if self.text_projection is not None and pooled_output is not None: + pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float() - if self.layer == "last": - z = outputs.last_hidden_state - elif self.layer == "pooled": - z = outputs.pooler_output[:, None, :] - else: - z = outputs.hidden_states[self.layer_idx] - if self.layer_norm_hidden_state: - z = self.transformer.text_model.final_layer_norm(z) - - if hasattr(outputs, "pooler_output"): - pooled_output = outputs.pooler_output.float() - else: - pooled_output = None - - if self.text_projection is not None and pooled_output is not None: - pooled_output = pooled_output.float().to(self.text_projection.device) @ self.text_projection.float() return z.float(), pooled_output @@ -164,11 +175,9 @@ def patched_ClipVisionModel__init__(self, json_config): else: self.dtype = torch.float32 - if 'cuda' not in self.load_device.type: - self.dtype = torch.float32 - - with modeling_utils.no_init_weights(): - self.model = CLIPVisionModelWithProjection(config) + with use_patched_ops(ops.manual_cast): + with modeling_utils.no_init_weights(): + self.model = CLIPVisionModelWithProjection(config) self.model.to(self.dtype) self.patcher = ldm_patched.modules.model_patcher.ModelPatcher( @@ -181,14 +190,7 @@ def patched_ClipVisionModel__init__(self, json_config): def patched_ClipVisionModel_encode_image(self, image): ldm_patched.modules.model_management.load_model_gpu(self.patcher) pixel_values = ldm_patched.modules.clip_vision.clip_preprocess(image.to(self.load_device)) - - if self.dtype != torch.float32: - precision_scope = torch.autocast - else: - precision_scope = lambda a, b: contextlib.nullcontext(a) - - with precision_scope(ldm_patched.modules.model_management.get_autocast_device(self.load_device), torch.float32): - outputs = self.model(pixel_values=pixel_values, output_hidden_states=True) + outputs = self.model(pixel_values=pixel_values, output_hidden_states=True) for k in outputs: t = outputs[k] From 7a6b46f363d0f3797d263bfdf00bd16e315da02a Mon Sep 17 00:00:00 2001 From: Huy Nguyen <111590292+peterng1618@users.noreply.github.com> Date: Mon, 18 Dec 2023 11:20:02 +0700 Subject: [PATCH 17/23] New Log System --- fooocus_version.py | 2 +- modules/async_worker.py | 10 ++++--- modules/private_logger.py | 58 ++++++++++++++++++++++++--------------- update_log.md | 4 +++ 4 files changed, 47 insertions(+), 27 deletions(-) diff --git a/fooocus_version.py b/fooocus_version.py index b588f46f..71457946 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.851' +version = '2.1.852' diff --git a/modules/async_worker.py b/modules/async_worker.py index 93a76579..c2c8632d 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -34,6 +34,7 @@ def worker(): import modules.advanced_parameters as advanced_parameters import extras.ip_adapter as ip_adapter import extras.face_crop + import fooocus_version from modules.sdxl_styles import apply_style, apply_wildcards, fooocus_expansion from modules.private_logger import log @@ -492,7 +493,7 @@ def worker(): if direct_return: d = [('Upscale (Fast)', '2x')] - log(uov_input_image, d, single_line_number=1) + log(uov_input_image, d) yield_result(async_task, uov_input_image, do_not_show_finished_images=True) return @@ -774,12 +775,13 @@ def worker(): ('Refiner Switch', refiner_switch), ('Sampler', sampler_name), ('Scheduler', scheduler_name), - ('Seed', task['task_seed']) + ('Seed', task['task_seed']), ] for n, w in loras: if n != 'None': - d.append((f'LoRA [{n}] weight', w)) - log(x, d, single_line_number=3) + d.append((f'LoRA', f'{n} : {w}')) + d.append(('Version', 'v' + fooocus_version.version)) + log(x, d) yield_result(async_task, imgs, do_not_show_finished_images=len(tasks) == 1) except ldm_patched.modules.model_management.InterruptProcessingException as e: diff --git a/modules/private_logger.py b/modules/private_logger.py index 3a992cf6..b91f68d3 100644 --- a/modules/private_logger.py +++ b/modules/private_logger.py @@ -16,7 +16,7 @@ def get_current_html_path(): return html_name -def log(img, dic, single_line_number=3): +def log(img, dic): if args_manager.args.disable_image_log: return @@ -25,36 +25,50 @@ def log(img, dic, single_line_number=3): Image.fromarray(img).save(local_temp_filename) html_name = os.path.join(os.path.dirname(local_temp_filename), 'log.html') - existing_log = log_cache.get(html_name, None) + css_styles = ( + "" + ) - if existing_log is None: + begin_part = f"Fooocus Log {date_string}{css_styles}

Fooocus Log {date_string} (private)

\n

All images are clean, without any hidden data/meta, and safe to share with others.

\n\n" + end_part = f'\n' + + middle_part = log_cache.get(html_name, "") + + if middle_part == "": if os.path.exists(html_name): - existing_log = open(html_name, encoding='utf-8').read() - else: - existing_log = f'

Fooocus Log {date_string} (private)

\n

All images do not contain any hidden data.

' + existing_split = open(html_name, 'r', encoding='utf-8').read().split('') + if len(existing_split) == 3: + middle_part = existing_split[1] + else: + middle_part = existing_split[0] div_name = only_name.replace('.', '_') - item = f'
\n' - item += "" - item += f"" - item += f"

{only_name}

\n" - for i, (k, v) in enumerate(dic): - if i < single_line_number: - item += f"

{k}: {v}

\n" - else: - if (i - single_line_number) % 2 == 0: - item += f"

{k}: {v}, " - else: - item += f"{k}: {v}

\n" + item = f"

\n" + item += f"" + item += "" - item += "
{only_name}
" + for key, value in dic: + item += f"\n" + item += "" item += "

\n" - existing_log = item + existing_log + item += "
\n\n" + + middle_part = item + middle_part with open(html_name, 'w', encoding='utf-8') as f: - f.write(existing_log) + f.write(begin_part + middle_part + end_part) print(f'Image generated with private log at: {html_name}') - log_cache[html_name] = existing_log + log_cache[html_name] = middle_part return diff --git a/update_log.md b/update_log.md index 39e4c9e5..7ac777de 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,7 @@ +# 2.1.852 + +* New Log System: Log system now uses tables. If this is breaking some other browser extension or javascript developments, see also [use previous version](https://github.com/lllyasviel/Fooocus/discussions/1405). + # 2.1.846 * Many users reported that image quality is different from 2.1.824. We reviewed all codes and fixed several precision problems in 2.1.846. From f3084894402a4c0b7ed9e7164466bcedd5f5428d Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Mon, 18 Dec 2023 19:01:52 -0800 Subject: [PATCH 18/23] Add Marc K3nt3L's styles Thanks Marc K3nt3L ! --- fooocus_version.py | 2 +- modules/sdxl_styles.py | 3 +- modules/style_sorter.py | 13 +- readme.md | 2 +- sdxl_styles/sdxl_styles_marc_k3nt3l.json | 312 +++++++++++++++++++++++ update_log.md | 4 + 6 files changed, 328 insertions(+), 8 deletions(-) create mode 100644 sdxl_styles/sdxl_styles_marc_k3nt3l.json diff --git a/fooocus_version.py b/fooocus_version.py index 71457946..8d6fe946 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.852' +version = '2.1.853' diff --git a/modules/sdxl_styles.py b/modules/sdxl_styles.py index d7489455..f5bb6276 100644 --- a/modules/sdxl_styles.py +++ b/modules/sdxl_styles.py @@ -31,7 +31,8 @@ for x in ['sdxl_styles_fooocus.json', 'sdxl_styles_sai.json', 'sdxl_styles_mre.json', 'sdxl_styles_twri.json', - 'sdxl_styles_diva.json']: + 'sdxl_styles_diva.json', + 'sdxl_styles_marc_k3nt3l.json']: if x in styles_files: styles_files.remove(x) styles_files.append(x) diff --git a/modules/style_sorter.py b/modules/style_sorter.py index 393e441d..49142bc7 100644 --- a/modules/style_sorter.py +++ b/modules/style_sorter.py @@ -15,11 +15,14 @@ def try_load_sorted_styles(style_names, default_selected): try: if os.path.exists('sorted_styles.json'): with open('sorted_styles.json', 'rt', encoding='utf-8') as fp: - sorted_styles = json.load(fp) - if len(sorted_styles) == len(all_styles): - if all(x in all_styles for x in sorted_styles): - if all(x in sorted_styles for x in all_styles): - all_styles = sorted_styles + sorted_styles = [] + for x in json.load(fp): + if x in all_styles: + sorted_styles.append(x) + for x in all_styles: + if x not in sorted_styles: + sorted_styles.append(x) + all_styles = sorted_styles except Exception as e: print('Load style sorting failed.') print(e) diff --git a/readme.md b/readme.md index 78682fc0..b970ee12 100644 --- a/readme.md +++ b/readme.md @@ -384,7 +384,7 @@ See also [About Forking and Promotion of Forks](https://github.com/lllyasviel/Fo ## Thanks -Special thanks to [twri](https://github.com/twri) and [3Diva](https://github.com/3Diva) for creating additional SDXL styles available in Fooocus. Thanks [daswer123](https://github.com/daswer123) for contributing the Canvas Zoom! +Special thanks to [twri](https://github.com/twri) and [3Diva](https://github.com/3Diva) and [Marc K3nt3L](https://github.com/K3nt3L) for creating additional SDXL styles available in Fooocus. Thanks [daswer123](https://github.com/daswer123) for contributing the Canvas Zoom! ## Update Log diff --git a/sdxl_styles/sdxl_styles_marc_k3nt3l.json b/sdxl_styles/sdxl_styles_marc_k3nt3l.json new file mode 100644 index 00000000..fbbe1a24 --- /dev/null +++ b/sdxl_styles/sdxl_styles_marc_k3nt3l.json @@ -0,0 +1,312 @@ +[ + { + "name": "MK Chromolithography", + "prompt": "Chromolithograph {prompt}. Vibrant colors, intricate details, rich color saturation, meticulous registration, multi-layered printing, decorative elements, historical charm, artistic reproductions, commercial posters, nostalgic, ornate compositions.", + "negative_prompt": "monochromatic, simple designs, limited color palette, imprecise registration, minimalistic, modern aesthetic, digital appearance." + }, + { + "name": "MK Cross Processing Print", + "prompt": "Cross processing print {prompt}. Experimental color shifts, unconventional tonalities, vibrant and surreal hues, heightened contrasts, unpredictable results, artistic unpredictability, retro and vintage feel, dynamic color interplay, abstract and dreamlike.", + "negative_prompt": "predictable color tones, traditional processing, realistic color representation, subdued contrasts, standard photographic aesthetics." + }, + { + "name": "MK Dufaycolor Photograph", + "prompt": "Dufaycolor photograph {prompt}. Vintage color palette, distinctive color rendering, soft and dreamy atmosphere, historical charm, unique color process, grainy texture, evocative mood, nostalgic aesthetic, hand-tinted appearance, artistic patina.", + "negative_prompt": "modern color reproduction, hyperrealistic tones, sharp and clear details, digital precision, contemporary aesthetic." + }, + { + "name": "MK Herbarium", + "prompt": "Herbarium drawing{prompt}. Botanical accuracy, old botanical book illustration, detailed illustrations, pressed plants, delicate and precise linework, scientific documentation, meticulous presentation, educational purpose, organic compositions, timeless aesthetic, naturalistic beauty.", + "negative_prompt": "abstract representation, vibrant colors, artistic interpretation, chaotic compositions, fantastical elements, digital appearance." + }, + { + "name": "MK Punk Collage", + "prompt": "punk collage style {prompt} . mixed media, papercut,textured paper, overlapping, ripped posters, safety pins, chaotic layers, graffiti-style elements, anarchy symbols, vintage photos, cut-and-paste aesthetic, bold typography, distorted images, political messages, urban decay, distressed textures, newspaper clippings, spray paint, rebellious icons, DIY spirit, vivid colors, punk band logos, edgy and raw compositions, ", + "negative_prompt": "conventional,blurry, noisy, low contrast" + }, + { + "name": "MK mosaic", + "prompt": "mosaic style {prompt} . fragmented, assembled, colorful, highly detailed", + "negative_prompt": "whole, unbroken, monochrome" + }, + { + "name": "MK Van Gogh", + "prompt": "Oil painting by Van Gogh {prompt} . Expressive, impasto, swirling brushwork, vibrant, brush strokes, Brushstroke-heavy, Textured, Impasto, Colorful, Dynamic, Bold, Distinctive, Vibrant, Whirling, Expressive, Dramatic, Swirling, Layered, Intense, Contrastive, Atmospheric, Luminous, Textural, Evocative, SpiraledVan Gogh style", + "negative_prompt": "realistic, photorealistic, calm, straight lines, signature, frame, text, watermark" + }, + { + "name": "MK Coloring Book", + "prompt": "centered black and white high contrast line drawing, coloring book style,{prompt} . monochrome, blank white background", + "negative_prompt": "greyscale, gradients,shadows,shadow, colored, Red, Blue, Yellow, Green, Orange, Purple, Pink, Brown, Gray, Beige, Turquoise, Lavender, Cyan, Magenta, Olive, Indigo, black background" + }, + { + "name": "MK Singer Sargent", + "prompt": "Oil painting by John Singer Sargent {prompt}. Elegant, refined, masterful technique,realistic portrayal, subtle play of light, captivating expression, rich details, harmonious colors, skillful composition, brush strokes, chiaroscuro.", + "negative_prompt": "realistic, photorealistic, abstract, overly stylized, excessive contrasts, distorted,bright colors,disorder." + }, + { + "name": "MK Pollock", + "prompt": "Oil painting by Jackson Pollock {prompt}. Abstract expressionism, drip painting, chaotic composition, energetic, spontaneous, unconventional technique, dynamic, bold, distinctive, vibrant, intense, expressive, energetic, layered, non-representational, gestural.", + "negative_prompt": "(realistic:1.5), (photorealistic:1.5), representational, calm, ordered composition, precise lines, detailed forms, subdued colors, quiet, static, traditional, figurative." + }, + { + "name": "MK Basquiat", + "prompt": "Artwork by Jean-Michel Basquiat {prompt}. Neo-expressionism, street art influence, graffiti-inspired, raw, energetic, bold colors, dynamic composition, chaotic, layered, textural, expressive, spontaneous, distinctive, symbolic,energetic brushstrokes.", + "negative_prompt": "(realistic:1.5), (photorealistic:1.5), calm, precise lines, conventional composition, subdued" + }, + { + "name": "MK Andy Warhol", + "prompt": "Artwork in the style of Andy Warhol {prompt}. Pop art, vibrant colors, bold compositions, repetition of iconic imagery, celebrity culture, commercial aesthetics, mass production influence, stylized simplicity, cultural commentary, graphical elements, distinctive portraits.", + "negative_prompt": "subdued colors, realistic, lack of repetition, minimalistic." + }, + { + "name": "MK Halftone print", + "prompt": "Halftone print of {prompt}. Dot matrix pattern, grayscale tones, vintage aesthetic, newspaper print vibe, stylized dots, visual texture, black and white contrasts, retro appearance, artistic pointillism,pop culture, (Roy Lichtenstein style:1.5).", + "negative_prompt": "smooth gradients, continuous tones, vibrant colors." + }, + { + "name": "MK Gond Painting", + "prompt": "Gond painting {prompt}. Intricate patterns, vibrant colors, detailed motifs, nature-inspired themes, tribal folklore, fine lines, intricate detailing, storytelling compositions, mystical and folkloric, cultural richness.", + "negative_prompt": "monochromatic, abstract shapes, minimalistic." + }, + { + "name": "MK Albumen Print", + "prompt": "Albumen print {prompt}. Sepia tones, fine details, subtle tonal gradations, delicate highlights, vintage aesthetic, soft and muted atmosphere, historical charm, rich textures, meticulous craftsmanship, classic photographic technique, vignetting.", + "negative_prompt": "vibrant colors, high contrast, modern, digital appearance, sharp details, contemporary style." + }, + { + "name": "MK Aquatint Print", + "prompt": "Aquatint print {prompt}. Soft tonal gradations, atmospheric effects, velvety textures, rich contrasts, fine details, etching process, delicate lines, nuanced shading, expressive and moody atmosphere, artistic depth.", + "negative_prompt": "sharp contrasts, bold lines, minimalistic." + }, + { + "name": "MK Anthotype Print", + "prompt": "Anthotype print {prompt}. Monochrome dye, soft and muted colors, organic textures, ephemeral and delicate appearance, low details, watercolor canvas, low contrast, overexposed, silhouette, textured paper.", + "negative_prompt": "vibrant synthetic dyes, bold and saturated colors." + }, + { + "name": "MK Inuit Carving", + "prompt": "A sculpture made of ivory, {prompt} made of . Sculptures, Inuit art style, intricate carvings, natural materials, storytelling motifs, arctic wildlife themes, symbolic representations, cultural traditions, earthy tones, harmonious compositions, spiritual and mythological elements.", + "negative_prompt": "abstract, vibrant colors." + }, + { + "name": "MK Bromoil Print", + "prompt": "Bromoil print {prompt}. Painterly effects, sepia tones, textured surfaces, rich contrasts, expressive brushwork, tonal variations, vintage aesthetic, atmospheric mood, handmade quality, artistic experimentation, darkroom craftsmanship, vignetting.", + "negative_prompt": "smooth surfaces, minimal brushwork, contemporary digital appearance." + }, + { + "name": "MK Calotype Print", + "prompt": "Calotype print {prompt}. Soft focus, subtle tonal range, paper negative process, fine details, vintage aesthetic, artistic experimentation, atmospheric mood, early photographic charm, handmade quality, vignetting.", + "negative_prompt": "sharp focus, bold contrasts, modern aesthetic, digital photography." + }, + { + "name": "MK Color Sketchnote", + "prompt": "Color sketchnote {prompt}. Hand-drawn elements, vibrant colors, visual hierarchy, playful illustrations, varied typography, graphic icons, organic and dynamic layout, personalized touches, creative expression, engaging storytelling.", + "negative_prompt": "monochromatic, geometric layout." + }, + { + "name": "MK Cibulak Porcelain", + "prompt": "A sculpture made of blue pattern porcelain of {prompt}. Classic design, blue and white color scheme, intricate detailing, floral motifs, onion-shaped elements, historical charm, rococo, white ware, cobalt blue, underglaze pattern, fine craftsmanship, traditional elegance, delicate patterns, vintage aesthetic, Meissen, Blue Onion pattern, Cibulak.", + "negative_prompt": "tea, teapot, cup, teacup,bright colors, bold and modern design, absence of intricate detailing, lack of floral motifs, non-traditional shapes." + }, + { + "name": "MK Alcohol Ink Art", + "prompt": "Alcohol ink art {prompt}. Fluid and vibrant colors, unpredictable patterns, organic textures, translucent layers, abstract compositions, ethereal and dreamy effects, free-flowing movement, expressive brushstrokes, contemporary aesthetic, wet textured paper.", + "negative_prompt": "monochromatic, controlled patterns." + }, + { + "name": "MK One Line Art", + "prompt": "One line art {prompt}. Continuous and unbroken black line, minimalistic, simplicity, economical use of space, flowing and dynamic, symbolic representations, contemporary aesthetic, evocative and abstract, white background.", + "negative_prompt": "disjointed lines, complexity, complex detailing." + }, + { + "name": "MK Blacklight Paint", + "prompt": "Blacklight paint {prompt}. Fluorescent pigments, vibrant and surreal colors, ethereal glow, otherworldly effects, dynamic and psychedelic compositions, neon aesthetics, transformative in ultraviolet light, contemporary and experimental.", + "negative_prompt": "muted colors, traditional and realistic compositions." + }, + { + "name": "MK Carnival Glass", + "prompt": "A sculpture made of Carnival glass, {prompt}. Iridescent surfaces, vibrant colors, intricate patterns, opalescent hues, reflective and prismatic effects, Art Nouveau and Art Deco influences, vintage charm, intricate detailing, lustrous and luminous appearance, Carnival Glass style.", + "negative_prompt": "non-iridescent surfaces, muted colors, absence of intricate patterns, lack of opalescent hues, modern and minimalist aesthetic." + }, + { + "name": "MK Cyanotype Print", + "prompt": "Cyanotype print {prompt}. Prussian blue tones, distinctive coloration, high contrast, blueprint aesthetics, atmospheric mood, sun-exposed paper, silhouette effects, delicate details, historical charm, handmade and experimental quality.", + "negative_prompt": "vibrant colors, low contrast, modern and polished appearance." + }, + { + "name": "MK Cross-Stitching", + "prompt": "Cross-stitching {prompt}. Intricate patterns, embroidery thread, sewing, fine details, precise stitches, textile artistry, symmetrical designs, varied color palette, traditional and contemporary motifs, handmade and crafted,canvas, nostalgic charm.", + "negative_prompt": "paper, paint, ink, photography." + }, + { + "name": "MK Encaustic Paint", + "prompt": "Encaustic paint {prompt}. Textured surfaces, translucent layers, luminous quality, wax medium, rich color saturation, fluid and organic shapes, contemporary and historical influences, mixed media elements, atmospheric depth.", + "negative_prompt": "flat surfaces, opaque layers, lack of wax medium, muted color palette, absence of textured surfaces, non-mixed media." + }, + { + "name": "MK Embroidery", + "prompt": "Embroidery {prompt}. Intricate stitching, embroidery thread, fine details, varied thread textures, textile artistry, embellished surfaces, diverse color palette, traditional and contemporary motifs, handmade and crafted, tactile and ornate.", + "negative_prompt": "minimalist, monochromatic." + }, + { + "name": "MK Gyotaku", + "prompt": "Gyotaku {prompt}. Fish impressions, realistic details, ink rubbings, textured surfaces, traditional Japanese art form, nature-inspired compositions, artistic representation of marine life, black and white contrasts, cultural significance.", + "negative_prompt": "photography." + }, + { + "name": "MK Luminogram", + "prompt": "Luminogram {prompt}. Photogram technique, ethereal and abstract effects, light and shadow interplay, luminous quality, experimental process, direct light exposure, unique and unpredictable results, artistic experimentation.", + "negative_prompt": "" + }, + { + "name": "MK Lite Brite Art", + "prompt": "Lite Brite art {prompt}. Luminous and colorful designs, pixelated compositions, retro aesthetic, glowing effects, creative patterns, interactive and playful, nostalgic charm, vibrant and dynamic arrangements.", + "negative_prompt": "monochromatic." + }, + { + "name": "MK Mokume-gane", + "prompt": "Mokume-gane {prompt}. Wood-grain patterns, mixed metal layers, intricate and organic designs, traditional Japanese metalwork, harmonious color combinations, artisanal craftsmanship, unique and layered textures, cultural and historical significance.", + "negative_prompt": "uniform metal surfaces." + }, + { + "name": "Pebble Art", + "prompt": "a sculpture made of peebles, {prompt}. Pebble art style,natural materials, textured surfaces, balanced compositions, organic forms, harmonious arrangements, tactile and 3D effects, beach-inspired aesthetic, creative storytelling, artisanal craftsmanship.", + "negative_prompt": "non-natural materials, lack of textured surfaces, imbalanced compositions, absence of organic forms, non-tactile appearance." + }, + { + "name": "MK Palekh", + "prompt": "Palekh art {prompt}. Miniature paintings, intricate details, vivid colors, folkloric themes, lacquer finish, storytelling compositions, symbolic elements, Russian folklore influence, cultural and historical significance.", + "negative_prompt": "large-scale paintings." + }, + { + "name": "MK Suminagashi", + "prompt": "Suminagashi {prompt}. Floating ink patterns, marbled effects, delicate and ethereal designs, water-based ink, fluid and unpredictable compositions, meditative process, monochromatic or subtle color palette, Japanese artistic tradition.", + "negative_prompt": "vibrant and bold color palette." + }, + { + "name": "MK Scrimshaw", + "prompt": "A Scrimshaw engraving of {prompt}. Intricate engravings on a spermwhale's teeth, marine motifs, detailed scenes, nautical themes, black and white contrasts, historical craftsmanship, artisanal carving, storytelling compositions, maritime heritage.", + "negative_prompt": "colorful, modern." + }, + { + "name": "MK Shibori", + "prompt": "Shibori {prompt}. Textured fabric, intricate patterns, resist-dyeing technique, indigo or vibrant colors, organic and flowing designs, Japanese textile art, cultural tradition, tactile and visual interest.", + "negative_prompt": "monochromatic." + }, + { + "name": "MK Vitreous Enamel", + "prompt": "A sculpture made of Vitreous enamel {prompt}. Smooth and glossy surfaces, vibrant colors, glass-like finish, durable and resilient, intricate detailing, traditional and contemporary applications, artistic craftsmanship, jewelry and decorative objects, , Vitreous enamel, colored glass.", + "negative_prompt": "rough surfaces, muted colors." + }, + { + "name": "MK Ukiyo-e", + "prompt": "Ukiyo-e {prompt}. Woodblock prints, vibrant colors, intricate details, depictions of landscapes, kabuki actors, beautiful women, cultural scenes, traditional Japanese art, artistic craftsmanship, historical significance.", + "negative_prompt": "absence of woodblock prints, muted colors, lack of intricate details, non-traditional Japanese themes, absence of cultural scenes." + }, + { + "name": "MK vintage-airline-poster", + "prompt": "vintage airline poster {prompt} . classic aviation fonts, pastel colors, elegant aircraft illustrations, scenic destinations, distressed textures, retro travel allure", + "negative_prompt": "modern fonts, bold colors, hyper-realistic, sleek design" + }, + { + "name": "MK vintage-travel-poster", + "prompt": "vintage travel poster {prompt} . retro fonts, muted colors, scenic illustrations, iconic landmarks, distressed textures, nostalgic vibes", + "negative_prompt": "modern fonts, vibrant colors, hyper-realistic, sleek design" + }, + { + "name": "MK bauhaus-style", + "prompt": "Bauhaus-inspired {prompt} . minimalism, geometric precision, primary colors, sans-serif typography, asymmetry, functional design", + "negative_prompt": "ornate, intricate, excessive detail, complex patterns, serif typography" + }, + { + "name": "MK afrofuturism", + "prompt": "Afrofuturism illustration {prompt} . vibrant colors, futuristic elements, cultural symbolism, cosmic imagery, dynamic patterns, empowering narratives", + "negative_prompt": "monochromatic" + }, + { + "name": "MK atompunk", + "prompt": "Atompunk illustation, {prompt} . retro-futuristic, atomic age aesthetics, sleek lines, metallic textures, futuristic technology, optimism, energy", + "negative_prompt": "organic, natural textures, rustic, dystopian" + }, + { + "name": "MK constructivism", + "prompt": "Constructivism {prompt} . geometric abstraction, bold colors, industrial aesthetics, dynamic compositions, utilitarian design, revolutionary spirit", + "negative_prompt": "organic shapes, muted colors, ornate elements, traditional" + }, + { + "name": "MK chicano-art", + "prompt": "Chicano art {prompt} . bold colors, cultural symbolism, muralism, lowrider aesthetics, barrio life, political messages, social activism, Mexico", + "negative_prompt": "monochromatic, minimalist, mainstream aesthetics" + }, + { + "name": "MK de-stijl", + "prompt": "De Stijl Art {prompt} . neoplasticism, primary colors, geometric abstraction, horizontal and vertical lines, simplicity, harmony, utopian ideals", + "negative_prompt": "complex patterns, muted colors, ornate elements, asymmetry" + }, + { + "name": "MK dayak-art", + "prompt": "Dayak art sculpture of {prompt} . intricate patterns, nature-inspired motifs, vibrant colors, traditional craftsmanship, cultural symbolism, storytelling", + "negative_prompt": "minimalist, monochromatic, modern" + }, + { + "name": "MK fayum-portrait", + "prompt": "Fayum portrait {prompt} . encaustic painting, realistic facial features, warm earth tones, serene expressions, ancient Egyptian influences", + "negative_prompt": "abstract, vibrant colors, exaggerated features, modern" + }, + { + "name": "MK illuminated-manuscript", + "prompt": "Illuminated manuscript {prompt} . intricate calligraphy, rich colors, detailed illustrations, gold leaf accents, ornate borders, religious, historical, medieval", + "negative_prompt": "modern typography, minimalist design, monochromatic, abstract themes" + }, + { + "name": "MK kalighat-painting", + "prompt": "Kalighat painting {prompt} . bold lines, vibrant colors, narrative storytelling, cultural motifs, flat compositions, expressive characters", + "negative_prompt": "subdued colors, intricate details, realistic portrayal, modern aesthetics" + }, + { + "name": "MK madhubani-painting", + "prompt": "Madhubani painting {prompt} . intricate patterns, vibrant colors, nature-inspired motifs, cultural storytelling, symmetry, folk art aesthetics", + "negative_prompt": "abstract, muted colors, minimalistic design, modern aesthetics" + }, + { + "name": "MK pictorialism", + "prompt": "Pictorialism illustration{prompt} . soft focus, atmospheric effects, artistic interpretation, tonality, muted colors, evocative storytelling", + "negative_prompt": "sharp focus, high contrast, realistic depiction, vivid colors" + }, + { + "name": "MK pichwai-painting", + "prompt": "Pichwai painting {prompt} . intricate detailing, vibrant colors, religious themes, nature motifs, devotional storytelling, gold leaf accents", + "negative_prompt": "minimalist, subdued colors, abstract design" + }, + { + "name": "MK patachitra-painting", + "prompt": "Patachitra painting {prompt} . bold outlines, vibrant colors, intricate detailing, mythological themes, storytelling, traditional craftsmanship", + "negative_prompt": "subdued colors, minimalistic, abstract, modern aesthetics" + }, + { + "name": "MK samoan-art-inspired", + "prompt": "Samoan art-inspired wooden sculpture {prompt} . traditional motifs, natural elements, bold colors, cultural symbolism, storytelling, craftsmanship", + "negative_prompt": "modern aesthetics, minimalist, abstract" + }, + { + "name": "MK tlingit-art", + "prompt": "Tlingit art {prompt} . formline design, natural elements, animal motifs, bold colors, cultural storytelling, traditional craftsmanship, Alaska traditional art, (totem:1.5)", + "negative_prompt": "" + }, + { + "name": "MK adnate-style", + "prompt": "Painting by Adnate {prompt} . realistic portraits, street art, large-scale murals, subdued color palette, social narratives", + "negative_prompt": "abstract, vibrant colors, small-scale art" + }, + { + "name": "MK ron-english-style", + "prompt": "Painting by Ron English {prompt} . pop-surrealism, cultural subversion, iconic mash-ups, vibrant and bold colors, satirical commentary", + "negative_prompt": "traditional, monochromatic" + }, + { + "name": "MK shepard-fairey-style", + "prompt": "Painting by Shepard Fairey {prompt} . street art, political activism, iconic stencils, bold typography, high contrast, red, black, and white color palette", + "negative_prompt": "traditional, muted colors" + } +] diff --git a/update_log.md b/update_log.md index 7ac777de..5c2efd87 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,7 @@ +# 2.1.853 + +* Add Marc K3nt3L's styles. Thanks [Marc K3nt3L](https://github.com/K3nt3L)! + # 2.1.852 * New Log System: Log system now uses tables. If this is breaking some other browser extension or javascript developments, see also [use previous version](https://github.com/lllyasviel/Fooocus/discussions/1405). From f7bb578a1409b1f96aff534ff5ed2bd10502296f Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Wed, 20 Dec 2023 19:52:38 -0800 Subject: [PATCH 19/23] 2.1.854 * Add a button to copy parameters to clipboard in log. * Allow users to load parameters directly by pasting parameters to prompt. --- fooocus_version.py | 2 +- modules/async_worker.py | 8 +-- modules/meta_parser.py | 144 ++++++++++++++++++++++++++++++++++++++ modules/private_logger.py | 23 +++++- update_log.md | 5 ++ webui.py | 48 ++++++++++++- 6 files changed, 222 insertions(+), 8 deletions(-) create mode 100644 modules/meta_parser.py diff --git a/fooocus_version.py b/fooocus_version.py index 8d6fe946..43705b4a 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.853' +version = '2.1.854' diff --git a/modules/async_worker.py b/modules/async_worker.py index c2c8632d..81b942e9 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -397,8 +397,8 @@ def worker(): uc=None, positive_top_k=len(positive_basic_workloads), negative_top_k=len(negative_basic_workloads), - log_positive_prompt='; '.join([task_prompt] + task_extra_positive_prompts), - log_negative_prompt='; '.join([task_negative_prompt] + task_extra_negative_prompts), + log_positive_prompt='\n'.join([task_prompt] + task_extra_positive_prompts), + log_negative_prompt='\n'.join([task_negative_prompt] + task_extra_negative_prompts), )) if use_expansion: @@ -777,9 +777,9 @@ def worker(): ('Scheduler', scheduler_name), ('Seed', task['task_seed']), ] - for n, w in loras: + for li, (n, w) in enumerate(loras): if n != 'None': - d.append((f'LoRA', f'{n} : {w}')) + d.append((f'LoRA {li + 1}', f'{n} : {w}')) d.append(('Version', 'v' + fooocus_version.version)) log(x, d) diff --git a/modules/meta_parser.py b/modules/meta_parser.py new file mode 100644 index 00000000..6898133d --- /dev/null +++ b/modules/meta_parser.py @@ -0,0 +1,144 @@ +import json +import gradio as gr +import modules.config + + +def load_parameter_button_click(raw_prompt_txt): + loaded_parameter_dict = json.loads(raw_prompt_txt) + assert isinstance(loaded_parameter_dict, dict) + + results = [True] + + try: + h = loaded_parameter_dict.get('Prompt', None) + assert isinstance(h, str) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Negative Prompt', None) + assert isinstance(h, str) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Styles', None) + h = eval(h) + assert isinstance(h, list) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Performance', None) + assert isinstance(h, str) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Resolution', None) + width, height = eval(h) + formatted = modules.config.add_ratio(f'{width}*{height}') + if formatted in modules.config.available_aspect_ratios: + results.append(formatted) + results.append(-1) + results.append(-1) + else: + results.append(gr.update()) + results.append(width) + results.append(height) + except: + results.append(gr.update()) + results.append(gr.update()) + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Sharpness', None) + assert h is not None + h = float(h) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Guidance Scale', None) + assert h is not None + h = float(h) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('ADM Guidance', None) + p, n, e = eval(h) + results.append(float(p)) + results.append(float(n)) + results.append(float(e)) + except: + results.append(gr.update()) + results.append(gr.update()) + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Base Model', None) + assert isinstance(h, str) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Refiner Model', None) + assert isinstance(h, str) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Refiner Switch', None) + assert h is not None + h = float(h) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Sampler', None) + assert isinstance(h, str) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Scheduler', None) + assert isinstance(h, str) + results.append(h) + except: + results.append(gr.update()) + + try: + h = loaded_parameter_dict.get('Seed', None) + assert h is not None + h = int(h) + results.append(False) + results.append(h) + except: + results.append(gr.update()) + results.append(gr.update()) + + results.append(gr.update(visible=True)) + results.append(gr.update(visible=False)) + + for i in range(1, 6): + try: + n, w = loaded_parameter_dict.get(f'LoRA {i}').split(' : ') + w = float(w) + results.append(n) + results.append(w) + except: + results.append(gr.update()) + results.append(gr.update()) + + return results diff --git a/modules/private_logger.py b/modules/private_logger.py index b91f68d3..83ba9e36 100644 --- a/modules/private_logger.py +++ b/modules/private_logger.py @@ -1,6 +1,8 @@ import os import args_manager import modules.config +import json +import urllib.parse from PIL import Image from modules.util import generate_temp_filename @@ -36,10 +38,22 @@ def log(img, dic): ".image-container img { height: auto; max-width: 512px; display: block; padding-right:10px; } " ".image-container div { text-align: center; padding: 4px; } " "hr { border-color: gray; } " + "button { background-color: black; color: white; border: 1px solid grey; border-radius: 5px; padding: 5px 10px; text-align: center; display: inline-block; font-size: 16px; cursor: pointer; }" + "button:hover {background-color: grey; color: black;}" "" ) - begin_part = f"Fooocus Log {date_string}{css_styles}

Fooocus Log {date_string} (private)

\n

All images are clean, without any hidden data/meta, and safe to share with others.

\n\n" + js = ( + "" + ) + + begin_part = f"Fooocus Log {date_string}{css_styles}{js}

Fooocus Log {date_string} (private)

\n

All images are clean, without any hidden data/meta, and safe to share with others.

\n\n" end_part = f'\n' middle_part = log_cache.get(html_name, "") @@ -57,8 +71,13 @@ def log(img, dic): item += f"
{only_name}
" item += "" for key, value in dic: - item += f"\n" + value_txt = str(value).replace('\n', '
') + item += f"\n" item += "" + + js_txt = urllib.parse.quote(json.dumps({k: v for k, v in dic}, indent=0), safe='') + item += f"
" + item += "" item += "\n\n" diff --git a/update_log.md b/update_log.md index 5c2efd87..ef906987 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,8 @@ +# 2.1.854 + +* Add a button to copy parameters to clipboard in log. +* Allow users to load parameters directly by pasting parameters to prompt. + # 2.1.853 * Add Marc K3nt3L's styles. Thanks [Marc K3nt3L](https://github.com/K3nt3L)! diff --git a/webui.py b/webui.py index 00b1e44e..35566b49 100644 --- a/webui.py +++ b/webui.py @@ -1,6 +1,7 @@ import gradio as gr import random import os +import json import time import shared import modules.config @@ -12,6 +13,7 @@ import modules.flags as flags import modules.gradio_hijack as grh import modules.advanced_parameters as advanced_parameters import modules.style_sorter as style_sorter +import modules.meta_parser import args_manager import copy @@ -100,7 +102,7 @@ with shared.gradio_root: elem_id='final_gallery') with gr.Row(elem_classes='type_row'): with gr.Column(scale=17): - prompt = gr.Textbox(show_label=False, placeholder="Type prompt here.", elem_id='positive_prompt', + prompt = gr.Textbox(show_label=False, placeholder="Type prompt here or paste parameters.", elem_id='positive_prompt', container=False, autofocus=True, elem_classes='type_row', lines=1024) default_prompt = modules.config.default_prompt @@ -109,6 +111,7 @@ with shared.gradio_root: with gr.Column(scale=3, min_width=0): generate_button = gr.Button(label="Generate", value="Generate", elem_classes='type_row', elem_id='generate_button', visible=True) + load_parameter_button = gr.Button(label="Load Parameters", value="Load Parameters", elem_classes='type_row', elem_id='load_parameter_button', visible=False) skip_button = gr.Button(label="Skip", value="Skip", elem_classes='type_row_half', visible=False) stop_button = gr.Button(label="Stop", value="Stop", elem_classes='type_row_half', elem_id='stop_button', visible=False) @@ -510,6 +513,49 @@ with shared.gradio_root: ctrls += [outpaint_selections, inpaint_input_image, inpaint_additional_prompt] ctrls += ip_ctrls + def parse_meta(raw_prompt_txt): + loaded_json = None + try: + if '{' in raw_prompt_txt: + if '}' in raw_prompt_txt: + if ':' in raw_prompt_txt: + loaded_json = json.loads(raw_prompt_txt) + assert isinstance(loaded_json, dict) + except: + loaded_json = None + + if loaded_json is None: + return gr.update(), gr.update(visible=True), gr.update(visible=False) + + return json.dumps(loaded_json), gr.update(visible=False), gr.update(visible=True) + + prompt.input(parse_meta, inputs=prompt, outputs=[prompt, generate_button, load_parameter_button], queue=False, show_progress=False) + + load_parameter_button.click(modules.meta_parser.load_parameter_button_click, inputs=prompt, outputs=[ + advanced_checkbox, + prompt, + negative_prompt, + style_selections, + performance_selection, + aspect_ratios_selection, + overwrite_width, + overwrite_height, + sharpness, + guidance_scale, + adm_scaler_positive, + adm_scaler_negative, + adm_scaler_end, + base_model, + refiner_model, + refiner_switch, + sampler_name, + scheduler_name, + seed_random, + image_seed, + generate_button, + load_parameter_button + ] + lora_ctrls, queue=False, show_progress=False) + generate_button.click(lambda: (gr.update(visible=True, interactive=True), gr.update(visible=True, interactive=True), gr.update(visible=False), []), outputs=[stop_button, skip_button, generate_button, gallery]) \ .then(fn=refresh_seed, inputs=[seed_random, image_seed], outputs=image_seed) \ .then(advanced_parameters.set_all_advanced_parameters, inputs=adps) \ From 81107298a87c3c1f8743c32ed1431083366e2d39 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Wed, 20 Dec 2023 19:58:53 -0800 Subject: [PATCH 20/23] minor fix (#1532) --- modules/meta_parser.py | 2 +- webui.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/meta_parser.py b/modules/meta_parser.py index 6898133d..78d73978 100644 --- a/modules/meta_parser.py +++ b/modules/meta_parser.py @@ -7,7 +7,7 @@ def load_parameter_button_click(raw_prompt_txt): loaded_parameter_dict = json.loads(raw_prompt_txt) assert isinstance(loaded_parameter_dict, dict) - results = [True] + results = [True, 1] try: h = loaded_parameter_dict.get('Prompt', None) diff --git a/webui.py b/webui.py index 35566b49..a5138abf 100644 --- a/webui.py +++ b/webui.py @@ -533,6 +533,7 @@ with shared.gradio_root: load_parameter_button.click(modules.meta_parser.load_parameter_button_click, inputs=prompt, outputs=[ advanced_checkbox, + image_number, prompt, negative_prompt, style_selections, From b0df0d57f62636fd8670d8f64482b3cde2aca05c Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Wed, 20 Dec 2023 22:32:44 -0800 Subject: [PATCH 21/23] Update readme.md (#1533) --- readme.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/readme.md b/readme.md index b970ee12..6b458e74 100644 --- a/readme.md +++ b/readme.md @@ -28,6 +28,8 @@ Fooocus has simplified the installation. Between pressing "download" and generat `[1]` David Holz, 2019. +**Recently many fake websites exist on Google when you search “fooocus”. Do not trust those – here is the only official source of Fooocus.** + ## [Installing Fooocus](#download) # Moving from Midjourney to Fooocus From 1f9a072d66e08d5f0aa73f7660ed017257ed6e73 Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Thu, 21 Dec 2023 17:42:45 -0800 Subject: [PATCH 22/23] Announcement --- update_log.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/update_log.md b/update_log.md index ef906987..216a2bc4 100644 --- a/update_log.md +++ b/update_log.md @@ -1,3 +1,5 @@ +**(2023 Dec 21) Hi all, the feature updating of Fooocus will be paused for about two or three weeks because we have some other workloads. See you soon and we will come back in mid or late Jan. However, you may still see updates if other collaborators are fixing bugs or solving problems.** + # 2.1.854 * Add a button to copy parameters to clipboard in log. From 7b5bced6c2e98efa5c013624f6466ed0ba76970c Mon Sep 17 00:00:00 2001 From: lllyasviel Date: Fri, 22 Dec 2023 00:20:09 -0800 Subject: [PATCH 23/23] small url fix (#1551) --- fooocus_version.py | 2 +- modules/async_worker.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fooocus_version.py b/fooocus_version.py index 43705b4a..2511cfc7 100644 --- a/fooocus_version.py +++ b/fooocus_version.py @@ -1 +1 @@ -version = '2.1.854' +version = '2.1.855' diff --git a/modules/async_worker.py b/modules/async_worker.py index 81b942e9..fab2508e 100644 --- a/modules/async_worker.py +++ b/modules/async_worker.py @@ -277,9 +277,10 @@ def worker(): inpaint_image = HWC3(inpaint_image) if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \ and (np.any(inpaint_mask > 127) or len(outpaint_selections) > 0): + progressbar(async_task, 1, 'Downloading upscale models ...') + modules.config.downloading_upscale_model() if inpaint_parameterized: progressbar(async_task, 1, 'Downloading inpainter ...') - modules.config.downloading_upscale_model() inpaint_head_model_path, inpaint_patch_model_path = modules.config.downloading_inpaint_models( advanced_parameters.inpaint_engine) base_model_additional_loras += [(inpaint_patch_model_path, 1.0)]