This commit is contained in:
lllyasviel 2023-10-12 04:23:10 -07:00 committed by GitHub
parent 4c867c1b8b
commit e61aac34ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
147 changed files with 523 additions and 642 deletions

View File

@ -1,15 +1,15 @@
from comfy.options import enable_args_parsing from fcbh.options import enable_args_parsing
enable_args_parsing(False) enable_args_parsing(False)
import comfy.cli_args as comfy_cli import fcbh.cli_args as fcbh_cli
comfy_cli.parser.add_argument("--share", action='store_true', help="Set whether to share on Gradio.") fcbh_cli.parser.add_argument("--share", action='store_true', help="Set whether to share on Gradio.")
comfy_cli.args = comfy_cli.parser.parse_args() fcbh_cli.args = fcbh_cli.parser.parse_args()
comfy_cli.args.disable_cuda_malloc = True fcbh_cli.args.disable_cuda_malloc = True
comfy_cli.args.auto_launch = True fcbh_cli.args.auto_launch = True
if getattr(comfy_cli.args, 'port', 8188) == 8188: if getattr(fcbh_cli.args, 'port', 8188) == 8188:
comfy_cli.args.port = None fcbh_cli.args.port = None
args = comfy_cli.args args = fcbh_cli.args

View File

@ -1,102 +0,0 @@
class Example:
"""
A example node
Class methods
-------------
INPUT_TYPES (dict):
Tell the main program input parameters of nodes.
Attributes
----------
RETURN_TYPES (`tuple`):
The type of each element in the output tulple.
RETURN_NAMES (`tuple`):
Optional: The name of each output in the output tulple.
FUNCTION (`str`):
The name of the entry-point method. For example, if `FUNCTION = "execute"` then it will run Example().execute()
OUTPUT_NODE ([`bool`]):
If this node is an output node that outputs a result/image from the graph. The SaveImage node is an example.
The backend iterates on these output nodes and tries to execute all their parents if their parent graph is properly connected.
Assumed to be False if not present.
CATEGORY (`str`):
The category the node should appear in the UI.
execute(s) -> tuple || None:
The entry point method. The name of this method must be the same as the value of property `FUNCTION`.
For example, if `FUNCTION = "execute"` then this method's name must be `execute`, if `FUNCTION = "foo"` then it must be `foo`.
"""
def __init__(self):
pass
@classmethod
def INPUT_TYPES(s):
"""
Return a dictionary which contains config for all input fields.
Some types (string): "MODEL", "VAE", "CLIP", "CONDITIONING", "LATENT", "IMAGE", "INT", "STRING", "FLOAT".
Input types "INT", "STRING" or "FLOAT" are special values for fields on the node.
The type can be a list for selection.
Returns: `dict`:
- Key input_fields_group (`string`): Can be either required, hidden or optional. A node class must have property `required`
- Value input_fields (`dict`): Contains input fields config:
* Key field_name (`string`): Name of a entry-point method's argument
* Value field_config (`tuple`):
+ First value is a string indicate the type of field or a list for selection.
+ Secound value is a config for type "INT", "STRING" or "FLOAT".
"""
return {
"required": {
"image": ("IMAGE",),
"int_field": ("INT", {
"default": 0,
"min": 0, #Minimum value
"max": 4096, #Maximum value
"step": 64, #Slider's step
"display": "number" # Cosmetic only: display as "number" or "slider"
}),
"float_field": ("FLOAT", {
"default": 1.0,
"min": 0.0,
"max": 10.0,
"step": 0.01,
"round": 0.001, #The value represeting the precision to round to, will be set to the step value by default. Can be set to False to disable rounding.
"display": "number"}),
"print_to_screen": (["enable", "disable"],),
"string_field": ("STRING", {
"multiline": False, #True if you want the field to look like the one on the ClipTextEncode node
"default": "Hello World!"
}),
},
}
RETURN_TYPES = ("IMAGE",)
#RETURN_NAMES = ("image_output_name",)
FUNCTION = "test"
#OUTPUT_NODE = False
CATEGORY = "Example"
def test(self, image, string_field, int_field, float_field, print_to_screen):
if print_to_screen == "enable":
print(f"""Your input contains:
string_field aka input text: {string_field}
int_field: {int_field}
float_field: {float_field}
""")
#do some processing on the image, in this example I just invert it
image = 1.0 - image
return (image,)
# A dictionary that contains all nodes you want to export with their names
# NOTE: names should be globally unique
NODE_CLASS_MAPPINGS = {
"Example": Example
}
# A dictionary that contains the friendly/humanly readable titles for the nodes
NODE_DISPLAY_NAME_MAPPINGS = {
"Example": "Example Node"
}

View File

@ -13,7 +13,7 @@ from ..ldm.modules.diffusionmodules.util import (
from ..ldm.modules.attention import SpatialTransformer from ..ldm.modules.attention import SpatialTransformer
from ..ldm.modules.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample from ..ldm.modules.diffusionmodules.openaimodel import UNetModel, TimestepEmbedSequential, ResBlock, Downsample
from ..ldm.util import exists from ..ldm.util import exists
import comfy.ops import fcbh.ops
class ControlledUnetModel(UNetModel): class ControlledUnetModel(UNetModel):
#implemented in the ldm unet #implemented in the ldm unet
@ -54,7 +54,7 @@ class ControlNet(nn.Module):
adm_in_channels=None, adm_in_channels=None,
transformer_depth_middle=None, transformer_depth_middle=None,
device=None, device=None,
operations=comfy.ops, operations=fcbh.ops,
): ):
super().__init__() super().__init__()
assert use_spatial_transformer == True, "use_spatial_transformer has to be true" assert use_spatial_transformer == True, "use_spatial_transformer has to be true"

View File

@ -1,6 +1,6 @@
import argparse import argparse
import enum import enum
import comfy.options import fcbh.options
class EnumAction(argparse.Action): class EnumAction(argparse.Action):
""" """
@ -37,10 +37,10 @@ parser.add_argument("--listen", type=str, default="127.0.0.1", metavar="IP", nar
parser.add_argument("--port", type=int, default=8188, help="Set the listen port.") parser.add_argument("--port", type=int, default=8188, help="Set the listen port.")
parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.") parser.add_argument("--enable-cors-header", type=str, default=None, metavar="ORIGIN", nargs="?", const="*", help="Enable CORS (Cross-Origin Resource Sharing) with optional origin or allow all with default '*'.")
parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.") parser.add_argument("--extra-model-paths-config", type=str, default=None, metavar="PATH", nargs='+', action='append', help="Load one or more extra_model_paths.yaml files.")
parser.add_argument("--output-directory", type=str, default=None, help="Set the ComfyUI output directory.") parser.add_argument("--output-directory", type=str, default=None, help="Set the fcbh_backend output directory.")
parser.add_argument("--temp-directory", type=str, default=None, help="Set the ComfyUI temp directory (default is in the ComfyUI directory).") parser.add_argument("--temp-directory", type=str, default=None, help="Set the fcbh_backend temp directory (default is in the fcbh_backend directory).")
parser.add_argument("--input-directory", type=str, default=None, help="Set the ComfyUI input directory.") parser.add_argument("--input-directory", type=str, default=None, help="Set the fcbh_backend input directory.")
parser.add_argument("--auto-launch", action="store_true", help="Automatically launch ComfyUI in the default browser.") parser.add_argument("--auto-launch", action="store_true", help="Automatically launch fcbh_backend in the default browser.")
parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.") parser.add_argument("--disable-auto-launch", action="store_true", help="Disable auto launching the browser.")
parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.") parser.add_argument("--cuda-device", type=int, default=None, metavar="DEVICE_ID", help="Set the id of the cuda device this instance will use.")
cm_group = parser.add_mutually_exclusive_group() cm_group = parser.add_mutually_exclusive_group()
@ -86,7 +86,7 @@ vram_group.add_argument("--novram", action="store_true", help="When lowvram isn'
vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).") vram_group.add_argument("--cpu", action="store_true", help="To use the CPU for everything (slow).")
parser.add_argument("--disable-smart-memory", action="store_true", help="Force ComfyUI to agressively offload to regular ram instead of keeping models in vram when it can.") parser.add_argument("--disable-smart-memory", action="store_true", help="Force fcbh_backend to agressively offload to regular ram instead of keeping models in vram when it can.")
parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.") parser.add_argument("--dont-print-server", action="store_true", help="Don't print server output.")
@ -95,7 +95,7 @@ parser.add_argument("--windows-standalone-build", action="store_true", help="Win
parser.add_argument("--disable-metadata", action="store_true", help="Disable saving prompt metadata in files.") parser.add_argument("--disable-metadata", action="store_true", help="Disable saving prompt metadata in files.")
if comfy.options.args_parsing: if fcbh.options.args_parsing:
args = parser.parse_args() args = parser.parse_args()
else: else:
args = parser.parse_args([]) args = parser.parse_args([])

View File

@ -4,25 +4,25 @@ import os
import torch import torch
import contextlib import contextlib
import comfy.ops import fcbh.ops
import comfy.model_patcher import fcbh.model_patcher
import comfy.model_management import fcbh.model_management
class ClipVisionModel(): class ClipVisionModel():
def __init__(self, json_config): def __init__(self, json_config):
config = CLIPVisionConfig.from_json_file(json_config) config = CLIPVisionConfig.from_json_file(json_config)
self.load_device = comfy.model_management.text_encoder_device() self.load_device = fcbh.model_management.text_encoder_device()
offload_device = comfy.model_management.text_encoder_offload_device() offload_device = fcbh.model_management.text_encoder_offload_device()
self.dtype = torch.float32 self.dtype = torch.float32
if comfy.model_management.should_use_fp16(self.load_device, prioritize_performance=False): if fcbh.model_management.should_use_fp16(self.load_device, prioritize_performance=False):
self.dtype = torch.float16 self.dtype = torch.float16
with comfy.ops.use_comfy_ops(offload_device, self.dtype): with fcbh.ops.use_fcbh_ops(offload_device, self.dtype):
with modeling_utils.no_init_weights(): with modeling_utils.no_init_weights():
self.model = CLIPVisionModelWithProjection(config) self.model = CLIPVisionModelWithProjection(config)
self.model.to(self.dtype) self.model.to(self.dtype)
self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device) self.patcher = fcbh.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
self.processor = CLIPImageProcessor(crop_size=224, self.processor = CLIPImageProcessor(crop_size=224,
do_center_crop=True, do_center_crop=True,
do_convert_rgb=True, do_convert_rgb=True,
@ -40,7 +40,7 @@ class ClipVisionModel():
img = torch.clip((255. * image), 0, 255).round().int() img = torch.clip((255. * image), 0, 255).round().int()
img = list(map(lambda a: a, img)) img = list(map(lambda a: a, img))
inputs = self.processor(images=img, return_tensors="pt") inputs = self.processor(images=img, return_tensors="pt")
comfy.model_management.load_model_gpu(self.patcher) fcbh.model_management.load_model_gpu(self.patcher)
pixel_values = inputs['pixel_values'].to(self.load_device) pixel_values = inputs['pixel_values'].to(self.load_device)
if self.dtype != torch.float32: if self.dtype != torch.float32:
@ -48,7 +48,7 @@ class ClipVisionModel():
else: else:
precision_scope = lambda a, b: contextlib.nullcontext(a) precision_scope = lambda a, b: contextlib.nullcontext(a)
with precision_scope(comfy.model_management.get_autocast_device(self.load_device), torch.float32): with precision_scope(fcbh.model_management.get_autocast_device(self.load_device), torch.float32):
outputs = self.model(pixel_values=pixel_values, output_hidden_states=True) outputs = self.model(pixel_values=pixel_values, output_hidden_states=True)
for k in outputs: for k in outputs:

View File

@ -1,13 +1,13 @@
import torch import torch
import math import math
import os import os
import comfy.utils import fcbh.utils
import comfy.model_management import fcbh.model_management
import comfy.model_detection import fcbh.model_detection
import comfy.model_patcher import fcbh.model_patcher
import comfy.cldm.cldm import fcbh.cldm.cldm
import comfy.t2i_adapter.adapter import fcbh.t2i_adapter.adapter
def broadcast_image_to(tensor, target_batch_size, batched_number): def broadcast_image_to(tensor, target_batch_size, batched_number):
@ -37,7 +37,7 @@ class ControlBase:
self.timestep_range = None self.timestep_range = None
if device is None: if device is None:
device = comfy.model_management.get_torch_device() device = fcbh.model_management.get_torch_device()
self.device = device self.device = device
self.previous_controlnet = None self.previous_controlnet = None
self.global_average_pooling = False self.global_average_pooling = False
@ -130,7 +130,7 @@ class ControlNet(ControlBase):
def __init__(self, control_model, global_average_pooling=False, device=None): def __init__(self, control_model, global_average_pooling=False, device=None):
super().__init__(device) super().__init__(device)
self.control_model = control_model self.control_model = control_model
self.control_model_wrapped = comfy.model_patcher.ModelPatcher(self.control_model, load_device=comfy.model_management.get_torch_device(), offload_device=comfy.model_management.unet_offload_device()) self.control_model_wrapped = fcbh.model_patcher.ModelPatcher(self.control_model, load_device=fcbh.model_management.get_torch_device(), offload_device=fcbh.model_management.unet_offload_device())
self.global_average_pooling = global_average_pooling self.global_average_pooling = global_average_pooling
def get_control(self, x_noisy, t, cond, batched_number): def get_control(self, x_noisy, t, cond, batched_number):
@ -150,7 +150,7 @@ class ControlNet(ControlBase):
if self.cond_hint is not None: if self.cond_hint is not None:
del self.cond_hint del self.cond_hint
self.cond_hint = None self.cond_hint = None
self.cond_hint = comfy.utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * 8, x_noisy.shape[2] * 8, 'nearest-exact', "center").to(self.control_model.dtype).to(self.device) self.cond_hint = fcbh.utils.common_upscale(self.cond_hint_original, x_noisy.shape[3] * 8, x_noisy.shape[2] * 8, 'nearest-exact', "center").to(self.control_model.dtype).to(self.device)
if x_noisy.shape[0] != self.cond_hint.shape[0]: if x_noisy.shape[0] != self.cond_hint.shape[0]:
self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number) self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
@ -249,24 +249,24 @@ class ControlLora(ControlNet):
controlnet_config.pop("out_channels") controlnet_config.pop("out_channels")
controlnet_config["hint_channels"] = self.control_weights["input_hint_block.0.weight"].shape[1] controlnet_config["hint_channels"] = self.control_weights["input_hint_block.0.weight"].shape[1]
controlnet_config["operations"] = ControlLoraOps() controlnet_config["operations"] = ControlLoraOps()
self.control_model = comfy.cldm.cldm.ControlNet(**controlnet_config) self.control_model = fcbh.cldm.cldm.ControlNet(**controlnet_config)
dtype = model.get_dtype() dtype = model.get_dtype()
self.control_model.to(dtype) self.control_model.to(dtype)
self.control_model.to(comfy.model_management.get_torch_device()) self.control_model.to(fcbh.model_management.get_torch_device())
diffusion_model = model.diffusion_model diffusion_model = model.diffusion_model
sd = diffusion_model.state_dict() sd = diffusion_model.state_dict()
cm = self.control_model.state_dict() cm = self.control_model.state_dict()
for k in sd: for k in sd:
weight = comfy.model_management.resolve_lowvram_weight(sd[k], diffusion_model, k) weight = fcbh.model_management.resolve_lowvram_weight(sd[k], diffusion_model, k)
try: try:
comfy.utils.set_attr(self.control_model, k, weight) fcbh.utils.set_attr(self.control_model, k, weight)
except: except:
pass pass
for k in self.control_weights: for k in self.control_weights:
if k not in {"lora_controlnet"}: if k not in {"lora_controlnet"}:
comfy.utils.set_attr(self.control_model, k, self.control_weights[k].to(dtype).to(comfy.model_management.get_torch_device())) fcbh.utils.set_attr(self.control_model, k, self.control_weights[k].to(dtype).to(fcbh.model_management.get_torch_device()))
def copy(self): def copy(self):
c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling) c = ControlLora(self.control_weights, global_average_pooling=self.global_average_pooling)
@ -283,18 +283,18 @@ class ControlLora(ControlNet):
return out return out
def inference_memory_requirements(self, dtype): def inference_memory_requirements(self, dtype):
return comfy.utils.calculate_parameters(self.control_weights) * comfy.model_management.dtype_size(dtype) + ControlBase.inference_memory_requirements(self, dtype) return fcbh.utils.calculate_parameters(self.control_weights) * fcbh.model_management.dtype_size(dtype) + ControlBase.inference_memory_requirements(self, dtype)
def load_controlnet(ckpt_path, model=None): def load_controlnet(ckpt_path, model=None):
controlnet_data = comfy.utils.load_torch_file(ckpt_path, safe_load=True) controlnet_data = fcbh.utils.load_torch_file(ckpt_path, safe_load=True)
if "lora_controlnet" in controlnet_data: if "lora_controlnet" in controlnet_data:
return ControlLora(controlnet_data) return ControlLora(controlnet_data)
controlnet_config = None controlnet_config = None
if "controlnet_cond_embedding.conv_in.weight" in controlnet_data: #diffusers format if "controlnet_cond_embedding.conv_in.weight" in controlnet_data: #diffusers format
use_fp16 = comfy.model_management.should_use_fp16() use_fp16 = fcbh.model_management.should_use_fp16()
controlnet_config = comfy.model_detection.unet_config_from_diffusers_unet(controlnet_data, use_fp16) controlnet_config = fcbh.model_detection.unet_config_from_diffusers_unet(controlnet_data, use_fp16)
diffusers_keys = comfy.utils.unet_to_diffusers(controlnet_config) diffusers_keys = fcbh.utils.unet_to_diffusers(controlnet_config)
diffusers_keys["controlnet_mid_block.weight"] = "middle_block_out.0.weight" diffusers_keys["controlnet_mid_block.weight"] = "middle_block_out.0.weight"
diffusers_keys["controlnet_mid_block.bias"] = "middle_block_out.0.bias" diffusers_keys["controlnet_mid_block.bias"] = "middle_block_out.0.bias"
@ -353,16 +353,16 @@ def load_controlnet(ckpt_path, model=None):
return net return net
if controlnet_config is None: if controlnet_config is None:
use_fp16 = comfy.model_management.should_use_fp16() use_fp16 = fcbh.model_management.should_use_fp16()
controlnet_config = comfy.model_detection.model_config_from_unet(controlnet_data, prefix, use_fp16, True).unet_config controlnet_config = fcbh.model_detection.model_config_from_unet(controlnet_data, prefix, use_fp16, True).unet_config
controlnet_config.pop("out_channels") controlnet_config.pop("out_channels")
controlnet_config["hint_channels"] = controlnet_data["{}input_hint_block.0.weight".format(prefix)].shape[1] controlnet_config["hint_channels"] = controlnet_data["{}input_hint_block.0.weight".format(prefix)].shape[1]
control_model = comfy.cldm.cldm.ControlNet(**controlnet_config) control_model = fcbh.cldm.cldm.ControlNet(**controlnet_config)
if pth: if pth:
if 'difference' in controlnet_data: if 'difference' in controlnet_data:
if model is not None: if model is not None:
comfy.model_management.load_models_gpu([model]) fcbh.model_management.load_models_gpu([model])
model_sd = model.model_state_dict() model_sd = model.model_state_dict()
for x in controlnet_data: for x in controlnet_data:
c_m = "control_model." c_m = "control_model."
@ -425,7 +425,7 @@ class T2IAdapter(ControlBase):
self.control_input = None self.control_input = None
self.cond_hint = None self.cond_hint = None
width, height = self.scale_image_to(x_noisy.shape[3] * 8, x_noisy.shape[2] * 8) width, height = self.scale_image_to(x_noisy.shape[3] * 8, x_noisy.shape[2] * 8)
self.cond_hint = comfy.utils.common_upscale(self.cond_hint_original, width, height, 'nearest-exact', "center").float().to(self.device) self.cond_hint = fcbh.utils.common_upscale(self.cond_hint_original, width, height, 'nearest-exact', "center").float().to(self.device)
if self.channels_in == 1 and self.cond_hint.shape[1] > 1: if self.channels_in == 1 and self.cond_hint.shape[1] > 1:
self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True) self.cond_hint = torch.mean(self.cond_hint, 1, keepdim=True)
if x_noisy.shape[0] != self.cond_hint.shape[0]: if x_noisy.shape[0] != self.cond_hint.shape[0]:
@ -458,12 +458,12 @@ def load_t2i_adapter(t2i_data):
prefix_replace["adapter.body.{}.resnets.{}.".format(i, j)] = "body.{}.".format(i * 2 + j) prefix_replace["adapter.body.{}.resnets.{}.".format(i, j)] = "body.{}.".format(i * 2 + j)
prefix_replace["adapter.body.{}.".format(i, j)] = "body.{}.".format(i * 2) prefix_replace["adapter.body.{}.".format(i, j)] = "body.{}.".format(i * 2)
prefix_replace["adapter."] = "" prefix_replace["adapter."] = ""
t2i_data = comfy.utils.state_dict_prefix_replace(t2i_data, prefix_replace) t2i_data = fcbh.utils.state_dict_prefix_replace(t2i_data, prefix_replace)
keys = t2i_data.keys() keys = t2i_data.keys()
if "body.0.in_conv.weight" in keys: if "body.0.in_conv.weight" in keys:
cin = t2i_data['body.0.in_conv.weight'].shape[1] cin = t2i_data['body.0.in_conv.weight'].shape[1]
model_ad = comfy.t2i_adapter.adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4) model_ad = fcbh.t2i_adapter.adapter.Adapter_light(cin=cin, channels=[320, 640, 1280, 1280], nums_rb=4)
elif 'conv_in.weight' in keys: elif 'conv_in.weight' in keys:
cin = t2i_data['conv_in.weight'].shape[1] cin = t2i_data['conv_in.weight'].shape[1]
channel = t2i_data['conv_in.weight'].shape[0] channel = t2i_data['conv_in.weight'].shape[0]
@ -475,7 +475,7 @@ def load_t2i_adapter(t2i_data):
xl = False xl = False
if cin == 256 or cin == 768: if cin == 256 or cin == 768:
xl = True xl = True
model_ad = comfy.t2i_adapter.adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl) model_ad = fcbh.t2i_adapter.adapter.Adapter(cin=cin, channels=[channel, channel*2, channel*4, channel*4][:4], nums_rb=2, ksize=ksize, sk=True, use_conv=use_conv, xl=xl)
else: else:
return None return None
missing, unexpected = model_ad.load_state_dict(t2i_data) missing, unexpected = model_ad.load_state_dict(t2i_data)

View File

@ -1,7 +1,7 @@
import json import json
import os import os
import comfy.sd import fcbh.sd
def first_file(path, filenames): def first_file(path, filenames):
for f in filenames: for f in filenames:
@ -23,14 +23,14 @@ def load_diffusers(model_path, output_vae=True, output_clip=True, embedding_dire
if text_encoder2_path is not None: if text_encoder2_path is not None:
text_encoder_paths.append(text_encoder2_path) text_encoder_paths.append(text_encoder2_path)
unet = comfy.sd.load_unet(unet_path) unet = fcbh.sd.load_unet(unet_path)
clip = None clip = None
if output_clip: if output_clip:
clip = comfy.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory) clip = fcbh.sd.load_clip(text_encoder_paths, embedding_directory=embedding_directory)
vae = None vae = None
if output_vae: if output_vae:
vae = comfy.sd.VAE(ckpt_path=vae_path) vae = fcbh.sd.VAE(ckpt_path=vae_path)
return (unet, clip, vae) return (unet, clip, vae)

View File

@ -3,11 +3,11 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from contextlib import contextmanager from contextlib import contextmanager
from comfy.ldm.modules.diffusionmodules.model import Encoder, Decoder from fcbh.ldm.modules.diffusionmodules.model import Encoder, Decoder
from comfy.ldm.modules.distributions.distributions import DiagonalGaussianDistribution from fcbh.ldm.modules.distributions.distributions import DiagonalGaussianDistribution
from comfy.ldm.util import instantiate_from_config from fcbh.ldm.util import instantiate_from_config
from comfy.ldm.modules.ema import LitEma from fcbh.ldm.modules.ema import LitEma
# class AutoencoderKL(pl.LightningModule): # class AutoencoderKL(pl.LightningModule):
class AutoencoderKL(torch.nn.Module): class AutoencoderKL(torch.nn.Module):

View File

@ -4,7 +4,7 @@ import torch
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
from comfy.ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor from fcbh.ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor
class DDIMSampler(object): class DDIMSampler(object):

View File

@ -9,14 +9,14 @@ from typing import Optional, Any
from .diffusionmodules.util import checkpoint from .diffusionmodules.util import checkpoint
from .sub_quadratic_attention import efficient_dot_product_attention from .sub_quadratic_attention import efficient_dot_product_attention
from comfy import model_management from fcbh import model_management
if model_management.xformers_enabled(): if model_management.xformers_enabled():
import xformers import xformers
import xformers.ops import xformers.ops
from comfy.cli_args import args from fcbh.cli_args import args
import comfy.ops import fcbh.ops
# CrossAttn precision handling # CrossAttn precision handling
if args.dont_upcast_attention: if args.dont_upcast_attention:
@ -53,7 +53,7 @@ def init_(tensor):
# feedforward # feedforward
class GEGLU(nn.Module): class GEGLU(nn.Module):
def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=comfy.ops): def __init__(self, dim_in, dim_out, dtype=None, device=None, operations=fcbh.ops):
super().__init__() super().__init__()
self.proj = operations.Linear(dim_in, dim_out * 2, dtype=dtype, device=device) self.proj = operations.Linear(dim_in, dim_out * 2, dtype=dtype, device=device)
@ -63,7 +63,7 @@ class GEGLU(nn.Module):
class FeedForward(nn.Module): class FeedForward(nn.Module):
def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=comfy.ops): def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0., dtype=None, device=None, operations=fcbh.ops):
super().__init__() super().__init__()
inner_dim = int(dim * mult) inner_dim = int(dim * mult)
dim_out = default(dim_out, dim) dim_out = default(dim_out, dim)
@ -310,7 +310,7 @@ else:
optimized_attention = attention_sub_quad optimized_attention = attention_sub_quad
class CrossAttention(nn.Module): class CrossAttention(nn.Module):
def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., dtype=None, device=None, operations=comfy.ops): def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0., dtype=None, device=None, operations=fcbh.ops):
super().__init__() super().__init__()
inner_dim = dim_head * heads inner_dim = dim_head * heads
context_dim = default(context_dim, query_dim) context_dim = default(context_dim, query_dim)
@ -340,7 +340,7 @@ class CrossAttention(nn.Module):
class BasicTransformerBlock(nn.Module): class BasicTransformerBlock(nn.Module):
def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True, def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True,
disable_self_attn=False, dtype=None, device=None, operations=comfy.ops): disable_self_attn=False, dtype=None, device=None, operations=fcbh.ops):
super().__init__() super().__init__()
self.disable_self_attn = disable_self_attn self.disable_self_attn = disable_self_attn
self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout, self.attn1 = CrossAttention(query_dim=dim, heads=n_heads, dim_head=d_head, dropout=dropout,
@ -482,7 +482,7 @@ class SpatialTransformer(nn.Module):
def __init__(self, in_channels, n_heads, d_head, def __init__(self, in_channels, n_heads, d_head,
depth=1, dropout=0., context_dim=None, depth=1, dropout=0., context_dim=None,
disable_self_attn=False, use_linear=False, disable_self_attn=False, use_linear=False,
use_checkpoint=True, dtype=None, device=None, operations=comfy.ops): use_checkpoint=True, dtype=None, device=None, operations=fcbh.ops):
super().__init__() super().__init__()
if exists(context_dim) and not isinstance(context_dim, list): if exists(context_dim) and not isinstance(context_dim, list):
context_dim = [context_dim] * depth context_dim = [context_dim] * depth

View File

@ -6,8 +6,8 @@ import numpy as np
from einops import rearrange from einops import rearrange
from typing import Optional, Any from typing import Optional, Any
from comfy import model_management from fcbh import model_management
import comfy.ops import fcbh.ops
if model_management.xformers_enabled_vae(): if model_management.xformers_enabled_vae():
import xformers import xformers
@ -48,7 +48,7 @@ class Upsample(nn.Module):
super().__init__() super().__init__()
self.with_conv = with_conv self.with_conv = with_conv
if self.with_conv: if self.with_conv:
self.conv = comfy.ops.Conv2d(in_channels, self.conv = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
@ -78,7 +78,7 @@ class Downsample(nn.Module):
self.with_conv = with_conv self.with_conv = with_conv
if self.with_conv: if self.with_conv:
# no asymmetric padding in torch conv, must do it ourselves # no asymmetric padding in torch conv, must do it ourselves
self.conv = comfy.ops.Conv2d(in_channels, self.conv = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=3, kernel_size=3,
stride=2, stride=2,
@ -105,30 +105,30 @@ class ResnetBlock(nn.Module):
self.swish = torch.nn.SiLU(inplace=True) self.swish = torch.nn.SiLU(inplace=True)
self.norm1 = Normalize(in_channels) self.norm1 = Normalize(in_channels)
self.conv1 = comfy.ops.Conv2d(in_channels, self.conv1 = fcbh.ops.Conv2d(in_channels,
out_channels, out_channels,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
padding=1) padding=1)
if temb_channels > 0: if temb_channels > 0:
self.temb_proj = comfy.ops.Linear(temb_channels, self.temb_proj = fcbh.ops.Linear(temb_channels,
out_channels) out_channels)
self.norm2 = Normalize(out_channels) self.norm2 = Normalize(out_channels)
self.dropout = torch.nn.Dropout(dropout, inplace=True) self.dropout = torch.nn.Dropout(dropout, inplace=True)
self.conv2 = comfy.ops.Conv2d(out_channels, self.conv2 = fcbh.ops.Conv2d(out_channels,
out_channels, out_channels,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
padding=1) padding=1)
if self.in_channels != self.out_channels: if self.in_channels != self.out_channels:
if self.use_conv_shortcut: if self.use_conv_shortcut:
self.conv_shortcut = comfy.ops.Conv2d(in_channels, self.conv_shortcut = fcbh.ops.Conv2d(in_channels,
out_channels, out_channels,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
padding=1) padding=1)
else: else:
self.nin_shortcut = comfy.ops.Conv2d(in_channels, self.nin_shortcut = fcbh.ops.Conv2d(in_channels,
out_channels, out_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
@ -199,22 +199,22 @@ class AttnBlock(nn.Module):
self.in_channels = in_channels self.in_channels = in_channels
self.norm = Normalize(in_channels) self.norm = Normalize(in_channels)
self.q = comfy.ops.Conv2d(in_channels, self.q = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.k = comfy.ops.Conv2d(in_channels, self.k = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.v = comfy.ops.Conv2d(in_channels, self.v = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.proj_out = comfy.ops.Conv2d(in_channels, self.proj_out = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
@ -254,22 +254,22 @@ class MemoryEfficientAttnBlock(nn.Module):
self.in_channels = in_channels self.in_channels = in_channels
self.norm = Normalize(in_channels) self.norm = Normalize(in_channels)
self.q = comfy.ops.Conv2d(in_channels, self.q = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.k = comfy.ops.Conv2d(in_channels, self.k = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.v = comfy.ops.Conv2d(in_channels, self.v = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.proj_out = comfy.ops.Conv2d(in_channels, self.proj_out = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
@ -305,22 +305,22 @@ class MemoryEfficientAttnBlockPytorch(nn.Module):
self.in_channels = in_channels self.in_channels = in_channels
self.norm = Normalize(in_channels) self.norm = Normalize(in_channels)
self.q = comfy.ops.Conv2d(in_channels, self.q = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.k = comfy.ops.Conv2d(in_channels, self.k = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.v = comfy.ops.Conv2d(in_channels, self.v = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.proj_out = comfy.ops.Conv2d(in_channels, self.proj_out = fcbh.ops.Conv2d(in_channels,
in_channels, in_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
@ -390,14 +390,14 @@ class Model(nn.Module):
# timestep embedding # timestep embedding
self.temb = nn.Module() self.temb = nn.Module()
self.temb.dense = nn.ModuleList([ self.temb.dense = nn.ModuleList([
comfy.ops.Linear(self.ch, fcbh.ops.Linear(self.ch,
self.temb_ch), self.temb_ch),
comfy.ops.Linear(self.temb_ch, fcbh.ops.Linear(self.temb_ch,
self.temb_ch), self.temb_ch),
]) ])
# downsampling # downsampling
self.conv_in = comfy.ops.Conv2d(in_channels, self.conv_in = fcbh.ops.Conv2d(in_channels,
self.ch, self.ch,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
@ -466,7 +466,7 @@ class Model(nn.Module):
# end # end
self.norm_out = Normalize(block_in) self.norm_out = Normalize(block_in)
self.conv_out = comfy.ops.Conv2d(block_in, self.conv_out = fcbh.ops.Conv2d(block_in,
out_ch, out_ch,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
@ -539,7 +539,7 @@ class Encoder(nn.Module):
self.in_channels = in_channels self.in_channels = in_channels
# downsampling # downsampling
self.conv_in = comfy.ops.Conv2d(in_channels, self.conv_in = fcbh.ops.Conv2d(in_channels,
self.ch, self.ch,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
@ -584,7 +584,7 @@ class Encoder(nn.Module):
# end # end
self.norm_out = Normalize(block_in) self.norm_out = Normalize(block_in)
self.conv_out = comfy.ops.Conv2d(block_in, self.conv_out = fcbh.ops.Conv2d(block_in,
2*z_channels if double_z else z_channels, 2*z_channels if double_z else z_channels,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
@ -640,7 +640,7 @@ class Decoder(nn.Module):
self.z_shape, np.prod(self.z_shape))) self.z_shape, np.prod(self.z_shape)))
# z to block_in # z to block_in
self.conv_in = comfy.ops.Conv2d(z_channels, self.conv_in = fcbh.ops.Conv2d(z_channels,
block_in, block_in,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
@ -682,7 +682,7 @@ class Decoder(nn.Module):
# end # end
self.norm_out = Normalize(block_in) self.norm_out = Normalize(block_in)
self.conv_out = comfy.ops.Conv2d(block_in, self.conv_out = fcbh.ops.Conv2d(block_in,
out_ch, out_ch,
kernel_size=3, kernel_size=3,
stride=1, stride=1,

View File

@ -14,8 +14,8 @@ from .util import (
timestep_embedding, timestep_embedding,
) )
from ..attention import SpatialTransformer from ..attention import SpatialTransformer
from comfy.ldm.util import exists from fcbh.ldm.util import exists
import comfy.ops import fcbh.ops
class TimestepBlock(nn.Module): class TimestepBlock(nn.Module):
""" """
@ -70,7 +70,7 @@ class Upsample(nn.Module):
upsampling occurs in the inner-two dimensions. upsampling occurs in the inner-two dimensions.
""" """
def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=comfy.ops): def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=fcbh.ops):
super().__init__() super().__init__()
self.channels = channels self.channels = channels
self.out_channels = out_channels or channels self.out_channels = out_channels or channels
@ -106,7 +106,7 @@ class Downsample(nn.Module):
downsampling occurs in the inner-two dimensions. downsampling occurs in the inner-two dimensions.
""" """
def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=comfy.ops): def __init__(self, channels, use_conv, dims=2, out_channels=None, padding=1, dtype=None, device=None, operations=fcbh.ops):
super().__init__() super().__init__()
self.channels = channels self.channels = channels
self.out_channels = out_channels or channels self.out_channels = out_channels or channels
@ -156,7 +156,7 @@ class ResBlock(TimestepBlock):
down=False, down=False,
dtype=None, dtype=None,
device=None, device=None,
operations=comfy.ops operations=fcbh.ops
): ):
super().__init__() super().__init__()
self.channels = channels self.channels = channels
@ -316,7 +316,7 @@ class UNetModel(nn.Module):
adm_in_channels=None, adm_in_channels=None,
transformer_depth_middle=None, transformer_depth_middle=None,
device=None, device=None,
operations=comfy.ops, operations=fcbh.ops,
): ):
super().__init__() super().__init__()
assert use_spatial_transformer == True, "use_spatial_transformer has to be true" assert use_spatial_transformer == True, "use_spatial_transformer has to be true"

View File

@ -4,7 +4,7 @@ import numpy as np
from functools import partial from functools import partial
from .util import extract_into_tensor, make_beta_schedule from .util import extract_into_tensor, make_beta_schedule
from comfy.ldm.util import default from fcbh.ldm.util import default
class AbstractLowScaleModel(nn.Module): class AbstractLowScaleModel(nn.Module):

View File

@ -15,8 +15,8 @@ import torch.nn as nn
import numpy as np import numpy as np
from einops import repeat from einops import repeat
from comfy.ldm.util import instantiate_from_config from fcbh.ldm.util import instantiate_from_config
import comfy.ops import fcbh.ops
def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
if schedule == "linear": if schedule == "linear":
@ -233,7 +233,7 @@ def conv_nd(dims, *args, **kwargs):
if dims == 1: if dims == 1:
return nn.Conv1d(*args, **kwargs) return nn.Conv1d(*args, **kwargs)
elif dims == 2: elif dims == 2:
return comfy.ops.Conv2d(*args, **kwargs) return fcbh.ops.Conv2d(*args, **kwargs)
elif dims == 3: elif dims == 3:
return nn.Conv3d(*args, **kwargs) return nn.Conv3d(*args, **kwargs)
raise ValueError(f"unsupported dimensions: {dims}") raise ValueError(f"unsupported dimensions: {dims}")
@ -243,7 +243,7 @@ def linear(*args, **kwargs):
""" """
Create a linear module. Create a linear module.
""" """
return comfy.ops.Linear(*args, **kwargs) return fcbh.ops.Linear(*args, **kwargs)
def avg_pool_nd(dims, *args, **kwargs): def avg_pool_nd(dims, *args, **kwargs):

View File

@ -24,7 +24,7 @@ except ImportError:
from torch import Tensor from torch import Tensor
from typing import List from typing import List
from comfy import model_management from fcbh import model_management
def dynamic_slice( def dynamic_slice(
x: Tensor, x: Tensor,

View File

@ -1,4 +1,4 @@
import comfy.utils import fcbh.utils
LORA_CLIP_MAP = { LORA_CLIP_MAP = {
"mlp.fc1": "mlp_fc1", "mlp.fc1": "mlp_fc1",
@ -183,7 +183,7 @@ def model_lora_keys_unet(model, key_map={}):
key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_") key_lora = k[len("diffusion_model."):-len(".weight")].replace(".", "_")
key_map["lora_unet_{}".format(key_lora)] = k key_map["lora_unet_{}".format(key_lora)] = k
diffusers_keys = comfy.utils.unet_to_diffusers(model.model_config.unet_config) diffusers_keys = fcbh.utils.unet_to_diffusers(model.model_config.unet_config)
for k in diffusers_keys: for k in diffusers_keys:
if k.endswith(".weight"): if k.endswith(".weight"):
unet_key = "diffusion_model.{}".format(diffusers_keys[k]) unet_key = "diffusion_model.{}".format(diffusers_keys[k])

View File

@ -1,9 +1,9 @@
import torch import torch
from comfy.ldm.modules.diffusionmodules.openaimodel import UNetModel from fcbh.ldm.modules.diffusionmodules.openaimodel import UNetModel
from comfy.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation from fcbh.ldm.modules.encoders.noise_aug_modules import CLIPEmbeddingNoiseAugmentation
from comfy.ldm.modules.diffusionmodules.util import make_beta_schedule from fcbh.ldm.modules.diffusionmodules.util import make_beta_schedule
from comfy.ldm.modules.diffusionmodules.openaimodel import Timestep from fcbh.ldm.modules.diffusionmodules.openaimodel import Timestep
import comfy.model_management import fcbh.model_management
import numpy as np import numpy as np
from enum import Enum from enum import Enum
from . import utils from . import utils
@ -98,7 +98,7 @@ class BaseModel(torch.nn.Module):
unet_sd = self.diffusion_model.state_dict() unet_sd = self.diffusion_model.state_dict()
unet_state_dict = {} unet_state_dict = {}
for k in unet_sd: for k in unet_sd:
unet_state_dict[k] = comfy.model_management.resolve_lowvram_weight(unet_sd[k], self.diffusion_model, k) unet_state_dict[k] = fcbh.model_management.resolve_lowvram_weight(unet_sd[k], self.diffusion_model, k)
unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict) unet_state_dict = self.model_config.process_unet_state_dict_for_saving(unet_state_dict)
vae_state_dict = self.model_config.process_vae_state_dict_for_saving(vae_state_dict) vae_state_dict = self.model_config.process_vae_state_dict_for_saving(vae_state_dict)

View File

@ -1,5 +1,5 @@
import comfy.supported_models import fcbh.supported_models
import comfy.supported_models_base import fcbh.supported_models_base
def count_blocks(state_dict_keys, prefix_string): def count_blocks(state_dict_keys, prefix_string):
count = 0 count = 0
@ -109,7 +109,7 @@ def detect_unet_config(state_dict, key_prefix, use_fp16):
return unet_config return unet_config
def model_config_from_unet_config(unet_config): def model_config_from_unet_config(unet_config):
for model_config in comfy.supported_models.models: for model_config in fcbh.supported_models.models:
if model_config.matches(unet_config): if model_config.matches(unet_config):
return model_config(unet_config) return model_config(unet_config)
@ -120,7 +120,7 @@ def model_config_from_unet(state_dict, unet_key_prefix, use_fp16, use_base_if_no
unet_config = detect_unet_config(state_dict, unet_key_prefix, use_fp16) unet_config = detect_unet_config(state_dict, unet_key_prefix, use_fp16)
model_config = model_config_from_unet_config(unet_config) model_config = model_config_from_unet_config(unet_config)
if model_config is None and use_base_if_no_match: if model_config is None and use_base_if_no_match:
return comfy.supported_models_base.BASE(unet_config) return fcbh.supported_models_base.BASE(unet_config)
else: else:
return model_config return model_config

View File

@ -1,7 +1,7 @@
import psutil import psutil
from enum import Enum from enum import Enum
from comfy.cli_args import args from fcbh.cli_args import args
import comfy.utils import fcbh.utils
import torch import torch
import sys import sys
@ -681,7 +681,7 @@ def soft_empty_cache(force=False):
def resolve_lowvram_weight(weight, model, key): def resolve_lowvram_weight(weight, model, key):
if weight.device == torch.device("meta"): #lowvram NOTE: this depends on the inner working of the accelerate library so it might break. if weight.device == torch.device("meta"): #lowvram NOTE: this depends on the inner working of the accelerate library so it might break.
key_split = key.split('.') # I have no idea why they don't just leave the weight there instead of using the meta device. key_split = key.split('.') # I have no idea why they don't just leave the weight there instead of using the meta device.
op = comfy.utils.get_attr(model, '.'.join(key_split[:-1])) op = fcbh.utils.get_attr(model, '.'.join(key_split[:-1]))
weight = op._hf_hook.weights_map[key_split[-1]] weight = op._hf_hook.weights_map[key_split[-1]]
return weight return weight

View File

@ -2,8 +2,8 @@ import torch
import copy import copy
import inspect import inspect
import comfy.utils import fcbh.utils
import comfy.model_management import fcbh.model_management
class ModelPatcher: class ModelPatcher:
def __init__(self, model, load_device, offload_device, size=0, current_device=None): def __init__(self, model, load_device, offload_device, size=0, current_device=None):
@ -162,11 +162,11 @@ class ModelPatcher:
self.backup[key] = weight.to(self.offload_device) self.backup[key] = weight.to(self.offload_device)
if device_to is not None: if device_to is not None:
temp_weight = comfy.model_management.cast_to_device(weight, device_to, torch.float32, copy=True) temp_weight = fcbh.model_management.cast_to_device(weight, device_to, torch.float32, copy=True)
else: else:
temp_weight = weight.to(torch.float32, copy=True) temp_weight = weight.to(torch.float32, copy=True)
out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(weight.dtype) out_weight = self.calculate_weight(self.patches[key], temp_weight, key).to(weight.dtype)
comfy.utils.set_attr(self.model, key, out_weight) fcbh.utils.set_attr(self.model, key, out_weight)
del temp_weight del temp_weight
if device_to is not None: if device_to is not None:
@ -193,15 +193,15 @@ class ModelPatcher:
if w1.shape != weight.shape: if w1.shape != weight.shape:
print("WARNING SHAPE MISMATCH {} WEIGHT NOT MERGED {} != {}".format(key, w1.shape, weight.shape)) print("WARNING SHAPE MISMATCH {} WEIGHT NOT MERGED {} != {}".format(key, w1.shape, weight.shape))
else: else:
weight += alpha * comfy.model_management.cast_to_device(w1, weight.device, weight.dtype) weight += alpha * fcbh.model_management.cast_to_device(w1, weight.device, weight.dtype)
elif len(v) == 4: #lora/locon elif len(v) == 4: #lora/locon
mat1 = comfy.model_management.cast_to_device(v[0], weight.device, torch.float32) mat1 = fcbh.model_management.cast_to_device(v[0], weight.device, torch.float32)
mat2 = comfy.model_management.cast_to_device(v[1], weight.device, torch.float32) mat2 = fcbh.model_management.cast_to_device(v[1], weight.device, torch.float32)
if v[2] is not None: if v[2] is not None:
alpha *= v[2] / mat2.shape[0] alpha *= v[2] / mat2.shape[0]
if v[3] is not None: if v[3] is not None:
#locon mid weights, hopefully the math is fine because I didn't properly test it #locon mid weights, hopefully the math is fine because I didn't properly test it
mat3 = comfy.model_management.cast_to_device(v[3], weight.device, torch.float32) mat3 = fcbh.model_management.cast_to_device(v[3], weight.device, torch.float32)
final_shape = [mat2.shape[1], mat2.shape[0], mat3.shape[2], mat3.shape[3]] final_shape = [mat2.shape[1], mat2.shape[0], mat3.shape[2], mat3.shape[3]]
mat2 = torch.mm(mat2.transpose(0, 1).flatten(start_dim=1), mat3.transpose(0, 1).flatten(start_dim=1)).reshape(final_shape).transpose(0, 1) mat2 = torch.mm(mat2.transpose(0, 1).flatten(start_dim=1), mat3.transpose(0, 1).flatten(start_dim=1)).reshape(final_shape).transpose(0, 1)
try: try:
@ -220,23 +220,23 @@ class ModelPatcher:
if w1 is None: if w1 is None:
dim = w1_b.shape[0] dim = w1_b.shape[0]
w1 = torch.mm(comfy.model_management.cast_to_device(w1_a, weight.device, torch.float32), w1 = torch.mm(fcbh.model_management.cast_to_device(w1_a, weight.device, torch.float32),
comfy.model_management.cast_to_device(w1_b, weight.device, torch.float32)) fcbh.model_management.cast_to_device(w1_b, weight.device, torch.float32))
else: else:
w1 = comfy.model_management.cast_to_device(w1, weight.device, torch.float32) w1 = fcbh.model_management.cast_to_device(w1, weight.device, torch.float32)
if w2 is None: if w2 is None:
dim = w2_b.shape[0] dim = w2_b.shape[0]
if t2 is None: if t2 is None:
w2 = torch.mm(comfy.model_management.cast_to_device(w2_a, weight.device, torch.float32), w2 = torch.mm(fcbh.model_management.cast_to_device(w2_a, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2_b, weight.device, torch.float32)) fcbh.model_management.cast_to_device(w2_b, weight.device, torch.float32))
else: else:
w2 = torch.einsum('i j k l, j r, i p -> p r k l', w2 = torch.einsum('i j k l, j r, i p -> p r k l',
comfy.model_management.cast_to_device(t2, weight.device, torch.float32), fcbh.model_management.cast_to_device(t2, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2_b, weight.device, torch.float32), fcbh.model_management.cast_to_device(w2_b, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2_a, weight.device, torch.float32)) fcbh.model_management.cast_to_device(w2_a, weight.device, torch.float32))
else: else:
w2 = comfy.model_management.cast_to_device(w2, weight.device, torch.float32) w2 = fcbh.model_management.cast_to_device(w2, weight.device, torch.float32)
if len(w2.shape) == 4: if len(w2.shape) == 4:
w1 = w1.unsqueeze(2).unsqueeze(2) w1 = w1.unsqueeze(2).unsqueeze(2)
@ -258,19 +258,19 @@ class ModelPatcher:
t1 = v[5] t1 = v[5]
t2 = v[6] t2 = v[6]
m1 = torch.einsum('i j k l, j r, i p -> p r k l', m1 = torch.einsum('i j k l, j r, i p -> p r k l',
comfy.model_management.cast_to_device(t1, weight.device, torch.float32), fcbh.model_management.cast_to_device(t1, weight.device, torch.float32),
comfy.model_management.cast_to_device(w1b, weight.device, torch.float32), fcbh.model_management.cast_to_device(w1b, weight.device, torch.float32),
comfy.model_management.cast_to_device(w1a, weight.device, torch.float32)) fcbh.model_management.cast_to_device(w1a, weight.device, torch.float32))
m2 = torch.einsum('i j k l, j r, i p -> p r k l', m2 = torch.einsum('i j k l, j r, i p -> p r k l',
comfy.model_management.cast_to_device(t2, weight.device, torch.float32), fcbh.model_management.cast_to_device(t2, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2b, weight.device, torch.float32), fcbh.model_management.cast_to_device(w2b, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2a, weight.device, torch.float32)) fcbh.model_management.cast_to_device(w2a, weight.device, torch.float32))
else: else:
m1 = torch.mm(comfy.model_management.cast_to_device(w1a, weight.device, torch.float32), m1 = torch.mm(fcbh.model_management.cast_to_device(w1a, weight.device, torch.float32),
comfy.model_management.cast_to_device(w1b, weight.device, torch.float32)) fcbh.model_management.cast_to_device(w1b, weight.device, torch.float32))
m2 = torch.mm(comfy.model_management.cast_to_device(w2a, weight.device, torch.float32), m2 = torch.mm(fcbh.model_management.cast_to_device(w2a, weight.device, torch.float32),
comfy.model_management.cast_to_device(w2b, weight.device, torch.float32)) fcbh.model_management.cast_to_device(w2b, weight.device, torch.float32))
try: try:
weight += (alpha * m1 * m2).reshape(weight.shape).type(weight.dtype) weight += (alpha * m1 * m2).reshape(weight.shape).type(weight.dtype)
@ -283,7 +283,7 @@ class ModelPatcher:
keys = list(self.backup.keys()) keys = list(self.backup.keys())
for k in keys: for k in keys:
comfy.utils.set_attr(self.model, k, self.backup[k]) fcbh.utils.set_attr(self.model, k, self.backup[k])
self.backup = {} self.backup = {}

View File

@ -28,7 +28,7 @@ def conv_nd(dims, *args, **kwargs):
raise ValueError(f"unsupported dimensions: {dims}") raise ValueError(f"unsupported dimensions: {dims}")
@contextmanager @contextmanager
def use_comfy_ops(device=None, dtype=None): # Kind of an ugly hack but I can't think of a better way def use_fcbh_ops(device=None, dtype=None): # Kind of an ugly hack but I can't think of a better way
old_torch_nn_linear = torch.nn.Linear old_torch_nn_linear = torch.nn.Linear
force_device = device force_device = device
force_dtype = dtype force_dtype = dtype

View File

@ -1,7 +1,7 @@
import torch import torch
import comfy.model_management import fcbh.model_management
import comfy.samplers import fcbh.samplers
import comfy.utils import fcbh.utils
import math import math
import numpy as np import numpy as np
@ -29,7 +29,7 @@ def prepare_mask(noise_mask, shape, device):
noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear") noise_mask = torch.nn.functional.interpolate(noise_mask.reshape((-1, 1, noise_mask.shape[-2], noise_mask.shape[-1])), size=(shape[2], shape[3]), mode="bilinear")
noise_mask = noise_mask.round() noise_mask = noise_mask.round()
noise_mask = torch.cat([noise_mask] * shape[1], dim=1) noise_mask = torch.cat([noise_mask] * shape[1], dim=1)
noise_mask = comfy.utils.repeat_to_batch_size(noise_mask, shape[0]) noise_mask = fcbh.utils.repeat_to_batch_size(noise_mask, shape[0])
noise_mask = noise_mask.to(device) noise_mask = noise_mask.to(device)
return noise_mask return noise_mask
@ -37,7 +37,7 @@ def broadcast_cond(cond, batch, device):
"""broadcasts conditioning to the batch size""" """broadcasts conditioning to the batch size"""
copy = [] copy = []
for p in cond: for p in cond:
t = comfy.utils.repeat_to_batch_size(p[0], batch) t = fcbh.utils.repeat_to_batch_size(p[0], batch)
t = t.to(device) t = t.to(device)
copy += [[t] + p[1:]] copy += [[t] + p[1:]]
return copy return copy
@ -78,7 +78,7 @@ def prepare_sampling(model, noise_shape, positive, negative, noise_mask):
real_model = None real_model = None
models, inference_memory = get_additional_models(positive, negative, model.model_dtype()) models, inference_memory = get_additional_models(positive, negative, model.model_dtype())
comfy.model_management.load_models_gpu([model] + models, comfy.model_management.batch_area_memory(noise_shape[0] * noise_shape[2] * noise_shape[3]) + inference_memory) fcbh.model_management.load_models_gpu([model] + models, fcbh.model_management.batch_area_memory(noise_shape[0] * noise_shape[2] * noise_shape[3]) + inference_memory)
real_model = model.model real_model = model.model
positive_copy = broadcast_cond(positive, noise_shape[0], device) positive_copy = broadcast_cond(positive, noise_shape[0], device)
@ -92,7 +92,7 @@ def sample(model, noise, steps, cfg, sampler_name, scheduler, positive, negative
noise = noise.to(model.load_device) noise = noise.to(model.load_device)
latent_image = latent_image.to(model.load_device) latent_image = latent_image.to(model.load_device)
sampler = comfy.samplers.KSampler(real_model, steps=steps, device=model.load_device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options) sampler = fcbh.samplers.KSampler(real_model, steps=steps, device=model.load_device, sampler=sampler_name, scheduler=scheduler, denoise=denoise, model_options=model.model_options)
samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar, seed=seed) samples = sampler.sample(noise, positive_copy, negative_copy, cfg=cfg, latent_image=latent_image, start_step=start_step, last_step=last_step, force_full_denoise=force_full_denoise, denoise_mask=noise_mask, sigmas=sigmas, callback=callback, disable_pbar=disable_pbar, seed=seed)
samples = samples.cpu() samples = samples.cpu()
@ -106,7 +106,7 @@ def sample_custom(model, noise, cfg, sampler, sigmas, positive, negative, latent
latent_image = latent_image.to(model.load_device) latent_image = latent_image.to(model.load_device)
sigmas = sigmas.to(model.load_device) sigmas = sigmas.to(model.load_device)
samples = comfy.samplers.sample(real_model, noise, positive_copy, negative_copy, cfg, model.load_device, sampler, sigmas, model_options=model.model_options, latent_image=latent_image, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed) samples = fcbh.samplers.sample(real_model, noise, positive_copy, negative_copy, cfg, model.load_device, sampler, sigmas, model_options=model.model_options, latent_image=latent_image, denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar, seed=seed)
samples = samples.cpu() samples = samples.cpu()
cleanup_additional_models(models) cleanup_additional_models(models)
return samples return samples

View File

@ -2,12 +2,12 @@ from .k_diffusion import sampling as k_diffusion_sampling
from .k_diffusion import external as k_diffusion_external from .k_diffusion import external as k_diffusion_external
from .extra_samplers import uni_pc from .extra_samplers import uni_pc
import torch import torch
from comfy import model_management from fcbh import model_management
from .ldm.models.diffusion.ddim import DDIMSampler from .ldm.models.diffusion.ddim import DDIMSampler
from .ldm.modules.diffusionmodules.util import make_ddim_timesteps from .ldm.modules.diffusionmodules.util import make_ddim_timesteps
import math import math
from comfy import model_base from fcbh import model_base
import comfy.utils import fcbh.utils
def lcm(a, b): #TODO: eventually replace by math.lcm (added in python3.9) def lcm(a, b): #TODO: eventually replace by math.lcm (added in python3.9)
return abs(a*b) // math.gcd(a, b) return abs(a*b) // math.gcd(a, b)
@ -539,7 +539,7 @@ def encode_adm(model, conds, batch_size, width, height, device, prompt_type):
if adm_out is not None: if adm_out is not None:
x[1] = x[1].copy() x[1] = x[1].copy()
x[1]["adm_encoded"] = comfy.utils.repeat_to_batch_size(adm_out, batch_size).to(device) x[1]["adm_encoded"] = fcbh.utils.repeat_to_batch_size(adm_out, batch_size).to(device)
return conds return conds

View File

@ -2,12 +2,12 @@ import torch
import contextlib import contextlib
import math import math
from comfy import model_management from fcbh import model_management
from .ldm.util import instantiate_from_config from .ldm.util import instantiate_from_config
from .ldm.models.autoencoder import AutoencoderKL from .ldm.models.autoencoder import AutoencoderKL
import yaml import yaml
import comfy.utils import fcbh.utils
from . import clip_vision from . import clip_vision
from . import gligen from . import gligen
@ -19,10 +19,10 @@ from . import sd1_clip
from . import sd2_clip from . import sd2_clip
from . import sdxl_clip from . import sdxl_clip
import comfy.model_patcher import fcbh.model_patcher
import comfy.lora import fcbh.lora
import comfy.t2i_adapter.adapter import fcbh.t2i_adapter.adapter
import comfy.supported_models_base import fcbh.supported_models_base
def load_model_weights(model, sd): def load_model_weights(model, sd):
m, u = model.load_state_dict(sd, strict=False) m, u = model.load_state_dict(sd, strict=False)
@ -50,14 +50,14 @@ def load_clip_weights(model, sd):
if ids.dtype == torch.float32: if ids.dtype == torch.float32:
sd['cond_stage_model.transformer.text_model.embeddings.position_ids'] = ids.round() sd['cond_stage_model.transformer.text_model.embeddings.position_ids'] = ids.round()
sd = comfy.utils.transformers_convert(sd, "cond_stage_model.model.", "cond_stage_model.transformer.text_model.", 24) sd = fcbh.utils.transformers_convert(sd, "cond_stage_model.model.", "cond_stage_model.transformer.text_model.", 24)
return load_model_weights(model, sd) return load_model_weights(model, sd)
def load_lora_for_models(model, clip, lora, strength_model, strength_clip): def load_lora_for_models(model, clip, lora, strength_model, strength_clip):
key_map = comfy.lora.model_lora_keys_unet(model.model) key_map = fcbh.lora.model_lora_keys_unet(model.model)
key_map = comfy.lora.model_lora_keys_clip(clip.cond_stage_model, key_map) key_map = fcbh.lora.model_lora_keys_clip(clip.cond_stage_model, key_map)
loaded = comfy.lora.load_lora(lora, key_map) loaded = fcbh.lora.load_lora(lora, key_map)
new_modelpatcher = model.clone() new_modelpatcher = model.clone()
k = new_modelpatcher.add_patches(loaded, strength_model) k = new_modelpatcher.add_patches(loaded, strength_model)
new_clip = clip.clone() new_clip = clip.clone()
@ -90,7 +90,7 @@ class CLIP:
self.cond_stage_model = clip(**(params)) self.cond_stage_model = clip(**(params))
self.tokenizer = tokenizer(embedding_directory=embedding_directory) self.tokenizer = tokenizer(embedding_directory=embedding_directory)
self.patcher = comfy.model_patcher.ModelPatcher(self.cond_stage_model, load_device=load_device, offload_device=offload_device) self.patcher = fcbh.model_patcher.ModelPatcher(self.cond_stage_model, load_device=load_device, offload_device=offload_device)
self.layer_idx = None self.layer_idx = None
def clone(self): def clone(self):
@ -149,7 +149,7 @@ class VAE:
self.first_stage_model = AutoencoderKL(**(config['params'])) self.first_stage_model = AutoencoderKL(**(config['params']))
self.first_stage_model = self.first_stage_model.eval() self.first_stage_model = self.first_stage_model.eval()
if ckpt_path is not None: if ckpt_path is not None:
sd = comfy.utils.load_torch_file(ckpt_path) sd = fcbh.utils.load_torch_file(ckpt_path)
if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format
sd = diffusers_convert.convert_vae_state_dict(sd) sd = diffusers_convert.convert_vae_state_dict(sd)
m, u = self.first_stage_model.load_state_dict(sd, strict=False) m, u = self.first_stage_model.load_state_dict(sd, strict=False)
@ -164,29 +164,29 @@ class VAE:
self.first_stage_model.to(self.vae_dtype) self.first_stage_model.to(self.vae_dtype)
def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16): def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16):
steps = samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap) steps = samples.shape[0] * fcbh.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x, tile_y, overlap)
steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap) steps += samples.shape[0] * fcbh.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x // 2, tile_y * 2, overlap)
steps += samples.shape[0] * comfy.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x * 2, tile_y // 2, overlap) steps += samples.shape[0] * fcbh.utils.get_tiled_scale_steps(samples.shape[3], samples.shape[2], tile_x * 2, tile_y // 2, overlap)
pbar = comfy.utils.ProgressBar(steps) pbar = fcbh.utils.ProgressBar(steps)
decode_fn = lambda a: (self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)) + 1.0).float() decode_fn = lambda a: (self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)) + 1.0).float()
output = torch.clamp(( output = torch.clamp((
(comfy.utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = 8, pbar = pbar) + (fcbh.utils.tiled_scale(samples, decode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = 8, pbar = pbar) +
comfy.utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = 8, pbar = pbar) + fcbh.utils.tiled_scale(samples, decode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = 8, pbar = pbar) +
comfy.utils.tiled_scale(samples, decode_fn, tile_x, tile_y, overlap, upscale_amount = 8, pbar = pbar)) fcbh.utils.tiled_scale(samples, decode_fn, tile_x, tile_y, overlap, upscale_amount = 8, pbar = pbar))
/ 3.0) / 2.0, min=0.0, max=1.0) / 3.0) / 2.0, min=0.0, max=1.0)
return output return output
def encode_tiled_(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64): def encode_tiled_(self, pixel_samples, tile_x=512, tile_y=512, overlap = 64):
steps = pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x, tile_y, overlap) steps = pixel_samples.shape[0] * fcbh.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x, tile_y, overlap)
steps += pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x // 2, tile_y * 2, overlap) steps += pixel_samples.shape[0] * fcbh.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x // 2, tile_y * 2, overlap)
steps += pixel_samples.shape[0] * comfy.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x * 2, tile_y // 2, overlap) steps += pixel_samples.shape[0] * fcbh.utils.get_tiled_scale_steps(pixel_samples.shape[3], pixel_samples.shape[2], tile_x * 2, tile_y // 2, overlap)
pbar = comfy.utils.ProgressBar(steps) pbar = fcbh.utils.ProgressBar(steps)
encode_fn = lambda a: self.first_stage_model.encode((2. * a - 1.).to(self.vae_dtype).to(self.device)).sample().float() encode_fn = lambda a: self.first_stage_model.encode((2. * a - 1.).to(self.vae_dtype).to(self.device)).sample().float()
samples = comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x, tile_y, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar) samples = fcbh.utils.tiled_scale(pixel_samples, encode_fn, tile_x, tile_y, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar)
samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar) samples += fcbh.utils.tiled_scale(pixel_samples, encode_fn, tile_x * 2, tile_y // 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar)
samples += comfy.utils.tiled_scale(pixel_samples, encode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar) samples += fcbh.utils.tiled_scale(pixel_samples, encode_fn, tile_x // 2, tile_y * 2, overlap, upscale_amount = (1/8), out_channels=4, pbar=pbar)
samples /= 3.0 samples /= 3.0
return samples return samples
@ -257,10 +257,10 @@ class StyleModel:
def load_style_model(ckpt_path): def load_style_model(ckpt_path):
model_data = comfy.utils.load_torch_file(ckpt_path, safe_load=True) model_data = fcbh.utils.load_torch_file(ckpt_path, safe_load=True)
keys = model_data.keys() keys = model_data.keys()
if "style_embedding" in keys: if "style_embedding" in keys:
model = comfy.t2i_adapter.adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8) model = fcbh.t2i_adapter.adapter.StyleAdapter(width=1024, context_dim=768, num_head=8, n_layes=3, num_token=8)
else: else:
raise Exception("invalid style model {}".format(ckpt_path)) raise Exception("invalid style model {}".format(ckpt_path))
model.load_state_dict(model_data) model.load_state_dict(model_data)
@ -270,14 +270,14 @@ def load_style_model(ckpt_path):
def load_clip(ckpt_paths, embedding_directory=None): def load_clip(ckpt_paths, embedding_directory=None):
clip_data = [] clip_data = []
for p in ckpt_paths: for p in ckpt_paths:
clip_data.append(comfy.utils.load_torch_file(p, safe_load=True)) clip_data.append(fcbh.utils.load_torch_file(p, safe_load=True))
class EmptyClass: class EmptyClass:
pass pass
for i in range(len(clip_data)): for i in range(len(clip_data)):
if "transformer.resblocks.0.ln_1.weight" in clip_data[i]: if "transformer.resblocks.0.ln_1.weight" in clip_data[i]:
clip_data[i] = comfy.utils.transformers_convert(clip_data[i], "", "text_model.", 32) clip_data[i] = fcbh.utils.transformers_convert(clip_data[i], "", "text_model.", 32)
clip_target = EmptyClass() clip_target = EmptyClass()
clip_target.params = {} clip_target.params = {}
@ -306,11 +306,11 @@ def load_clip(ckpt_paths, embedding_directory=None):
return clip return clip
def load_gligen(ckpt_path): def load_gligen(ckpt_path):
data = comfy.utils.load_torch_file(ckpt_path, safe_load=True) data = fcbh.utils.load_torch_file(ckpt_path, safe_load=True)
model = gligen.load_gligen(data) model = gligen.load_gligen(data)
if model_management.should_use_fp16(): if model_management.should_use_fp16():
model = model.half() model = model.half()
return comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device()) return fcbh.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device())
def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None): def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_clip=True, embedding_directory=None, state_dict=None, config=None):
#TODO: this function is a mess and should be removed eventually #TODO: this function is a mess and should be removed eventually
@ -346,12 +346,12 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl
pass pass
if state_dict is None: if state_dict is None:
state_dict = comfy.utils.load_torch_file(ckpt_path) state_dict = fcbh.utils.load_torch_file(ckpt_path)
class EmptyClass: class EmptyClass:
pass pass
model_config = comfy.supported_models_base.BASE({}) model_config = fcbh.supported_models_base.BASE({})
from . import latent_formats from . import latent_formats
model_config.latent_format = latent_formats.SD15(scale_factor=scale_factor) model_config.latent_format = latent_formats.SD15(scale_factor=scale_factor)
@ -392,10 +392,10 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl
w.cond_stage_model = clip.cond_stage_model w.cond_stage_model = clip.cond_stage_model
load_clip_weights(w, state_dict) load_clip_weights(w, state_dict)
return (comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae) return (fcbh.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device), clip, vae)
def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True): def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True):
sd = comfy.utils.load_torch_file(ckpt_path) sd = fcbh.utils.load_torch_file(ckpt_path)
sd_keys = sd.keys() sd_keys = sd.keys()
clip = None clip = None
clipvision = None clipvision = None
@ -404,7 +404,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
model_patcher = None model_patcher = None
clip_target = None clip_target = None
parameters = comfy.utils.calculate_parameters(sd, "model.diffusion_model.") parameters = fcbh.utils.calculate_parameters(sd, "model.diffusion_model.")
fp16 = model_management.should_use_fp16(model_params=parameters) fp16 = model_management.should_use_fp16(model_params=parameters)
class WeightsLoader(torch.nn.Module): class WeightsLoader(torch.nn.Module):
@ -447,7 +447,7 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
print("left over keys:", left_over) print("left over keys:", left_over)
if output_model: if output_model:
model_patcher = comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device(), current_device=inital_load_device) model_patcher = fcbh.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=model_management.unet_offload_device(), current_device=inital_load_device)
if inital_load_device != torch.device("cpu"): if inital_load_device != torch.device("cpu"):
print("loaded straight to GPU") print("loaded straight to GPU")
model_management.load_model_gpu(model_patcher) model_management.load_model_gpu(model_patcher)
@ -456,8 +456,8 @@ def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, o
def load_unet(unet_path): #load unet in diffusers format def load_unet(unet_path): #load unet in diffusers format
sd = comfy.utils.load_torch_file(unet_path) sd = fcbh.utils.load_torch_file(unet_path)
parameters = comfy.utils.calculate_parameters(sd) parameters = fcbh.utils.calculate_parameters(sd)
fp16 = model_management.should_use_fp16(model_params=parameters) fp16 = model_management.should_use_fp16(model_params=parameters)
if "input_blocks.0.0.weight" in sd: #ldm if "input_blocks.0.0.weight" in sd: #ldm
model_config = model_detection.model_config_from_unet(sd, "", fp16) model_config = model_detection.model_config_from_unet(sd, "", fp16)
@ -471,7 +471,7 @@ def load_unet(unet_path): #load unet in diffusers format
print("ERROR UNSUPPORTED UNET", unet_path) print("ERROR UNSUPPORTED UNET", unet_path)
return None return None
diffusers_keys = comfy.utils.unet_to_diffusers(model_config.unet_config) diffusers_keys = fcbh.utils.unet_to_diffusers(model_config.unet_config)
new_sd = {} new_sd = {}
for k in diffusers_keys: for k in diffusers_keys:
@ -483,9 +483,9 @@ def load_unet(unet_path): #load unet in diffusers format
model = model_config.get_model(new_sd, "") model = model_config.get_model(new_sd, "")
model = model.to(offload_device) model = model.to(offload_device)
model.load_model_weights(new_sd, "") model.load_model_weights(new_sd, "")
return comfy.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device) return fcbh.model_patcher.ModelPatcher(model, load_device=model_management.get_torch_device(), offload_device=offload_device)
def save_checkpoint(output_path, model, clip, vae, metadata=None): def save_checkpoint(output_path, model, clip, vae, metadata=None):
model_management.load_models_gpu([model, clip.load_model()]) model_management.load_models_gpu([model, clip.load_model()])
sd = model.model.state_dict_for_saving(clip.get_sd(), vae.get_sd()) sd = model.model.state_dict_for_saving(clip.get_sd(), vae.get_sd())
comfy.utils.save_torch_file(sd, output_path, metadata=metadata) fcbh.utils.save_torch_file(sd, output_path, metadata=metadata)

View File

@ -1,7 +1,7 @@
import os import os
from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextConfig, modeling_utils from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextConfig, modeling_utils
import comfy.ops import fcbh.ops
import torch import torch
import traceback import traceback
import zipfile import zipfile
@ -54,7 +54,7 @@ class SD1ClipModel(torch.nn.Module, ClipTokenWeightEncoder):
textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_clip_config.json") textmodel_json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sd1_clip_config.json")
config = CLIPTextConfig.from_json_file(textmodel_json_config) config = CLIPTextConfig.from_json_file(textmodel_json_config)
self.num_layers = config.num_hidden_layers self.num_layers = config.num_hidden_layers
with comfy.ops.use_comfy_ops(device, dtype): with fcbh.ops.use_fcbh_ops(device, dtype):
with modeling_utils.no_init_weights(): with modeling_utils.no_init_weights():
self.transformer = CLIPTextModel(config) self.transformer = CLIPTextModel(config)

View File

@ -1,4 +1,4 @@
from comfy import sd1_clip from fcbh import sd1_clip
import torch import torch
import os import os

View File

@ -1,4 +1,4 @@
from comfy import sd1_clip from fcbh import sd1_clip
import torch import torch
import os import os

View File

@ -6,7 +6,7 @@ Tiny AutoEncoder for Stable Diffusion
import torch import torch
import torch.nn as nn import torch.nn as nn
import comfy.utils import fcbh.utils
def conv(n_in, n_out, **kwargs): def conv(n_in, n_out, **kwargs):
return nn.Conv2d(n_in, n_out, 3, padding=1, **kwargs) return nn.Conv2d(n_in, n_out, 3, padding=1, **kwargs)
@ -52,9 +52,9 @@ class TAESD(nn.Module):
self.encoder = Encoder() self.encoder = Encoder()
self.decoder = Decoder() self.decoder = Decoder()
if encoder_path is not None: if encoder_path is not None:
self.encoder.load_state_dict(comfy.utils.load_torch_file(encoder_path, safe_load=True)) self.encoder.load_state_dict(fcbh.utils.load_torch_file(encoder_path, safe_load=True))
if decoder_path is not None: if decoder_path is not None:
self.decoder.load_state_dict(comfy.utils.load_torch_file(decoder_path, safe_load=True)) self.decoder.load_state_dict(fcbh.utils.load_torch_file(decoder_path, safe_load=True))
@staticmethod @staticmethod
def scale_latents(x): def scale_latents(x):

View File

@ -1,7 +1,7 @@
import torch import torch
import math import math
import struct import struct
import comfy.checkpoint_pickle import fcbh.checkpoint_pickle
import safetensors.torch import safetensors.torch
import numpy as np import numpy as np
from PIL import Image from PIL import Image
@ -19,7 +19,7 @@ def load_torch_file(ckpt, safe_load=False, device=None):
if safe_load: if safe_load:
pl_sd = torch.load(ckpt, map_location=device, weights_only=True) pl_sd = torch.load(ckpt, map_location=device, weights_only=True)
else: else:
pl_sd = torch.load(ckpt, map_location=device, pickle_module=comfy.checkpoint_pickle) pl_sd = torch.load(ckpt, map_location=device, pickle_module=fcbh.checkpoint_pickle)
if "global_step" in pl_sd: if "global_step" in pl_sd:
print(f"Global Step: {pl_sd['global_step']}") print(f"Global Step: {pl_sd['global_step']}")
if "state_dict" in pl_sd: if "state_dict" in pl_sd:

Some files were not shown because too many files have changed in this diff Show More