Revert "skip free_memory for interrogate"
This reverts commit 316fe24f65.
Rather add interrogate action to queue than keeping 400MB BLIP loaded in VRAM.
This commit is contained in:
parent
64c1a3dcce
commit
13bb314cfa
|
|
@ -47,7 +47,7 @@ class Interrogator:
|
||||||
|
|
||||||
self.blip_model = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
|
self.blip_model = ModelPatcher(model, load_device=self.load_device, offload_device=self.offload_device)
|
||||||
|
|
||||||
model_management.load_model_gpu(self.blip_model, should_free_memory=False)
|
model_management.load_model_gpu(self.blip_model)
|
||||||
|
|
||||||
gpu_image = transforms.Compose([
|
gpu_image = transforms.Compose([
|
||||||
transforms.ToTensor(),
|
transforms.ToTensor(),
|
||||||
|
|
|
||||||
|
|
@ -373,7 +373,7 @@ def free_memory(memory_required, device, keep_loaded=[]):
|
||||||
if mem_free_torch > mem_free_total * 0.25:
|
if mem_free_torch > mem_free_total * 0.25:
|
||||||
soft_empty_cache()
|
soft_empty_cache()
|
||||||
|
|
||||||
def load_models_gpu(models, memory_required=0, should_free_memory=True):
|
def load_models_gpu(models, memory_required=0):
|
||||||
global vram_state
|
global vram_state
|
||||||
|
|
||||||
inference_memory = minimum_inference_memory()
|
inference_memory = minimum_inference_memory()
|
||||||
|
|
@ -396,7 +396,7 @@ def load_models_gpu(models, memory_required=0, should_free_memory=True):
|
||||||
if len(models_to_load) == 0:
|
if len(models_to_load) == 0:
|
||||||
devs = set(map(lambda a: a.device, models_already_loaded))
|
devs = set(map(lambda a: a.device, models_already_loaded))
|
||||||
for d in devs:
|
for d in devs:
|
||||||
if d != torch.device("cpu") and should_free_memory:
|
if d != torch.device("cpu"):
|
||||||
free_memory(extra_mem, d, models_already_loaded)
|
free_memory(extra_mem, d, models_already_loaded)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -408,7 +408,7 @@ def load_models_gpu(models, memory_required=0, should_free_memory=True):
|
||||||
total_memory_required[loaded_model.device] = total_memory_required.get(loaded_model.device, 0) + loaded_model.model_memory_required(loaded_model.device)
|
total_memory_required[loaded_model.device] = total_memory_required.get(loaded_model.device, 0) + loaded_model.model_memory_required(loaded_model.device)
|
||||||
|
|
||||||
for device in total_memory_required:
|
for device in total_memory_required:
|
||||||
if device != torch.device("cpu") and should_free_memory:
|
if device != torch.device("cpu"):
|
||||||
free_memory(total_memory_required[device] * 1.3 + extra_mem, device, models_already_loaded)
|
free_memory(total_memory_required[device] * 1.3 + extra_mem, device, models_already_loaded)
|
||||||
|
|
||||||
for loaded_model in models_to_load:
|
for loaded_model in models_to_load:
|
||||||
|
|
@ -436,8 +436,8 @@ def load_models_gpu(models, memory_required=0, should_free_memory=True):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def load_model_gpu(model, should_free_memory=True):
|
def load_model_gpu(model):
|
||||||
return load_models_gpu([model], should_free_memory=should_free_memory)
|
return load_models_gpu([model])
|
||||||
|
|
||||||
def cleanup_models():
|
def cleanup_models():
|
||||||
to_delete = []
|
to_delete = []
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue