diff --git a/README_CLI.md b/README_CLI.md new file mode 100644 index 00000000..0bea33fc --- /dev/null +++ b/README_CLI.md @@ -0,0 +1,84 @@ +# Fooocus CLI (Command Line Interface) + +This is a custom Command Line Interface for Fooocus, allowing you to generate images programmatically without launching the web browser. It supports advanced features like model selection, custom styles, LoRAs, and batch generation. + +## 🚀 Quick Start + +From the Fooocus directory, run: + +```bash +python entry_with_update.py --prompt "a beautiful landscape" --output "landscape.png" +``` + +For Windows Standalone users (assuming you are in the install folder): +```powershell +.\python_embeded\python.exe Fooocus\entry_with_update.py --prompt "test" +``` + +## 🛠️ Usage & Arguments + +### Basic Usage + +```bash +python entry_with_update.py --prompt "your prompt" --aspect-ratio "1024x1024" +``` + +### Advanced Usage + +```bash +python entry_with_update.py ^ + --prompt "cyberpunk detective, neon rain" ^ + --negative-prompt "bright, sunny" ^ + --steps 30 ^ + --cfg-scale 7.0 ^ + --style "Fooocus V2" --style "Fooocus Cyberpunk" ^ + --base-model "juggernautXL_v8Rundiffusion.safetensors" ^ + --lora "sd_xl_offset_example-lora_1.0.safetensors:0.5" ^ + --image-number 2 ^ + --output "detective.png" +``` + +### Argument Reference + +| Category | Argument | Description | Default | +| :--- | :--- | :--- | :--- | +| **Core** | `--prompt` | **(Required)** The positive prompt text. | N/A | +| | `--negative-prompt` | valid negative prompt text. | "" | +| | `--output` | Output filename. Relative command run location. | N/A | +| | `--seed` | Seed number for reproducibility. `-1` is random. | -1 | +| | `--image-number` | Number of images to generate in a row. | 1 | +| **Performance** | `--performance` | Preset: `Speed`, `Quality`, `Extreme Speed`. | Speed | +| | `--steps` | Exact number of sampling steps (overrides performance). | N/A | +| | `--aspect-ratio` | Dimensions (e.g., `1152x896`, `1024x1024`). | 1152x896 | +| | `--cfg-scale` | Guidance scale (how strictly to follow prompt). | 7.0 | +| | `--sharpness` | Image sharpness filter strength. | 2.0 | +| | `--sampler` | Sampler method name. | dpmpp_2m_sde_gpu | +| | `--scheduler` | Scheduler name. | karras | +| **Models** | `--base-model` | Filename of the base checkpoint. | (Config Default) | +| | `--refiner-model` | Filename of the refiner checkpoint. | (Config Default) | +| | `--refiner-switch` | Step ratio to switch to refiner (0.0-1.0). | 0.5 | +| | `--lora` | Load LoRA: `filename:weight`. Use flag multiple times. | N/A | +| **Styles** | `--style` | Style name. Use flag multiple times. | (Fooocus Defaults) | + +## 🤖 AI Agent Integration + +If you want to teach an AI agent to use this tool, provide it with the following specification: + +### Tool: `generate_image_fooocus` + +**Description:** Generates images locally using Fooocus via CLI. +**Execution:** `d:\Fooocus_win64_2-5-0\fooocus-cli.bat` + +**Parameters:** +* `prompt`: String (Required) +* `negative_prompt`: String +* `output`: String (Filename) +* `aspect_ratio`: String (e.g., "1024x1024") +* `base_model`: String (Checkpoint filename) +* `style`: List[String] (Style names) +* `lora`: List[String] (Format "name:weight") +* `steps`: Integer +* `cfg_scale`: Float + +**Notes:** +* Output is saved relative to `d:\Fooocus_win64_2-5-0\Fooocus\` if a relative path is given in python, but the batch wrapper usually handles CWD. Absolute paths are recommended for the `--output` argument to ensure files are saved exactly where intended. diff --git a/args_manager.py b/args_manager.py index bb622c23..4970756b 100644 --- a/args_manager.py +++ b/args_manager.py @@ -40,6 +40,25 @@ args_parser.parser.add_argument("--always-download-new-model", action='store_tru args_parser.parser.add_argument("--rebuild-hash-cache", help="Generates missing model and LoRA hashes.", type=int, nargs="?", metavar="CPU_NUM_THREADS", const=-1) +# CLI Arguments +args_parser.parser.add_argument("--prompt", type=str, default=None, help="Positive prompt for CLI mode.") +args_parser.parser.add_argument("--negative-prompt", type=str, default="", help="Negative prompt for CLI mode.") +args_parser.parser.add_argument("--seed", type=int, default=-1, help="Seed for CLI mode.") +args_parser.parser.add_argument("--output", type=str, default=None, help="Output filename for CLI mode.") +args_parser.parser.add_argument("--performance", type=str, default="Speed", choices=["Speed", "Quality", "Extreme Speed"], help="Performance mode for CLI mode.") +args_parser.parser.add_argument("--aspect-ratio", type=str, default="1152\u00d7896", help="Aspect ratio for CLI mode.") +args_parser.parser.add_argument("--base-model", type=str, default=None, help="Base model filename.") +args_parser.parser.add_argument("--refiner-model", type=str, default=None, help="Refiner model filename.") +args_parser.parser.add_argument("--refiner-switch", type=float, default=0.5, help="Refiner switch point (0.0-1.0).") +args_parser.parser.add_argument("--lora", action='append', help="LoRA definition in format 'filename:weight'. Can be specified multiple times.") +args_parser.parser.add_argument("--style", action='append', help="Style name. Can be specified multiple times.") +args_parser.parser.add_argument("--cfg-scale", type=float, default=7.0, help="CFG Scale (default 7.0).") +args_parser.parser.add_argument("--sampler", type=str, default="dpmpp_2m_sde_gpu", help="Sampler name.") +args_parser.parser.add_argument("--scheduler", type=str, default="karras", help="Scheduler name.") +args_parser.parser.add_argument("--sharpness", type=float, default=2.0, help="Image sharpness (default 2.0).") +args_parser.parser.add_argument("--image-number", type=int, default=1, help="Number of images to generate (default 1).") +args_parser.parser.add_argument("--steps", type=int, default=-1, help="Number of steps (overrides performance).") + args_parser.parser.set_defaults( disable_cuda_malloc=True, in_browser=True, diff --git a/launch.py b/launch.py index eae5b19e..acbcd119 100644 --- a/launch.py +++ b/launch.py @@ -27,9 +27,9 @@ TRY_INSTALL_XFORMERS = False def prepare_environment(): - torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://download.pytorch.org/whl/cu121") + torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://download.pytorch.org/whl/cu128") torch_command = os.environ.get('TORCH_COMMAND', - f"pip install torch==2.1.0 torchvision==0.16.0 --extra-index-url {torch_index_url}") + f"pip install torch==2.7.1 torchvision==0.22.1 --extra-index-url {torch_index_url}") requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt") print(f"Python {sys.version}") @@ -149,4 +149,15 @@ config.default_base_model_name, config.checkpoint_downloads = download_models( config.update_files() init_cache(config.model_filenames, config.paths_checkpoints, config.lora_filenames, config.paths_loras) +if args.prompt is not None: + try: + from modules.cli_worker import run_cli + run_cli(args) + sys.exit(0) + except Exception as e: + print(f"[CLI] Error: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + from webui import * diff --git a/ldm_patched/modules/model_sampling.py b/ldm_patched/modules/model_sampling.py index 8971b4e6..4ddd9ab8 100644 --- a/ldm_patched/modules/model_sampling.py +++ b/ldm_patched/modules/model_sampling.py @@ -58,7 +58,7 @@ class ModelSamplingDiscrete(torch.nn.Module): else: betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s) alphas = 1. - betas - alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_cumprod = torch.cumprod(torch.tensor(alphas, dtype=torch.float32), dim=0) timesteps, = betas.shape self.num_timesteps = int(timesteps) diff --git a/modules/cli_worker.py b/modules/cli_worker.py new file mode 100644 index 00000000..cd83c7f5 --- /dev/null +++ b/modules/cli_worker.py @@ -0,0 +1,201 @@ +""" +Fooocus CLI Worker + +This module handles the programmatic generation of images using the Fooocus pipeline. +It is invoked by launch.py when CLI arguments are detected. +""" +import os +import random +import torch + +import modules.default_pipeline as pipeline +import modules.async_worker as worker +import modules.flags as flags +import modules.config +from modules.util import get_enabled_loras +from modules.sdxl_styles import apply_style +from modules.private_logger import log + +def run_cli(args): + prompt = args.prompt + negative_prompt = args.negative_prompt + seed = args.seed + + performance = args.performance + aspect_ratio = args.aspect_ratio + + # 1. Configuration & Models + # Styles + if args.style: + styles = args.style + else: + styles = ["Fooocus V2", "Fooocus Photograph", "Fooocus Enhance"] + + # Models + base_model = args.base_model if args.base_model else modules.config.default_base_model_name + refiner_model = args.refiner_model if args.refiner_model else modules.config.default_refiner_model_name + + # LoRAs (convert 'name:weight' to (enabled, name, weight)) + if args.lora: + loras = [] + for l in args.lora: + try: + if ':' in l: + parts = l.rsplit(':', 1) + name = parts[0] + weight = float(parts[1]) + else: + name = l + weight = 1.0 + loras.append((True, name, weight)) + except Exception as e: + print(f"[CLI] Warning: Could not parse LoRA '{l}': {e}") + else: + loras = modules.config.default_loras + + print(f"\n[CLI] Settings:") + print(f" Prompt: {prompt}") + print(f" Negative: {negative_prompt}") + print(f" Styles: {styles}") + print(f" Models: Base='{base_model}', Refiner='{refiner_model}'") + print(f" LoRAs: {len(loras)} enabled") + print(f" Performance: {performance}") + + # Refresh Pipeline + try: + pipeline.refresh_everything( + refiner_model_name=refiner_model, + base_model_name=base_model, + loras=get_enabled_loras(loras), + vae_name=modules.config.default_vae + ) + except Exception as e: + print(f"[CLI] Error loading models: {e}") + return + + # 2. Process Prompts + positive_basic_workloads = [prompt] + negative_basic_workloads = [negative_prompt] + + for s in styles: + if s == "Fooocus V2": + continue + try: + p, n, _ = apply_style(s, positive=prompt) + positive_basic_workloads += p + negative_basic_workloads += n + except Exception as e: + print(f"[CLI] Warning: Failed to apply style '{s}': {e}") + + # 3. Expansion + if "Fooocus V2" in styles: + seed_for_expansion = seed if seed != -1 else random.randint(0, 2**32 - 1) + expansion = pipeline.final_expansion(prompt, seed_for_expansion) + print(f"[CLI] Expansion: {expansion}") + positive_prompts = positive_basic_workloads + [expansion] + else: + expansion = "" + positive_prompts = positive_basic_workloads + + # 4. Encoding + print("[CLI] Encoding prompts...") + c = pipeline.clip_encode(texts=positive_prompts, pool_top_k=len(positive_basic_workloads)) + uc = pipeline.clip_encode(texts=negative_basic_workloads, pool_top_k=len(negative_basic_workloads)) + + # 5. Parameters + perf_obj = flags.Performance(performance) + steps = perf_obj.steps() + if args.steps > 0: + steps = args.steps + + switch = int(steps * args.refiner_switch) + + try: + if '\u00d7' in aspect_ratio: + w_h = aspect_ratio.split('\u00d7') + elif 'x' in aspect_ratio: + w_h = aspect_ratio.split('x') + else: + w_h = aspect_ratio.split(',') # fallback? no, probably just error out to default + + width = int(w_h[0]) + height = int(w_h[1]) + except: + width, height = 1152, 896 + + print(f"[CLI] Resolution: {width}x{height}, Steps: {steps}, Sampler: {args.sampler}") + + # Initialize PatchSettings + from modules.patch import PatchSettings, patch_settings + pid = os.getpid() + patch_settings[pid] = PatchSettings( + sharpness=args.sharpness, + adm_scaler_end=0.3, + positive_adm_scale=1.5, + negative_adm_scale=0.8, + controlnet_softness=0.25, + adaptive_cfg=7.0 + ) + + # 6. Generation Loop + def callback(step, x0, x, total_steps, y): + print(f" Step {step}/{total_steps}", end="\r") + + total_images = args.image_number + print(f"[CLI] Generating {total_images} image(s)...") + + for i in range(total_images): + current_seed = seed + if current_seed == -1: + current_seed = random.randint(0, 2**32 - 1) + elif i > 0: + current_seed += 1 # Auto-increment seed + + print(f"\n[CLI] Image {i+1}/{total_images} (Seed: {current_seed})") + + with torch.no_grad(): + imgs = pipeline.process_diffusion( + positive_cond=c, + negative_cond=uc, + steps=steps, + switch=switch, + width=width, + height=height, + image_seed=current_seed, + callback=callback, + sampler_name=args.sampler, + scheduler_name=args.scheduler, + latent=None, + denoise=1.0, + cfg_scale=args.cfg_scale, + refiner_swap_method="joint" + ) + + # 7. Save + log_task = { + 'log_positive_prompt': prompt, + 'log_negative_prompt': negative_prompt, + 'expansion': expansion, + 'styles': styles, + 'task_seed': current_seed, + 'base_model_name': base_model + } + + for img_idx, img in enumerate(imgs): + d = [('Prompt', 'prompt', prompt)] + path = log(img, d, None, "png", log_task) + print(f"[CLI] Saved to: {path}") + + if args.output: + if total_images > 1: + # Append index to filename if multiple images + root, ext = os.path.splitext(args.output) + final_output = f"{root}_{i+1}{ext}" + else: + final_output = args.output + + import shutil + shutil.copy(path, final_output) + print(f"[CLI] Copied to: {final_output}") + + print("\n[CLI] Generation complete!") diff --git a/modules/patch_clip.py b/modules/patch_clip.py index 06b7f01b..bcff086f 100644 --- a/modules/patch_clip.py +++ b/modules/patch_clip.py @@ -122,8 +122,28 @@ def patched_SDClipModel_forward(self, tokens): if tokens[x, y] == max_token: break - outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask, - output_hidden_states=self.layer == "hidden") + try: + outputs = self.transformer(input_ids=tokens, attention_mask=attention_mask, + output_hidden_states=self.layer == "hidden") + except TypeError: + # Fallback for ldm_patched.modules.clip_model.CLIPTextModel_ + # which uses a different signature than transformers.CLIPTextModel + intermediate_output = self.layer_idx if self.layer == "hidden" else None + # Note: ldm_patched expects positional input_tokens + outputs = self.transformer(tokens, attention_mask=attention_mask, + intermediate_output=intermediate_output) + + # Handle ldm_patched return type (tuple) vs transformers return type (BaseModelOutputWithPooling) + if isinstance(outputs, tuple): + class MockOutput: + pass + out_obj = MockOutput() + out_obj.last_hidden_state = outputs[0] + # output[1] is intermediate + out_obj.hidden_states = {self.layer_idx: outputs[1]} if outputs[1] is not None else None + out_obj.pooler_output = outputs[2] + outputs = out_obj + self.transformer.set_input_embeddings(backup_embeds) if self.layer == "last": @@ -131,7 +151,12 @@ def patched_SDClipModel_forward(self, tokens): elif self.layer == "pooled": z = outputs.pooler_output[:, None, :] else: - z = outputs.hidden_states[self.layer_idx] + # For ldm_patched, we stored it in the dict with key layer_idx + if isinstance(outputs.hidden_states, dict): + z = outputs.hidden_states[self.layer_idx] + else: + z = outputs.hidden_states[self.layer_idx] + if self.layer_norm_hidden_state: z = self.transformer.text_model.final_layer_norm(z) diff --git a/readme.md b/readme.md index 6f34a950..298470bf 100644 --- a/readme.md +++ b/readme.md @@ -428,6 +428,15 @@ Processed only for positive prompt. Applies a LoRA to the prompt. The LoRA file must be located in the `models/loras` directory. +## Command Line Interface (CLI) + +Fooocus now supports a headless Command Line Interface for generating images without the Web UI. + +See [README_CLI.md](README_CLI.md) for detailed instructions. + +Quick Example: +`python entry_with_update.py --prompt "astronaut in space" --output "space.png"` + ## Advanced Features [Click here to browse the advanced features.](https://github.com/lllyasviel/Fooocus/discussions/117)