diff --git a/tools/cli/README.md b/tools/cli/README.md index 7b8b8692e9..505c8f4223 100644 --- a/tools/cli/README.md +++ b/tools/cli/README.md @@ -158,7 +158,7 @@ | `-mmu, --mmproj-url URL` | URL to a multimodal projector file. see tools/mtmd/README.md
(env: LLAMA_ARG_MMPROJ_URL) | | `--mmproj-auto, --no-mmproj, --no-mmproj-auto` | whether to use multimodal projector file (if available), useful when using -hf (default: enabled)
(env: LLAMA_ARG_MMPROJ_AUTO) | | `--mmproj-offload, --no-mmproj-offload` | whether to enable GPU offloading for multimodal projector (default: enabled)
(env: LLAMA_ARG_MMPROJ_OFFLOAD) | -| `--image, --audio FILE` | path to an image or audio file. use with multimodal models, use comma-separated values for multiple files | +| `--image, --audio FILE` | path or URL to an image or audio file. use with multimodal models, use comma-separated values for multiple files | | `--image-min-tokens N` | minimum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)
(env: LLAMA_ARG_IMAGE_MIN_TOKENS) | | `--image-max-tokens N` | maximum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)
(env: LLAMA_ARG_IMAGE_MAX_TOKENS) | | `-otd, --override-tensor-draft =,...` | override tensor buffer type for draft model | diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp index 2f0ffea1c2..7886942112 100644 --- a/tools/cli/cli.cpp +++ b/tools/cli/cli.cpp @@ -140,21 +140,72 @@ struct cli_context { return curr_content; } - // TODO: support remote files in the future (http, https, etc) - std::string load_input_file(const std::string & fname, bool is_media) { - std::ifstream file(fname, std::ios::binary); + bool load_data_from_url(const std::string & url, std::vector & out) { +#if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB) + try { + common_remote_params params; + params.headers.push_back("User-Agent: llama.cpp/" + build_info); + params.max_size = 1024 * 1024 * 10; // 10MB + params.timeout = 10; // seconds + auto [http_code, data] = common_remote_get_content(url, params); + if (http_code != 200) { + console::error("Failed to fetch from URL: %s, HTTP code: %ld\n", url.c_str(), http_code); + return false; + } + if (data.empty()) { + console::error("Fetched empty content from URL: %s\n", url.c_str()); + return false; + } + out = std::move(data); + return true; + } catch (const std::exception & e) { + console::error("Exception while fetching from URL: %s, error: %s\n", url.c_str(), e.what()); + return false; + } +#else + console::error("Network support is disabled. Compile with LLAMA_USE_CURL or LLAMA_USE_HTTPLIB to enable URL loading.\n"); + GGML_UNUSED(url); + GGML_UNUSED(out); + return false; +#endif + } + + bool load_data_from_file(const std::string & path, std::vector & out) { + std::ifstream file(path, std::ios::binary); if (!file) { + console::error("Failed to open file: %s\n", path.c_str()); + return false; + } + out.assign((std::istreambuf_iterator(file)), std::istreambuf_iterator()); + return true; + } + + // load input from local path or url + std::string load_input_file(const std::string & source, bool is_media) { + static auto is_url = [](const std::string & s) { + return s.find("http://") == 0 || s.find("https://") == 0; + }; + + std::vector data; + + bool success = false; + if (is_url(source)) { + success = load_data_from_url(source, data); + } else { + success = load_data_from_file(source, data); + } + + if (!success) { return ""; } + if (is_media) { raw_buffer buf; - buf.assign((std::istreambuf_iterator(file)), std::istreambuf_iterator()); + buf.assign(data.begin(), data.end()); input_files.push_back(std::move(buf)); return mtmd_default_marker(); - } else { - std::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator()); - return content; } + return std::string(data.data(), data.size()); } }; @@ -247,10 +298,10 @@ int main(int argc, char ** argv) { console::log(" /clear clear the chat history\n"); console::log(" /read add a text file\n"); if (inf.has_inp_image) { - console::log(" /image add an image file\n"); + console::log(" /image add an image file (supports URL)\n"); } if (inf.has_inp_audio) { - console::log(" /audio add an audio file\n"); + console::log(" /audio add an audio file (supports URL)\n"); } console::log("\n");