diff --git a/tools/cli/README.md b/tools/cli/README.md
index 7b8b8692e9..505c8f4223 100644
--- a/tools/cli/README.md
+++ b/tools/cli/README.md
@@ -158,7 +158,7 @@
| `-mmu, --mmproj-url URL` | URL to a multimodal projector file. see tools/mtmd/README.md
(env: LLAMA_ARG_MMPROJ_URL) |
| `--mmproj-auto, --no-mmproj, --no-mmproj-auto` | whether to use multimodal projector file (if available), useful when using -hf (default: enabled)
(env: LLAMA_ARG_MMPROJ_AUTO) |
| `--mmproj-offload, --no-mmproj-offload` | whether to enable GPU offloading for multimodal projector (default: enabled)
(env: LLAMA_ARG_MMPROJ_OFFLOAD) |
-| `--image, --audio FILE` | path to an image or audio file. use with multimodal models, use comma-separated values for multiple files |
+| `--image, --audio FILE` | path or URL to an image or audio file. use with multimodal models, use comma-separated values for multiple files |
| `--image-min-tokens N` | minimum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)
(env: LLAMA_ARG_IMAGE_MIN_TOKENS) |
| `--image-max-tokens N` | maximum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)
(env: LLAMA_ARG_IMAGE_MAX_TOKENS) |
| `-otd, --override-tensor-draft =,...` | override tensor buffer type for draft model |
diff --git a/tools/cli/cli.cpp b/tools/cli/cli.cpp
index 2f0ffea1c2..7886942112 100644
--- a/tools/cli/cli.cpp
+++ b/tools/cli/cli.cpp
@@ -140,21 +140,72 @@ struct cli_context {
return curr_content;
}
- // TODO: support remote files in the future (http, https, etc)
- std::string load_input_file(const std::string & fname, bool is_media) {
- std::ifstream file(fname, std::ios::binary);
+ bool load_data_from_url(const std::string & url, std::vector & out) {
+#if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
+ try {
+ common_remote_params params;
+ params.headers.push_back("User-Agent: llama.cpp/" + build_info);
+ params.max_size = 1024 * 1024 * 10; // 10MB
+ params.timeout = 10; // seconds
+ auto [http_code, data] = common_remote_get_content(url, params);
+ if (http_code != 200) {
+ console::error("Failed to fetch from URL: %s, HTTP code: %ld\n", url.c_str(), http_code);
+ return false;
+ }
+ if (data.empty()) {
+ console::error("Fetched empty content from URL: %s\n", url.c_str());
+ return false;
+ }
+ out = std::move(data);
+ return true;
+ } catch (const std::exception & e) {
+ console::error("Exception while fetching from URL: %s, error: %s\n", url.c_str(), e.what());
+ return false;
+ }
+#else
+ console::error("Network support is disabled. Compile with LLAMA_USE_CURL or LLAMA_USE_HTTPLIB to enable URL loading.\n");
+ GGML_UNUSED(url);
+ GGML_UNUSED(out);
+ return false;
+#endif
+ }
+
+ bool load_data_from_file(const std::string & path, std::vector & out) {
+ std::ifstream file(path, std::ios::binary);
if (!file) {
+ console::error("Failed to open file: %s\n", path.c_str());
+ return false;
+ }
+ out.assign((std::istreambuf_iterator(file)), std::istreambuf_iterator());
+ return true;
+ }
+
+ // load input from local path or url
+ std::string load_input_file(const std::string & source, bool is_media) {
+ static auto is_url = [](const std::string & s) {
+ return s.find("http://") == 0 || s.find("https://") == 0;
+ };
+
+ std::vector data;
+
+ bool success = false;
+ if (is_url(source)) {
+ success = load_data_from_url(source, data);
+ } else {
+ success = load_data_from_file(source, data);
+ }
+
+ if (!success) {
return "";
}
+
if (is_media) {
raw_buffer buf;
- buf.assign((std::istreambuf_iterator(file)), std::istreambuf_iterator());
+ buf.assign(data.begin(), data.end());
input_files.push_back(std::move(buf));
return mtmd_default_marker();
- } else {
- std::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator());
- return content;
}
+ return std::string(data.data(), data.size());
}
};
@@ -247,10 +298,10 @@ int main(int argc, char ** argv) {
console::log(" /clear clear the chat history\n");
console::log(" /read add a text file\n");
if (inf.has_inp_image) {
- console::log(" /image add an image file\n");
+ console::log(" /image add an image file (supports URL)\n");
}
if (inf.has_inp_audio) {
- console::log(" /audio add an audio file\n");
+ console::log(" /audio add an audio file (supports URL)\n");
}
console::log("\n");