diff --git a/common/arg.cpp b/common/arg.cpp
index b864ca8c2b..eab26b67f2 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2480,14 +2480,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         [](common_params & params, const std::string & value) {
             params.models_dir = value;
         }
-    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
     add_opt(common_arg(
         {"--max-models"}, "N",
         string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.max_models),
         [](common_params & params, int value) {
             params.max_models = value;
         }
-    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MAX_MODELS"));
     add_opt(common_arg(
         {"--jinja"},
         "use jinja template for chat (default: disabled)",