llama : allow using iGPUs with --device (#15951)
* llama : allow using iGPUs with --device * mtmd : allow iGPU * rpc-server : allow iGPU
This commit is contained in:
parent
55758b00ca
commit
50f4281a6f
|
|
@ -1304,7 +1304,7 @@ static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & val
|
||||||
} else {
|
} else {
|
||||||
for (const auto & device : dev_names) {
|
for (const auto & device : dev_names) {
|
||||||
auto * dev = ggml_backend_dev_by_name(device.c_str());
|
auto * dev = ggml_backend_dev_by_name(device.c_str());
|
||||||
if (!dev || ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_GPU) {
|
if (!dev || ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU) {
|
||||||
throw std::invalid_argument(string_format("invalid device: %s", device.c_str()));
|
throw std::invalid_argument(string_format("invalid device: %s", device.c_str()));
|
||||||
}
|
}
|
||||||
devices.push_back(dev);
|
devices.push_back(dev);
|
||||||
|
|
@ -1314,7 +1314,7 @@ static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & val
|
||||||
return devices;
|
return devices;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_rpc_devices(std::string servers) {
|
static void add_rpc_devices(const std::string & servers) {
|
||||||
auto rpc_servers = string_split<std::string>(servers, ',');
|
auto rpc_servers = string_split<std::string>(servers, ',');
|
||||||
if (rpc_servers.empty()) {
|
if (rpc_servers.empty()) {
|
||||||
throw std::invalid_argument("no RPC servers specified");
|
throw std::invalid_argument("no RPC servers specified");
|
||||||
|
|
@ -2516,24 +2516,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
{"--list-devices"},
|
{"--list-devices"},
|
||||||
"print list of available devices and exit",
|
"print list of available devices and exit",
|
||||||
[](common_params &) {
|
[](common_params &) {
|
||||||
std::vector<ggml_backend_dev_t> rpc_devices;
|
std::vector<ggml_backend_dev_t> devices;
|
||||||
std::vector<ggml_backend_dev_t> all_devices;
|
|
||||||
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
|
||||||
auto * dev = ggml_backend_dev_get(i);
|
auto * dev = ggml_backend_dev_get(i);
|
||||||
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
|
if (ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_CPU) {
|
||||||
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
|
devices.push_back(dev);
|
||||||
if (ggml_backend_reg_name(reg) == std::string("RPC")) {
|
|
||||||
rpc_devices.push_back(dev);
|
|
||||||
} else {
|
|
||||||
all_devices.push_back(dev);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// insert RPC devices in front
|
|
||||||
all_devices.insert(all_devices.begin(), rpc_devices.begin(), rpc_devices.end());
|
|
||||||
printf("Available devices:\n");
|
printf("Available devices:\n");
|
||||||
for (size_t i = 0; i < all_devices.size(); ++i) {
|
for (auto * dev : devices) {
|
||||||
auto * dev = all_devices[i];
|
|
||||||
size_t free, total;
|
size_t free, total;
|
||||||
ggml_backend_dev_memory(dev, &free, &total);
|
ggml_backend_dev_memory(dev, &free, &total);
|
||||||
printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024);
|
printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024);
|
||||||
|
|
|
||||||
|
|
@ -406,6 +406,7 @@ struct clip_ctx {
|
||||||
}
|
}
|
||||||
if (!backend) {
|
if (!backend) {
|
||||||
backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr);
|
backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr);
|
||||||
|
backend = backend ? backend : ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU, nullptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -227,15 +227,7 @@ static ggml_backend_t create_backend(const rpc_server_params & params) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// try to initialize a GPU backend first
|
backend = ggml_backend_init_best();
|
||||||
if (!backend) {
|
|
||||||
backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_GPU, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// if there aren't GPU backends fallback to CPU backend
|
|
||||||
if (!backend) {
|
|
||||||
backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (backend) {
|
if (backend) {
|
||||||
fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
fprintf(stderr, "%s: using %s backend\n", __func__, ggml_backend_name(backend));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue