use no_alloc instead of fixing f32 fallback
This commit is contained in:
parent
d6fc8fe0c7
commit
646f0a7d78
|
|
@ -1434,6 +1434,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|||
|
||||
mparams.progress_callback = params.load_progress_callback;
|
||||
mparams.progress_callback_user_data = params.load_progress_callback_user_data;
|
||||
mparams.no_alloc = params.no_alloc;
|
||||
|
||||
return mparams;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -679,6 +679,7 @@ struct common_params {
|
|||
// return false from callback to abort model loading or true to continue
|
||||
llama_progress_callback load_progress_callback = NULL;
|
||||
void * load_progress_callback_user_data = NULL;
|
||||
bool no_alloc = false; // Don't allocate model buffers
|
||||
};
|
||||
|
||||
// call once at the start of a program if it uses libcommon
|
||||
|
|
|
|||
|
|
@ -1215,10 +1215,6 @@ struct ggml_tensor * llama_model_loader::create_tensor(
|
|||
const int64_t tid = gguf_find_tensor(metadata, tn.str().c_str());
|
||||
if (tid != -1) {
|
||||
type = gguf_get_tensor_type(metadata, tid);
|
||||
} else if (flags & TENSOR_NOT_REQUIRED) {
|
||||
// If the tensor is not found and not required, return nullptr to allow
|
||||
// the caller to fall back
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// for tensors that are not required some of the dimensions can be invalid:
|
||||
|
|
|
|||
|
|
@ -167,6 +167,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
model_params.devices = params.devices.data();
|
||||
model_params.no_alloc = true;
|
||||
|
||||
model.reset(llama_model_init_from_user(gguf_ctx.get(), set_tensor_data, nullptr, model_params));
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue