Compare commits

...

3 Commits

Author SHA1 Message Date
Aadeshveer Singh 58062860af
ggml : use WARP_SIZE/2 for argmax reduction offset (#18092) 2025-12-17 11:47:01 +08:00
Yuri Khrustalev 2973a65ecb
gguf-py : allow converting multi-tensor models from read-only locations (#18100) 2025-12-17 02:27:03 +01:00
Johannes Gäßler d0794e89d9
llama-fit-params: force disable mlock (#18103) 2025-12-17 00:50:12 +01:00
3 changed files with 6 additions and 5 deletions

View File

@ -21,7 +21,7 @@ static __global__ void argmax_f32(const float * __restrict__ x, int32_t * __rest
} }
#pragma unroll #pragma unroll
for (int offset = 16; offset > 0; offset >>= 1) { for (int offset = WARP_SIZE/2; offset > 0; offset >>= 1) {
const float val = __shfl_xor_sync(0xFFFFFFFF, maxval, offset, WARP_SIZE); const float val = __shfl_xor_sync(0xFFFFFFFF, maxval, offset, WARP_SIZE);
const int col = __shfl_xor_sync(0xFFFFFFFF, argmax, offset, WARP_SIZE); const int col = __shfl_xor_sync(0xFFFFFFFF, argmax, offset, WARP_SIZE);
if (val > maxval) { if (val > maxval) {
@ -50,7 +50,7 @@ static __global__ void argmax_f32(const float * __restrict__ x, int32_t * __rest
argmax = shared_argmax[lane_id]; argmax = shared_argmax[lane_id];
} }
#pragma unroll #pragma unroll
for (int offset = 16; offset > 0; offset >>= 1) { for (int offset = WARP_SIZE/2; offset > 0; offset >>= 1) {
const float val = __shfl_xor_sync(0xFFFFFFFF, maxval, offset, WARP_SIZE); const float val = __shfl_xor_sync(0xFFFFFFFF, maxval, offset, WARP_SIZE);
const int col = __shfl_xor_sync(0xFFFFFFFF, argmax, offset, WARP_SIZE); const int col = __shfl_xor_sync(0xFFFFFFFF, argmax, offset, WARP_SIZE);
if (val > maxval) { if (val > maxval) {

View File

@ -288,7 +288,7 @@ class LocalTensor:
data_range: LocalTensorRange data_range: LocalTensorRange
def mmap_bytes(self) -> np.ndarray: def mmap_bytes(self) -> np.ndarray:
return np.memmap(self.data_range.filename, offset=self.data_range.offset, shape=self.data_range.size) return np.memmap(self.data_range.filename, mode='r', offset=self.data_range.offset, shape=self.data_range.size)
class SafetensorsLocal: class SafetensorsLocal:

View File

@ -71,8 +71,9 @@ static std::vector<llama_device_memory_data> llama_get_device_memory_data(
}, &ud); }, &ud);
llama_model_params mparams_copy = *mparams; llama_model_params mparams_copy = *mparams;
mparams_copy.no_alloc = true; mparams_copy.no_alloc = true;
mparams_copy.use_mmap = false; mparams_copy.use_mmap = false;
mparams_copy.use_mlock = false;
llama_model * model = llama_model_load_from_file(path_model, mparams_copy); llama_model * model = llama_model_load_from_file(path_model, mparams_copy);
if (model == nullptr) { if (model == nullptr) {