Compare commits

..

No commits in common. "master" and "b7442" have entirely different histories.

2 changed files with 3 additions and 3 deletions

View File

@ -21,7 +21,7 @@ static __global__ void argmax_f32(const float * __restrict__ x, int32_t * __rest
}
#pragma unroll
for (int offset = WARP_SIZE/2; offset > 0; offset >>= 1) {
for (int offset = 16; offset > 0; offset >>= 1) {
const float val = __shfl_xor_sync(0xFFFFFFFF, maxval, offset, WARP_SIZE);
const int col = __shfl_xor_sync(0xFFFFFFFF, argmax, offset, WARP_SIZE);
if (val > maxval) {
@ -50,7 +50,7 @@ static __global__ void argmax_f32(const float * __restrict__ x, int32_t * __rest
argmax = shared_argmax[lane_id];
}
#pragma unroll
for (int offset = WARP_SIZE/2; offset > 0; offset >>= 1) {
for (int offset = 16; offset > 0; offset >>= 1) {
const float val = __shfl_xor_sync(0xFFFFFFFF, maxval, offset, WARP_SIZE);
const int col = __shfl_xor_sync(0xFFFFFFFF, argmax, offset, WARP_SIZE);
if (val > maxval) {

View File

@ -288,7 +288,7 @@ class LocalTensor:
data_range: LocalTensorRange
def mmap_bytes(self) -> np.ndarray:
return np.memmap(self.data_range.filename, mode='r', offset=self.data_range.offset, shape=self.data_range.size)
return np.memmap(self.data_range.filename, offset=self.data_range.offset, shape=self.data_range.size)
class SafetensorsLocal: