sampling : add assertions for contiguous tensors in async copy functions
This commit is contained in:
parent
883a87043a
commit
a02adf4211
|
|
@ -1231,6 +1231,8 @@ static void copy_tensor_async_ints(
|
||||||
const uint32_t row = it->second;
|
const uint32_t row = it->second;
|
||||||
GGML_ASSERT(row < sampled_size);
|
GGML_ASSERT(row < sampled_size);
|
||||||
|
|
||||||
|
GGML_ASSERT(ggml_is_contiguous(tensor) && "sampled tokens tensor must be contiguous for async copy");
|
||||||
|
|
||||||
ggml_backend_t backend = ggml_backend_sched_get_tensor_backend(sched, tensor);
|
ggml_backend_t backend = ggml_backend_sched_get_tensor_backend(sched, tensor);
|
||||||
ggml_backend_tensor_get_async(backend, tensor, sampled + row, 0, sizeof(sampled[row]));
|
ggml_backend_tensor_get_async(backend, tensor, sampled + row, 0, sizeof(sampled[row]));
|
||||||
}
|
}
|
||||||
|
|
@ -1253,6 +1255,8 @@ static void copy_tensor_async_floats(
|
||||||
const uint32_t row = it->second;
|
const uint32_t row = it->second;
|
||||||
GGML_ASSERT(row < counts.size());
|
GGML_ASSERT(row < counts.size());
|
||||||
|
|
||||||
|
GGML_ASSERT(ggml_is_contiguous(tensor) && "logits/probs tensor must be contiguous for async copy");
|
||||||
|
|
||||||
ggml_backend_t backend = ggml_backend_sched_get_tensor_backend(sched, tensor);
|
ggml_backend_t backend = ggml_backend_sched_get_tensor_backend(sched, tensor);
|
||||||
float * row_ptr = dst + (size_t) row * stride;
|
float * row_ptr = dst + (size_t) row * stride;
|
||||||
ggml_backend_tensor_get_async(backend, tensor, row_ptr, 0, ggml_nbytes(tensor));
|
ggml_backend_tensor_get_async(backend, tensor, row_ptr, 0, ggml_nbytes(tensor));
|
||||||
|
|
@ -1279,6 +1283,8 @@ static void copy_tensor_async_candidates(
|
||||||
const uint32_t row = it->second;
|
const uint32_t row = it->second;
|
||||||
GGML_ASSERT(row < counts.size());
|
GGML_ASSERT(row < counts.size());
|
||||||
|
|
||||||
|
GGML_ASSERT(ggml_is_contiguous(tensor) && "candidates tensor must be contiguous for async copy");
|
||||||
|
|
||||||
ggml_backend_t backend = ggml_backend_sched_get_tensor_backend(sched, tensor);
|
ggml_backend_t backend = ggml_backend_sched_get_tensor_backend(sched, tensor);
|
||||||
llama_token * row_ptr = dst + (size_t) row * stride;
|
llama_token * row_ptr = dst + (size_t) row * stride;
|
||||||
ggml_backend_tensor_get_async(backend, tensor, row_ptr, 0, ggml_nbytes(tensor));
|
ggml_backend_tensor_get_async(backend, tensor, row_ptr, 0, ggml_nbytes(tensor));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue