ggml-alloc : fix reuse-parent logic for misaligned sizes (#17884)
This commit is contained in:
parent
d9f8f60618
commit
c6f6e4f96a
|
|
@ -312,16 +312,9 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is a very naive implementation, but for our case the number of free blocks should be very small
|
// this is a very naive implementation, but for our case the number of free blocks should be very small
|
||||||
static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size, const struct ggml_tensor * tensor) {
|
static void ggml_dyn_tallocr_free_bytes(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size) {
|
||||||
size = aligned_offset(NULL, size, alloc->alignment);
|
size = aligned_offset(NULL, size, alloc->alignment);
|
||||||
|
|
||||||
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
|
|
||||||
__func__, tensor->name, addr.chunk, addr.offset, size, alloc->chunks[addr.chunk]->n_free_blocks);
|
|
||||||
|
|
||||||
#ifdef GGML_ALLOCATOR_DEBUG
|
|
||||||
remove_allocated_tensor(alloc, addr, tensor);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct tallocr_chunk * chunk = alloc->chunks[addr.chunk];
|
struct tallocr_chunk * chunk = alloc->chunks[addr.chunk];
|
||||||
|
|
||||||
// see if we can merge with an existing block
|
// see if we can merge with an existing block
|
||||||
|
|
@ -357,8 +350,6 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct
|
||||||
}
|
}
|
||||||
// otherwise, add a new block
|
// otherwise, add a new block
|
||||||
ggml_dyn_tallocr_insert_block(chunk, addr.offset, size);
|
ggml_dyn_tallocr_insert_block(chunk, addr.offset, size);
|
||||||
|
|
||||||
GGML_UNUSED(tensor);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) {
|
static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) {
|
||||||
|
|
@ -616,13 +607,17 @@ static void ggml_gallocr_free_extra_space(ggml_gallocr_t galloc, struct ggml_ten
|
||||||
|
|
||||||
GGML_ASSERT(parent_size >= node_size);
|
GGML_ASSERT(parent_size >= node_size);
|
||||||
|
|
||||||
if (parent_size > node_size) {
|
// note: we want after the freeing the chunks to continue to be aligned
|
||||||
struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
|
struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
|
||||||
|
parent_size = aligned_offset(NULL, parent_size, p_alloc->alignment);
|
||||||
|
node_size = aligned_offset(NULL, node_size, p_alloc->alignment);
|
||||||
|
|
||||||
|
if (parent_size > node_size) {
|
||||||
struct buffer_address p_addr = p_hn->addr;
|
struct buffer_address p_addr = p_hn->addr;
|
||||||
p_addr.offset += node_size;
|
p_addr.offset += node_size;
|
||||||
size_t extra_size = parent_size - node_size;
|
size_t extra_size = parent_size - node_size;
|
||||||
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
|
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
|
||||||
ggml_dyn_tallocr_free_tensor(p_alloc, p_addr, extra_size, parent);
|
ggml_dyn_tallocr_free_bytes(p_alloc, p_addr, extra_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -706,7 +701,14 @@ static void ggml_gallocr_free_node(ggml_gallocr_t galloc, struct ggml_tensor * n
|
||||||
struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
|
struct ggml_dyn_tallocr * alloc = galloc->buf_tallocs[buffer_id];
|
||||||
ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
|
ggml_backend_buffer_type_t buft = galloc->bufts[buffer_id];
|
||||||
size_t size = ggml_backend_buft_get_alloc_size(buft, node);
|
size_t size = ggml_backend_buft_get_alloc_size(buft, node);
|
||||||
ggml_dyn_tallocr_free_tensor(alloc, hn->addr, size, node);
|
|
||||||
|
AT_PRINTF("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n",
|
||||||
|
__func__, node->name, hn->addr.chunk, hn->addr.offset, size, alloc->chunks[hn->addr.chunk]->n_free_blocks);
|
||||||
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
|
remove_allocated_tensor(alloc, hn->addr, node);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ggml_dyn_tallocr_free_bytes(alloc, hn->addr, size);
|
||||||
hn->allocated = false;
|
hn->allocated = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue