llama-map to support hugepage feature of pagesize 2M or 1G which can greatly speedup loading huge model when system has enough RAM to pre-allocate hugetlbfs model file
This commit is contained in:
parent
4375415b4a
commit
1e78084c62
|
|
@ -42,3 +42,13 @@ if (BUILD_SHARED_LIBS)
|
||||||
target_compile_definitions(llama PRIVATE LLAMA_BUILD)
|
target_compile_definitions(llama PRIVATE LLAMA_BUILD)
|
||||||
target_compile_definitions(llama PUBLIC LLAMA_SHARED)
|
target_compile_definitions(llama PUBLIC LLAMA_SHARED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
if (GGML_USING_HUGE_PAGE_2M)
|
||||||
|
message(STATUS " GGML_USING_HUGE_PAGE_2M is set in cmake")
|
||||||
|
target_compile_definitions(llama PRIVATE GGML_USING_HUGE_PAGE_2M=1)
|
||||||
|
endif()
|
||||||
|
if (GGML_USING_HUGE_PAGE_1G)
|
||||||
|
message(STATUS " GGML_USING_HUGE_PAGE_1G is set in cmake")
|
||||||
|
target_compile_definitions(llama PRIVATE GGML_USING_HUGE_PAGE_1G=1)
|
||||||
|
endif()
|
||||||
|
|
@ -54,6 +54,15 @@ static std::string llama_format_win_err(DWORD err) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// llama_mmap supports for hugepagesz=2M and 1G
|
||||||
|
#ifdef GGML_USING_HUGE_PAGE_2M
|
||||||
|
#define HUGE_PAGE_SIZE 2097152
|
||||||
|
#endif
|
||||||
|
#ifdef GGML_USING_HUGE_PAGE_1G
|
||||||
|
#define HUGE_PAGE_SIZE 1073741824
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// llama_file
|
// llama_file
|
||||||
|
|
||||||
struct llama_file::impl {
|
struct llama_file::impl {
|
||||||
|
|
@ -274,9 +283,16 @@ struct llama_mmap::impl {
|
||||||
std::vector<std::pair<size_t, size_t>> mapped_fragments;
|
std::vector<std::pair<size_t, size_t>> mapped_fragments;
|
||||||
|
|
||||||
impl(struct llama_file * file, size_t prefetch, bool numa) {
|
impl(struct llama_file * file, size_t prefetch, bool numa) {
|
||||||
size = file->size();
|
|
||||||
int fd = file->file_id();
|
int fd = file->file_id();
|
||||||
int flags = MAP_SHARED;
|
int flags = MAP_SHARED;
|
||||||
|
#if defined(GGML_USING_HUGE_PAGE_2M) || defined(GGML_USING_HUGE_PAGE_1G)
|
||||||
|
// hugepage support requires mmap size to be aligned with pagesize
|
||||||
|
// and this is even true for normal 4K page in mmap, only some OS relaxes
|
||||||
|
size = (file->size() + HUGE_PAGE_SIZE - 1) / HUGE_PAGE_SIZE * HUGE_PAGE_SIZE;
|
||||||
|
flags |= MAP_HUGETLB;
|
||||||
|
#else
|
||||||
|
size = file->size();
|
||||||
|
#endif
|
||||||
if (numa) { prefetch = 0; }
|
if (numa) { prefetch = 0; }
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
|
if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
|
||||||
|
|
@ -285,25 +301,25 @@ struct llama_mmap::impl {
|
||||||
}
|
}
|
||||||
if (prefetch) { flags |= MAP_POPULATE; }
|
if (prefetch) { flags |= MAP_POPULATE; }
|
||||||
#endif
|
#endif
|
||||||
addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
|
addr = mmap(NULL, size, PROT_READ, flags, fd, 0);
|
||||||
if (addr == MAP_FAILED) {
|
if (addr == MAP_FAILED) {
|
||||||
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
|
throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prefetch > 0) {
|
if (prefetch > 0) {
|
||||||
if (posix_madvise(addr, std::min(file->size(), prefetch), POSIX_MADV_WILLNEED)) {
|
if (posix_madvise(addr, std::min(size, prefetch), POSIX_MADV_WILLNEED)) {
|
||||||
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
|
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (numa) {
|
if (numa) {
|
||||||
if (posix_madvise(addr, file->size(), POSIX_MADV_RANDOM)) {
|
if (posix_madvise(addr, size, POSIX_MADV_RANDOM)) {
|
||||||
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
|
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mapped_fragments.emplace_back(0, file->size());
|
mapped_fragments.emplace_back(0, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void align_range(size_t * first, size_t * last, size_t page_size) {
|
static void align_range(size_t * first, size_t * last, size_t page_size) {
|
||||||
|
|
@ -319,7 +335,11 @@ struct llama_mmap::impl {
|
||||||
}
|
}
|
||||||
|
|
||||||
void unmap_fragment(size_t first, size_t last) {
|
void unmap_fragment(size_t first, size_t last) {
|
||||||
|
#if defined(GGML_USING_HUGE_PAGE_2M) || defined(GGML_USING_HUGE_PAGE_1G)
|
||||||
|
int page_size = HUGE_PAGE_SIZE;
|
||||||
|
#else
|
||||||
int page_size = sysconf(_SC_PAGESIZE);
|
int page_size = sysconf(_SC_PAGESIZE);
|
||||||
|
#endif
|
||||||
align_range(&first, &last, page_size);
|
align_range(&first, &last, page_size);
|
||||||
size_t len = last - first;
|
size_t len = last - first;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue