From 1e78084c6223023e8efae939daabded6df63e974 Mon Sep 17 00:00:00 2001 From: nick huang Date: Sun, 23 Mar 2025 09:08:17 +0800 Subject: [PATCH] llama-map to support hugepage feature of pagesize 2M or 1G which can greatly speedup loading huge model when system has enough RAM to pre-allocate hugetlbfs model file --- src/CMakeLists.txt | 10 ++++++++++ src/llama-mmap.cpp | 30 +++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b340dae5b2..90bc92bb0f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -42,3 +42,13 @@ if (BUILD_SHARED_LIBS) target_compile_definitions(llama PRIVATE LLAMA_BUILD) target_compile_definitions(llama PUBLIC LLAMA_SHARED) endif() + + +if (GGML_USING_HUGE_PAGE_2M) +message(STATUS " GGML_USING_HUGE_PAGE_2M is set in cmake") + target_compile_definitions(llama PRIVATE GGML_USING_HUGE_PAGE_2M=1) +endif() +if (GGML_USING_HUGE_PAGE_1G) +message(STATUS " GGML_USING_HUGE_PAGE_1G is set in cmake") + target_compile_definitions(llama PRIVATE GGML_USING_HUGE_PAGE_1G=1) +endif() \ No newline at end of file diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp index 3970b7485f..898b11f798 100644 --- a/src/llama-mmap.cpp +++ b/src/llama-mmap.cpp @@ -54,6 +54,15 @@ static std::string llama_format_win_err(DWORD err) { } #endif +// llama_mmap supports for hugepagesz=2M and 1G +#ifdef GGML_USING_HUGE_PAGE_2M +#define HUGE_PAGE_SIZE 2097152 +#endif +#ifdef GGML_USING_HUGE_PAGE_1G +#define HUGE_PAGE_SIZE 1073741824 +#endif + + // llama_file struct llama_file::impl { @@ -274,9 +283,16 @@ struct llama_mmap::impl { std::vector> mapped_fragments; impl(struct llama_file * file, size_t prefetch, bool numa) { - size = file->size(); int fd = file->file_id(); int flags = MAP_SHARED; +#if defined(GGML_USING_HUGE_PAGE_2M) || defined(GGML_USING_HUGE_PAGE_1G) + // hugepage support requires mmap size to be aligned with pagesize + // and this is even true for normal 4K page in mmap, only some OS relaxes + size = (file->size() + HUGE_PAGE_SIZE - 1) / HUGE_PAGE_SIZE * HUGE_PAGE_SIZE; + flags |= MAP_HUGETLB; +#else + size = file->size(); +#endif if (numa) { prefetch = 0; } #ifdef __linux__ if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) { @@ -285,25 +301,25 @@ struct llama_mmap::impl { } if (prefetch) { flags |= MAP_POPULATE; } #endif - addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0); + addr = mmap(NULL, size, PROT_READ, flags, fd, 0); if (addr == MAP_FAILED) { throw std::runtime_error(format("mmap failed: %s", strerror(errno))); } if (prefetch > 0) { - if (posix_madvise(addr, std::min(file->size(), prefetch), POSIX_MADV_WILLNEED)) { + if (posix_madvise(addr, std::min(size, prefetch), POSIX_MADV_WILLNEED)) { LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n", strerror(errno)); } } if (numa) { - if (posix_madvise(addr, file->size(), POSIX_MADV_RANDOM)) { + if (posix_madvise(addr, size, POSIX_MADV_RANDOM)) { LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n", strerror(errno)); } } - mapped_fragments.emplace_back(0, file->size()); + mapped_fragments.emplace_back(0, size); } static void align_range(size_t * first, size_t * last, size_t page_size) { @@ -319,7 +335,11 @@ struct llama_mmap::impl { } void unmap_fragment(size_t first, size_t last) { +#if defined(GGML_USING_HUGE_PAGE_2M) || defined(GGML_USING_HUGE_PAGE_1G) + int page_size = HUGE_PAGE_SIZE; +#else int page_size = sysconf(_SC_PAGESIZE); +#endif align_range(&first, &last, page_size); size_t len = last - first;