1#include "llama-mmap.h"
  2
  3#include "llama-impl.h"
  4
  5#include "ggml.h"
  6
  7#include <cstring>
  8#include <climits>
  9#include <stdexcept>
 10#include <cerrno>
 11#include <algorithm>
 12
 13#ifdef __has_include
 14    #if __has_include(<unistd.h>)
 15        #include <unistd.h>
 16        #include <fcntl.h>
 17        #include <sys/stat.h>
 18        #if defined(_POSIX_MAPPED_FILES)
 19            #include <sys/mman.h>
 20        #endif
 21        #if defined(_POSIX_MEMLOCK_RANGE)
 22            #include <sys/resource.h>
 23        #endif
 24    #endif
 25#endif
 26
 27#if defined(_WIN32)
 28    #define WIN32_LEAN_AND_MEAN
 29    #ifndef NOMINMAX
 30        #define NOMINMAX
 31    #endif
 32    #include <windows.h>
 33    #ifndef PATH_MAX
 34        #define PATH_MAX MAX_PATH
 35    #endif
 36    #include <io.h>
 37#endif
 38
 39#if defined(__APPLE__)
 40#include <TargetConditionals.h>
 41#endif
 42
 43// TODO: consider moving to llama-impl.h if needed in more places
 44#if defined(_WIN32)
 45static std::string llama_format_win_err(DWORD err) {
 46    LPSTR buf;
 47    size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
 48                                 NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL);
 49    if (!size) {
 50        return "FormatMessageA failed";
 51    }
 52    std::string ret(buf, size);
 53    LocalFree(buf);
 54    return ret;
 55}
 56#endif
 57
 58// llama_file
 59
 60struct llama_file::impl {
 61#if defined(_WIN32)
 62    HANDLE fp_win32;
 63    std::string GetErrorMessageWin32(DWORD error_code) const {
 64        std::string ret;
 65        LPSTR lpMsgBuf = NULL;
 66        DWORD bufLen = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
 67                                    NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&lpMsgBuf, 0, NULL);
 68        if (!bufLen) {
 69            ret = format("Win32 error code: %lx", error_code);
 70        } else {
 71            ret = lpMsgBuf;
 72            LocalFree(lpMsgBuf);
 73        }
 74
 75        return ret;
 76    }
 77
 78    impl(const char * fname, const char * mode, [[maybe_unused]] const bool use_direct_io = false) {
 79        fp = ggml_fopen(fname, mode);
 80        if (fp == NULL) {
 81            throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
 82        }
 83        fp_win32 = (HANDLE) _get_osfhandle(_fileno(fp));
 84        seek(0, SEEK_END);
 85        size = tell();
 86        seek(0, SEEK_SET);
 87    }
 88
 89    size_t tell() const {
 90        LARGE_INTEGER li;
 91        li.QuadPart = 0;
 92        BOOL ret = SetFilePointerEx(fp_win32, li, &li, FILE_CURRENT);
 93        if (!ret) {
 94            throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
 95        }
 96
 97        return li.QuadPart;
 98    }
 99
100    void seek(size_t offset, int whence) const {
101        static_assert(SEEK_SET == FILE_BEGIN, "SEEK_SET != FILE_BEGIN");
102        static_assert(SEEK_CUR == FILE_CURRENT, "SEEK_CUR != FILE_CURRENT");
103        static_assert(SEEK_END == FILE_END, "SEEK_END != FILE_END");
104
105        LARGE_INTEGER li;
106        li.QuadPart = offset;
107        BOOL ret = SetFilePointerEx(fp_win32, li, NULL, whence);
108        if (!ret) {
109            throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
110        }
111    }
112
113    void read_raw(void * ptr, size_t len) {
114        size_t bytes_read = 0;
115        while (bytes_read < len) {
116            size_t chunk_size = std::min<size_t>(len - bytes_read, 64*1024*1024);
117            DWORD chunk_read = 0;
118            BOOL result = ReadFile(fp_win32, reinterpret_cast<char*>(ptr) + bytes_read, chunk_size, &chunk_read, NULL);
119            if (!result) {
120                throw std::runtime_error(format("read error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
121            }
122            if (chunk_read < chunk_size || chunk_read == 0) {
123                throw std::runtime_error("unexpectedly reached end of file");
124            }
125
126            bytes_read += chunk_read;
127        }
128    }
129
130    uint32_t read_u32() {
131        uint32_t val;
132        read_raw(&val, sizeof(val));
133        return val;
134    }
135
136    void write_raw(const void * ptr, size_t len) const {
137        size_t bytes_written = 0;
138        while (bytes_written < len) {
139            size_t chunk_size = std::min<size_t>(len - bytes_written, 64*1024*1024);
140            DWORD chunk_written = 0;
141            BOOL result = WriteFile(fp_win32, reinterpret_cast<char const*>(ptr) + bytes_written, chunk_size, &chunk_written, NULL);
142            if (!result) {
143                throw std::runtime_error(format("write error: %s", GetErrorMessageWin32(GetLastError()).c_str()));
144            }
145            if (chunk_written < chunk_size || chunk_written == 0) {
146                throw std::runtime_error("unexpectedly failed to write bytes");
147            }
148
149            bytes_written += chunk_written;
150        }
151    }
152
153    void write_u32(uint32_t val) const {
154        write_raw(&val, sizeof(val));
155    }
156
157    bool has_direct_io() const {
158        return true;
159    }
160
161    ~impl() {
162        if (fp) {
163            std::fclose(fp);
164        }
165    }
166#else
167    impl(const char * fname, const char * mode, [[maybe_unused]] const bool use_direct_io = false) : fname(fname) {
168#ifdef __linux__
169        // Try unbuffered I/O for read only
170        if (use_direct_io && std::strcmp(mode, "rb") == 0) {
171            if (init_fd()) {
172                return;
173            }
174            LLAMA_LOG_WARN("Failed to open file '%s' with error: %s. Falling back to buffered I/O",
175                           fname, strerror(errno));
176        }
177#endif
178        init_fp(mode);
179    }
180
181#ifdef __linux__
182    bool init_fd() {
183        fd = open(fname.c_str(), O_RDONLY | O_DIRECT);
184
185        if (fd != -1) {
186            struct stat file_stats{};
187            fstat(fd, &file_stats);
188
189            size = file_stats.st_size;
190            alignment = file_stats.st_blksize;
191
192            off_t ret = lseek(fd, 0, SEEK_SET);
193            if (ret == -1) {
194                throw std::runtime_error(format("seek error: %s", strerror(errno)));
195            }
196            return true;
197        }
198        return false;
199    }
200#endif
201
202    void init_fp(const char * mode) {
203        fp = ggml_fopen(fname.c_str(), mode);
204        if (fp == NULL) {
205            throw std::runtime_error(format("failed to open %s: %s", fname.c_str(), strerror(errno)));
206        }
207        seek(0, SEEK_END);
208        size = tell();
209        seek(0, SEEK_SET);
210    }
211
212    size_t tell() const {
213        if (fd == -1) {
214            long ret = std::ftell(fp);
215            if (ret == -1) {
216                throw std::runtime_error(format("ftell error: %s", strerror(errno)));
217            }
218
219            return (size_t) ret;
220        }
221
222        off_t pos = lseek(fd, 0, SEEK_CUR);
223        if (pos == -1) {
224            throw std::runtime_error(format("lseek error: %s", strerror(errno)));
225        }
226        return (size_t) pos;
227    }
228
229    void seek(size_t offset, int whence) const {
230        off_t ret = 0;
231        if (fd == -1) {
232            ret = std::fseek(fp, (long) offset, whence);
233        } else {
234            ret = lseek(fd, offset, whence);
235        }
236        if (ret == -1) {
237            throw std::runtime_error(format("seek error: %s", strerror(errno)));
238        }
239    }
240
241    void read_raw_unsafe(void * ptr, size_t len) {
242        if (len == 0) {
243            return;
244        }
245        errno = 0;
246        if (fd == -1) {
247            const size_t curr_off = tell();
248            const size_t to_read = std::min(len, size - curr_off);
249
250            std::size_t ret = std::fread(ptr, to_read, 1, fp);
251            if (ferror(fp)) {
252                throw std::runtime_error(format("read error: %s", strerror(errno)));
253            }
254            if (to_read > 0 && ret != 1) {
255                throw std::runtime_error("unexpectedly reached end of file");
256            }
257        } else {
258            size_t bytes_read = 0;
259            while (bytes_read < len) {
260                const size_t to_read = len - bytes_read;
261                ssize_t ret = ::read(fd, reinterpret_cast<char *>(ptr) + bytes_read, to_read);
262
263                if (ret == -1) {
264                    if (errno == EINTR) {
265                        continue;  // Interrupted by signal, retry
266                    }
267                    // Fallback to std::fread in case the DMA controller cannot access the buffer
268                    if (errno == EFAULT || errno == EINVAL) {
269                        LLAMA_LOG_WARN("%s: Falling back to buffered IO due to %s\n", __func__, strerror(errno));
270                        auto curr_off = tell();
271                        close(fd);
272                        fd = -1;
273                        alignment = 1;
274                        init_fp("rb");
275                        seek(curr_off, SEEK_SET);
276                        read_raw_unsafe(ptr, len);
277                        return;
278                    }
279                    throw std::runtime_error(format("read error: %s", strerror(errno)));
280                }
281                if (ret == 0) {
282                    // EOF: allow if this read was only pulling alignment padding past file end
283                    off_t pos = lseek(fd, 0, SEEK_CUR);
284                    if (pos != -1 && (size_t) pos == size) {
285                        std::memset(reinterpret_cast<char *>(ptr) + bytes_read, 0, len - bytes_read);
286                        return;
287                    }
288                    throw std::runtime_error("unexpectedly reached end of file");
289                }
290
291                bytes_read += (size_t) ret;
292            }
293        }
294    }
295
296    void read_aligned_chunk(void * dest, size_t size) {
297        size_t offset = tell();
298        off_t aligned_offset = offset & ~(alignment - 1);
299        off_t offset_from_alignment = offset - aligned_offset;
300        size_t bytes_to_read = (offset_from_alignment + size + alignment - 1) & ~(alignment - 1);
301
302        void * raw_buffer = nullptr;
303        int ret = posix_memalign(&raw_buffer, alignment, bytes_to_read);
304        if (ret != 0) {
305            throw std::runtime_error(format("posix_memalign failed with error %d", ret));
306        }
307
308        struct aligned_buffer_deleter {
309            void operator()(void * p) const { free(p); }
310        };
311        std::unique_ptr<void, aligned_buffer_deleter> buffer(raw_buffer);
312
313        seek(aligned_offset, SEEK_SET);
314        read_raw_unsafe(buffer.get(), bytes_to_read);
315
316        uintptr_t actual_data = reinterpret_cast<uintptr_t>(buffer.get()) + offset_from_alignment;
317        memcpy(dest, reinterpret_cast<void *>(actual_data), size);
318    }
319
320    void read_raw(void * ptr, size_t len) {
321        if (has_direct_io()) {
322            read_aligned_chunk(ptr, len);
323        } else {
324            read_raw_unsafe(ptr, len);
325        }
326    }
327
328    uint32_t read_u32() {
329        uint32_t ret;
330        read_raw(&ret, sizeof(ret));
331        return ret;
332    }
333
334    void write_raw(const void * ptr, size_t len) const {
335        if (len == 0) {
336            return;
337        }
338        errno = 0;
339        size_t ret = std::fwrite(ptr, len, 1, fp);
340        if (ret != 1) {
341            throw std::runtime_error(format("write error: %s", strerror(errno)));
342        }
343    }
344
345    void write_u32(uint32_t val) const {
346        write_raw(&val, sizeof(val));
347    }
348
349    bool has_direct_io() const {
350        return fd != -1 && alignment > 1;
351    }
352
353    ~impl() {
354        if (fd != -1) {
355            close(fd);
356        } else {
357            std::fclose(fp);
358        }
359    }
360    int fd = -1;
361    std::string fname;
362#endif
363
364    size_t read_alignment() const {
365        return alignment;
366    }
367
368    size_t alignment = 1;
369
370    FILE * fp{};
371    size_t size{};
372};
373
374llama_file::llama_file(const char * fname, const char * mode, const bool use_direct_io) :
375    pimpl(std::make_unique<impl>(fname, mode, use_direct_io)) {}
376llama_file::~llama_file() = default;
377
378size_t llama_file::tell() const { return pimpl->tell(); }
379size_t llama_file::size() const { return pimpl->size; }
380
381size_t llama_file::read_alignment() const { return pimpl->read_alignment(); }
382bool llama_file::has_direct_io() const { return pimpl->has_direct_io(); }
383
384int llama_file::file_id() const {
385#ifdef _WIN32
386    return _fileno(pimpl->fp);
387#else
388    if (pimpl->fd != -1) {
389        return pimpl->fd;
390    }
391#if defined(fileno)
392    return fileno(pimpl->fp);
393#else
394    return ::fileno(pimpl->fp);
395#endif
396#endif
397}
398
399void llama_file::seek(size_t offset, int whence) const { pimpl->seek(offset, whence); }
400void llama_file::read_raw(void * ptr, size_t len) { pimpl->read_raw(ptr, len); }
401#ifdef _WIN32
402void llama_file::read_raw_unsafe(void * ptr, size_t len) { pimpl->read_raw(ptr, len); }
403#else
404void llama_file::read_raw_unsafe(void * ptr, size_t len) { pimpl->read_raw_unsafe(ptr, len); }
405#endif
406
407uint32_t llama_file::read_u32() { return pimpl->read_u32(); }
408
409void llama_file::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); }
410void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
411
412// llama_mmap
413
414struct llama_mmap::impl {
415#ifdef _POSIX_MAPPED_FILES
416    std::vector<std::pair<size_t, size_t>> mapped_fragments;
417
418    impl(struct llama_file * file, size_t prefetch, bool numa) {
419        size = file->size();
420        int fd = file->file_id();
421        int flags = MAP_SHARED;
422        if (numa) { prefetch = 0; }
423#ifdef __linux__
424        if (posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) {
425            LLAMA_LOG_WARN("warning: posix_fadvise(.., POSIX_FADV_SEQUENTIAL) failed: %s\n",
426                    strerror(errno));
427        }
428        if (prefetch) { flags |= MAP_POPULATE; }
429#endif
430        addr = mmap(NULL, file->size(), PROT_READ, flags, fd, 0);
431        if (addr == MAP_FAILED) {
432            throw std::runtime_error(format("mmap failed: %s", strerror(errno)));
433        }
434
435        if (prefetch > 0) {
436            if (posix_madvise(addr, std::min(file->size(), prefetch), POSIX_MADV_WILLNEED)) {
437                LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
438                        strerror(errno));
439            }
440        }
441        if (numa) {
442            if (posix_madvise(addr, file->size(), POSIX_MADV_RANDOM)) {
443                LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_RANDOM) failed: %s\n",
444                        strerror(errno));
445            }
446        }
447
448        mapped_fragments.emplace_back(0, file->size());
449    }
450
451    static void align_range(size_t * first, size_t * last, size_t page_size) {
452        size_t offset_in_page = *first & (page_size - 1);
453        size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
454        *first += offset_to_page;
455
456        *last = *last & ~(page_size - 1);
457
458        if (*last <= *first) {
459            *last = *first;
460        }
461    }
462
463    void unmap_fragment(size_t first, size_t last) {
464        int page_size = sysconf(_SC_PAGESIZE);
465        align_range(&first, &last, page_size);
466        size_t len = last - first;
467
468        if (len == 0) {
469            return;
470        }
471
472        GGML_ASSERT(first % page_size == 0);
473        GGML_ASSERT(last % page_size == 0);
474        GGML_ASSERT(last > first);
475
476        void * next_page_start = (uint8_t *) addr + first;
477
478        if (munmap(next_page_start, len)) {
479            LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
480        }
481
482        std::vector<std::pair<size_t, size_t>> new_mapped_fragments;
483        for (const auto & frag : mapped_fragments) {
484            if (frag.first < first && frag.second > last) {
485                new_mapped_fragments.emplace_back(frag.first, first);
486                new_mapped_fragments.emplace_back(last, frag.second);
487            } else if (frag.first < first && frag.second > first) {
488                new_mapped_fragments.emplace_back(frag.first, first);
489            } else if (frag.first < last && frag.second > last) {
490                new_mapped_fragments.emplace_back(last, frag.second);
491            } else if (frag.first >= first && frag.second <= last) {
492            } else {
493                new_mapped_fragments.push_back(frag);
494            }
495        }
496        mapped_fragments = std::move(new_mapped_fragments);
497    }
498
499    ~impl() {
500        for (const auto & frag : mapped_fragments) {
501            if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
502                LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
503            }
504        }
505    }
506#elif defined(_WIN32)
507    impl(struct llama_file * file, size_t prefetch, bool numa) {
508        GGML_UNUSED(numa);
509
510        size = file->size();
511
512        HANDLE hFile = (HANDLE) _get_osfhandle(file->file_id());
513
514        HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
515
516        if (hMapping == NULL) {
517            DWORD error = GetLastError();
518            throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
519        }
520
521        addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
522        DWORD error = GetLastError();
523        CloseHandle(hMapping);
524
525        if (addr == NULL) {
526            throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
527        }
528
529        if (prefetch > 0) {
530#if _WIN32_WINNT >= 0x602
531            BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
532            HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
533
534            pPrefetchVirtualMemory = (decltype(pPrefetchVirtualMemory))(void *) GetProcAddress(hKernel32, "PrefetchVirtualMemory");
535
536            if (pPrefetchVirtualMemory) {
537                WIN32_MEMORY_RANGE_ENTRY range;
538                range.VirtualAddress = addr;
539                range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
540                if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
541                    LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
542                            llama_format_win_err(GetLastError()).c_str());
543                }
544            }
545#else
546            LLAMA_LOG_DEBUG("skipping PrefetchVirtualMemory because _WIN32_WINNT < 0x602\n");
547#endif
548        }
549    }
550
551    void unmap_fragment(size_t first, size_t last) {
552        GGML_UNUSED(first);
553        GGML_UNUSED(last);
554    }
555
556    ~impl() {
557        if (!UnmapViewOfFile(addr)) {
558            LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
559                    llama_format_win_err(GetLastError()).c_str());
560        }
561    }
562#else
563    impl(struct llama_file * file, size_t prefetch, bool numa) {
564        GGML_UNUSED(file);
565        GGML_UNUSED(prefetch);
566        GGML_UNUSED(numa);
567
568        throw std::runtime_error("mmap not supported");
569    }
570
571    void unmap_fragment(size_t first, size_t last) {
572        GGML_UNUSED(first);
573        GGML_UNUSED(last);
574
575        throw std::runtime_error("mmap not supported");
576    }
577#endif
578
579    void * addr;
580    size_t size;
581};
582
583llama_mmap::llama_mmap(struct llama_file * file, size_t prefetch, bool numa) : pimpl(std::make_unique<impl>(file, prefetch, numa)) {}
584llama_mmap::~llama_mmap() = default;
585
586size_t llama_mmap::size() const { return pimpl->size; }
587void * llama_mmap::addr() const { return pimpl->addr; }
588
589void llama_mmap::unmap_fragment(size_t first, size_t last) { pimpl->unmap_fragment(first, last); }
590
591#if defined(_POSIX_MEMLOCK_RANGE) || defined(_WIN32)
592const bool llama_mmap::SUPPORTED  = true;
593#else
594const bool llama_mmap::SUPPORTED  = false;
595#endif
596
597// llama_mlock
598
599struct llama_mlock::impl {
600#ifdef _POSIX_MEMLOCK_RANGE
601    static size_t lock_granularity() {
602        return (size_t) sysconf(_SC_PAGESIZE);
603    }
604
605    bool raw_lock(const void * addr, size_t size) const {
606        if (!mlock(addr, size)) {
607            return true;
608        }
609
610#ifdef __APPLE__
611#define MLOCK_SUGGESTION \
612        "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
613        "decreasing 'vm.global_no_user_wire_amount'.  Also try increasing RLIMIT_MEMLOCK (ulimit -l).\n"
614#else
615#define MLOCK_SUGGESTION \
616        "Try increasing RLIMIT_MEMLOCK ('ulimit -l' as root).\n"
617#endif
618
619        char* errmsg = std::strerror(errno);
620        bool suggest = (errno == ENOMEM);
621#if defined(TARGET_OS_VISION) || defined(TARGET_OS_TV) || defined(_AIX) || defined(__HAIKU__)
622        // visionOS/tvOS/Haiku don't support RLIMIT_MEMLOCK
623        // Skip resource limit checks on these platforms
624        suggest = false;
625#else
626        struct rlimit lock_limit;
627        if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) {
628            suggest = false;
629        }
630        if (suggest && ((uint64_t)lock_limit.rlim_max > (uint64_t)lock_limit.rlim_cur + size)) {
631            suggest = false;
632        }
633#endif
634
635        LLAMA_LOG_WARN("warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
636                size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
637        return false;
638    }
639
640    static void raw_unlock(void * addr, size_t size) {
641        if (munlock(addr, size)) {
642            LLAMA_LOG_WARN("warning: failed to munlock buffer: %s\n", std::strerror(errno));
643        }
644    }
645#elif defined(_WIN32)
646    static size_t lock_granularity() {
647        SYSTEM_INFO si;
648        GetSystemInfo(&si);
649        return (size_t) si.dwPageSize;
650    }
651
652    bool raw_lock(void * ptr, size_t len) const {
653        for (int tries = 1; ; tries++) {
654            if (VirtualLock(ptr, len)) {
655                return true;
656            }
657            if (tries == 2) {
658                LLAMA_LOG_WARN("warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
659                    len, size, llama_format_win_err(GetLastError()).c_str());
660                return false;
661            }
662
663            SIZE_T min_ws_size, max_ws_size;
664            if (!GetProcessWorkingSetSize(GetCurrentProcess(), &min_ws_size, &max_ws_size)) {
665                LLAMA_LOG_WARN("warning: GetProcessWorkingSetSize failed: %s\n",
666                        llama_format_win_err(GetLastError()).c_str());
667                return false;
668            }
669            size_t increment = len + 1048576;
670            min_ws_size += increment;
671            max_ws_size += increment;
672            if (!SetProcessWorkingSetSize(GetCurrentProcess(), min_ws_size, max_ws_size)) {
673                LLAMA_LOG_WARN("warning: SetProcessWorkingSetSize failed: %s\n",
674                        llama_format_win_err(GetLastError()).c_str());
675                return false;
676            }
677        }
678    }
679
680    static void raw_unlock(void * ptr, size_t len) {
681        if (!VirtualUnlock(ptr, len)) {
682            LLAMA_LOG_WARN("warning: failed to VirtualUnlock buffer: %s\n",
683                    llama_format_win_err(GetLastError()).c_str());
684        }
685    }
686#else
687    static size_t lock_granularity() {
688        return (size_t) 65536;
689    }
690
691    bool raw_lock(const void * addr, size_t len) const {
692        LLAMA_LOG_WARN("warning: mlock not supported on this system\n");
693        return false;
694    }
695
696    static void raw_unlock(const void * addr, size_t len) {}
697#endif
698
699    impl() : addr(NULL), size(0), failed_already(false) {}
700
701    void init(void * ptr) {
702        GGML_ASSERT(addr == NULL && size == 0);
703        addr = ptr;
704    }
705
706    void grow_to(size_t target_size) {
707        GGML_ASSERT(addr);
708        if (failed_already) {
709            return;
710        }
711        size_t granularity = lock_granularity();
712        target_size = (target_size + granularity - 1) & ~(granularity - 1);
713        if (target_size > size) {
714            if (raw_lock((uint8_t *) addr + size, target_size - size)) {
715                size = target_size;
716            } else {
717                failed_already = true;
718            }
719        }
720    }
721
722    void * addr;
723    size_t size;
724
725    bool failed_already;
726};
727
728llama_mlock::llama_mlock() : pimpl(std::make_unique<impl>()) {}
729llama_mlock::~llama_mlock() = default;
730
731void llama_mlock::init(void * ptr) { pimpl->init(ptr); }
732void llama_mlock::grow_to(size_t target_size) { pimpl->grow_to(target_size); }
733
734#if defined(_POSIX_MEMLOCK_RANGE) || defined(_WIN32)
735const bool llama_mlock::SUPPORTED = true;
736#else
737const bool llama_mlock::SUPPORTED = false;
738#endif
739
740size_t llama_path_max() {
741    return PATH_MAX;
742}