llmnpc - llama.cpp/common/ngram-cache.h

Path: llmnpc / llama.cpp / common / ngram-cache.h (raw)
  1#pragma once
  2
  3#include "llama.h"
  4
  5#include <unordered_map>
  6#include <string>
  7#include <vector>
  8
  9#define LLAMA_NGRAM_MIN    1
 10#define LLAMA_NGRAM_MAX    4
 11#define LLAMA_NGRAM_STATIC 2
 12
 13// Data structures to map n-grams to empirical token probabilities:
 14
 15struct common_ngram {
 16    llama_token tokens[LLAMA_NGRAM_MAX];
 17
 18    common_ngram() {
 19        for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
 20            tokens[i] = LLAMA_TOKEN_NULL;
 21        }
 22    }
 23
 24    common_ngram(const llama_token * input, const int ngram_size) {
 25        for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
 26            tokens[i] = i < ngram_size ? input[i] : LLAMA_TOKEN_NULL;
 27        }
 28    }
 29
 30    bool operator==(const common_ngram & other) const {
 31        for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
 32            if (tokens[i] != other.tokens[i]) {
 33                return false;
 34            }
 35        }
 36        return true;
 37    }
 38};
 39
 40struct common_token_hash_function {
 41    size_t operator()(const llama_token token) const {
 42        // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
 43        return token * 11400714819323198485llu;
 44    }
 45};
 46
 47struct common_ngram_hash_function {
 48    size_t operator()(const common_ngram & ngram) const {
 49        size_t hash = common_token_hash_function{}(ngram.tokens[0]);
 50        for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
 51            hash ^= common_token_hash_function{}(ngram.tokens[i]);
 52        }
 53        return hash;
 54    }
 55};
 56
 57// token -> number of times token has been seen
 58typedef std::unordered_map<llama_token, int32_t> common_ngram_cache_part;
 59
 60// n-gram -> empirical distribution of following tokens
 61typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_hash_function> common_ngram_cache;
 62
 63
 64// Update an ngram cache with tokens.
 65// ngram_cache:         the cache to modify.
 66// ngram_min/ngram_max: the min/max size of the ngrams to extract from inp_data.
 67// inp_data:            the token sequence with which to update ngram_cache.
 68// nnew:                how many new tokens have been appended to inp_data since the last call to this function.
 69// print_progress:      whether to print progress to stderr.
 70//
 71// In order to get correct results inp_data can ONLY BE APPENDED TO.
 72// Changes in the middle need a complete rebuild.
 73void common_ngram_cache_update(
 74    common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
 75
 76// Try to draft tokens from ngram caches.
 77// inp:                the tokens generated so far.
 78// draft:              the token sequence to draft. Expected to initially contain the previously sampled token.
 79// n_draft:            maximum number of tokens to add to draft.
 80// ngram_min/gram_max: the min/max size of the ngrams in nc_context and nc_dynamic.
 81// nc_context:         ngram cache based on current context.
 82// nc_dynamic:         ngram cache based on previous user generations.
 83// nc_static:          ngram cache generated from a large text corpus, used for validation.
 84void common_ngram_cache_draft(
 85    std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
 86    common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static);
 87
 88// Save an ngram cache to a file.
 89// ngram_cache: the ngram cache to save.
 90// filename:    the path under which to save the ngram cache.
 91void common_ngram_cache_save(common_ngram_cache & ngram_cache, const std::string & filename);
 92
 93// Load an ngram cache saved with common_ngram_cache_save.
 94// filename: the path from which to load the ngram cache.
 95// returns:  an ngram cache containing the information saved to filename.
 96common_ngram_cache common_ngram_cache_load(const std::string & filename);
 97
 98// Merge two ngram caches.
 99// ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.
100// ngram_cache_add:    the ngram cache to add to ngram_cache_target.
101void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add);