summaryrefslogtreecommitdiff
path: root/llama.cpp/common/ngram-cache.h
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/common/ngram-cache.h
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/common/ngram-cache.h')
-rw-r--r--llama.cpp/common/ngram-cache.h101
1 files changed, 101 insertions, 0 deletions
diff --git a/llama.cpp/common/ngram-cache.h b/llama.cpp/common/ngram-cache.h
new file mode 100644
index 0000000..6e7cfea
--- /dev/null
+++ b/llama.cpp/common/ngram-cache.h
@@ -0,0 +1,101 @@
+#pragma once
+
+#include "llama.h"
+
+#include <unordered_map>
+#include <string>
+#include <vector>
+
+#define LLAMA_NGRAM_MIN 1
+#define LLAMA_NGRAM_MAX 4
+#define LLAMA_NGRAM_STATIC 2
+
+// Data structures to map n-grams to empirical token probabilities:
+
+struct common_ngram {
+ llama_token tokens[LLAMA_NGRAM_MAX];
+
+ common_ngram() {
+ for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
+ tokens[i] = LLAMA_TOKEN_NULL;
+ }
+ }
+
+ common_ngram(const llama_token * input, const int ngram_size) {
+ for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
+ tokens[i] = i < ngram_size ? input[i] : LLAMA_TOKEN_NULL;
+ }
+ }
+
+ bool operator==(const common_ngram & other) const {
+ for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
+ if (tokens[i] != other.tokens[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+};
+
+struct common_token_hash_function {
+ size_t operator()(const llama_token token) const {
+ // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
+ return token * 11400714819323198485llu;
+ }
+};
+
+struct common_ngram_hash_function {
+ size_t operator()(const common_ngram & ngram) const {
+ size_t hash = common_token_hash_function{}(ngram.tokens[0]);
+ for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
+ hash ^= common_token_hash_function{}(ngram.tokens[i]);
+ }
+ return hash;
+ }
+};
+
+// token -> number of times token has been seen
+typedef std::unordered_map<llama_token, int32_t> common_ngram_cache_part;
+
+// n-gram -> empirical distribution of following tokens
+typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_hash_function> common_ngram_cache;
+
+
+// Update an ngram cache with tokens.
+// ngram_cache: the cache to modify.
+// ngram_min/ngram_max: the min/max size of the ngrams to extract from inp_data.
+// inp_data: the token sequence with which to update ngram_cache.
+// nnew: how many new tokens have been appended to inp_data since the last call to this function.
+// print_progress: whether to print progress to stderr.
+//
+// In order to get correct results inp_data can ONLY BE APPENDED TO.
+// Changes in the middle need a complete rebuild.
+void common_ngram_cache_update(
+ common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
+
+// Try to draft tokens from ngram caches.
+// inp: the tokens generated so far.
+// draft: the token sequence to draft. Expected to initially contain the previously sampled token.
+// n_draft: maximum number of tokens to add to draft.
+// ngram_min/gram_max: the min/max size of the ngrams in nc_context and nc_dynamic.
+// nc_context: ngram cache based on current context.
+// nc_dynamic: ngram cache based on previous user generations.
+// nc_static: ngram cache generated from a large text corpus, used for validation.
+void common_ngram_cache_draft(
+ std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
+ common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static);
+
+// Save an ngram cache to a file.
+// ngram_cache: the ngram cache to save.
+// filename: the path under which to save the ngram cache.
+void common_ngram_cache_save(common_ngram_cache & ngram_cache, const std::string & filename);
+
+// Load an ngram cache saved with common_ngram_cache_save.
+// filename: the path from which to load the ngram cache.
+// returns: an ngram cache containing the information saved to filename.
+common_ngram_cache common_ngram_cache_load(const std::string & filename);
+
+// Merge two ngram caches.
+// ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.
+// ngram_cache_add: the ngram cache to add to ngram_cache_target.
+void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add);