1#pragma once
2
3#include <cstdint>
4#include <vector>
5#include <cstddef>
6
7//
8// common_ngram_mod
9// ref: https://github.com/ggml-org/llama.cpp/pull/19164
10//
11
12// basic n-gram hasher
13struct common_ngram_mod {
14 using entry_t = int32_t;
15
16 static constexpr entry_t EMPTY = -1;
17
18 common_ngram_mod(uint16_t n, size_t size);
19
20 size_t idx(const entry_t * tokens) const;
21 void add(const entry_t * tokens);
22 entry_t get(const entry_t * tokens) const; // return -1 if not found
23
24 void reset();
25
26 size_t get_n() const;
27 size_t get_used() const;
28
29 size_t size() const;
30 size_t size_bytes() const;
31
32private:
33 size_t n; // ngram size to hash
34
35 size_t used;
36
37 std::vector<entry_t> entries;
38};