1#pragma once
2
3#include "llama.h"
4
5#include <vector>
6
7struct llama_vocab;
8struct llama_grammar;
9
10// sampler chain
11
12struct llama_sampler_chain {
13 llama_sampler_chain_params params;
14
15 // has .backend_init() been called?
16 bool is_init = false;
17
18 struct info {
19 bool is_backend;
20
21 llama_sampler * ptr;
22 };
23
24 std::vector<info> samplers;
25
26 // pre-allocated buffer for llama_sampler_sample to avoid repeated allocations
27 std::vector<llama_token_data> cur;
28
29 // timing
30
31 mutable int64_t t_sample_us;
32
33 mutable int32_t n_sample;
34};
35
36struct llama_sampler * llama_sampler_init_dry_testing(
37 int32_t context_size,
38 float dry_multiplier,
39 float dry_base,
40 int32_t dry_allowed_length,
41 int32_t dry_penalty_last_n,
42 const std::vector<std::vector<llama_token>> & seq_breakers);