1#pragma once
 2
 3#include "llama.h"
 4
 5#include <vector>
 6
 7struct llama_vocab;
 8struct llama_grammar;
 9
10// sampler chain
11
12struct llama_sampler_chain {
13    llama_sampler_chain_params params;
14
15    // has .backend_init() been called?
16    bool is_init = false;
17
18    struct info {
19        bool is_backend;
20
21        llama_sampler * ptr;
22    };
23
24    std::vector<info> samplers;
25
26    // pre-allocated buffer for llama_sampler_sample to avoid repeated allocations
27    std::vector<llama_token_data> cur;
28
29    // timing
30
31    mutable int64_t t_sample_us;
32
33    mutable int32_t n_sample;
34};
35
36struct llama_sampler * llama_sampler_init_dry_testing(
37        int32_t context_size,
38        float   dry_multiplier,
39        float   dry_base,
40        int32_t dry_allowed_length,
41        int32_t dry_penalty_last_n,
42        const std::vector<std::vector<llama_token>> & seq_breakers);