diff options
Diffstat (limited to 'llama.cpp/src/llama-cparams.h')
| -rw-r--r-- | llama.cpp/src/llama-cparams.h | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/llama.cpp/src/llama-cparams.h b/llama.cpp/src/llama-cparams.h new file mode 100644 index 0000000..2da3bbd --- /dev/null +++ b/llama.cpp/src/llama-cparams.h @@ -0,0 +1,44 @@ +#pragma once + +#include "llama.h" + +#include <cstdint> + +#define LLAMA_MAX_SEQ 256 + +struct llama_cparams { + uint32_t n_ctx; // context size used during inference + uint32_t n_ctx_seq; // context for a single sequence + uint32_t n_batch; + uint32_t n_ubatch; + uint32_t n_seq_max; + int32_t n_threads; // number of threads to use for generation + int32_t n_threads_batch; // number of threads to use for batch processing + + float rope_freq_base; + float rope_freq_scale; + + uint32_t n_ctx_orig_yarn; + // These hyperparameters are not exposed in GGUF, because all + // existing YaRN models use the same values for them. + float yarn_ext_factor; + float yarn_attn_factor; + float yarn_beta_fast; + float yarn_beta_slow; + + bool embeddings; + bool causal_attn; + bool offload_kqv; + bool flash_attn; + bool auto_fa; + bool no_perf; + bool warmup; + bool op_offload; + bool kv_unified; + bool pipeline_parallel; + + enum llama_pooling_type pooling_type; + + ggml_backend_sched_eval_callback cb_eval; + void * cb_eval_user_data; +}; |
