From b333b06772c89d96aacb5490d6a219fba7c09cc6 Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Thu, 12 Feb 2026 20:57:17 +0100 Subject: Engage! --- llama.cpp/src/llama-cparams.h | 44 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 llama.cpp/src/llama-cparams.h (limited to 'llama.cpp/src/llama-cparams.h') diff --git a/llama.cpp/src/llama-cparams.h b/llama.cpp/src/llama-cparams.h new file mode 100644 index 0000000..2da3bbd --- /dev/null +++ b/llama.cpp/src/llama-cparams.h @@ -0,0 +1,44 @@ +#pragma once + +#include "llama.h" + +#include + +#define LLAMA_MAX_SEQ 256 + +struct llama_cparams { + uint32_t n_ctx; // context size used during inference + uint32_t n_ctx_seq; // context for a single sequence + uint32_t n_batch; + uint32_t n_ubatch; + uint32_t n_seq_max; + int32_t n_threads; // number of threads to use for generation + int32_t n_threads_batch; // number of threads to use for batch processing + + float rope_freq_base; + float rope_freq_scale; + + uint32_t n_ctx_orig_yarn; + // These hyperparameters are not exposed in GGUF, because all + // existing YaRN models use the same values for them. + float yarn_ext_factor; + float yarn_attn_factor; + float yarn_beta_fast; + float yarn_beta_slow; + + bool embeddings; + bool causal_attn; + bool offload_kqv; + bool flash_attn; + bool auto_fa; + bool no_perf; + bool warmup; + bool op_offload; + bool kv_unified; + bool pipeline_parallel; + + enum llama_pooling_type pooling_type; + + ggml_backend_sched_eval_callback cb_eval; + void * cb_eval_user_data; +}; -- cgit v1.2.3