From b333b06772c89d96aacb5490d6a219fba7c09cc6 Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Thu, 12 Feb 2026 20:57:17 +0100 Subject: Engage! --- llama.cpp/src/llama-adapter.h | 86 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 llama.cpp/src/llama-adapter.h (limited to 'llama.cpp/src/llama-adapter.h') diff --git a/llama.cpp/src/llama-adapter.h b/llama.cpp/src/llama-adapter.h new file mode 100644 index 0000000..d275d25 --- /dev/null +++ b/llama.cpp/src/llama-adapter.h @@ -0,0 +1,86 @@ +#pragma once + +#include "llama.h" + +#include "ggml-cpp.h" + +#include +#include +#include + +// TODO: pimpl + +// +// llama_adapter_cvec +// + +struct llama_adapter_cvec { + ggml_tensor * tensor_for(int il) const; + + ggml_tensor * apply_to(ggml_context * ctx, ggml_tensor * cur, int il) const; + + bool apply( + const llama_model & model, + const float * data, + size_t len, + int32_t n_embd, + int32_t il_start, + int32_t il_end); + +private: + bool init(const llama_model & model); + + int32_t layer_start = -1; + int32_t layer_end = -1; + + std::vector ctxs; + std::vector bufs; + + std::vector tensors; // per layer +}; + +// +// llama_adapter_lora +// + +struct llama_adapter_lora_weight { + ggml_tensor * a = nullptr; + ggml_tensor * b = nullptr; + + // get actual scale based on rank and alpha + float get_scale(float alpha, float adapter_scale) const { + const float rank = (float) b->ne[0]; + const float scale = alpha ? adapter_scale * alpha / rank : adapter_scale; + return scale; + } + + llama_adapter_lora_weight() = default; + llama_adapter_lora_weight(ggml_tensor * a, ggml_tensor * b) : a(a), b(b) {} +}; + +struct llama_adapter_lora { + // map tensor name to lora_a_b + std::unordered_map ab_map; + + std::vector ctxs; + std::vector bufs; + + float alpha; + + // gguf metadata + std::unordered_map gguf_kv; + + // activated lora (aLoRA) + std::vector alora_invocation_tokens; + + llama_adapter_lora() = default; + ~llama_adapter_lora() = default; + + llama_adapter_lora_weight * get_weight(ggml_tensor * w); + + uint32_t get_n_nodes() const { + return ab_map.size() * 6u; // a, b, scale, add, 2 x mul_mat + } +}; + +using llama_adapter_loras = std::unordered_map; -- cgit v1.2.3