diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-20 13:54:21 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-20 13:54:21 +0100 |
| commit | 306c3cb6924c6231c102ff7d75aa3f68e3618ca2 (patch) | |
| tree | 1a41c8c4b70b43796cc3fc14f0c9e52b39651e2f /models.h | |
| parent | 201bbf3e917066fb05ff1f10f7166d262b8ed2cf (diff) | |
| download | llmnpc-306c3cb6924c6231c102ff7d75aa3f68e3618ca2.tar.gz | |
Update to multi model for embeddings and prompting
Diffstat (limited to 'models.h')
| -rw-r--r-- | models.h | 92 |
1 files changed, 88 insertions, 4 deletions
@@ -5,55 +5,139 @@ #include <stddef.h> #include <string.h> +typedef enum { + PROMPT_STYLE_PLAIN = 0, + PROMPT_STYLE_CHAT = 1, + PROMPT_STYLE_T5 = 2, +} PromptStyle; + typedef struct { const char *name; const char *filepath; + const char *embed_model_name; int n_gpu_layers; bool use_mmap; int n_ctx; int n_batch; bool embeddings; + int n_predict; float temperature; float min_p; + int top_k; + float top_p; + int repeat_last_n; + float repeat_penalty; + float freq_penalty; + float presence_penalty; uint32_t seed; + PromptStyle prompt_style; } ModelConfig; ModelConfig models[] = { { + .name = "qwen3", + .filepath = "models/Qwen3-0.6B-UD-Q6_K_XL.gguf", + .embed_model_name = "qwen3", + .n_gpu_layers = 0, + .use_mmap = false, + .n_ctx = 2048, + .n_batch = 4096, + .embeddings = false, + .n_predict = 128, + .temperature = 0.6f, + .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, + .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_CHAT, + }, + { + .name = "tinyllama-1.1b", + .filepath = "models/tinyllama-1.1b.gguf", + .embed_model_name = "qwen3", + .n_gpu_layers = 0, + .use_mmap = false, + .n_ctx = 2048, + .n_batch = 4096, + .embeddings = false, + .n_predict = 128, + .temperature = 0.7f, + .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, + .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_PLAIN, + }, + { .name = "tinyllama-1", .filepath = "models/TinyLlama-1.1B-intermediate-step-1431k-3T-Q2_K.gguf", + .embed_model_name = "qwen3", .n_gpu_layers = 0, .use_mmap = false, .n_ctx = 2048, .n_batch = 4096, .embeddings = false, - .temperature = 0.8f, + .n_predict = 128, + .temperature = 0.7f, .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_PLAIN, }, { .name = "flan-t5-small", .filepath = "models/flan-t5-small.F16.gguf", + .embed_model_name = "qwen3", .n_gpu_layers = 0, .use_mmap = false, .n_ctx = 512, .n_batch = 512, .embeddings = false, - .temperature = 0.8f, + .n_predict = 128, + .temperature = 0.2f, .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_T5, }, { .name = "phi-4-mini-instruct", .filepath = "models/Phi-4-mini-instruct.Q2_K.gguf", + .embed_model_name = "qwen3", .n_gpu_layers = 0, .use_mmap = false, - .n_ctx = 131072, + .n_ctx = 4096, .n_batch = 4096, .embeddings = false, - .temperature = 0.8f, + .n_predict = 128, + .temperature = 0.6f, .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_CHAT, }, }; |
