1#ifndef MODELS_H
2#define MODELS_H
3
4#include "llama.h"
5#include <stddef.h>
6#include <string.h>
7
8typedef enum {
9 PROMPT_STYLE_PLAIN = 0,
10 PROMPT_STYLE_CHAT = 1,
11 PROMPT_STYLE_T5 = 2,
12} PromptStyle;
13
14typedef struct {
15 const char *name;
16 const char *filepath;
17 const char *embed_model_name;
18 int n_gpu_layers;
19 bool use_mmap;
20 int n_ctx;
21 int n_batch;
22 bool embeddings;
23 int n_predict;
24 float temperature;
25 float min_p;
26 int top_k;
27 float top_p;
28 int repeat_last_n;
29 float repeat_penalty;
30 float freq_penalty;
31 float presence_penalty;
32 uint32_t seed;
33 PromptStyle prompt_style;
34} ModelConfig;
35
36ModelConfig models[] = {
37 {
38 .name = "qwen3",
39 .filepath = "models/Qwen3-0.6B-UD-Q6_K_XL.gguf",
40 .embed_model_name = "qwen3",
41 .n_gpu_layers = 0,
42 .use_mmap = false,
43 .n_ctx = 2048,
44 .n_batch = 4096,
45 .embeddings = false,
46 .n_predict = 128,
47 .temperature = 0.6f,
48 .min_p = 0.05f,
49 .top_k = 40,
50 .top_p = 0.9f,
51 .repeat_last_n = 64,
52 .repeat_penalty = 1.1f,
53 .freq_penalty = 0.0f,
54 .presence_penalty = 0.0f,
55 .seed = LLAMA_DEFAULT_SEED,
56 .prompt_style = PROMPT_STYLE_CHAT,
57 },
58 {
59 .name = "tinyllama-1.1b",
60 .filepath = "models/tinyllama-1.1b.gguf",
61 .embed_model_name = "qwen3",
62 .n_gpu_layers = 0,
63 .use_mmap = false,
64 .n_ctx = 2048,
65 .n_batch = 4096,
66 .embeddings = false,
67 .n_predict = 128,
68 .temperature = 0.7f,
69 .min_p = 0.05f,
70 .top_k = 40,
71 .top_p = 0.9f,
72 .repeat_last_n = 64,
73 .repeat_penalty = 1.1f,
74 .freq_penalty = 0.0f,
75 .presence_penalty = 0.0f,
76 .seed = LLAMA_DEFAULT_SEED,
77 .prompt_style = PROMPT_STYLE_PLAIN,
78 },
79 {
80 .name = "tinyllama-1",
81 .filepath = "models/TinyLlama-1.1B-intermediate-step-1431k-3T-Q2_K.gguf",
82 .embed_model_name = "qwen3",
83 .n_gpu_layers = 0,
84 .use_mmap = false,
85 .n_ctx = 2048,
86 .n_batch = 4096,
87 .embeddings = false,
88 .n_predict = 128,
89 .temperature = 0.7f,
90 .min_p = 0.05f,
91 .top_k = 40,
92 .top_p = 0.9f,
93 .repeat_last_n = 64,
94 .repeat_penalty = 1.1f,
95 .freq_penalty = 0.0f,
96 .presence_penalty = 0.0f,
97 .seed = LLAMA_DEFAULT_SEED,
98 .prompt_style = PROMPT_STYLE_PLAIN,
99 },
100 {
101 .name = "flan-t5-small",
102 .filepath = "models/flan-t5-small.F16.gguf",
103 .embed_model_name = "qwen3",
104 .n_gpu_layers = 0,
105 .use_mmap = false,
106 .n_ctx = 512,
107 .n_batch = 512,
108 .embeddings = false,
109 .n_predict = 128,
110 .temperature = 0.2f,
111 .min_p = 0.05f,
112 .top_k = 40,
113 .top_p = 0.9f,
114 .repeat_last_n = 64,
115 .repeat_penalty = 1.1f,
116 .freq_penalty = 0.0f,
117 .presence_penalty = 0.0f,
118 .seed = LLAMA_DEFAULT_SEED,
119 .prompt_style = PROMPT_STYLE_T5,
120 },
121 {
122 .name = "phi-4-mini-instruct",
123 .filepath = "models/Phi-4-mini-instruct.Q2_K.gguf",
124 .embed_model_name = "qwen3",
125 .n_gpu_layers = 0,
126 .use_mmap = false,
127 .n_ctx = 4096,
128 .n_batch = 4096,
129 .embeddings = false,
130 .n_predict = 128,
131 .temperature = 0.6f,
132 .min_p = 0.05f,
133 .top_k = 40,
134 .top_p = 0.9f,
135 .repeat_last_n = 64,
136 .repeat_penalty = 1.1f,
137 .freq_penalty = 0.0f,
138 .presence_penalty = 0.0f,
139 .seed = LLAMA_DEFAULT_SEED,
140 .prompt_style = PROMPT_STYLE_CHAT,
141 },
142};
143
144const ModelConfig *get_model_by_name(const char *name) {
145 for (size_t i = 0; i < sizeof(models) / sizeof(models[0]); i++) {
146 if (models[i].name != NULL && strcmp(models[i].name, name) == 0) {
147 return &models[i];
148 }
149 }
150 return NULL;
151}
152
153#endif