Update to multi model for embeddings and prompting

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-20 13:54:21 +0100
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-20 13:54:21 +0100
commit: 306c3cb6924c6231c102ff7d75aa3f68e3618ca2 (patch)
tree: 1a41c8c4b70b43796cc3fc14f0c9e52b39651e2f /models.h
parent: 201bbf3e917066fb05ff1f10f7166d262b8ed2cf (diff)
download: llmnpc-306c3cb6924c6231c102ff7d75aa3f68e3618ca2.tar.gz
1 files changed, 88 insertions, 4 deletions
diff --git a/models.h b/models.h
index e296971..f2f1bc8 100644
--- a/models.h
+++ b/models.h
@@ -5,55 +5,139 @@
 #include <stddef.h>
 #include <string.h>
 
+typedef enum {
+	PROMPT_STYLE_PLAIN = 0,
+	PROMPT_STYLE_CHAT = 1,
+	PROMPT_STYLE_T5 = 2,
+} PromptStyle;
+
 typedef struct {
 	const char *name;
 	const char *filepath;
+	const char *embed_model_name;
 	int n_gpu_layers;
 	bool use_mmap;
 	int n_ctx;
 	int n_batch;
 	bool embeddings;
+	int n_predict;
 	float temperature;
 	float min_p;
+	int top_k;
+	float top_p;
+	int repeat_last_n;
+	float repeat_penalty;
+	float freq_penalty;
+	float presence_penalty;
 	uint32_t seed;
+	PromptStyle prompt_style;
 } ModelConfig;
 
 ModelConfig models[] = {
 	{
+		.name = "qwen3",
+		.filepath = "models/Qwen3-0.6B-UD-Q6_K_XL.gguf",
+		.embed_model_name = "qwen3",
+		.n_gpu_layers = 0,
+		.use_mmap = false,
+		.n_ctx = 2048,
+		.n_batch = 4096,
+		.embeddings = false,
+		.n_predict = 128,
+		.temperature = 0.6f,
+		.min_p = 0.05f,
+		.top_k = 40,
+		.top_p = 0.9f,
+		.repeat_last_n = 64,
+		.repeat_penalty = 1.1f,
+		.freq_penalty = 0.0f,
+		.presence_penalty = 0.0f,
+		.seed = LLAMA_DEFAULT_SEED,
+		.prompt_style = PROMPT_STYLE_CHAT,
+	},
+	{
+		.name = "tinyllama-1.1b",
+		.filepath = "models/tinyllama-1.1b.gguf",
+		.embed_model_name = "qwen3",
+		.n_gpu_layers = 0,
+		.use_mmap = false,
+		.n_ctx = 2048,
+		.n_batch = 4096,
+		.embeddings = false,
+		.n_predict = 128,
+		.temperature = 0.7f,
+		.min_p = 0.05f,
+		.top_k = 40,
+		.top_p = 0.9f,
+		.repeat_last_n = 64,
+		.repeat_penalty = 1.1f,
+		.freq_penalty = 0.0f,
+		.presence_penalty = 0.0f,
+		.seed = LLAMA_DEFAULT_SEED,
+		.prompt_style = PROMPT_STYLE_PLAIN,
+	},
+	{
 		.name = "tinyllama-1",
 		.filepath = "models/TinyLlama-1.1B-intermediate-step-1431k-3T-Q2_K.gguf",
+		.embed_model_name = "qwen3",
 		.n_gpu_layers = 0,
 		.use_mmap = false,
 		.n_ctx = 2048,
 		.n_batch = 4096,
 		.embeddings = false,
-		.temperature = 0.8f,
+		.n_predict = 128,
+		.temperature = 0.7f,
 		.min_p = 0.05f,
+		.top_k = 40,
+		.top_p = 0.9f,
+		.repeat_last_n = 64,
+		.repeat_penalty = 1.1f,
+		.freq_penalty = 0.0f,
+		.presence_penalty = 0.0f,
 		.seed = LLAMA_DEFAULT_SEED,
+		.prompt_style = PROMPT_STYLE_PLAIN,
 	},
 	{
 		.name = "flan-t5-small",
 		.filepath = "models/flan-t5-small.F16.gguf",
+		.embed_model_name = "qwen3",
 		.n_gpu_layers = 0,
 		.use_mmap = false,
 		.n_ctx = 512,
 		.n_batch = 512,
 		.embeddings = false,
-		.temperature = 0.8f,
+		.n_predict = 128,
+		.temperature = 0.2f,
 		.min_p = 0.05f,
+		.top_k = 40,
+		.top_p = 0.9f,
+		.repeat_last_n = 64,
+		.repeat_penalty = 1.1f,
+		.freq_penalty = 0.0f,
+		.presence_penalty = 0.0f,
 		.seed = LLAMA_DEFAULT_SEED,
+		.prompt_style = PROMPT_STYLE_T5,
 	},
 	{
 		.name = "phi-4-mini-instruct",
 		.filepath = "models/Phi-4-mini-instruct.Q2_K.gguf",
+		.embed_model_name = "qwen3",
 		.n_gpu_layers = 0,
 		.use_mmap = false,
-		.n_ctx = 131072,
+		.n_ctx = 4096,
 		.n_batch = 4096,
 		.embeddings = false,
-		.temperature = 0.8f,
+		.n_predict = 128,
+		.temperature = 0.6f,
 		.min_p = 0.05f,
+		.top_k = 40,
+		.top_p = 0.9f,
+		.repeat_last_n = 64,
+		.repeat_penalty = 1.1f,
+		.freq_penalty = 0.0f,
+		.presence_penalty = 0.0f,
 		.seed = LLAMA_DEFAULT_SEED,
+		.prompt_style = PROMPT_STYLE_CHAT,
 	},
 };
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-02-20 13:54:21 +0100
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-02-20 13:54:21 +0100
commit	306c3cb6924c6231c102ff7d75aa3f68e3618ca2 (patch)
tree	1a41c8c4b70b43796cc3fc14f0c9e52b39651e2f /models.h
parent	201bbf3e917066fb05ff1f10f7166d262b8ed2cf (diff)
download	llmnpc-306c3cb6924c6231c102ff7d75aa3f68e3618ca2.tar.gz