llmnpc - models.h

Path: llmnpc / models.h (raw)
  1#ifndef MODELS_H
  2#define MODELS_H
  3
  4#include "llama.h"
  5#include <stddef.h>
  6#include <string.h>
  7
  8typedef enum {
  9	PROMPT_STYLE_PLAIN = 0,
 10	PROMPT_STYLE_CHAT = 1,
 11	PROMPT_STYLE_T5 = 2,
 12} PromptStyle;
 13
 14typedef struct {
 15	const char *name;
 16	const char *filepath;
 17	const char *embed_model_name;
 18	int n_gpu_layers;
 19	bool use_mmap;
 20	int n_ctx;
 21	int n_batch;
 22	bool embeddings;
 23	int n_predict;
 24	float temperature;
 25	float min_p;
 26	int top_k;
 27	float top_p;
 28	int repeat_last_n;
 29	float repeat_penalty;
 30	float freq_penalty;
 31	float presence_penalty;
 32	uint32_t seed;
 33	PromptStyle prompt_style;
 34} ModelConfig;
 35
 36ModelConfig models[] = {
 37	{
 38		.name = "qwen3",
 39		.filepath = "models/Qwen3-0.6B-UD-Q6_K_XL.gguf",
 40		.embed_model_name = "qwen3",
 41		.n_gpu_layers = 0,
 42		.use_mmap = false,
 43		.n_ctx = 2048,
 44		.n_batch = 4096,
 45		.embeddings = false,
 46		.n_predict = 128,
 47		.temperature = 0.6f,
 48		.min_p = 0.05f,
 49		.top_k = 40,
 50		.top_p = 0.9f,
 51		.repeat_last_n = 64,
 52		.repeat_penalty = 1.1f,
 53		.freq_penalty = 0.0f,
 54		.presence_penalty = 0.0f,
 55		.seed = LLAMA_DEFAULT_SEED,
 56		.prompt_style = PROMPT_STYLE_CHAT,
 57	},
 58	{
 59		.name = "tinyllama-1.1b",
 60		.filepath = "models/tinyllama-1.1b.gguf",
 61		.embed_model_name = "qwen3",
 62		.n_gpu_layers = 0,
 63		.use_mmap = false,
 64		.n_ctx = 2048,
 65		.n_batch = 4096,
 66		.embeddings = false,
 67		.n_predict = 128,
 68		.temperature = 0.7f,
 69		.min_p = 0.05f,
 70		.top_k = 40,
 71		.top_p = 0.9f,
 72		.repeat_last_n = 64,
 73		.repeat_penalty = 1.1f,
 74		.freq_penalty = 0.0f,
 75		.presence_penalty = 0.0f,
 76		.seed = LLAMA_DEFAULT_SEED,
 77		.prompt_style = PROMPT_STYLE_PLAIN,
 78	},
 79	{
 80		.name = "tinyllama-1",
 81		.filepath = "models/TinyLlama-1.1B-intermediate-step-1431k-3T-Q2_K.gguf",
 82		.embed_model_name = "qwen3",
 83		.n_gpu_layers = 0,
 84		.use_mmap = false,
 85		.n_ctx = 2048,
 86		.n_batch = 4096,
 87		.embeddings = false,
 88		.n_predict = 128,
 89		.temperature = 0.7f,
 90		.min_p = 0.05f,
 91		.top_k = 40,
 92		.top_p = 0.9f,
 93		.repeat_last_n = 64,
 94		.repeat_penalty = 1.1f,
 95		.freq_penalty = 0.0f,
 96		.presence_penalty = 0.0f,
 97		.seed = LLAMA_DEFAULT_SEED,
 98		.prompt_style = PROMPT_STYLE_PLAIN,
 99	},
100	{
101		.name = "flan-t5-small",
102		.filepath = "models/flan-t5-small.F16.gguf",
103		.embed_model_name = "qwen3",
104		.n_gpu_layers = 0,
105		.use_mmap = false,
106		.n_ctx = 512,
107		.n_batch = 512,
108		.embeddings = false,
109		.n_predict = 128,
110		.temperature = 0.2f,
111		.min_p = 0.05f,
112		.top_k = 40,
113		.top_p = 0.9f,
114		.repeat_last_n = 64,
115		.repeat_penalty = 1.1f,
116		.freq_penalty = 0.0f,
117		.presence_penalty = 0.0f,
118		.seed = LLAMA_DEFAULT_SEED,
119		.prompt_style = PROMPT_STYLE_T5,
120	},
121	{
122		.name = "phi-4-mini-instruct",
123		.filepath = "models/Phi-4-mini-instruct.Q2_K.gguf",
124		.embed_model_name = "qwen3",
125		.n_gpu_layers = 0,
126		.use_mmap = false,
127		.n_ctx = 4096,
128		.n_batch = 4096,
129		.embeddings = false,
130		.n_predict = 128,
131		.temperature = 0.6f,
132		.min_p = 0.05f,
133		.top_k = 40,
134		.top_p = 0.9f,
135		.repeat_last_n = 64,
136		.repeat_penalty = 1.1f,
137		.freq_penalty = 0.0f,
138		.presence_penalty = 0.0f,
139		.seed = LLAMA_DEFAULT_SEED,
140		.prompt_style = PROMPT_STYLE_CHAT,
141	},
142};
143
144const ModelConfig *get_model_by_name(const char *name) {
145	for (size_t i = 0; i < sizeof(models) / sizeof(models[0]); i++) {
146		if (models[i].name != NULL && strcmp(models[i].name, name) == 0) {
147			return &models[i];
148		}
149	}
150	return NULL;
151}
152
153#endif