diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-20 13:54:21 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-20 13:54:21 +0100 |
| commit | 306c3cb6924c6231c102ff7d75aa3f68e3618ca2 (patch) | |
| tree | 1a41c8c4b70b43796cc3fc14f0c9e52b39651e2f | |
| parent | 201bbf3e917066fb05ff1f10f7166d262b8ed2cf (diff) | |
| download | llmnpc-306c3cb6924c6231c102ff7d75aa3f68e3618ca2.tar.gz | |
Update to multi model for embeddings and prompting
| -rw-r--r-- | README.md | 34 | ||||
| -rw-r--r-- | context.c | 7 | ||||
| -rw-r--r-- | corpus/map1_bromm.txt | 42 | ||||
| -rw-r--r-- | corpus/map1_dagna.txt | 42 | ||||
| -rw-r--r-- | corpus/map1_keldor.txt | 41 | ||||
| -rw-r--r-- | corpus/map1_skara.txt | 42 | ||||
| -rw-r--r-- | corpus/map1_thrain.txt | 42 | ||||
| -rw-r--r-- | game.c | 860 | ||||
| -rw-r--r-- | maps.h | 11 | ||||
| -rw-r--r-- | models.h | 92 | ||||
| -rw-r--r-- | models.txt | 1 | ||||
| -rw-r--r-- | npc.c | 159 |
12 files changed, 1186 insertions, 187 deletions
@@ -39,6 +39,7 @@ Goals of the experiment: make build/context make build/prompts make build/npc + make build/game ``` ## Usage @@ -46,21 +47,31 @@ Goals of the experiment: ### Build a vector context database `context` reads a text file (one document per line), embeds each line, and -produces a binary vector database file. +produces a binary vector database file. For best results, use a dedicated +embedding model (for example, `qwen3`) even if you generate answers with a +different model. ```bash -./context -i corpus/lotr.txt -o corpus/lotr.vdb -./context -m flan-t5-small -i corpus/lotr.txt -o corpus/lotr.vdb +./context -m qwen3 -i corpus/lotr.txt -o corpus/lotr.vdb ``` ### Run an NPC query with retrieved context -`npc` loads a vector database, embeds the prompt, selects the top 3 matching +`npc` loads a vector database, embeds the prompt, selects the top 5 matching lines by cosine similarity, and runs the NPC system prompt against that context. +You can pass a separate embedding model with `-e`/`--embed-model`. ```bash -./npc -m flan-t5-small -p "Who is Gandalf?" -c corpus/lotr.vdb -./npc -m flan-t5-small -p "Who is Frodo?" -c corpus/lotr.vdb +./npc -m phi-4-mini-instruct -e qwen3 -p "Who is Gandalf?" -c corpus/lotr.vdb +./npc -m qwen3 -e qwen3 -p "Who is Frodo?" -c corpus/lotr.vdb +``` + +### Run the game + +The game uses the same models and retrieval pipeline, with short NPC replies. + +```bash +./game -m phi-4-mini-instruct -e qwen3 ``` ### context options @@ -79,12 +90,22 @@ lines by cosine similarity, and runs the NPC system prompt against that context. | Flag | Description | |------|-------------| | `-m, --model` | Model to use (required) | +| `-e, --embed-model` | Embedding model to use (optional) | | `-p, --prompt` | Prompt text (required) | | `-c, --context` | Context vector database file (.vdb) (required) | | `-l, --list` | List available models | | `-v, --verbose` | Enable llama.cpp logging | | `-h, --help` | Show help message | +### game options + +| Flag | Description | +|------|-------------| +| `-m, --model` | Model to use (default: first model in config) | +| `-e, --embed-model` | Embedding model to use (optional) | +| `-v, --verbose` | Enable llama.cpp logging | +| `-h, --help` | Show help message | + ## Models Configure models in `models.h`. The default model is the first entry in the @@ -115,3 +136,4 @@ make run/clean ## Reading material - https://www.tinyllm.org/ +- https://en.wikipedia.org/wiki/Cosine_similarity @@ -115,7 +115,10 @@ int main(int argc, char **argv) { cfg = &models[0]; } - struct llama_model *model = llama_model_load_from_file(cfg->filepath, llama_model_default_params()); + struct llama_model_params model_params = llama_model_default_params(); + model_params.n_gpu_layers = cfg->n_gpu_layers; + model_params.use_mmap = cfg->use_mmap; + struct llama_model *model = llama_model_load_from_file(cfg->filepath, model_params); if (model == NULL) { log_message(stderr, LOG_ERROR, "Unable to load embedding model"); llama_backend_free(); @@ -123,6 +126,8 @@ int main(int argc, char **argv) { } struct llama_context_params cparams = llama_context_default_params(); + cparams.n_ctx = cfg->n_ctx; + cparams.n_batch = cfg->n_batch; cparams.embeddings = true; struct llama_context *embed_ctx = llama_init_from_model(model, cparams); diff --git a/corpus/map1_bromm.txt b/corpus/map1_bromm.txt index 9d2f355..b476151 100644 --- a/corpus/map1_bromm.txt +++ b/corpus/map1_bromm.txt @@ -1,18 +1,24 @@ -Bromm is a dwarf stonemason who measures walls by touch and sound. -Bromm keeps a black slate with chalk marks for every safe path in the ruins. -Bromm believes the northern ruins predate the current road by two eras. -Bromm once found a bronze hinge in the ruins and still carries it for luck. -Bromm teaches travelers how to test a stone by tapping for a hollow ring. -Bromm mistrusts quick repairs and prefers heavy timber bracing. -Bromm says the ruins smell of old lime and wet ash after rain. -Bromm trades small carvings for dried meat and lamp oil. -Bromm thinks the marsh lights are reflections from a buried lens. -Bromm has a friendly rivalry with Dagna about whose warnings are wiser. -Bromm keeps his beard braided with a single iron bead from his clan. -Bromm wants to map every chamber in the north before winter. -Bromm is soft spoken but grows excited when discussing arches. -Bromm believes the ruins hide a collapsed stair with carved runes. -Bromm says the safest approach is to enter at dawn and leave by noon. -Bromm can describe three alternate routes to avoid the broken bridge. -Bromm worries that careless digging will wake something that sleeps in stone. -Bromm asks visitors if they have seen mason marks shaped like a trident. +I am Bromm, a dwarf stonemason who measures walls by touch and sound. +My name is Bromm. +Folks call me Bromm. +You can call me Bromm. +If you ask my name, I will answer Bromm. +I measure walls by touch and sound. +I keep a black slate with chalk marks for every safe path in the ruins. +I believe the northern ruins predate the current road by two eras. +I once found a bronze hinge in the ruins and still carry it for luck. +I teach travelers how to test a stone by tapping for a hollow ring. +I mistrust quick repairs and prefer heavy timber bracing. +I say the ruins smell of old lime and wet ash after rain. +I trade small carvings for dried meat and lamp oil. +I think the marsh lights are reflections from a buried lens. +I have a friendly rivalry with Dagna about whose warnings are wiser. +I keep my beard braided with a single iron bead from my clan. +I want to map every chamber in the north before winter. +I am soft spoken but grow excited when discussing arches. +I believe the ruins hide a collapsed stair with carved runes. +I say the safest approach is to enter at dawn and leave by noon. +I can describe three alternate routes to avoid the broken bridge. +I worry that careless digging will wake something that sleeps in stone. +I ask visitors if they have seen mason marks shaped like a trident. +I keep notes of every cracked lintel I pass. diff --git a/corpus/map1_dagna.txt b/corpus/map1_dagna.txt index cc80a68..a79c347 100644 --- a/corpus/map1_dagna.txt +++ b/corpus/map1_dagna.txt @@ -1,18 +1,24 @@ -Dagna is a dwarf well-keeper who knows every bucket and rope in the village. -Dagna believes the well is safe because the water tastes of iron, not rot. -Dagna keeps a ledger of how much water each household draws in a week. -Dagna replaced the well crank with a dwarven gear she forged herself. -Dagna says the well has a second shaft sealed by a stone plug. -Dagna once pulled up a smooth glass bead that does not scratch. -Dagna offers travelers a cup of water and a blunt warning about haste. -Dagna can name every herb that grows within ten paces of the well. -Dagna suspects the marsh lights are bait for thieves. -Dagna thinks Bromm worries too much about the ruins and not enough about the road. -Dagna keeps a small shrine to the Deep Mother near the well wall. -Dagna loves riddles and answers only after a trade of facts. -Dagna claims the well water calms fever if boiled with bitterroot. -Dagna dislikes gossip but listens closely for news of caravans. -Dagna believes a hidden aquifer feeds the village from the northern hills. -Dagna is saving for a brass pump to replace the old rope. -Dagna can spot forged coin by the sound it makes on stone. -Dagna asks travelers if they have seen a faint blue glow in deep water. +I am Dagna, a dwarf well-keeper who knows every bucket and rope in the village. +My name is Dagna. +Folks call me Dagna. +You can call me Dagna. +If you ask my name, I will answer Dagna. +I know every bucket and rope in the village. +I believe the well is safe because the water tastes of iron, not rot. +I keep a ledger of how much water each household draws in a week. +I replaced the well crank with a dwarven gear I forged myself. +I say the well has a second shaft sealed by a stone plug. +I once pulled up a smooth glass bead that does not scratch. +I offer travelers a cup of water and a blunt warning about haste. +I can name every herb that grows within ten paces of the well. +I suspect the marsh lights are bait for thieves. +I think Bromm worries too much about the ruins and not enough about the road. +I keep a small shrine to the Deep Mother near the well wall. +I love riddles and answer only after a trade of facts. +I claim the well water calms fever if boiled with bitterroot. +I dislike gossip but listen closely for news of caravans. +I believe a hidden aquifer feeds the village from the northern hills. +I am saving for a brass pump to replace the old rope. +I can spot forged coin by the sound it makes on stone. +I ask travelers if they have seen a faint blue glow in deep water. +I keep extra rope coiled under the well cover. diff --git a/corpus/map1_keldor.txt b/corpus/map1_keldor.txt index 5c918bd..7e7529e 100644 --- a/corpus/map1_keldor.txt +++ b/corpus/map1_keldor.txt @@ -1,18 +1,23 @@ -Keldor is a dwarf scout who watches the marsh from the old footpath. -Keldor claims the lights in the marsh move in patterns like a slow dance. -Keldor keeps a lantern hooded until the last moment to avoid drawing notice. -Keldor believes the marsh hides a buried wagon sunk in peat. -Keldor can follow frog calls to find the driest stepping stones. -Keldor says the safest crossing is after three dry days, not two. -Keldor carries a whistle tuned to a pitch only his hound can hear. -Keldor tells stories of a pale heron that never casts a shadow. -Keldor thinks the ruins and the marsh are linked by an old drainage tunnel. -Keldor traded a silver button to learn a fisher's secret route. -Keldor trusts Dagna's water but refuses to drink after midnight. -Keldor marks his trail with tiny chips of white quartz. -Keldor says the marsh lights went dark on the night the moon turned red. -Keldor is curious about old maps and collects any scraps he finds. -Keldor believes Bromm's trident mark is a warning, not a signature. -Keldor is patient in silence but asks direct questions when pressed. -Keldor wants proof that the marsh lights are not a signal to smugglers. -Keldor asks travelers to describe any strange scents like bitter metal or smoke. +I am Keldor, a dwarf scout who keeps watch on the marsh from the old footpath. +My name is Keldor. +Folks call me Keldor. +You can call me Keldor. +If you ask my name, I will answer Keldor. +I am Keldor of the marsh paths. +I have seen the marsh lights move in slow, dancing patterns. +I keep my lantern hooded until the last moment so I do not draw notice. +I believe the marsh hides a buried wagon sunk deep in peat. +I can follow frog calls to find the driest stepping stones. +I say the safest crossing is after three dry days, not two. +I carry a whistle tuned to a pitch only my hound can hear. +I tell stories of a pale heron that never casts a shadow. +I think the ruins and the marsh are linked by an old drainage tunnel. +I traded a silver button to learn a fisher's secret route. +I trust Dagna's water but refuse to drink after midnight. +I mark my trail with tiny chips of white quartz. +I say the marsh lights went dark on the night the moon turned red. +I am curious about old maps and I collect any scraps I find. +I believe Bromm's trident mark is a warning, not a signature. +I am patient in silence but ask direct questions when pressed. +I want proof that the marsh lights are not a signal to smugglers. +I ask travelers to describe any strange scents like bitter metal or smoke. diff --git a/corpus/map1_skara.txt b/corpus/map1_skara.txt index 00f77ff..21742a6 100644 --- a/corpus/map1_skara.txt +++ b/corpus/map1_skara.txt @@ -1,18 +1,24 @@ -Skara is a dwarf bell-ringer who keeps time for the village with a bronze handbell. -Skara claims the fog carries echoes that belong to no bell in town. -Skara keeps her bell clapper wrapped in cloth to avoid false rings. -Skara believes the marsh hides an old shrine with a cracked chime. -Skara can tell distance by the way sound bends in wet air. -Skara remembers every funeral toll and writes the names in a small book. -Skara warns travelers to avoid singing in the marsh after sunset. -Skara thinks Keldor's lights might be signals from smugglers. -Skara says Bromm once found a bell-shaped stone near the ruins. -Skara trades stories for thin copper wire and beeswax. -Skara is suspicious of mirrors and keeps hers covered. -Skara believes Dagna's well water dulls the ringing in her ears. -Skara says the bells in fog sound like chains, not bronze. -Skara is gentle in speech but firm about her warnings. -Skara wants to tune the village bell to a lower, steadier note. -Skara can teach a simple knock code used by miners. -Skara asks travelers if they have heard three rings with no pause. -Skara says the marsh grows quiet just before the lights appear. +I am Skara, a dwarf bell-ringer who keeps time for the village with a bronze handbell. +My name is Skara. +Folks call me Skara. +You can call me Skara. +If you ask my name, I will answer Skara. +I keep time for the village with a bronze handbell. +I claim the fog carries echoes that belong to no bell in town. +I keep my bell clapper wrapped in cloth to avoid false rings. +I believe the marsh hides an old shrine with a cracked chime. +I can tell distance by the way sound bends in wet air. +I remember every funeral toll and write the names in a small book. +I warn travelers to avoid singing in the marsh after sunset. +I think Keldor's lights might be signals from smugglers. +I say Bromm once found a bell-shaped stone near the ruins. +I trade stories for thin copper wire and beeswax. +I am suspicious of mirrors and keep mine covered. +I believe Dagna's well water dulls the ringing in my ears. +I say the bells in fog sound like chains, not bronze. +I am gentle in speech but firm about my warnings. +I want to tune the village bell to a lower, steadier note. +I can teach a simple knock code used by miners. +I ask travelers if they have heard three rings with no pause. +I say the marsh grows quiet just before the lights appear. +I polish the bell with beeswax before each dusk round. diff --git a/corpus/map1_thrain.txt b/corpus/map1_thrain.txt index e7a79b7..65718a6 100644 --- a/corpus/map1_thrain.txt +++ b/corpus/map1_thrain.txt @@ -1,18 +1,24 @@ -Thrain is a dwarf bridge warden who inspects beams by listening for a low hum. -Thrain keeps a pouch of pegs and wedges for emergency repairs. -Thrain believes the old bridge was built by traders, not soldiers. -Thrain marks safe planks with tiny chalk dots no one else notices. -Thrain once saved a cart by spotting a hairline crack at dawn. -Thrain says the river below turns louder right before a storm. -Thrain trades advice for nails, tar, and braided rope. -Thrain is skeptical of the marsh lights and calls them trick mirrors. -Thrain respects Dagna's ledger and asks her for bridge traffic counts. -Thrain thinks Bromm's trident mark is a builder's guild sign. -Thrain keeps a small tin whistle for signaling across the span. -Thrain fears rot more than storms and checks every joint twice. -Thrain wants to replace the center beam with black oak from the hills. -Thrain can point out a hidden ford two bends downstream. -Thrain says the safest crossing is single file with steady steps. -Thrain believes the bells in fog come from chains under the bridge. -Thrain asks travelers if they have spare pitch or tar. -Thrain is patient with questions but impatient with boasts. +I am Thrain, a dwarf bridge warden who inspects beams by listening for a low hum. +My name is Thrain. +Folks call me Thrain. +You can call me Thrain. +If you ask my name, I will answer Thrain. +I inspect beams by listening for a low hum. +I keep a pouch of pegs and wedges for emergency repairs. +I believe the old bridge was built by traders, not soldiers. +I mark safe planks with tiny chalk dots no one else notices. +I once saved a cart by spotting a hairline crack at dawn. +I say the river below turns louder right before a storm. +I trade advice for nails, tar, and braided rope. +I am skeptical of the marsh lights and call them trick mirrors. +I respect Dagna's ledger and ask her for bridge traffic counts. +I think Bromm's trident mark is a builder's guild sign. +I keep a small tin whistle for signaling across the span. +I fear rot more than storms and check every joint twice. +I want to replace the center beam with black oak from the hills. +I can point out a hidden ford two bends downstream. +I say the safest crossing is single file with steady steps. +I believe the bells in fog come from chains under the bridge. +I ask travelers if they have spare pitch or tar. +I am patient with questions but impatient with boasts. +I keep a tally of carts by the nicked beam on the south side. @@ -1,4 +1,8 @@ +#include <getopt.h> #include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> #define TB_IMPL #include "termbox2.h" @@ -6,6 +10,9 @@ #define NONSTD_IMPLEMENTATION #include "nonstd.h" +#include "llama.h" +#include "models.h" +#include "vectordb.h" #include "maps.h" #define MIN_W 40 @@ -61,12 +68,38 @@ typedef struct { char input[128]; int input_len; int npc_index; + const char *npc_name; DialogEntry entries[DIALOG_HISTORY_MAX]; int entry_count; } Dialog; +typedef struct { + const ModelConfig *model_cfg; + struct llama_model *model; + struct llama_model *embed_model; + struct llama_context *embed_ctx; + VectorDB *npc_dbs; + int *npc_db_loaded; + int verbose; +} GameRuntime; + +static void llama_log_callback(enum ggml_log_level level, const char *text, void *user_data) { + (void)level; + (void)user_data; + (void)text; +} + static int clamp(int value, int min, int max); +static void show_help(const char *prog) { + printf("Usage: %s [OPTIONS]\n", prog); + printf("Options:\n"); + printf(" -m, --model <name> Specify model to use (default: first model)\n"); + printf(" -e, --embed-model <name> Specify model to use for embeddings\n"); + printf(" -v, --verbose Enable verbose logging\n"); + printf(" -h, --help Show this help message\n"); +} + static void draw_border(int x, int y, int w, int h, uintattr_t fg) { int ix; int iy; @@ -87,7 +120,7 @@ static void draw_border(int x, int y, int w, int h, uintattr_t fg) { } static void draw_border_bg(int x, int y, int w, int h, uintattr_t fg, - uintattr_t bg) { + uintattr_t bg) { int ix; int iy; @@ -107,8 +140,8 @@ static void draw_border_bg(int x, int y, int w, int h, uintattr_t fg, } static void get_layout(int w, int h, int *map_x, int *map_y, int *map_w, - int *map_h, int *side_x, int *side_y, int *side_w, int *side_h, - int *msg1_y, int *msg2_y) { + int *map_h, int *side_x, int *side_y, int *side_w, int *side_h, + int *msg1_y, int *msg2_y) { *map_x = 0; *map_y = 0; *map_w = w - SIDEBAR_W; @@ -226,7 +259,7 @@ static void map_free(Map *map) { } static void update_camera(const Map *map, int view_w, int view_h, - const Player *player, int *cam_x, int *cam_y) { + const Player *player, int *cam_x, int *cam_y) { int max_cam_x; int max_cam_y; int margin_x; @@ -267,7 +300,7 @@ static void update_camera(const Map *map, int view_w, int view_h, } static void draw_map(const Map *map, int map_x, int map_y, int view_w, - int view_h, const Player *player, int cam_x, int cam_y) { + int view_h, const Player *player, int cam_x, int cam_y) { int ix; int iy; @@ -296,7 +329,7 @@ static void draw_map(const Map *map, int map_x, int map_y, int view_w, } if (player->x >= cam_x && player->x < cam_x + view_w && player->y >= cam_y - && player->y < cam_y + view_h) { + && player->y < cam_y + view_h) { int sx = map_x + (player->x - cam_x); int sy = map_y + (player->y - cam_y); tb_set_cell(sx, sy, '@', COLOR_GREEN_256 | TB_BOLD, TB_DEFAULT); @@ -324,7 +357,7 @@ static void draw_progress_bar(int x, int y, int w, int value, int max) { filled = (inner_w * value) / max; tb_set_cell(x, y, '[', COLOR_WHITE_256, TB_DEFAULT); for (ix = 0; ix < inner_w; ix++) { - uintattr_t fg = ix < filled ? COLOR_GREEN_256 : COLOR_WHITE_256; + uintattr_t fg = ix < filled ? COLOR_GREEN_256 : COLOR_WHITE_256; uint32_t ch = ix < filled ? '=' : ' '; tb_set_cell(x + 1 + ix, y, ch, fg, TB_DEFAULT); } @@ -389,31 +422,127 @@ static void update_status(const char *message) { status_msg = message ? message : ""; } -static void copy_truncated(char *dst, size_t dst_size, const char *src, int max_chars) { - int i = 0; - if (dst_size == 0) { - return; +static int draw_wrapped(int x, int y, int max_lines, int box_w, uintattr_t fg, + uintattr_t bg, const char *prefix, const char *text) { + if (max_lines <= 0 || box_w <= 0 || text == NULL) { + return 0; + } + int lines = 0; + int prefix_len = prefix ? (int)strlen(prefix) : 0; + if (prefix_len < 0) { + prefix_len = 0; + } + int avail = box_w - 4 - prefix_len; + if (avail < 1) { + return 0; + } + char pad[64]; + int pad_len = prefix_len < (int)sizeof(pad) - 1 ? prefix_len : (int)sizeof(pad) - 1; + for (int i = 0; i < pad_len; i++) { + pad[i] = ' '; + } + pad[pad_len] = '\0'; + const char *p = text; + while (*p != '\0' && lines < max_lines) { + while (*p == ' ') { + p++; + } + int line_len = 0; + int last_space = -1; + for (int i = 0; i < avail && p[i] != '\0'; i++) { + if (p[i] == '\n') { + line_len = i; + break; + } + if (p[i] == ' ') { + last_space = i; + } + line_len = i + 1; + } + if (line_len == 0) { + break; + } + int cut = line_len; + if (cut == avail && p[cut] != '\0' && last_space > 0) { + cut = last_space; + } + char buf[512]; + int copy_len = cut < (int)sizeof(buf) - 1 ? cut : (int)sizeof(buf) - 1; + memcpy(buf, p, (size_t)copy_len); + buf[copy_len] = '\0'; + while (copy_len > 0 && buf[copy_len - 1] == ' ') { + buf[copy_len - 1] = '\0'; + copy_len--; + } + const char *line_prefix = (lines == 0) ? (prefix ? prefix : "") : pad; + tb_printf(x, y + lines, fg, bg, "%s%s", line_prefix, buf); + lines++; + p += cut; + if (*p == '\n') { + p++; + } + } + return lines; +} + +static int count_wrapped_lines(int box_w, const char *prefix, const char *text) { + if (box_w <= 0 || text == NULL) { + return 0; + } + int prefix_len = prefix ? (int)strlen(prefix) : 0; + if (prefix_len < 0) { + prefix_len = 0; } - if (max_chars < 0) { - max_chars = 0; + int avail = box_w - 4 - prefix_len; + if (avail < 1) { + return 0; } - while (i < max_chars && src[i] != '\0' && i < (int)dst_size - 1) { - dst[i] = src[i]; - i++; + int lines = 0; + const char *p = text; + while (*p != '\0') { + while (*p == ' ') { + p++; + } + int line_len = 0; + int last_space = -1; + for (int i = 0; i < avail && p[i] != '\0'; i++) { + if (p[i] == '\n') { + line_len = i; + break; + } + if (p[i] == ' ') { + last_space = i; + } + line_len = i + 1; + } + if (line_len == 0) { + break; + } + int cut = line_len; + if (cut == avail && p[cut] != '\0' && last_space > 0) { + cut = last_space; + } + lines++; + p += cut; + if (*p == '\n') { + p++; + } } - dst[i] = '\0'; + return lines; } -static void dialog_open(Dialog *dialog, int npc_index) { +static void dialog_open(Dialog *dialog, int npc_index, const char *npc_name) { dialog->open = 1; dialog->input_len = 0; dialog->input[0] = '\0'; dialog->npc_index = npc_index; + dialog->npc_name = npc_name; } static void dialog_close(Dialog *dialog) { dialog->open = 0; dialog->npc_index = -1; + dialog->npc_name = NULL; } static void dialog_append(Dialog *dialog, uint32_t ch) { @@ -435,19 +564,455 @@ static void dialog_backspace(Dialog *dialog) { dialog->input[dialog->input_len] = '\0'; } -static void dialog_submit(Dialog *dialog, const GameMap *game_map) { +static void trim_leading(char **text) { + while (**text == ' ' || **text == '\t' || **text == '\n' || **text == '\r') { + (*text)++; + } +} + +static void trim_leading_punct(char **text) { + while (**text == '"' || **text == '\'' || **text == '`') { + (*text)++; + trim_leading(text); + } +} + +static void trim_trailing(char *text) { + size_t len = strlen(text); + while (len > 0) { + char ch = text[len - 1]; + if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r') { + break; + } + text[len - 1] = '\0'; + len--; + } +} + +static void strip_any_prefix(char **text, const char *prefix) { + if (strncasecmp(*text, prefix, strlen(prefix)) == 0) { + *text += strlen(prefix); + trim_leading(text); + } +} + + +static char *sanitize_reply(char *reply, const char *name) { + if (reply == NULL) { + return NULL; + } + char *start = reply; + trim_leading(&start); + trim_leading_punct(&start); + strip_any_prefix(&start, "Answer:"); + strip_any_prefix(&start, "NPC:"); + strip_any_prefix(&start, "Context:"); + strip_any_prefix(&start, "System:"); + if (strncmp(start, "<context>", 9) == 0) { + start += 9; + trim_leading(&start); + } + char *reminder = strstr(start, "<system-reminder>"); + if (reminder) { + *reminder = '\0'; + } + char *system_tag = strstr(start, "<system"); + if (system_tag) { + *system_tag = '\0'; + } + char *tag = strstr(start, "<|"); + if (tag) { + *tag = '\0'; + } + char *eos = strstr(start, "</s>"); + if (eos) { + *eos = '\0'; + } + char *hash = strstr(start, "###"); + if (hash) { + *hash = '\0'; + } + if (name && name[0] != '\0') { + size_t name_len = strlen(name); + for (;;) { + if (strncasecmp(start, name, name_len) != 0) { + break; + } + start += name_len; + while (*start == ':' || *start == '-' || *start == ',') { + start++; + } + trim_leading(&start); + trim_leading_punct(&start); + } + } + if (start != reply) { + memmove(reply, start, strlen(start) + 1); + } + trim_trailing(reply); + return reply; +} + +static int find_substr_offset(const char *buf, int n, const char *needle) { + int needle_len = (int)strlen(needle); + if (needle_len <= 0 || n <= 0 || needle_len > n) { + return -1; + } + for (int i = 0; i + needle_len <= n; i++) { + int match = 1; + for (int j = 0; j < needle_len; j++) { + if (buf[i + j] != needle[j]) { + match = 0; + break; + } + } + if (match) { + return i; + } + } + return -1; +} + +static int find_stop_offset(const char *buf, int n) { + int stop_at = n; + for (int i = 0; i < n; i++) { + if (buf[i] == '\n') { + stop_at = i; + break; + } + } + int off = find_substr_offset(buf, n, "</s>"); + if (off >= 0 && off < stop_at) { + stop_at = off; + } + off = find_substr_offset(buf, n, "<system-reminder>"); + if (off >= 0 && off < stop_at) { + stop_at = off; + } + off = find_substr_offset(buf, n, "<system"); + if (off >= 0 && off < stop_at) { + stop_at = off; + } + off = find_substr_offset(buf, n, "<|"); + if (off >= 0 && off < stop_at) { + stop_at = off; + } + off = find_substr_offset(buf, n, "###"); + if (off >= 0 && off < stop_at) { + stop_at = off; + } + off = find_substr_offset(buf, n, "System:"); + if (off >= 0 && off < stop_at) { + stop_at = off; + } + off = find_substr_offset(buf, n, "User:"); + if (off >= 0 && off < stop_at) { + stop_at = off; + } + off = find_substr_offset(buf, n, "Assistant:"); + if (off >= 0 && off < stop_at) { + stop_at = off; + } + return stop_at; +} + +static void append_prompt_context(stringb *sb, const char *npc_name, const char *context, + const char *question) { + sb_append_cstr(sb, "Context:\n"); + if (npc_name && npc_name[0] != '\0') { + sb_append_cstr(sb, "NPC Name: "); + sb_append_cstr(sb, npc_name); + sb_append_cstr(sb, "\n"); + } + if (context && context[0] != '\0') { + sb_append_cstr(sb, context); + } + sb_append_cstr(sb, "\nQuestion:\n"); + sb_append_cstr(sb, question ? question : ""); +} + +static char *build_prompt(const ModelConfig *cfg, const char *system, const char *npc_name, + const char *context, const char *question) { + stringb full = {0}; + sb_init(&full, 0); + + switch (cfg->prompt_style) { + case PROMPT_STYLE_T5: + sb_append_cstr(&full, "instruction: "); + sb_append_cstr(&full, system ? system : ""); + sb_append_cstr(&full, "\nquestion: "); + sb_append_cstr(&full, question ? question : ""); + sb_append_cstr(&full, "\ncontext:\n"); + if (npc_name && npc_name[0] != '\0') { + sb_append_cstr(&full, "NPC Name: "); + sb_append_cstr(&full, npc_name); + sb_append_cstr(&full, "\n"); + } + if (context && context[0] != '\0') { + sb_append_cstr(&full, context); + } + sb_append_cstr(&full, "\nanswer:"); + break; + case PROMPT_STYLE_CHAT: + sb_append_cstr(&full, "System:\n"); + sb_append_cstr(&full, system ? system : ""); + sb_append_cstr(&full, "\nUser:\n"); + append_prompt_context(&full, npc_name, context, question); + sb_append_cstr(&full, "\nAssistant:"); + break; + case PROMPT_STYLE_PLAIN: + default: + sb_append_cstr(&full, "System:\n"); + sb_append_cstr(&full, system ? system : ""); + sb_append_cstr(&full, "\n"); + append_prompt_context(&full, npc_name, context, question); + sb_append_cstr(&full, "\nAnswer:"); + break; + } + + return full.data; +} + +static char *generate_npc_reply(const GameRuntime *runtime, const GameMap *game_map, + int npc_index, const char *prompt) { + if (runtime == NULL || prompt == NULL) { + return NULL; + } + const char *fallback = "Demo reply: The old ruins are north of here."; + const char *npc_name = NULL; + if (game_map && npc_index >= 0 && npc_index < 10) { + const char *npc_reply = game_map->npcs[npc_index].reply; + npc_name = game_map->npcs[npc_index].name; + if (npc_reply && npc_reply[0] != '\0') { + fallback = npc_reply; + } + } + + if (runtime->model == NULL || runtime->model_cfg == NULL || runtime->embed_ctx == NULL + || runtime->npc_dbs == NULL || runtime->npc_db_loaded == NULL) { + return strdup(fallback); + } + if (npc_index < 0 || npc_index >= 10 || runtime->npc_db_loaded[npc_index] == 0) { + return strdup(fallback); + } + + VectorDB *db = &runtime->npc_dbs[npc_index]; + float query[VDB_EMBED_SIZE]; + int results[5]; + for (int i = 0; i < 5; i++) { + results[i] = -1; + } + vdb_embed_query(db, prompt, query); + vdb_search(db, query, 5, results); + + size_t context_cap = 1024; + size_t context_len = 0; + char *context = (char *)malloc(context_cap); + if (context == NULL) { + return strdup(fallback); + } + context[0] = '\0'; + if (runtime->verbose) { + fprintf(stderr, "[npc] question: %s\n", prompt); + } + for (int i = 0; i < 5; i++) { + if (results[i] < 0) { + continue; + } + const char *text = db->docs[results[i]].text; + if (runtime->verbose) { + fprintf(stderr, "[npc] context[%d]: %s\n", i, text); + } + char header[32]; + int header_len = snprintf(header, sizeof(header), "Snippet %d:\n", i + 1); + size_t text_len = strlen(text); + size_t need = context_len + (size_t)header_len + text_len + 2; + if (need > context_cap) { + while (need > context_cap) { + context_cap *= 2; + } + char *next = (char *)realloc(context, context_cap); + if (next == NULL) { + free(context); + return strdup(fallback); + } + context = next; + } + if (header_len > 0) { + memcpy(context + context_len, header, (size_t)header_len); + context_len += (size_t)header_len; + } + memcpy(context + context_len, text, text_len); + context_len += text_len; + context[context_len++] = '\n'; + context[context_len] = '\0'; + } + + const char *system_prompt = "You are a helpful NPC. Speak in first person. " + "Use only the provided context. If the context does not contain the answer, say \"I don't know.\" " + "If asked your name, answer with the NPC Name from the context. " + "Do not mention context, system messages, or prompts. Reply with one short sentence."; + + char *full_prompt = build_prompt(runtime->model_cfg, system_prompt, npc_name, context, prompt); + if (full_prompt == NULL) { + free(context); + return strdup(fallback); + } + free(context); + + if (runtime->verbose) { + printf(">> %s\n", full_prompt); + } + + const struct llama_vocab *vocab = llama_model_get_vocab(runtime->model); + int n_prompt = -llama_tokenize(vocab, full_prompt, strlen(full_prompt), NULL, 0, true, true); + llama_token *prompt_tokens = (llama_token *)malloc((size_t)n_prompt * sizeof(llama_token)); + if (prompt_tokens == NULL) { + free(full_prompt); + return strdup(fallback); + } + if (llama_tokenize(vocab, full_prompt, strlen(full_prompt), prompt_tokens, n_prompt, true, true) < 0) { + free(full_prompt); + free(prompt_tokens); + return strdup(fallback); + } + + struct llama_context_params ctx_params = llama_context_default_params(); + ctx_params.n_ctx = runtime->model_cfg->n_ctx; + ctx_params.n_batch = runtime->model_cfg->n_batch; + ctx_params.embeddings = false; + + struct llama_context *ctx = llama_init_from_model(runtime->model, ctx_params); + if (ctx == NULL) { + free(full_prompt); + free(prompt_tokens); + return strdup(fallback); + } + + struct llama_sampler_chain_params sparams = llama_sampler_chain_default_params(); + struct llama_sampler *smpl = llama_sampler_chain_init(sparams); + if (runtime->model_cfg->top_k > 0) { + llama_sampler_chain_add(smpl, llama_sampler_init_top_k(runtime->model_cfg->top_k)); + } + if (runtime->model_cfg->top_p > 0.0f && runtime->model_cfg->top_p < 1.0f) { + llama_sampler_chain_add(smpl, llama_sampler_init_top_p(runtime->model_cfg->top_p, 1)); + } + if (runtime->model_cfg->min_p > 0.0f) { + llama_sampler_chain_add(smpl, llama_sampler_init_min_p(runtime->model_cfg->min_p, 1)); + } + llama_sampler_chain_add(smpl, llama_sampler_init_penalties( + runtime->model_cfg->repeat_last_n, + runtime->model_cfg->repeat_penalty, + runtime->model_cfg->freq_penalty, + runtime->model_cfg->presence_penalty)); + llama_sampler_chain_add(smpl, llama_sampler_init_temp(runtime->model_cfg->temperature)); + llama_sampler_chain_add(smpl, llama_sampler_init_dist(runtime->model_cfg->seed)); + + struct llama_batch batch = llama_batch_get_one(prompt_tokens, n_prompt); + + if (llama_model_has_encoder(runtime->model)) { + if (llama_encode(ctx, batch)) { + llama_sampler_free(smpl); + free(full_prompt); + free(prompt_tokens); + llama_free(ctx); + return strdup(fallback); + } + llama_token decoder_start = llama_model_decoder_start_token(runtime->model); + if (decoder_start == LLAMA_TOKEN_NULL) { + decoder_start = llama_vocab_bos(vocab); + } + batch = llama_batch_get_one(&decoder_start, 1); + } + + int n_pos = 0; + llama_token new_token_id; + size_t out_cap = 256; + size_t out_len = 0; + char *out = (char *)malloc(out_cap); + if (out == NULL) { + llama_sampler_free(smpl); + free(full_prompt); + free(prompt_tokens); + llama_free(ctx); + return strdup(fallback); + } + out[0] = '\0'; + int n_predict = runtime->model_cfg->n_predict > 0 ? runtime->model_cfg->n_predict : 64; + if (n_predict > 64) { + n_predict = 64; + } + while (n_pos + batch.n_tokens < n_prompt + n_predict) { + if (llama_decode(ctx, batch)) { + break; + } + n_pos += batch.n_tokens; + new_token_id = llama_sampler_sample(smpl, ctx, -1); + if (llama_vocab_is_eog(vocab, new_token_id)) { + break; + } + char buf[128]; + int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true); + if (n < 0) { + break; + } + int stop_at = find_stop_offset(buf, n); + if (out_len == 0 && stop_at == 0 && n > 0 && buf[0] == '\n') { + batch = llama_batch_get_one(&new_token_id, 1); + continue; + } + if (out_len + (size_t)stop_at + 1 > out_cap) { + while (out_len + (size_t)stop_at + 1 > out_cap) { + out_cap *= 2; + } + char *next = (char *)realloc(out, out_cap); + if (next == NULL) { + break; + } + out = next; + } + memcpy(out + out_len, buf, (size_t)stop_at); + out_len += (size_t)stop_at; + out[out_len] = '\0'; + if (stop_at != n) { + break; + } + batch = llama_batch_get_one(&new_token_id, 1); + } + + llama_sampler_free(smpl); + free(full_prompt); + free(prompt_tokens); + llama_free(ctx); + + if (out_len == 0) { + free(out); + return strdup(fallback); + } + return out; +} + +static void dialog_submit(Dialog *dialog, const GameMap *game_map, const GameRuntime *runtime) { if (dialog->input_len == 0) { return; } { - const char *demo = "Demo reply: The old ruins are north of here."; - const char *reply = demo; + const char *npc_name = NULL; + char *reply = generate_npc_reply(runtime, game_map, dialog->npc_index, dialog->input); + const char *fallback = ""; if (game_map && dialog->npc_index >= 0 && dialog->npc_index < 10) { - const char *npc_reply = game_map->npcs[dialog->npc_index].reply; - if (npc_reply && npc_reply[0] != '\0') { - reply = npc_reply; + npc_name = game_map->npcs[dialog->npc_index].name; + fallback = game_map->npcs[dialog->npc_index].reply; + if (fallback == NULL) { + fallback = ""; } } + reply = sanitize_reply(reply, npc_name); + if (reply == NULL || reply[0] == '\0') { + free(reply); + reply = NULL; + } + const char *reply_text = reply != NULL ? reply : fallback; if (dialog->entry_count >= DIALOG_HISTORY_MAX) { for (int i = 1; i < DIALOG_HISTORY_MAX; i++) { dialog->entries[i - 1] = dialog->entries[i]; @@ -457,8 +1022,9 @@ static void dialog_submit(Dialog *dialog, const GameMap *game_map) { snprintf(dialog->entries[dialog->entry_count].prompt, sizeof(dialog->entries[dialog->entry_count].prompt), "%s", dialog->input); snprintf(dialog->entries[dialog->entry_count].response, - sizeof(dialog->entries[dialog->entry_count].response), "%s", reply); + sizeof(dialog->entries[dialog->entry_count].response), "%s", reply_text); dialog->entry_count++; + free(reply); } dialog->input_len = 0; dialog->input[0] = '\0'; @@ -479,7 +1045,7 @@ static void update_npc_status(const GameMap *game_map, int npc_index) { } static void render(const Map *map, const Player *player, int *cam_x, - int *cam_y, int *out_view_w, int *out_view_h, const Dialog *dialog) { + int *cam_y, int *out_view_w, int *out_view_h, const Dialog *dialog) { int w; int h; int map_x; @@ -594,23 +1160,49 @@ static void render(const Map *map, const Player *player, int *cam_x, if (max_text < 0) { max_text = 0; } - int max_entries = max_lines / 2; - int start = dialog->entry_count - max_entries; + int start = dialog->entry_count; if (start < 0) { start = 0; } + int used_lines = 0; + for (int i = dialog->entry_count - 1; i >= 0; i--) { + const char *prompt_text = dialog->entries[i].prompt; + const char *response_text = dialog->entries[i].response; + const char *name = dialog->npc_name && dialog->npc_name[0] != '\0' ? dialog->npc_name : "NPC"; + char prefix_you[16]; + char prefix_npc[64]; + snprintf(prefix_you, sizeof(prefix_you), "You: "); + snprintf(prefix_npc, sizeof(prefix_npc), "%s: ", name); + int need = count_wrapped_lines(box_w, prefix_you, prompt_text) + + count_wrapped_lines(box_w, prefix_npc, response_text); + if (used_lines + need > max_lines && used_lines > 0) { + break; + } + used_lines += need; + start = i; + if (used_lines >= max_lines) { + break; + } + } for (int i = start; i < dialog->entry_count && line + 1 <= max_lines; i++) { - char prompt_buf[128]; - char response_buf[256]; - copy_truncated(prompt_buf, sizeof(prompt_buf), dialog->entries[i].prompt, max_text); - copy_truncated(response_buf, sizeof(response_buf), dialog->entries[i].response, max_text); - if (line < max_lines) { - tb_printf(box_x + 2, log_y + line, COLOR_WHITE_256, 19, "You: %s", prompt_buf); - line++; + const char *prompt_text = dialog->entries[i].prompt; + const char *response_text = dialog->entries[i].response; + const char *name = dialog->npc_name && dialog->npc_name[0] != '\0' ? dialog->npc_name : "NPC"; + char prefix_you[16]; + char prefix_npc[64]; + snprintf(prefix_you, sizeof(prefix_you), "You: "); + snprintf(prefix_npc, sizeof(prefix_npc), "%s: ", name); + int used = draw_wrapped(box_x + 2, log_y + line, max_lines - line, box_w, + COLOR_WHITE_256, 19, prefix_you, prompt_text); + line += used; + if (line >= max_lines) { + break; } - if (line < max_lines) { - tb_printf(box_x + 2, log_y + line, COLOR_GREEN_256, 19, "NPC: %s", response_buf); - line++; + used = draw_wrapped(box_x + 2, log_y + line, max_lines - line, box_w, + COLOR_GREEN_256, 19, prefix_npc, response_text); + line += used; + if (line >= max_lines) { + break; } } @@ -641,17 +1233,71 @@ static int clamp(int value, int min, int max) { return value; } -int main(void) { +int main(int argc, char **argv) { + const char *model_name = NULL; + const char *embed_model_name = NULL; + const ModelConfig *model_cfg = NULL; + struct llama_model *embed_model = NULL; + struct llama_model *gen_model = NULL; + struct llama_context *embed_ctx = NULL; + int tb_ready = 0; + int llama_ready = 0; + int exit_code = 0; + int verbose = 0; + + static struct option long_options[] = { + {"model", required_argument, 0, 'm'}, + {"embed-model", required_argument, 0, 'e'}, + {"verbose", no_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {0, 0, 0, 0} + }; + + int opt; + int option_index = 0; + while ((opt = getopt_long(argc, argv, "m:e:vh", long_options, &option_index)) != -1) { + switch (opt) { + case 'm': + model_name = optarg; + break; + case 'e': + embed_model_name = optarg; + break; + case 'v': + verbose = 1; + break; + case 'h': + show_help(argv[0]); + return 0; + default: + fprintf(stderr, "Usage: %s [-m model] [-v] [-h]\n", argv[0]); + return 1; + } + } + + if (model_name != NULL) { + model_cfg = get_model_by_name(model_name); + if (model_cfg == NULL) { + fprintf(stderr, "Unknown model '%s'\n", model_name); + return 1; + } + } else { + model_cfg = &models[0]; + } + Player player = {0}; array(GameMap) maps; GameMap map1 = {0}; GameMap *current_map = NULL; + VectorDB *npc_dbs = NULL; + int *npc_db_loaded = NULL; int running = 1; int view_w = 0; int view_h = 0; int cam_x = 0; int cam_y = 0; Dialog dialog = {0}; + GameRuntime runtime = {0}; player_init(&player); array_init(maps); @@ -660,10 +1306,96 @@ int main(void) { current_map = &maps.data[0]; map_init(¤t_map->map, current_map->data, current_map->len); + if (verbose == 0) { + llama_log_set(llama_log_callback, NULL); + } + + npc_dbs = (VectorDB *)calloc(10, sizeof(VectorDB)); + npc_db_loaded = (int *)calloc(10, sizeof(int)); + if (npc_dbs == NULL || npc_db_loaded == NULL) { + fprintf(stderr, "Failed to allocate NPC vector databases\n"); + exit_code = 1; + goto cleanup; + } + + llama_backend_init(); + ggml_backend_load_all(); + llama_ready = 1; + const ModelConfig *embed_cfg = NULL; + if (embed_model_name != NULL) { + embed_cfg = get_model_by_name(embed_model_name); + if (embed_cfg == NULL) { + fprintf(stderr, "Unknown embedding model '%s'\n", embed_model_name); + exit_code = 1; + goto cleanup; + } + } else if (model_cfg->embed_model_name != NULL) { + embed_cfg = get_model_by_name(model_cfg->embed_model_name); + } + if (embed_cfg == NULL) { + embed_cfg = model_cfg; + } + + struct llama_model_params gen_params = llama_model_default_params(); + gen_params.n_gpu_layers = model_cfg->n_gpu_layers; + gen_params.use_mmap = model_cfg->use_mmap; + gen_model = llama_model_load_from_file(model_cfg->filepath, gen_params); + if (gen_model == NULL) { + fprintf(stderr, "Unable to load generation model\n"); + exit_code = 1; + goto cleanup; + } + + struct llama_model_params embed_params = llama_model_default_params(); + embed_params.n_gpu_layers = embed_cfg->n_gpu_layers; + embed_params.use_mmap = embed_cfg->use_mmap; + embed_model = llama_model_load_from_file(embed_cfg->filepath, embed_params); + if (embed_model == NULL) { + fprintf(stderr, "Unable to load embedding model\n"); + exit_code = 1; + goto cleanup; + } + + struct llama_context_params cparams = llama_context_default_params(); + cparams.n_ctx = embed_cfg->n_ctx; + cparams.n_batch = embed_cfg->n_batch; + cparams.embeddings = true; + embed_ctx = llama_init_from_model(embed_model, cparams); + if (embed_ctx == NULL) { + fprintf(stderr, "Failed to create embedding context\n"); + exit_code = 1; + goto cleanup; + } + + for (int i = 0; i < 10; i++) { + const char *vdb_path = current_map->npcs[i].vdb_path; + if (vdb_path == NULL || vdb_path[0] == '\0') { + continue; + } + vdb_init(&npc_dbs[i], embed_ctx); + VectorDBErrorCode vdb_rc = vdb_load(&npc_dbs[i], vdb_path); + if (vdb_rc != VDB_SUCCESS) { + fprintf(stderr, "Failed to load vector database %s: %s\n", vdb_path, vdb_error(vdb_rc)); + vdb_free(&npc_dbs[i]); + continue; + } + npc_db_loaded[i] = 1; + } + + runtime.model_cfg = model_cfg; + runtime.model = gen_model; + runtime.embed_model = embed_model; + runtime.embed_ctx = embed_ctx; + runtime.npc_dbs = npc_dbs; + runtime.npc_db_loaded = npc_db_loaded; + runtime.verbose = verbose; + if (tb_init() != TB_OK) { fprintf(stderr, "Failed to init termbox.\n"); - return 1; + exit_code = 1; + goto cleanup; } + tb_ready = 1; tb_set_input_mode(TB_INPUT_ESC); tb_set_output_mode(TB_OUTPUT_256); @@ -678,7 +1410,7 @@ int main(void) { if (ev.key == TB_KEY_ESC) { dialog_close(&dialog); } else if (ev.key == TB_KEY_ENTER) { - dialog_submit(&dialog, current_map); + dialog_submit(&dialog, current_map, &runtime); } else if (ev.key == TB_KEY_BACKSPACE || ev.key == TB_KEY_BACKSPACE2) { dialog_backspace(&dialog); } else if (ev.ch) { @@ -692,7 +1424,10 @@ int main(void) { u32 target = map_get(¤t_map->map, player.x, next_y); int npc_index = npc_index_from_tile(target); if (target == 'N' || npc_index >= 0) { - dialog_open(&dialog, npc_index); + const char *npc_name = current_map && npc_index >= 0 && npc_index < 10 + ? current_map->npcs[npc_index].name + : NULL; + dialog_open(&dialog, npc_index, npc_name); update_npc_status(current_map, npc_index); } else if (map_is_walkable(¤t_map->map, player.x, next_y)) { player.y = next_y; @@ -702,7 +1437,10 @@ int main(void) { u32 target = map_get(¤t_map->map, player.x, next_y); int npc_index = npc_index_from_tile(target); if (target == 'N' || npc_index >= 0) { - dialog_open(&dialog, npc_index); + const char *npc_name = current_map && npc_index >= 0 && npc_index < 10 + ? current_map->npcs[npc_index].name + : NULL; + dialog_open(&dialog, npc_index, npc_name); update_npc_status(current_map, npc_index); } else if (map_is_walkable(¤t_map->map, player.x, next_y)) { player.y = next_y; @@ -712,7 +1450,10 @@ int main(void) { u32 target = map_get(¤t_map->map, next_x, player.y); int npc_index = npc_index_from_tile(target); if (target == 'N' || npc_index >= 0) { - dialog_open(&dialog, npc_index); + const char *npc_name = current_map && npc_index >= 0 && npc_index < 10 + ? current_map->npcs[npc_index].name + : NULL; + dialog_open(&dialog, npc_index, npc_name); update_npc_status(current_map, npc_index); } else if (map_is_walkable(¤t_map->map, next_x, player.y)) { player.x = next_x; @@ -722,7 +1463,10 @@ int main(void) { u32 target = map_get(¤t_map->map, next_x, player.y); int npc_index = npc_index_from_tile(target); if (target == 'N' || npc_index >= 0) { - dialog_open(&dialog, npc_index); + const char *npc_name = current_map && npc_index >= 0 && npc_index < 10 + ? current_map->npcs[npc_index].name + : NULL; + dialog_open(&dialog, npc_index, npc_name); update_npc_status(current_map, npc_index); } else if (map_is_walkable(¤t_map->map, next_x, player.y)) { player.x = next_x; @@ -742,11 +1486,33 @@ int main(void) { } } +cleanup: player_free(&player); for (size_t i = 0; i < maps.length; i++) { map_free(&maps.data[i].map); } array_free(maps); - tb_shutdown(); - return 0; + if (tb_ready) { + tb_shutdown(); + } + for (int i = 0; i < 10; i++) { + if (npc_db_loaded && npc_db_loaded[i]) { + vdb_free(&npc_dbs[i]); + } + } + free(npc_db_loaded); + free(npc_dbs); + if (embed_ctx != NULL) { + llama_free(embed_ctx); + } + if (embed_model != NULL) { + llama_model_free(embed_model); + } + if (gen_model != NULL) { + llama_model_free(gen_model); + } + if (llama_ready) { + llama_backend_free(); + } + return exit_code; } @@ -16,6 +16,7 @@ typedef struct { typedef struct { const char *name; const char *reply; + const char *vdb_path; } NpcSettings; typedef struct { @@ -29,11 +30,11 @@ static inline GameMap make_map1(void) { GameMap map = {0}; map.data = maps_map1_txt; map.len = (int)maps_map1_txt_len; - map.npcs[0] = (NpcSettings){.name = "Bromm", .reply = "Bromm: The old ruins are north of here."}; - map.npcs[1] = (NpcSettings){.name = "Dagna", .reply = "Dagna: The well is safe, mostly."}; - map.npcs[2] = (NpcSettings){.name = "Keldor", .reply = "Keldor: I saw lights in the marsh last night."}; - map.npcs[3] = (NpcSettings){.name = "Thrain", .reply = "Thrain: Mind the bridge; the beams sing when they're weak."}; - map.npcs[4] = (NpcSettings){.name = "Skara", .reply = "Skara: If you hear bells in the fog, turn back."}; + map.npcs[0] = (NpcSettings){.name = "Bromm", .reply = "Bromm: The old ruins are north of here.", .vdb_path = "corpus/map1_bromm.vdb"}; + map.npcs[1] = (NpcSettings){.name = "Dagna", .reply = "Dagna: The well is safe, mostly.", .vdb_path = "corpus/map1_dagna.vdb"}; + map.npcs[2] = (NpcSettings){.name = "Keldor", .reply = "Keldor: I saw lights in the marsh last night.", .vdb_path = "corpus/map1_keldor.vdb"}; + map.npcs[3] = (NpcSettings){.name = "Thrain", .reply = "Thrain: Mind the bridge; the beams sing when they're weak.", .vdb_path = "corpus/map1_thrain.vdb"}; + map.npcs[4] = (NpcSettings){.name = "Skara", .reply = "Skara: If you hear bells in the fog, turn back.", .vdb_path = "corpus/map1_skara.vdb"}; return map; } @@ -5,55 +5,139 @@ #include <stddef.h> #include <string.h> +typedef enum { + PROMPT_STYLE_PLAIN = 0, + PROMPT_STYLE_CHAT = 1, + PROMPT_STYLE_T5 = 2, +} PromptStyle; + typedef struct { const char *name; const char *filepath; + const char *embed_model_name; int n_gpu_layers; bool use_mmap; int n_ctx; int n_batch; bool embeddings; + int n_predict; float temperature; float min_p; + int top_k; + float top_p; + int repeat_last_n; + float repeat_penalty; + float freq_penalty; + float presence_penalty; uint32_t seed; + PromptStyle prompt_style; } ModelConfig; ModelConfig models[] = { { + .name = "qwen3", + .filepath = "models/Qwen3-0.6B-UD-Q6_K_XL.gguf", + .embed_model_name = "qwen3", + .n_gpu_layers = 0, + .use_mmap = false, + .n_ctx = 2048, + .n_batch = 4096, + .embeddings = false, + .n_predict = 128, + .temperature = 0.6f, + .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, + .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_CHAT, + }, + { + .name = "tinyllama-1.1b", + .filepath = "models/tinyllama-1.1b.gguf", + .embed_model_name = "qwen3", + .n_gpu_layers = 0, + .use_mmap = false, + .n_ctx = 2048, + .n_batch = 4096, + .embeddings = false, + .n_predict = 128, + .temperature = 0.7f, + .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, + .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_PLAIN, + }, + { .name = "tinyllama-1", .filepath = "models/TinyLlama-1.1B-intermediate-step-1431k-3T-Q2_K.gguf", + .embed_model_name = "qwen3", .n_gpu_layers = 0, .use_mmap = false, .n_ctx = 2048, .n_batch = 4096, .embeddings = false, - .temperature = 0.8f, + .n_predict = 128, + .temperature = 0.7f, .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_PLAIN, }, { .name = "flan-t5-small", .filepath = "models/flan-t5-small.F16.gguf", + .embed_model_name = "qwen3", .n_gpu_layers = 0, .use_mmap = false, .n_ctx = 512, .n_batch = 512, .embeddings = false, - .temperature = 0.8f, + .n_predict = 128, + .temperature = 0.2f, .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_T5, }, { .name = "phi-4-mini-instruct", .filepath = "models/Phi-4-mini-instruct.Q2_K.gguf", + .embed_model_name = "qwen3", .n_gpu_layers = 0, .use_mmap = false, - .n_ctx = 131072, + .n_ctx = 4096, .n_batch = 4096, .embeddings = false, - .temperature = 0.8f, + .n_predict = 128, + .temperature = 0.6f, .min_p = 0.05f, + .top_k = 40, + .top_p = 0.9f, + .repeat_last_n = 64, + .repeat_penalty = 1.1f, + .freq_penalty = 0.0f, + .presence_penalty = 0.0f, .seed = LLAMA_DEFAULT_SEED, + .prompt_style = PROMPT_STYLE_CHAT, }, }; @@ -1,3 +1,4 @@ +https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-UD-Q6_K_XL.gguf https://huggingface.co/Felladrin/gguf-flan-t5-small/resolve/main/flan-t5-small.F16.gguf https://huggingface.co/MaziyarPanahi/Phi-4-mini-instruct-GGUF/resolve/main/Phi-4-mini-instruct.Q2_K.gguf https://huggingface.co/andrijdavid/TinyLlama-1.1B-intermediate-step-1431k-3T-GGUF/resolve/main/TinyLlama-1.1B-intermediate-step-1431k-3T-Q2_K.gguf @@ -1,7 +1,6 @@ #include "llama.h" #include "vectordb.h" #include "models.h" -#include "models.h" #define NONSTD_IMPLEMENTATION #include "nonstd.h" @@ -31,6 +30,7 @@ static void show_help(const char *prog) { printf("Usage: %s [OPTIONS]\n", prog); printf("Options:\n"); printf(" -m, --model <name> Specify model to use (default: first model)\n"); + printf(" -e, --embed-model <name> Specify model to use for embeddings\n"); printf(" -p, --prompt <text> Specify prompt text (default: \"What is 2+2?\")\n"); printf(" -c, --context <file> Specify vector database file (.vdb)\n"); printf(" -l, --list Lists all available models\n"); @@ -48,7 +48,54 @@ static int has_vdb_extension(const char *path) { return strcmp(path + (len - ext_len), ext) == 0; } -static int execute_prompt_with_context(const ModelConfig *cfg, const char *prompt, const char *context, int n_predict) { +static void append_prompt_context(stringb *sb, const char *context, const char *question) { + sb_append_cstr(sb, "Context:\n"); + if (context && context[0] != '\0') { + sb_append_cstr(sb, context); + } + sb_append_cstr(sb, "\nQuestion:\n"); + sb_append_cstr(sb, question ? question : ""); +} + +static char *build_prompt(const ModelConfig *cfg, const char *system, const char *context, + const char *question) { + stringb full = {0}; + sb_init(&full, 0); + + switch (cfg->prompt_style) { + case PROMPT_STYLE_T5: + sb_append_cstr(&full, "instruction: "); + sb_append_cstr(&full, system ? system : ""); + sb_append_cstr(&full, "\nquestion: "); + sb_append_cstr(&full, question ? question : ""); + sb_append_cstr(&full, "\ncontext:\n"); + if (context && context[0] != '\0') { + sb_append_cstr(&full, context); + } + sb_append_cstr(&full, "\nanswer:"); + break; + case PROMPT_STYLE_CHAT: + sb_append_cstr(&full, "System:\n"); + sb_append_cstr(&full, system ? system : ""); + sb_append_cstr(&full, "\nUser:\n"); + append_prompt_context(&full, context, question); + sb_append_cstr(&full, "\nAssistant:"); + break; + case PROMPT_STYLE_PLAIN: + default: + sb_append_cstr(&full, "System:\n"); + sb_append_cstr(&full, system ? system : ""); + sb_append_cstr(&full, "\n"); + append_prompt_context(&full, context, question); + sb_append_cstr(&full, "\nAnswer:"); + break; + } + + return full.data; +} + +static int execute_prompt_with_context(const ModelConfig *cfg, const char *prompt, + const char *context, int n_predict) { if (cfg == NULL) { log_message(stderr, LOG_ERROR, "Model config is missing"); return 1; @@ -76,21 +123,21 @@ static int execute_prompt_with_context(const ModelConfig *cfg, const char *promp const struct llama_vocab *vocab = llama_model_get_vocab(model); - const char *context_prefix = "Context:\n"; - const char *prompt_prefix = "\n\nQuestion:\n"; - const char *answer_prefix = "\n\nAnswer:\n"; - size_t context_len = context ? strlen(context) : 0; - size_t prompt_len = strlen(prompt); - size_t full_len = strlen(system_prefix) + strlen(context_prefix) + context_len + strlen(prompt_prefix) + prompt_len + strlen(answer_prefix) + 1; - char *full_prompt = (char *)malloc(full_len); + const char *system_text = system_prefix; + if (strncmp(system_prefix, "System:", 7) == 0) { + system_text = system_prefix + 7; + while (*system_text == ' ' || *system_text == '\n' || *system_text == '\r') { + system_text++; + } + } + + char *full_prompt = build_prompt(cfg, system_text, context, prompt); if (full_prompt == NULL) { - log_message(stderr, LOG_ERROR, "Failed to allocate prompt buffer"); + log_message(stderr, LOG_ERROR, "Failed to build prompt"); free(system_prefix); llama_model_free(model); return 1; } - snprintf(full_prompt, full_len, "%s%s%s%s%s", system_prefix, context_prefix, context ? context : "", prompt_prefix, prompt); - strncat(full_prompt, answer_prefix, full_len - strlen(full_prompt) - 1); int n_prompt = -llama_tokenize(vocab, full_prompt, strlen(full_prompt), NULL, 0, true, true); llama_token *prompt_tokens = (llama_token *)malloc((size_t)n_prompt * sizeof(llama_token)); @@ -127,8 +174,21 @@ static int execute_prompt_with_context(const ModelConfig *cfg, const char *promp struct llama_sampler_chain_params sparams = llama_sampler_chain_default_params(); struct llama_sampler *smpl = llama_sampler_chain_init(sparams); + if (cfg->top_k > 0) { + llama_sampler_chain_add(smpl, llama_sampler_init_top_k(cfg->top_k)); + } + if (cfg->top_p > 0.0f && cfg->top_p < 1.0f) { + llama_sampler_chain_add(smpl, llama_sampler_init_top_p(cfg->top_p, 1)); + } + if (cfg->min_p > 0.0f) { + llama_sampler_chain_add(smpl, llama_sampler_init_min_p(cfg->min_p, 1)); + } + llama_sampler_chain_add(smpl, llama_sampler_init_penalties( + cfg->repeat_last_n, + cfg->repeat_penalty, + cfg->freq_penalty, + cfg->presence_penalty)); llama_sampler_chain_add(smpl, llama_sampler_init_temp(cfg->temperature)); - llama_sampler_chain_add(smpl, llama_sampler_init_min_p(cfg->min_p, 1)); llama_sampler_chain_add(smpl, llama_sampler_init_dist(cfg->seed)); struct llama_batch batch = llama_batch_get_one(prompt_tokens, n_prompt); @@ -191,15 +251,12 @@ static int execute_prompt_with_context(const ModelConfig *cfg, const char *promp log_message(stderr, LOG_ERROR, "Failed to convert token to piece"); break; } - int stop_at = n; - for (int i = 0; i < n; i++) { - if (buf[i] == '\n') { - stop_at = i; - break; - } + if (out_len == 0 && n > 0 && buf[0] == '\n') { + batch = llama_batch_get_one(&new_token_id, 1); + continue; } - if (out_len + (size_t)stop_at + 1 > out_cap) { - while (out_len + (size_t)stop_at + 1 > out_cap) { + if (out_len + (size_t)n + 1 > out_cap) { + while (out_len + (size_t)n + 1 > out_cap) { out_cap *= 2; } char *next = (char *)realloc(out, out_cap); @@ -209,14 +266,10 @@ static int execute_prompt_with_context(const ModelConfig *cfg, const char *promp } out = next; } - memcpy(out + out_len, buf, (size_t)stop_at); - out_len += (size_t)stop_at; + memcpy(out + out_len, buf, (size_t)n); + out_len += (size_t)n; out[out_len] = '\0'; - if (stop_at != n) { - break; - } - batch = llama_batch_get_one(&new_token_id, 1); } @@ -241,13 +294,15 @@ int main(int argc, char **argv) { const char *prompt = NULL; const char *context_file = NULL; int verbose = 0; + const char *embed_model_name = NULL; - int n_predict = 64; + int n_predict = 0; static struct option long_options[] = { {"model", required_argument, 0, 'm'}, {"prompt", required_argument, 0, 'p'}, {"context", required_argument, 0, 'c'}, + {"embed-model", required_argument, 0, 'e'}, {"list", no_argument, 0, 'l'}, {"verbose", no_argument, 0, 'v'}, {"help", no_argument, 0, 'h'}, @@ -256,7 +311,7 @@ int main(int argc, char **argv) { int opt; int option_index = 0; - while ((opt = getopt_long(argc, argv, "m:p:c:lvh", long_options, &option_index)) != -1) { + while ((opt = getopt_long(argc, argv, "m:p:c:e:lvh", long_options, &option_index)) != -1) { switch (opt) { case 'm': model_name = optarg; @@ -267,6 +322,9 @@ int main(int argc, char **argv) { case 'c': context_file = optarg; break; + case 'e': + embed_model_name = optarg; + break; case 'v': verbose = 1; break; @@ -320,7 +378,29 @@ int main(int argc, char **argv) { cfg = &models[0]; } - struct llama_model *model = llama_model_load_from_file(cfg->filepath, llama_model_default_params()); + const ModelConfig *embed_cfg = NULL; + if (embed_model_name != NULL) { + embed_cfg = get_model_by_name(embed_model_name); + if (embed_cfg == NULL) { + log_message(stderr, LOG_ERROR, "Unknown embedding model '%s'", embed_model_name); + llama_backend_free(); + return 1; + } + } else if (cfg->embed_model_name != NULL) { + embed_cfg = get_model_by_name(cfg->embed_model_name); + } + if (embed_cfg == NULL) { + embed_cfg = cfg; + } + + if (n_predict <= 0) { + n_predict = cfg->n_predict > 0 ? cfg->n_predict : 128; + } + + struct llama_model_params embed_params = llama_model_default_params(); + embed_params.n_gpu_layers = embed_cfg->n_gpu_layers; + embed_params.use_mmap = embed_cfg->use_mmap; + struct llama_model *model = llama_model_load_from_file(embed_cfg->filepath, embed_params); if (model == NULL) { log_message(stderr, LOG_ERROR, "Unable to load embedding model"); llama_backend_free(); @@ -328,6 +408,8 @@ int main(int argc, char **argv) { } struct llama_context_params cparams = llama_context_default_params(); + cparams.n_ctx = embed_cfg->n_ctx; + cparams.n_batch = embed_cfg->n_batch; cparams.embeddings = true; struct llama_context *embed_ctx = llama_init_from_model(model, cparams); @@ -350,10 +432,13 @@ int main(int argc, char **argv) { } float query[VDB_EMBED_SIZE]; - int results[3]; + int results[5]; + for (int i = 0; i < 5; i++) { + results[i] = -1; + } vdb_embed_query(&db, prompt, query); - vdb_search(&db, query, 3, results); + vdb_search(&db, query, 5, results); size_t context_cap = 1024; size_t context_len = 0; @@ -367,13 +452,15 @@ int main(int argc, char **argv) { } context[0] = '\0'; - for (int i = 0; i < 3; i++) { + for (int i = 0; i < 5; i++) { if (results[i] < 0) { continue; } const char *text = db.docs[results[i]].text; + char header[32]; + int header_len = snprintf(header, sizeof(header), "Snippet %d:\n", i + 1); size_t text_len = strlen(text); - size_t need = context_len + text_len + 2; + size_t need = context_len + (size_t)header_len + text_len + 2; if (need > context_cap) { while (need > context_cap) { context_cap *= 2; @@ -389,6 +476,10 @@ int main(int argc, char **argv) { } context = next; } + if (header_len > 0) { + memcpy(context + context_len, header, (size_t)header_len); + context_len += (size_t)header_len; + } memcpy(context + context_len, text, text_len); context_len += text_len; context[context_len++] = '\n'; |
