summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md34
-rw-r--r--context.c7
-rw-r--r--corpus/map1_bromm.txt42
-rw-r--r--corpus/map1_dagna.txt42
-rw-r--r--corpus/map1_keldor.txt41
-rw-r--r--corpus/map1_skara.txt42
-rw-r--r--corpus/map1_thrain.txt42
-rw-r--r--game.c860
-rw-r--r--maps.h11
-rw-r--r--models.h92
-rw-r--r--models.txt1
-rw-r--r--npc.c159
12 files changed, 1186 insertions, 187 deletions
diff --git a/README.md b/README.md
index 7c5e7f7..6619175 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@ Goals of the experiment:
make build/context
make build/prompts
make build/npc
+ make build/game
```
## Usage
@@ -46,21 +47,31 @@ Goals of the experiment:
### Build a vector context database
`context` reads a text file (one document per line), embeds each line, and
-produces a binary vector database file.
+produces a binary vector database file. For best results, use a dedicated
+embedding model (for example, `qwen3`) even if you generate answers with a
+different model.
```bash
-./context -i corpus/lotr.txt -o corpus/lotr.vdb
-./context -m flan-t5-small -i corpus/lotr.txt -o corpus/lotr.vdb
+./context -m qwen3 -i corpus/lotr.txt -o corpus/lotr.vdb
```
### Run an NPC query with retrieved context
-`npc` loads a vector database, embeds the prompt, selects the top 3 matching
+`npc` loads a vector database, embeds the prompt, selects the top 5 matching
lines by cosine similarity, and runs the NPC system prompt against that context.
+You can pass a separate embedding model with `-e`/`--embed-model`.
```bash
-./npc -m flan-t5-small -p "Who is Gandalf?" -c corpus/lotr.vdb
-./npc -m flan-t5-small -p "Who is Frodo?" -c corpus/lotr.vdb
+./npc -m phi-4-mini-instruct -e qwen3 -p "Who is Gandalf?" -c corpus/lotr.vdb
+./npc -m qwen3 -e qwen3 -p "Who is Frodo?" -c corpus/lotr.vdb
+```
+
+### Run the game
+
+The game uses the same models and retrieval pipeline, with short NPC replies.
+
+```bash
+./game -m phi-4-mini-instruct -e qwen3
```
### context options
@@ -79,12 +90,22 @@ lines by cosine similarity, and runs the NPC system prompt against that context.
| Flag | Description |
|------|-------------|
| `-m, --model` | Model to use (required) |
+| `-e, --embed-model` | Embedding model to use (optional) |
| `-p, --prompt` | Prompt text (required) |
| `-c, --context` | Context vector database file (.vdb) (required) |
| `-l, --list` | List available models |
| `-v, --verbose` | Enable llama.cpp logging |
| `-h, --help` | Show help message |
+### game options
+
+| Flag | Description |
+|------|-------------|
+| `-m, --model` | Model to use (default: first model in config) |
+| `-e, --embed-model` | Embedding model to use (optional) |
+| `-v, --verbose` | Enable llama.cpp logging |
+| `-h, --help` | Show help message |
+
## Models
Configure models in `models.h`. The default model is the first entry in the
@@ -115,3 +136,4 @@ make run/clean
## Reading material
- https://www.tinyllm.org/
+- https://en.wikipedia.org/wiki/Cosine_similarity
diff --git a/context.c b/context.c
index e7aa0cf..66b8cc2 100644
--- a/context.c
+++ b/context.c
@@ -115,7 +115,10 @@ int main(int argc, char **argv) {
cfg = &models[0];
}
- struct llama_model *model = llama_model_load_from_file(cfg->filepath, llama_model_default_params());
+ struct llama_model_params model_params = llama_model_default_params();
+ model_params.n_gpu_layers = cfg->n_gpu_layers;
+ model_params.use_mmap = cfg->use_mmap;
+ struct llama_model *model = llama_model_load_from_file(cfg->filepath, model_params);
if (model == NULL) {
log_message(stderr, LOG_ERROR, "Unable to load embedding model");
llama_backend_free();
@@ -123,6 +126,8 @@ int main(int argc, char **argv) {
}
struct llama_context_params cparams = llama_context_default_params();
+ cparams.n_ctx = cfg->n_ctx;
+ cparams.n_batch = cfg->n_batch;
cparams.embeddings = true;
struct llama_context *embed_ctx = llama_init_from_model(model, cparams);
diff --git a/corpus/map1_bromm.txt b/corpus/map1_bromm.txt
index 9d2f355..b476151 100644
--- a/corpus/map1_bromm.txt
+++ b/corpus/map1_bromm.txt
@@ -1,18 +1,24 @@
-Bromm is a dwarf stonemason who measures walls by touch and sound.
-Bromm keeps a black slate with chalk marks for every safe path in the ruins.
-Bromm believes the northern ruins predate the current road by two eras.
-Bromm once found a bronze hinge in the ruins and still carries it for luck.
-Bromm teaches travelers how to test a stone by tapping for a hollow ring.
-Bromm mistrusts quick repairs and prefers heavy timber bracing.
-Bromm says the ruins smell of old lime and wet ash after rain.
-Bromm trades small carvings for dried meat and lamp oil.
-Bromm thinks the marsh lights are reflections from a buried lens.
-Bromm has a friendly rivalry with Dagna about whose warnings are wiser.
-Bromm keeps his beard braided with a single iron bead from his clan.
-Bromm wants to map every chamber in the north before winter.
-Bromm is soft spoken but grows excited when discussing arches.
-Bromm believes the ruins hide a collapsed stair with carved runes.
-Bromm says the safest approach is to enter at dawn and leave by noon.
-Bromm can describe three alternate routes to avoid the broken bridge.
-Bromm worries that careless digging will wake something that sleeps in stone.
-Bromm asks visitors if they have seen mason marks shaped like a trident.
+I am Bromm, a dwarf stonemason who measures walls by touch and sound.
+My name is Bromm.
+Folks call me Bromm.
+You can call me Bromm.
+If you ask my name, I will answer Bromm.
+I measure walls by touch and sound.
+I keep a black slate with chalk marks for every safe path in the ruins.
+I believe the northern ruins predate the current road by two eras.
+I once found a bronze hinge in the ruins and still carry it for luck.
+I teach travelers how to test a stone by tapping for a hollow ring.
+I mistrust quick repairs and prefer heavy timber bracing.
+I say the ruins smell of old lime and wet ash after rain.
+I trade small carvings for dried meat and lamp oil.
+I think the marsh lights are reflections from a buried lens.
+I have a friendly rivalry with Dagna about whose warnings are wiser.
+I keep my beard braided with a single iron bead from my clan.
+I want to map every chamber in the north before winter.
+I am soft spoken but grow excited when discussing arches.
+I believe the ruins hide a collapsed stair with carved runes.
+I say the safest approach is to enter at dawn and leave by noon.
+I can describe three alternate routes to avoid the broken bridge.
+I worry that careless digging will wake something that sleeps in stone.
+I ask visitors if they have seen mason marks shaped like a trident.
+I keep notes of every cracked lintel I pass.
diff --git a/corpus/map1_dagna.txt b/corpus/map1_dagna.txt
index cc80a68..a79c347 100644
--- a/corpus/map1_dagna.txt
+++ b/corpus/map1_dagna.txt
@@ -1,18 +1,24 @@
-Dagna is a dwarf well-keeper who knows every bucket and rope in the village.
-Dagna believes the well is safe because the water tastes of iron, not rot.
-Dagna keeps a ledger of how much water each household draws in a week.
-Dagna replaced the well crank with a dwarven gear she forged herself.
-Dagna says the well has a second shaft sealed by a stone plug.
-Dagna once pulled up a smooth glass bead that does not scratch.
-Dagna offers travelers a cup of water and a blunt warning about haste.
-Dagna can name every herb that grows within ten paces of the well.
-Dagna suspects the marsh lights are bait for thieves.
-Dagna thinks Bromm worries too much about the ruins and not enough about the road.
-Dagna keeps a small shrine to the Deep Mother near the well wall.
-Dagna loves riddles and answers only after a trade of facts.
-Dagna claims the well water calms fever if boiled with bitterroot.
-Dagna dislikes gossip but listens closely for news of caravans.
-Dagna believes a hidden aquifer feeds the village from the northern hills.
-Dagna is saving for a brass pump to replace the old rope.
-Dagna can spot forged coin by the sound it makes on stone.
-Dagna asks travelers if they have seen a faint blue glow in deep water.
+I am Dagna, a dwarf well-keeper who knows every bucket and rope in the village.
+My name is Dagna.
+Folks call me Dagna.
+You can call me Dagna.
+If you ask my name, I will answer Dagna.
+I know every bucket and rope in the village.
+I believe the well is safe because the water tastes of iron, not rot.
+I keep a ledger of how much water each household draws in a week.
+I replaced the well crank with a dwarven gear I forged myself.
+I say the well has a second shaft sealed by a stone plug.
+I once pulled up a smooth glass bead that does not scratch.
+I offer travelers a cup of water and a blunt warning about haste.
+I can name every herb that grows within ten paces of the well.
+I suspect the marsh lights are bait for thieves.
+I think Bromm worries too much about the ruins and not enough about the road.
+I keep a small shrine to the Deep Mother near the well wall.
+I love riddles and answer only after a trade of facts.
+I claim the well water calms fever if boiled with bitterroot.
+I dislike gossip but listen closely for news of caravans.
+I believe a hidden aquifer feeds the village from the northern hills.
+I am saving for a brass pump to replace the old rope.
+I can spot forged coin by the sound it makes on stone.
+I ask travelers if they have seen a faint blue glow in deep water.
+I keep extra rope coiled under the well cover.
diff --git a/corpus/map1_keldor.txt b/corpus/map1_keldor.txt
index 5c918bd..7e7529e 100644
--- a/corpus/map1_keldor.txt
+++ b/corpus/map1_keldor.txt
@@ -1,18 +1,23 @@
-Keldor is a dwarf scout who watches the marsh from the old footpath.
-Keldor claims the lights in the marsh move in patterns like a slow dance.
-Keldor keeps a lantern hooded until the last moment to avoid drawing notice.
-Keldor believes the marsh hides a buried wagon sunk in peat.
-Keldor can follow frog calls to find the driest stepping stones.
-Keldor says the safest crossing is after three dry days, not two.
-Keldor carries a whistle tuned to a pitch only his hound can hear.
-Keldor tells stories of a pale heron that never casts a shadow.
-Keldor thinks the ruins and the marsh are linked by an old drainage tunnel.
-Keldor traded a silver button to learn a fisher's secret route.
-Keldor trusts Dagna's water but refuses to drink after midnight.
-Keldor marks his trail with tiny chips of white quartz.
-Keldor says the marsh lights went dark on the night the moon turned red.
-Keldor is curious about old maps and collects any scraps he finds.
-Keldor believes Bromm's trident mark is a warning, not a signature.
-Keldor is patient in silence but asks direct questions when pressed.
-Keldor wants proof that the marsh lights are not a signal to smugglers.
-Keldor asks travelers to describe any strange scents like bitter metal or smoke.
+I am Keldor, a dwarf scout who keeps watch on the marsh from the old footpath.
+My name is Keldor.
+Folks call me Keldor.
+You can call me Keldor.
+If you ask my name, I will answer Keldor.
+I am Keldor of the marsh paths.
+I have seen the marsh lights move in slow, dancing patterns.
+I keep my lantern hooded until the last moment so I do not draw notice.
+I believe the marsh hides a buried wagon sunk deep in peat.
+I can follow frog calls to find the driest stepping stones.
+I say the safest crossing is after three dry days, not two.
+I carry a whistle tuned to a pitch only my hound can hear.
+I tell stories of a pale heron that never casts a shadow.
+I think the ruins and the marsh are linked by an old drainage tunnel.
+I traded a silver button to learn a fisher's secret route.
+I trust Dagna's water but refuse to drink after midnight.
+I mark my trail with tiny chips of white quartz.
+I say the marsh lights went dark on the night the moon turned red.
+I am curious about old maps and I collect any scraps I find.
+I believe Bromm's trident mark is a warning, not a signature.
+I am patient in silence but ask direct questions when pressed.
+I want proof that the marsh lights are not a signal to smugglers.
+I ask travelers to describe any strange scents like bitter metal or smoke.
diff --git a/corpus/map1_skara.txt b/corpus/map1_skara.txt
index 00f77ff..21742a6 100644
--- a/corpus/map1_skara.txt
+++ b/corpus/map1_skara.txt
@@ -1,18 +1,24 @@
-Skara is a dwarf bell-ringer who keeps time for the village with a bronze handbell.
-Skara claims the fog carries echoes that belong to no bell in town.
-Skara keeps her bell clapper wrapped in cloth to avoid false rings.
-Skara believes the marsh hides an old shrine with a cracked chime.
-Skara can tell distance by the way sound bends in wet air.
-Skara remembers every funeral toll and writes the names in a small book.
-Skara warns travelers to avoid singing in the marsh after sunset.
-Skara thinks Keldor's lights might be signals from smugglers.
-Skara says Bromm once found a bell-shaped stone near the ruins.
-Skara trades stories for thin copper wire and beeswax.
-Skara is suspicious of mirrors and keeps hers covered.
-Skara believes Dagna's well water dulls the ringing in her ears.
-Skara says the bells in fog sound like chains, not bronze.
-Skara is gentle in speech but firm about her warnings.
-Skara wants to tune the village bell to a lower, steadier note.
-Skara can teach a simple knock code used by miners.
-Skara asks travelers if they have heard three rings with no pause.
-Skara says the marsh grows quiet just before the lights appear.
+I am Skara, a dwarf bell-ringer who keeps time for the village with a bronze handbell.
+My name is Skara.
+Folks call me Skara.
+You can call me Skara.
+If you ask my name, I will answer Skara.
+I keep time for the village with a bronze handbell.
+I claim the fog carries echoes that belong to no bell in town.
+I keep my bell clapper wrapped in cloth to avoid false rings.
+I believe the marsh hides an old shrine with a cracked chime.
+I can tell distance by the way sound bends in wet air.
+I remember every funeral toll and write the names in a small book.
+I warn travelers to avoid singing in the marsh after sunset.
+I think Keldor's lights might be signals from smugglers.
+I say Bromm once found a bell-shaped stone near the ruins.
+I trade stories for thin copper wire and beeswax.
+I am suspicious of mirrors and keep mine covered.
+I believe Dagna's well water dulls the ringing in my ears.
+I say the bells in fog sound like chains, not bronze.
+I am gentle in speech but firm about my warnings.
+I want to tune the village bell to a lower, steadier note.
+I can teach a simple knock code used by miners.
+I ask travelers if they have heard three rings with no pause.
+I say the marsh grows quiet just before the lights appear.
+I polish the bell with beeswax before each dusk round.
diff --git a/corpus/map1_thrain.txt b/corpus/map1_thrain.txt
index e7a79b7..65718a6 100644
--- a/corpus/map1_thrain.txt
+++ b/corpus/map1_thrain.txt
@@ -1,18 +1,24 @@
-Thrain is a dwarf bridge warden who inspects beams by listening for a low hum.
-Thrain keeps a pouch of pegs and wedges for emergency repairs.
-Thrain believes the old bridge was built by traders, not soldiers.
-Thrain marks safe planks with tiny chalk dots no one else notices.
-Thrain once saved a cart by spotting a hairline crack at dawn.
-Thrain says the river below turns louder right before a storm.
-Thrain trades advice for nails, tar, and braided rope.
-Thrain is skeptical of the marsh lights and calls them trick mirrors.
-Thrain respects Dagna's ledger and asks her for bridge traffic counts.
-Thrain thinks Bromm's trident mark is a builder's guild sign.
-Thrain keeps a small tin whistle for signaling across the span.
-Thrain fears rot more than storms and checks every joint twice.
-Thrain wants to replace the center beam with black oak from the hills.
-Thrain can point out a hidden ford two bends downstream.
-Thrain says the safest crossing is single file with steady steps.
-Thrain believes the bells in fog come from chains under the bridge.
-Thrain asks travelers if they have spare pitch or tar.
-Thrain is patient with questions but impatient with boasts.
+I am Thrain, a dwarf bridge warden who inspects beams by listening for a low hum.
+My name is Thrain.
+Folks call me Thrain.
+You can call me Thrain.
+If you ask my name, I will answer Thrain.
+I inspect beams by listening for a low hum.
+I keep a pouch of pegs and wedges for emergency repairs.
+I believe the old bridge was built by traders, not soldiers.
+I mark safe planks with tiny chalk dots no one else notices.
+I once saved a cart by spotting a hairline crack at dawn.
+I say the river below turns louder right before a storm.
+I trade advice for nails, tar, and braided rope.
+I am skeptical of the marsh lights and call them trick mirrors.
+I respect Dagna's ledger and ask her for bridge traffic counts.
+I think Bromm's trident mark is a builder's guild sign.
+I keep a small tin whistle for signaling across the span.
+I fear rot more than storms and check every joint twice.
+I want to replace the center beam with black oak from the hills.
+I can point out a hidden ford two bends downstream.
+I say the safest crossing is single file with steady steps.
+I believe the bells in fog come from chains under the bridge.
+I ask travelers if they have spare pitch or tar.
+I am patient with questions but impatient with boasts.
+I keep a tally of carts by the nicked beam on the south side.
diff --git a/game.c b/game.c
index f036fb2..13297c7 100644
--- a/game.c
+++ b/game.c
@@ -1,4 +1,8 @@
+#include <getopt.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
#define TB_IMPL
#include "termbox2.h"
@@ -6,6 +10,9 @@
#define NONSTD_IMPLEMENTATION
#include "nonstd.h"
+#include "llama.h"
+#include "models.h"
+#include "vectordb.h"
#include "maps.h"
#define MIN_W 40
@@ -61,12 +68,38 @@ typedef struct {
char input[128];
int input_len;
int npc_index;
+ const char *npc_name;
DialogEntry entries[DIALOG_HISTORY_MAX];
int entry_count;
} Dialog;
+typedef struct {
+ const ModelConfig *model_cfg;
+ struct llama_model *model;
+ struct llama_model *embed_model;
+ struct llama_context *embed_ctx;
+ VectorDB *npc_dbs;
+ int *npc_db_loaded;
+ int verbose;
+} GameRuntime;
+
+static void llama_log_callback(enum ggml_log_level level, const char *text, void *user_data) {
+ (void)level;
+ (void)user_data;
+ (void)text;
+}
+
static int clamp(int value, int min, int max);
+static void show_help(const char *prog) {
+ printf("Usage: %s [OPTIONS]\n", prog);
+ printf("Options:\n");
+ printf(" -m, --model <name> Specify model to use (default: first model)\n");
+ printf(" -e, --embed-model <name> Specify model to use for embeddings\n");
+ printf(" -v, --verbose Enable verbose logging\n");
+ printf(" -h, --help Show this help message\n");
+}
+
static void draw_border(int x, int y, int w, int h, uintattr_t fg) {
int ix;
int iy;
@@ -87,7 +120,7 @@ static void draw_border(int x, int y, int w, int h, uintattr_t fg) {
}
static void draw_border_bg(int x, int y, int w, int h, uintattr_t fg,
- uintattr_t bg) {
+ uintattr_t bg) {
int ix;
int iy;
@@ -107,8 +140,8 @@ static void draw_border_bg(int x, int y, int w, int h, uintattr_t fg,
}
static void get_layout(int w, int h, int *map_x, int *map_y, int *map_w,
- int *map_h, int *side_x, int *side_y, int *side_w, int *side_h,
- int *msg1_y, int *msg2_y) {
+ int *map_h, int *side_x, int *side_y, int *side_w, int *side_h,
+ int *msg1_y, int *msg2_y) {
*map_x = 0;
*map_y = 0;
*map_w = w - SIDEBAR_W;
@@ -226,7 +259,7 @@ static void map_free(Map *map) {
}
static void update_camera(const Map *map, int view_w, int view_h,
- const Player *player, int *cam_x, int *cam_y) {
+ const Player *player, int *cam_x, int *cam_y) {
int max_cam_x;
int max_cam_y;
int margin_x;
@@ -267,7 +300,7 @@ static void update_camera(const Map *map, int view_w, int view_h,
}
static void draw_map(const Map *map, int map_x, int map_y, int view_w,
- int view_h, const Player *player, int cam_x, int cam_y) {
+ int view_h, const Player *player, int cam_x, int cam_y) {
int ix;
int iy;
@@ -296,7 +329,7 @@ static void draw_map(const Map *map, int map_x, int map_y, int view_w,
}
if (player->x >= cam_x && player->x < cam_x + view_w && player->y >= cam_y
- && player->y < cam_y + view_h) {
+ && player->y < cam_y + view_h) {
int sx = map_x + (player->x - cam_x);
int sy = map_y + (player->y - cam_y);
tb_set_cell(sx, sy, '@', COLOR_GREEN_256 | TB_BOLD, TB_DEFAULT);
@@ -324,7 +357,7 @@ static void draw_progress_bar(int x, int y, int w, int value, int max) {
filled = (inner_w * value) / max;
tb_set_cell(x, y, '[', COLOR_WHITE_256, TB_DEFAULT);
for (ix = 0; ix < inner_w; ix++) {
- uintattr_t fg = ix < filled ? COLOR_GREEN_256 : COLOR_WHITE_256;
+ uintattr_t fg = ix < filled ? COLOR_GREEN_256 : COLOR_WHITE_256;
uint32_t ch = ix < filled ? '=' : ' ';
tb_set_cell(x + 1 + ix, y, ch, fg, TB_DEFAULT);
}
@@ -389,31 +422,127 @@ static void update_status(const char *message) {
status_msg = message ? message : "";
}
-static void copy_truncated(char *dst, size_t dst_size, const char *src, int max_chars) {
- int i = 0;
- if (dst_size == 0) {
- return;
+static int draw_wrapped(int x, int y, int max_lines, int box_w, uintattr_t fg,
+ uintattr_t bg, const char *prefix, const char *text) {
+ if (max_lines <= 0 || box_w <= 0 || text == NULL) {
+ return 0;
+ }
+ int lines = 0;
+ int prefix_len = prefix ? (int)strlen(prefix) : 0;
+ if (prefix_len < 0) {
+ prefix_len = 0;
+ }
+ int avail = box_w - 4 - prefix_len;
+ if (avail < 1) {
+ return 0;
+ }
+ char pad[64];
+ int pad_len = prefix_len < (int)sizeof(pad) - 1 ? prefix_len : (int)sizeof(pad) - 1;
+ for (int i = 0; i < pad_len; i++) {
+ pad[i] = ' ';
+ }
+ pad[pad_len] = '\0';
+ const char *p = text;
+ while (*p != '\0' && lines < max_lines) {
+ while (*p == ' ') {
+ p++;
+ }
+ int line_len = 0;
+ int last_space = -1;
+ for (int i = 0; i < avail && p[i] != '\0'; i++) {
+ if (p[i] == '\n') {
+ line_len = i;
+ break;
+ }
+ if (p[i] == ' ') {
+ last_space = i;
+ }
+ line_len = i + 1;
+ }
+ if (line_len == 0) {
+ break;
+ }
+ int cut = line_len;
+ if (cut == avail && p[cut] != '\0' && last_space > 0) {
+ cut = last_space;
+ }
+ char buf[512];
+ int copy_len = cut < (int)sizeof(buf) - 1 ? cut : (int)sizeof(buf) - 1;
+ memcpy(buf, p, (size_t)copy_len);
+ buf[copy_len] = '\0';
+ while (copy_len > 0 && buf[copy_len - 1] == ' ') {
+ buf[copy_len - 1] = '\0';
+ copy_len--;
+ }
+ const char *line_prefix = (lines == 0) ? (prefix ? prefix : "") : pad;
+ tb_printf(x, y + lines, fg, bg, "%s%s", line_prefix, buf);
+ lines++;
+ p += cut;
+ if (*p == '\n') {
+ p++;
+ }
+ }
+ return lines;
+}
+
+static int count_wrapped_lines(int box_w, const char *prefix, const char *text) {
+ if (box_w <= 0 || text == NULL) {
+ return 0;
+ }
+ int prefix_len = prefix ? (int)strlen(prefix) : 0;
+ if (prefix_len < 0) {
+ prefix_len = 0;
}
- if (max_chars < 0) {
- max_chars = 0;
+ int avail = box_w - 4 - prefix_len;
+ if (avail < 1) {
+ return 0;
}
- while (i < max_chars && src[i] != '\0' && i < (int)dst_size - 1) {
- dst[i] = src[i];
- i++;
+ int lines = 0;
+ const char *p = text;
+ while (*p != '\0') {
+ while (*p == ' ') {
+ p++;
+ }
+ int line_len = 0;
+ int last_space = -1;
+ for (int i = 0; i < avail && p[i] != '\0'; i++) {
+ if (p[i] == '\n') {
+ line_len = i;
+ break;
+ }
+ if (p[i] == ' ') {
+ last_space = i;
+ }
+ line_len = i + 1;
+ }
+ if (line_len == 0) {
+ break;
+ }
+ int cut = line_len;
+ if (cut == avail && p[cut] != '\0' && last_space > 0) {
+ cut = last_space;
+ }
+ lines++;
+ p += cut;
+ if (*p == '\n') {
+ p++;
+ }
}
- dst[i] = '\0';
+ return lines;
}
-static void dialog_open(Dialog *dialog, int npc_index) {
+static void dialog_open(Dialog *dialog, int npc_index, const char *npc_name) {
dialog->open = 1;
dialog->input_len = 0;
dialog->input[0] = '\0';
dialog->npc_index = npc_index;
+ dialog->npc_name = npc_name;
}
static void dialog_close(Dialog *dialog) {
dialog->open = 0;
dialog->npc_index = -1;
+ dialog->npc_name = NULL;
}
static void dialog_append(Dialog *dialog, uint32_t ch) {
@@ -435,19 +564,455 @@ static void dialog_backspace(Dialog *dialog) {
dialog->input[dialog->input_len] = '\0';
}
-static void dialog_submit(Dialog *dialog, const GameMap *game_map) {
+static void trim_leading(char **text) {
+ while (**text == ' ' || **text == '\t' || **text == '\n' || **text == '\r') {
+ (*text)++;
+ }
+}
+
+static void trim_leading_punct(char **text) {
+ while (**text == '"' || **text == '\'' || **text == '`') {
+ (*text)++;
+ trim_leading(text);
+ }
+}
+
+static void trim_trailing(char *text) {
+ size_t len = strlen(text);
+ while (len > 0) {
+ char ch = text[len - 1];
+ if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r') {
+ break;
+ }
+ text[len - 1] = '\0';
+ len--;
+ }
+}
+
+static void strip_any_prefix(char **text, const char *prefix) {
+ if (strncasecmp(*text, prefix, strlen(prefix)) == 0) {
+ *text += strlen(prefix);
+ trim_leading(text);
+ }
+}
+
+
+static char *sanitize_reply(char *reply, const char *name) {
+ if (reply == NULL) {
+ return NULL;
+ }
+ char *start = reply;
+ trim_leading(&start);
+ trim_leading_punct(&start);
+ strip_any_prefix(&start, "Answer:");
+ strip_any_prefix(&start, "NPC:");
+ strip_any_prefix(&start, "Context:");
+ strip_any_prefix(&start, "System:");
+ if (strncmp(start, "<context>", 9) == 0) {
+ start += 9;
+ trim_leading(&start);
+ }
+ char *reminder = strstr(start, "<system-reminder>");
+ if (reminder) {
+ *reminder = '\0';
+ }
+ char *system_tag = strstr(start, "<system");
+ if (system_tag) {
+ *system_tag = '\0';
+ }
+ char *tag = strstr(start, "<|");
+ if (tag) {
+ *tag = '\0';
+ }
+ char *eos = strstr(start, "</s>");
+ if (eos) {
+ *eos = '\0';
+ }
+ char *hash = strstr(start, "###");
+ if (hash) {
+ *hash = '\0';
+ }
+ if (name && name[0] != '\0') {
+ size_t name_len = strlen(name);
+ for (;;) {
+ if (strncasecmp(start, name, name_len) != 0) {
+ break;
+ }
+ start += name_len;
+ while (*start == ':' || *start == '-' || *start == ',') {
+ start++;
+ }
+ trim_leading(&start);
+ trim_leading_punct(&start);
+ }
+ }
+ if (start != reply) {
+ memmove(reply, start, strlen(start) + 1);
+ }
+ trim_trailing(reply);
+ return reply;
+}
+
+static int find_substr_offset(const char *buf, int n, const char *needle) {
+ int needle_len = (int)strlen(needle);
+ if (needle_len <= 0 || n <= 0 || needle_len > n) {
+ return -1;
+ }
+ for (int i = 0; i + needle_len <= n; i++) {
+ int match = 1;
+ for (int j = 0; j < needle_len; j++) {
+ if (buf[i + j] != needle[j]) {
+ match = 0;
+ break;
+ }
+ }
+ if (match) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static int find_stop_offset(const char *buf, int n) {
+ int stop_at = n;
+ for (int i = 0; i < n; i++) {
+ if (buf[i] == '\n') {
+ stop_at = i;
+ break;
+ }
+ }
+ int off = find_substr_offset(buf, n, "</s>");
+ if (off >= 0 && off < stop_at) {
+ stop_at = off;
+ }
+ off = find_substr_offset(buf, n, "<system-reminder>");
+ if (off >= 0 && off < stop_at) {
+ stop_at = off;
+ }
+ off = find_substr_offset(buf, n, "<system");
+ if (off >= 0 && off < stop_at) {
+ stop_at = off;
+ }
+ off = find_substr_offset(buf, n, "<|");
+ if (off >= 0 && off < stop_at) {
+ stop_at = off;
+ }
+ off = find_substr_offset(buf, n, "###");
+ if (off >= 0 && off < stop_at) {
+ stop_at = off;
+ }
+ off = find_substr_offset(buf, n, "System:");
+ if (off >= 0 && off < stop_at) {
+ stop_at = off;
+ }
+ off = find_substr_offset(buf, n, "User:");
+ if (off >= 0 && off < stop_at) {
+ stop_at = off;
+ }
+ off = find_substr_offset(buf, n, "Assistant:");
+ if (off >= 0 && off < stop_at) {
+ stop_at = off;
+ }
+ return stop_at;
+}
+
+static void append_prompt_context(stringb *sb, const char *npc_name, const char *context,
+ const char *question) {
+ sb_append_cstr(sb, "Context:\n");
+ if (npc_name && npc_name[0] != '\0') {
+ sb_append_cstr(sb, "NPC Name: ");
+ sb_append_cstr(sb, npc_name);
+ sb_append_cstr(sb, "\n");
+ }
+ if (context && context[0] != '\0') {
+ sb_append_cstr(sb, context);
+ }
+ sb_append_cstr(sb, "\nQuestion:\n");
+ sb_append_cstr(sb, question ? question : "");
+}
+
+static char *build_prompt(const ModelConfig *cfg, const char *system, const char *npc_name,
+ const char *context, const char *question) {
+ stringb full = {0};
+ sb_init(&full, 0);
+
+ switch (cfg->prompt_style) {
+ case PROMPT_STYLE_T5:
+ sb_append_cstr(&full, "instruction: ");
+ sb_append_cstr(&full, system ? system : "");
+ sb_append_cstr(&full, "\nquestion: ");
+ sb_append_cstr(&full, question ? question : "");
+ sb_append_cstr(&full, "\ncontext:\n");
+ if (npc_name && npc_name[0] != '\0') {
+ sb_append_cstr(&full, "NPC Name: ");
+ sb_append_cstr(&full, npc_name);
+ sb_append_cstr(&full, "\n");
+ }
+ if (context && context[0] != '\0') {
+ sb_append_cstr(&full, context);
+ }
+ sb_append_cstr(&full, "\nanswer:");
+ break;
+ case PROMPT_STYLE_CHAT:
+ sb_append_cstr(&full, "System:\n");
+ sb_append_cstr(&full, system ? system : "");
+ sb_append_cstr(&full, "\nUser:\n");
+ append_prompt_context(&full, npc_name, context, question);
+ sb_append_cstr(&full, "\nAssistant:");
+ break;
+ case PROMPT_STYLE_PLAIN:
+ default:
+ sb_append_cstr(&full, "System:\n");
+ sb_append_cstr(&full, system ? system : "");
+ sb_append_cstr(&full, "\n");
+ append_prompt_context(&full, npc_name, context, question);
+ sb_append_cstr(&full, "\nAnswer:");
+ break;
+ }
+
+ return full.data;
+}
+
+static char *generate_npc_reply(const GameRuntime *runtime, const GameMap *game_map,
+ int npc_index, const char *prompt) {
+ if (runtime == NULL || prompt == NULL) {
+ return NULL;
+ }
+ const char *fallback = "Demo reply: The old ruins are north of here.";
+ const char *npc_name = NULL;
+ if (game_map && npc_index >= 0 && npc_index < 10) {
+ const char *npc_reply = game_map->npcs[npc_index].reply;
+ npc_name = game_map->npcs[npc_index].name;
+ if (npc_reply && npc_reply[0] != '\0') {
+ fallback = npc_reply;
+ }
+ }
+
+ if (runtime->model == NULL || runtime->model_cfg == NULL || runtime->embed_ctx == NULL
+ || runtime->npc_dbs == NULL || runtime->npc_db_loaded == NULL) {
+ return strdup(fallback);
+ }
+ if (npc_index < 0 || npc_index >= 10 || runtime->npc_db_loaded[npc_index] == 0) {
+ return strdup(fallback);
+ }
+
+ VectorDB *db = &runtime->npc_dbs[npc_index];
+ float query[VDB_EMBED_SIZE];
+ int results[5];
+ for (int i = 0; i < 5; i++) {
+ results[i] = -1;
+ }
+ vdb_embed_query(db, prompt, query);
+ vdb_search(db, query, 5, results);
+
+ size_t context_cap = 1024;
+ size_t context_len = 0;
+ char *context = (char *)malloc(context_cap);
+ if (context == NULL) {
+ return strdup(fallback);
+ }
+ context[0] = '\0';
+ if (runtime->verbose) {
+ fprintf(stderr, "[npc] question: %s\n", prompt);
+ }
+ for (int i = 0; i < 5; i++) {
+ if (results[i] < 0) {
+ continue;
+ }
+ const char *text = db->docs[results[i]].text;
+ if (runtime->verbose) {
+ fprintf(stderr, "[npc] context[%d]: %s\n", i, text);
+ }
+ char header[32];
+ int header_len = snprintf(header, sizeof(header), "Snippet %d:\n", i + 1);
+ size_t text_len = strlen(text);
+ size_t need = context_len + (size_t)header_len + text_len + 2;
+ if (need > context_cap) {
+ while (need > context_cap) {
+ context_cap *= 2;
+ }
+ char *next = (char *)realloc(context, context_cap);
+ if (next == NULL) {
+ free(context);
+ return strdup(fallback);
+ }
+ context = next;
+ }
+ if (header_len > 0) {
+ memcpy(context + context_len, header, (size_t)header_len);
+ context_len += (size_t)header_len;
+ }
+ memcpy(context + context_len, text, text_len);
+ context_len += text_len;
+ context[context_len++] = '\n';
+ context[context_len] = '\0';
+ }
+
+ const char *system_prompt = "You are a helpful NPC. Speak in first person. "
+ "Use only the provided context. If the context does not contain the answer, say \"I don't know.\" "
+ "If asked your name, answer with the NPC Name from the context. "
+ "Do not mention context, system messages, or prompts. Reply with one short sentence.";
+
+ char *full_prompt = build_prompt(runtime->model_cfg, system_prompt, npc_name, context, prompt);
+ if (full_prompt == NULL) {
+ free(context);
+ return strdup(fallback);
+ }
+ free(context);
+
+ if (runtime->verbose) {
+ printf(">> %s\n", full_prompt);
+ }
+
+ const struct llama_vocab *vocab = llama_model_get_vocab(runtime->model);
+ int n_prompt = -llama_tokenize(vocab, full_prompt, strlen(full_prompt), NULL, 0, true, true);
+ llama_token *prompt_tokens = (llama_token *)malloc((size_t)n_prompt * sizeof(llama_token));
+ if (prompt_tokens == NULL) {
+ free(full_prompt);
+ return strdup(fallback);
+ }
+ if (llama_tokenize(vocab, full_prompt, strlen(full_prompt), prompt_tokens, n_prompt, true, true) < 0) {
+ free(full_prompt);
+ free(prompt_tokens);
+ return strdup(fallback);
+ }
+
+ struct llama_context_params ctx_params = llama_context_default_params();
+ ctx_params.n_ctx = runtime->model_cfg->n_ctx;
+ ctx_params.n_batch = runtime->model_cfg->n_batch;
+ ctx_params.embeddings = false;
+
+ struct llama_context *ctx = llama_init_from_model(runtime->model, ctx_params);
+ if (ctx == NULL) {
+ free(full_prompt);
+ free(prompt_tokens);
+ return strdup(fallback);
+ }
+
+ struct llama_sampler_chain_params sparams = llama_sampler_chain_default_params();
+ struct llama_sampler *smpl = llama_sampler_chain_init(sparams);
+ if (runtime->model_cfg->top_k > 0) {
+ llama_sampler_chain_add(smpl, llama_sampler_init_top_k(runtime->model_cfg->top_k));
+ }
+ if (runtime->model_cfg->top_p > 0.0f && runtime->model_cfg->top_p < 1.0f) {
+ llama_sampler_chain_add(smpl, llama_sampler_init_top_p(runtime->model_cfg->top_p, 1));
+ }
+ if (runtime->model_cfg->min_p > 0.0f) {
+ llama_sampler_chain_add(smpl, llama_sampler_init_min_p(runtime->model_cfg->min_p, 1));
+ }
+ llama_sampler_chain_add(smpl, llama_sampler_init_penalties(
+ runtime->model_cfg->repeat_last_n,
+ runtime->model_cfg->repeat_penalty,
+ runtime->model_cfg->freq_penalty,
+ runtime->model_cfg->presence_penalty));
+ llama_sampler_chain_add(smpl, llama_sampler_init_temp(runtime->model_cfg->temperature));
+ llama_sampler_chain_add(smpl, llama_sampler_init_dist(runtime->model_cfg->seed));
+
+ struct llama_batch batch = llama_batch_get_one(prompt_tokens, n_prompt);
+
+ if (llama_model_has_encoder(runtime->model)) {
+ if (llama_encode(ctx, batch)) {
+ llama_sampler_free(smpl);
+ free(full_prompt);
+ free(prompt_tokens);
+ llama_free(ctx);
+ return strdup(fallback);
+ }
+ llama_token decoder_start = llama_model_decoder_start_token(runtime->model);
+ if (decoder_start == LLAMA_TOKEN_NULL) {
+ decoder_start = llama_vocab_bos(vocab);
+ }
+ batch = llama_batch_get_one(&decoder_start, 1);
+ }
+
+ int n_pos = 0;
+ llama_token new_token_id;
+ size_t out_cap = 256;
+ size_t out_len = 0;
+ char *out = (char *)malloc(out_cap);
+ if (out == NULL) {
+ llama_sampler_free(smpl);
+ free(full_prompt);
+ free(prompt_tokens);
+ llama_free(ctx);
+ return strdup(fallback);
+ }
+ out[0] = '\0';
+ int n_predict = runtime->model_cfg->n_predict > 0 ? runtime->model_cfg->n_predict : 64;
+ if (n_predict > 64) {
+ n_predict = 64;
+ }
+ while (n_pos + batch.n_tokens < n_prompt + n_predict) {
+ if (llama_decode(ctx, batch)) {
+ break;
+ }
+ n_pos += batch.n_tokens;
+ new_token_id = llama_sampler_sample(smpl, ctx, -1);
+ if (llama_vocab_is_eog(vocab, new_token_id)) {
+ break;
+ }
+ char buf[128];
+ int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true);
+ if (n < 0) {
+ break;
+ }
+ int stop_at = find_stop_offset(buf, n);
+ if (out_len == 0 && stop_at == 0 && n > 0 && buf[0] == '\n') {
+ batch = llama_batch_get_one(&new_token_id, 1);
+ continue;
+ }
+ if (out_len + (size_t)stop_at + 1 > out_cap) {
+ while (out_len + (size_t)stop_at + 1 > out_cap) {
+ out_cap *= 2;
+ }
+ char *next = (char *)realloc(out, out_cap);
+ if (next == NULL) {
+ break;
+ }
+ out = next;
+ }
+ memcpy(out + out_len, buf, (size_t)stop_at);
+ out_len += (size_t)stop_at;
+ out[out_len] = '\0';
+ if (stop_at != n) {
+ break;
+ }
+ batch = llama_batch_get_one(&new_token_id, 1);
+ }
+
+ llama_sampler_free(smpl);
+ free(full_prompt);
+ free(prompt_tokens);
+ llama_free(ctx);
+
+ if (out_len == 0) {
+ free(out);
+ return strdup(fallback);
+ }
+ return out;
+}
+
+static void dialog_submit(Dialog *dialog, const GameMap *game_map, const GameRuntime *runtime) {
if (dialog->input_len == 0) {
return;
}
{
- const char *demo = "Demo reply: The old ruins are north of here.";
- const char *reply = demo;
+ const char *npc_name = NULL;
+ char *reply = generate_npc_reply(runtime, game_map, dialog->npc_index, dialog->input);
+ const char *fallback = "";
if (game_map && dialog->npc_index >= 0 && dialog->npc_index < 10) {
- const char *npc_reply = game_map->npcs[dialog->npc_index].reply;
- if (npc_reply && npc_reply[0] != '\0') {
- reply = npc_reply;
+ npc_name = game_map->npcs[dialog->npc_index].name;
+ fallback = game_map->npcs[dialog->npc_index].reply;
+ if (fallback == NULL) {
+ fallback = "";
}
}
+ reply = sanitize_reply(reply, npc_name);
+ if (reply == NULL || reply[0] == '\0') {
+ free(reply);
+ reply = NULL;
+ }
+ const char *reply_text = reply != NULL ? reply : fallback;
if (dialog->entry_count >= DIALOG_HISTORY_MAX) {
for (int i = 1; i < DIALOG_HISTORY_MAX; i++) {
dialog->entries[i - 1] = dialog->entries[i];
@@ -457,8 +1022,9 @@ static void dialog_submit(Dialog *dialog, const GameMap *game_map) {
snprintf(dialog->entries[dialog->entry_count].prompt,
sizeof(dialog->entries[dialog->entry_count].prompt), "%s", dialog->input);
snprintf(dialog->entries[dialog->entry_count].response,
- sizeof(dialog->entries[dialog->entry_count].response), "%s", reply);
+ sizeof(dialog->entries[dialog->entry_count].response), "%s", reply_text);
dialog->entry_count++;
+ free(reply);
}
dialog->input_len = 0;
dialog->input[0] = '\0';
@@ -479,7 +1045,7 @@ static void update_npc_status(const GameMap *game_map, int npc_index) {
}
static void render(const Map *map, const Player *player, int *cam_x,
- int *cam_y, int *out_view_w, int *out_view_h, const Dialog *dialog) {
+ int *cam_y, int *out_view_w, int *out_view_h, const Dialog *dialog) {
int w;
int h;
int map_x;
@@ -594,23 +1160,49 @@ static void render(const Map *map, const Player *player, int *cam_x,
if (max_text < 0) {
max_text = 0;
}
- int max_entries = max_lines / 2;
- int start = dialog->entry_count - max_entries;
+ int start = dialog->entry_count;
if (start < 0) {
start = 0;
}
+ int used_lines = 0;
+ for (int i = dialog->entry_count - 1; i >= 0; i--) {
+ const char *prompt_text = dialog->entries[i].prompt;
+ const char *response_text = dialog->entries[i].response;
+ const char *name = dialog->npc_name && dialog->npc_name[0] != '\0' ? dialog->npc_name : "NPC";
+ char prefix_you[16];
+ char prefix_npc[64];
+ snprintf(prefix_you, sizeof(prefix_you), "You: ");
+ snprintf(prefix_npc, sizeof(prefix_npc), "%s: ", name);
+ int need = count_wrapped_lines(box_w, prefix_you, prompt_text)
+ + count_wrapped_lines(box_w, prefix_npc, response_text);
+ if (used_lines + need > max_lines && used_lines > 0) {
+ break;
+ }
+ used_lines += need;
+ start = i;
+ if (used_lines >= max_lines) {
+ break;
+ }
+ }
for (int i = start; i < dialog->entry_count && line + 1 <= max_lines; i++) {
- char prompt_buf[128];
- char response_buf[256];
- copy_truncated(prompt_buf, sizeof(prompt_buf), dialog->entries[i].prompt, max_text);
- copy_truncated(response_buf, sizeof(response_buf), dialog->entries[i].response, max_text);
- if (line < max_lines) {
- tb_printf(box_x + 2, log_y + line, COLOR_WHITE_256, 19, "You: %s", prompt_buf);
- line++;
+ const char *prompt_text = dialog->entries[i].prompt;
+ const char *response_text = dialog->entries[i].response;
+ const char *name = dialog->npc_name && dialog->npc_name[0] != '\0' ? dialog->npc_name : "NPC";
+ char prefix_you[16];
+ char prefix_npc[64];
+ snprintf(prefix_you, sizeof(prefix_you), "You: ");
+ snprintf(prefix_npc, sizeof(prefix_npc), "%s: ", name);
+ int used = draw_wrapped(box_x + 2, log_y + line, max_lines - line, box_w,
+ COLOR_WHITE_256, 19, prefix_you, prompt_text);
+ line += used;
+ if (line >= max_lines) {
+ break;
}
- if (line < max_lines) {
- tb_printf(box_x + 2, log_y + line, COLOR_GREEN_256, 19, "NPC: %s", response_buf);
- line++;
+ used = draw_wrapped(box_x + 2, log_y + line, max_lines - line, box_w,
+ COLOR_GREEN_256, 19, prefix_npc, response_text);
+ line += used;
+ if (line >= max_lines) {
+ break;
}
}
@@ -641,17 +1233,71 @@ static int clamp(int value, int min, int max) {
return value;
}
-int main(void) {
+int main(int argc, char **argv) {
+ const char *model_name = NULL;
+ const char *embed_model_name = NULL;
+ const ModelConfig *model_cfg = NULL;
+ struct llama_model *embed_model = NULL;
+ struct llama_model *gen_model = NULL;
+ struct llama_context *embed_ctx = NULL;
+ int tb_ready = 0;
+ int llama_ready = 0;
+ int exit_code = 0;
+ int verbose = 0;
+
+ static struct option long_options[] = {
+ {"model", required_argument, 0, 'm'},
+ {"embed-model", required_argument, 0, 'e'},
+ {"verbose", no_argument, 0, 'v'},
+ {"help", no_argument, 0, 'h'},
+ {0, 0, 0, 0}
+ };
+
+ int opt;
+ int option_index = 0;
+ while ((opt = getopt_long(argc, argv, "m:e:vh", long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'm':
+ model_name = optarg;
+ break;
+ case 'e':
+ embed_model_name = optarg;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'h':
+ show_help(argv[0]);
+ return 0;
+ default:
+ fprintf(stderr, "Usage: %s [-m model] [-v] [-h]\n", argv[0]);
+ return 1;
+ }
+ }
+
+ if (model_name != NULL) {
+ model_cfg = get_model_by_name(model_name);
+ if (model_cfg == NULL) {
+ fprintf(stderr, "Unknown model '%s'\n", model_name);
+ return 1;
+ }
+ } else {
+ model_cfg = &models[0];
+ }
+
Player player = {0};
array(GameMap) maps;
GameMap map1 = {0};
GameMap *current_map = NULL;
+ VectorDB *npc_dbs = NULL;
+ int *npc_db_loaded = NULL;
int running = 1;
int view_w = 0;
int view_h = 0;
int cam_x = 0;
int cam_y = 0;
Dialog dialog = {0};
+ GameRuntime runtime = {0};
player_init(&player);
array_init(maps);
@@ -660,10 +1306,96 @@ int main(void) {
current_map = &maps.data[0];
map_init(&current_map->map, current_map->data, current_map->len);
+ if (verbose == 0) {
+ llama_log_set(llama_log_callback, NULL);
+ }
+
+ npc_dbs = (VectorDB *)calloc(10, sizeof(VectorDB));
+ npc_db_loaded = (int *)calloc(10, sizeof(int));
+ if (npc_dbs == NULL || npc_db_loaded == NULL) {
+ fprintf(stderr, "Failed to allocate NPC vector databases\n");
+ exit_code = 1;
+ goto cleanup;
+ }
+
+ llama_backend_init();
+ ggml_backend_load_all();
+ llama_ready = 1;
+ const ModelConfig *embed_cfg = NULL;
+ if (embed_model_name != NULL) {
+ embed_cfg = get_model_by_name(embed_model_name);
+ if (embed_cfg == NULL) {
+ fprintf(stderr, "Unknown embedding model '%s'\n", embed_model_name);
+ exit_code = 1;
+ goto cleanup;
+ }
+ } else if (model_cfg->embed_model_name != NULL) {
+ embed_cfg = get_model_by_name(model_cfg->embed_model_name);
+ }
+ if (embed_cfg == NULL) {
+ embed_cfg = model_cfg;
+ }
+
+ struct llama_model_params gen_params = llama_model_default_params();
+ gen_params.n_gpu_layers = model_cfg->n_gpu_layers;
+ gen_params.use_mmap = model_cfg->use_mmap;
+ gen_model = llama_model_load_from_file(model_cfg->filepath, gen_params);
+ if (gen_model == NULL) {
+ fprintf(stderr, "Unable to load generation model\n");
+ exit_code = 1;
+ goto cleanup;
+ }
+
+ struct llama_model_params embed_params = llama_model_default_params();
+ embed_params.n_gpu_layers = embed_cfg->n_gpu_layers;
+ embed_params.use_mmap = embed_cfg->use_mmap;
+ embed_model = llama_model_load_from_file(embed_cfg->filepath, embed_params);
+ if (embed_model == NULL) {
+ fprintf(stderr, "Unable to load embedding model\n");
+ exit_code = 1;
+ goto cleanup;
+ }
+
+ struct llama_context_params cparams = llama_context_default_params();
+ cparams.n_ctx = embed_cfg->n_ctx;
+ cparams.n_batch = embed_cfg->n_batch;
+ cparams.embeddings = true;
+ embed_ctx = llama_init_from_model(embed_model, cparams);
+ if (embed_ctx == NULL) {
+ fprintf(stderr, "Failed to create embedding context\n");
+ exit_code = 1;
+ goto cleanup;
+ }
+
+ for (int i = 0; i < 10; i++) {
+ const char *vdb_path = current_map->npcs[i].vdb_path;
+ if (vdb_path == NULL || vdb_path[0] == '\0') {
+ continue;
+ }
+ vdb_init(&npc_dbs[i], embed_ctx);
+ VectorDBErrorCode vdb_rc = vdb_load(&npc_dbs[i], vdb_path);
+ if (vdb_rc != VDB_SUCCESS) {
+ fprintf(stderr, "Failed to load vector database %s: %s\n", vdb_path, vdb_error(vdb_rc));
+ vdb_free(&npc_dbs[i]);
+ continue;
+ }
+ npc_db_loaded[i] = 1;
+ }
+
+ runtime.model_cfg = model_cfg;
+ runtime.model = gen_model;
+ runtime.embed_model = embed_model;
+ runtime.embed_ctx = embed_ctx;
+ runtime.npc_dbs = npc_dbs;
+ runtime.npc_db_loaded = npc_db_loaded;
+ runtime.verbose = verbose;
+
if (tb_init() != TB_OK) {
fprintf(stderr, "Failed to init termbox.\n");
- return 1;
+ exit_code = 1;
+ goto cleanup;
}
+ tb_ready = 1;
tb_set_input_mode(TB_INPUT_ESC);
tb_set_output_mode(TB_OUTPUT_256);
@@ -678,7 +1410,7 @@ int main(void) {
if (ev.key == TB_KEY_ESC) {
dialog_close(&dialog);
} else if (ev.key == TB_KEY_ENTER) {
- dialog_submit(&dialog, current_map);
+ dialog_submit(&dialog, current_map, &runtime);
} else if (ev.key == TB_KEY_BACKSPACE || ev.key == TB_KEY_BACKSPACE2) {
dialog_backspace(&dialog);
} else if (ev.ch) {
@@ -692,7 +1424,10 @@ int main(void) {
u32 target = map_get(&current_map->map, player.x, next_y);
int npc_index = npc_index_from_tile(target);
if (target == 'N' || npc_index >= 0) {
- dialog_open(&dialog, npc_index);
+ const char *npc_name = current_map && npc_index >= 0 && npc_index < 10
+ ? current_map->npcs[npc_index].name
+ : NULL;
+ dialog_open(&dialog, npc_index, npc_name);
update_npc_status(current_map, npc_index);
} else if (map_is_walkable(&current_map->map, player.x, next_y)) {
player.y = next_y;
@@ -702,7 +1437,10 @@ int main(void) {
u32 target = map_get(&current_map->map, player.x, next_y);
int npc_index = npc_index_from_tile(target);
if (target == 'N' || npc_index >= 0) {
- dialog_open(&dialog, npc_index);
+ const char *npc_name = current_map && npc_index >= 0 && npc_index < 10
+ ? current_map->npcs[npc_index].name
+ : NULL;
+ dialog_open(&dialog, npc_index, npc_name);
update_npc_status(current_map, npc_index);
} else if (map_is_walkable(&current_map->map, player.x, next_y)) {
player.y = next_y;
@@ -712,7 +1450,10 @@ int main(void) {
u32 target = map_get(&current_map->map, next_x, player.y);
int npc_index = npc_index_from_tile(target);
if (target == 'N' || npc_index >= 0) {
- dialog_open(&dialog, npc_index);
+ const char *npc_name = current_map && npc_index >= 0 && npc_index < 10
+ ? current_map->npcs[npc_index].name
+ : NULL;
+ dialog_open(&dialog, npc_index, npc_name);
update_npc_status(current_map, npc_index);
} else if (map_is_walkable(&current_map->map, next_x, player.y)) {
player.x = next_x;
@@ -722,7 +1463,10 @@ int main(void) {
u32 target = map_get(&current_map->map, next_x, player.y);
int npc_index = npc_index_from_tile(target);
if (target == 'N' || npc_index >= 0) {
- dialog_open(&dialog, npc_index);
+ const char *npc_name = current_map && npc_index >= 0 && npc_index < 10
+ ? current_map->npcs[npc_index].name
+ : NULL;
+ dialog_open(&dialog, npc_index, npc_name);
update_npc_status(current_map, npc_index);
} else if (map_is_walkable(&current_map->map, next_x, player.y)) {
player.x = next_x;
@@ -742,11 +1486,33 @@ int main(void) {
}
}
+cleanup:
player_free(&player);
for (size_t i = 0; i < maps.length; i++) {
map_free(&maps.data[i].map);
}
array_free(maps);
- tb_shutdown();
- return 0;
+ if (tb_ready) {
+ tb_shutdown();
+ }
+ for (int i = 0; i < 10; i++) {
+ if (npc_db_loaded && npc_db_loaded[i]) {
+ vdb_free(&npc_dbs[i]);
+ }
+ }
+ free(npc_db_loaded);
+ free(npc_dbs);
+ if (embed_ctx != NULL) {
+ llama_free(embed_ctx);
+ }
+ if (embed_model != NULL) {
+ llama_model_free(embed_model);
+ }
+ if (gen_model != NULL) {
+ llama_model_free(gen_model);
+ }
+ if (llama_ready) {
+ llama_backend_free();
+ }
+ return exit_code;
}
diff --git a/maps.h b/maps.h
index f3cd617..69096bf 100644
--- a/maps.h
+++ b/maps.h
@@ -16,6 +16,7 @@ typedef struct {
typedef struct {
const char *name;
const char *reply;
+ const char *vdb_path;
} NpcSettings;
typedef struct {
@@ -29,11 +30,11 @@ static inline GameMap make_map1(void) {
GameMap map = {0};
map.data = maps_map1_txt;
map.len = (int)maps_map1_txt_len;
- map.npcs[0] = (NpcSettings){.name = "Bromm", .reply = "Bromm: The old ruins are north of here."};
- map.npcs[1] = (NpcSettings){.name = "Dagna", .reply = "Dagna: The well is safe, mostly."};
- map.npcs[2] = (NpcSettings){.name = "Keldor", .reply = "Keldor: I saw lights in the marsh last night."};
- map.npcs[3] = (NpcSettings){.name = "Thrain", .reply = "Thrain: Mind the bridge; the beams sing when they're weak."};
- map.npcs[4] = (NpcSettings){.name = "Skara", .reply = "Skara: If you hear bells in the fog, turn back."};
+ map.npcs[0] = (NpcSettings){.name = "Bromm", .reply = "Bromm: The old ruins are north of here.", .vdb_path = "corpus/map1_bromm.vdb"};
+ map.npcs[1] = (NpcSettings){.name = "Dagna", .reply = "Dagna: The well is safe, mostly.", .vdb_path = "corpus/map1_dagna.vdb"};
+ map.npcs[2] = (NpcSettings){.name = "Keldor", .reply = "Keldor: I saw lights in the marsh last night.", .vdb_path = "corpus/map1_keldor.vdb"};
+ map.npcs[3] = (NpcSettings){.name = "Thrain", .reply = "Thrain: Mind the bridge; the beams sing when they're weak.", .vdb_path = "corpus/map1_thrain.vdb"};
+ map.npcs[4] = (NpcSettings){.name = "Skara", .reply = "Skara: If you hear bells in the fog, turn back.", .vdb_path = "corpus/map1_skara.vdb"};
return map;
}
diff --git a/models.h b/models.h
index e296971..f2f1bc8 100644
--- a/models.h
+++ b/models.h
@@ -5,55 +5,139 @@
#include <stddef.h>
#include <string.h>
+typedef enum {
+ PROMPT_STYLE_PLAIN = 0,
+ PROMPT_STYLE_CHAT = 1,
+ PROMPT_STYLE_T5 = 2,
+} PromptStyle;
+
typedef struct {
const char *name;
const char *filepath;
+ const char *embed_model_name;
int n_gpu_layers;
bool use_mmap;
int n_ctx;
int n_batch;
bool embeddings;
+ int n_predict;
float temperature;
float min_p;
+ int top_k;
+ float top_p;
+ int repeat_last_n;
+ float repeat_penalty;
+ float freq_penalty;
+ float presence_penalty;
uint32_t seed;
+ PromptStyle prompt_style;
} ModelConfig;
ModelConfig models[] = {
{
+ .name = "qwen3",
+ .filepath = "models/Qwen3-0.6B-UD-Q6_K_XL.gguf",
+ .embed_model_name = "qwen3",
+ .n_gpu_layers = 0,
+ .use_mmap = false,
+ .n_ctx = 2048,
+ .n_batch = 4096,
+ .embeddings = false,
+ .n_predict = 128,
+ .temperature = 0.6f,
+ .min_p = 0.05f,
+ .top_k = 40,
+ .top_p = 0.9f,
+ .repeat_last_n = 64,
+ .repeat_penalty = 1.1f,
+ .freq_penalty = 0.0f,
+ .presence_penalty = 0.0f,
+ .seed = LLAMA_DEFAULT_SEED,
+ .prompt_style = PROMPT_STYLE_CHAT,
+ },
+ {
+ .name = "tinyllama-1.1b",
+ .filepath = "models/tinyllama-1.1b.gguf",
+ .embed_model_name = "qwen3",
+ .n_gpu_layers = 0,
+ .use_mmap = false,
+ .n_ctx = 2048,
+ .n_batch = 4096,
+ .embeddings = false,
+ .n_predict = 128,
+ .temperature = 0.7f,
+ .min_p = 0.05f,
+ .top_k = 40,
+ .top_p = 0.9f,
+ .repeat_last_n = 64,
+ .repeat_penalty = 1.1f,
+ .freq_penalty = 0.0f,
+ .presence_penalty = 0.0f,
+ .seed = LLAMA_DEFAULT_SEED,
+ .prompt_style = PROMPT_STYLE_PLAIN,
+ },
+ {
.name = "tinyllama-1",
.filepath = "models/TinyLlama-1.1B-intermediate-step-1431k-3T-Q2_K.gguf",
+ .embed_model_name = "qwen3",
.n_gpu_layers = 0,
.use_mmap = false,
.n_ctx = 2048,
.n_batch = 4096,
.embeddings = false,
- .temperature = 0.8f,
+ .n_predict = 128,
+ .temperature = 0.7f,
.min_p = 0.05f,
+ .top_k = 40,
+ .top_p = 0.9f,
+ .repeat_last_n = 64,
+ .repeat_penalty = 1.1f,
+ .freq_penalty = 0.0f,
+ .presence_penalty = 0.0f,
.seed = LLAMA_DEFAULT_SEED,
+ .prompt_style = PROMPT_STYLE_PLAIN,
},
{
.name = "flan-t5-small",
.filepath = "models/flan-t5-small.F16.gguf",
+ .embed_model_name = "qwen3",
.n_gpu_layers = 0,
.use_mmap = false,
.n_ctx = 512,
.n_batch = 512,
.embeddings = false,
- .temperature = 0.8f,
+ .n_predict = 128,
+ .temperature = 0.2f,
.min_p = 0.05f,
+ .top_k = 40,
+ .top_p = 0.9f,
+ .repeat_last_n = 64,
+ .repeat_penalty = 1.1f,
+ .freq_penalty = 0.0f,
+ .presence_penalty = 0.0f,
.seed = LLAMA_DEFAULT_SEED,
+ .prompt_style = PROMPT_STYLE_T5,
},
{
.name = "phi-4-mini-instruct",
.filepath = "models/Phi-4-mini-instruct.Q2_K.gguf",
+ .embed_model_name = "qwen3",
.n_gpu_layers = 0,
.use_mmap = false,
- .n_ctx = 131072,
+ .n_ctx = 4096,
.n_batch = 4096,
.embeddings = false,
- .temperature = 0.8f,
+ .n_predict = 128,
+ .temperature = 0.6f,
.min_p = 0.05f,
+ .top_k = 40,
+ .top_p = 0.9f,
+ .repeat_last_n = 64,
+ .repeat_penalty = 1.1f,
+ .freq_penalty = 0.0f,
+ .presence_penalty = 0.0f,
.seed = LLAMA_DEFAULT_SEED,
+ .prompt_style = PROMPT_STYLE_CHAT,
},
};
diff --git a/models.txt b/models.txt
index 3b7a3bd..186e84f 100644
--- a/models.txt
+++ b/models.txt
@@ -1,3 +1,4 @@
+https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-UD-Q6_K_XL.gguf
https://huggingface.co/Felladrin/gguf-flan-t5-small/resolve/main/flan-t5-small.F16.gguf
https://huggingface.co/MaziyarPanahi/Phi-4-mini-instruct-GGUF/resolve/main/Phi-4-mini-instruct.Q2_K.gguf
https://huggingface.co/andrijdavid/TinyLlama-1.1B-intermediate-step-1431k-3T-GGUF/resolve/main/TinyLlama-1.1B-intermediate-step-1431k-3T-Q2_K.gguf
diff --git a/npc.c b/npc.c
index 335eee7..6c8b089 100644
--- a/npc.c
+++ b/npc.c
@@ -1,7 +1,6 @@
#include "llama.h"
#include "vectordb.h"
#include "models.h"
-#include "models.h"
#define NONSTD_IMPLEMENTATION
#include "nonstd.h"
@@ -31,6 +30,7 @@ static void show_help(const char *prog) {
printf("Usage: %s [OPTIONS]\n", prog);
printf("Options:\n");
printf(" -m, --model <name> Specify model to use (default: first model)\n");
+ printf(" -e, --embed-model <name> Specify model to use for embeddings\n");
printf(" -p, --prompt <text> Specify prompt text (default: \"What is 2+2?\")\n");
printf(" -c, --context <file> Specify vector database file (.vdb)\n");
printf(" -l, --list Lists all available models\n");
@@ -48,7 +48,54 @@ static int has_vdb_extension(const char *path) {
return strcmp(path + (len - ext_len), ext) == 0;
}
-static int execute_prompt_with_context(const ModelConfig *cfg, const char *prompt, const char *context, int n_predict) {
+static void append_prompt_context(stringb *sb, const char *context, const char *question) {
+ sb_append_cstr(sb, "Context:\n");
+ if (context && context[0] != '\0') {
+ sb_append_cstr(sb, context);
+ }
+ sb_append_cstr(sb, "\nQuestion:\n");
+ sb_append_cstr(sb, question ? question : "");
+}
+
+static char *build_prompt(const ModelConfig *cfg, const char *system, const char *context,
+ const char *question) {
+ stringb full = {0};
+ sb_init(&full, 0);
+
+ switch (cfg->prompt_style) {
+ case PROMPT_STYLE_T5:
+ sb_append_cstr(&full, "instruction: ");
+ sb_append_cstr(&full, system ? system : "");
+ sb_append_cstr(&full, "\nquestion: ");
+ sb_append_cstr(&full, question ? question : "");
+ sb_append_cstr(&full, "\ncontext:\n");
+ if (context && context[0] != '\0') {
+ sb_append_cstr(&full, context);
+ }
+ sb_append_cstr(&full, "\nanswer:");
+ break;
+ case PROMPT_STYLE_CHAT:
+ sb_append_cstr(&full, "System:\n");
+ sb_append_cstr(&full, system ? system : "");
+ sb_append_cstr(&full, "\nUser:\n");
+ append_prompt_context(&full, context, question);
+ sb_append_cstr(&full, "\nAssistant:");
+ break;
+ case PROMPT_STYLE_PLAIN:
+ default:
+ sb_append_cstr(&full, "System:\n");
+ sb_append_cstr(&full, system ? system : "");
+ sb_append_cstr(&full, "\n");
+ append_prompt_context(&full, context, question);
+ sb_append_cstr(&full, "\nAnswer:");
+ break;
+ }
+
+ return full.data;
+}
+
+static int execute_prompt_with_context(const ModelConfig *cfg, const char *prompt,
+ const char *context, int n_predict) {
if (cfg == NULL) {
log_message(stderr, LOG_ERROR, "Model config is missing");
return 1;
@@ -76,21 +123,21 @@ static int execute_prompt_with_context(const ModelConfig *cfg, const char *promp
const struct llama_vocab *vocab = llama_model_get_vocab(model);
- const char *context_prefix = "Context:\n";
- const char *prompt_prefix = "\n\nQuestion:\n";
- const char *answer_prefix = "\n\nAnswer:\n";
- size_t context_len = context ? strlen(context) : 0;
- size_t prompt_len = strlen(prompt);
- size_t full_len = strlen(system_prefix) + strlen(context_prefix) + context_len + strlen(prompt_prefix) + prompt_len + strlen(answer_prefix) + 1;
- char *full_prompt = (char *)malloc(full_len);
+ const char *system_text = system_prefix;
+ if (strncmp(system_prefix, "System:", 7) == 0) {
+ system_text = system_prefix + 7;
+ while (*system_text == ' ' || *system_text == '\n' || *system_text == '\r') {
+ system_text++;
+ }
+ }
+
+ char *full_prompt = build_prompt(cfg, system_text, context, prompt);
if (full_prompt == NULL) {
- log_message(stderr, LOG_ERROR, "Failed to allocate prompt buffer");
+ log_message(stderr, LOG_ERROR, "Failed to build prompt");
free(system_prefix);
llama_model_free(model);
return 1;
}
- snprintf(full_prompt, full_len, "%s%s%s%s%s", system_prefix, context_prefix, context ? context : "", prompt_prefix, prompt);
- strncat(full_prompt, answer_prefix, full_len - strlen(full_prompt) - 1);
int n_prompt = -llama_tokenize(vocab, full_prompt, strlen(full_prompt), NULL, 0, true, true);
llama_token *prompt_tokens = (llama_token *)malloc((size_t)n_prompt * sizeof(llama_token));
@@ -127,8 +174,21 @@ static int execute_prompt_with_context(const ModelConfig *cfg, const char *promp
struct llama_sampler_chain_params sparams = llama_sampler_chain_default_params();
struct llama_sampler *smpl = llama_sampler_chain_init(sparams);
+ if (cfg->top_k > 0) {
+ llama_sampler_chain_add(smpl, llama_sampler_init_top_k(cfg->top_k));
+ }
+ if (cfg->top_p > 0.0f && cfg->top_p < 1.0f) {
+ llama_sampler_chain_add(smpl, llama_sampler_init_top_p(cfg->top_p, 1));
+ }
+ if (cfg->min_p > 0.0f) {
+ llama_sampler_chain_add(smpl, llama_sampler_init_min_p(cfg->min_p, 1));
+ }
+ llama_sampler_chain_add(smpl, llama_sampler_init_penalties(
+ cfg->repeat_last_n,
+ cfg->repeat_penalty,
+ cfg->freq_penalty,
+ cfg->presence_penalty));
llama_sampler_chain_add(smpl, llama_sampler_init_temp(cfg->temperature));
- llama_sampler_chain_add(smpl, llama_sampler_init_min_p(cfg->min_p, 1));
llama_sampler_chain_add(smpl, llama_sampler_init_dist(cfg->seed));
struct llama_batch batch = llama_batch_get_one(prompt_tokens, n_prompt);
@@ -191,15 +251,12 @@ static int execute_prompt_with_context(const ModelConfig *cfg, const char *promp
log_message(stderr, LOG_ERROR, "Failed to convert token to piece");
break;
}
- int stop_at = n;
- for (int i = 0; i < n; i++) {
- if (buf[i] == '\n') {
- stop_at = i;
- break;
- }
+ if (out_len == 0 && n > 0 && buf[0] == '\n') {
+ batch = llama_batch_get_one(&new_token_id, 1);
+ continue;
}
- if (out_len + (size_t)stop_at + 1 > out_cap) {
- while (out_len + (size_t)stop_at + 1 > out_cap) {
+ if (out_len + (size_t)n + 1 > out_cap) {
+ while (out_len + (size_t)n + 1 > out_cap) {
out_cap *= 2;
}
char *next = (char *)realloc(out, out_cap);
@@ -209,14 +266,10 @@ static int execute_prompt_with_context(const ModelConfig *cfg, const char *promp
}
out = next;
}
- memcpy(out + out_len, buf, (size_t)stop_at);
- out_len += (size_t)stop_at;
+ memcpy(out + out_len, buf, (size_t)n);
+ out_len += (size_t)n;
out[out_len] = '\0';
- if (stop_at != n) {
- break;
- }
-
batch = llama_batch_get_one(&new_token_id, 1);
}
@@ -241,13 +294,15 @@ int main(int argc, char **argv) {
const char *prompt = NULL;
const char *context_file = NULL;
int verbose = 0;
+ const char *embed_model_name = NULL;
- int n_predict = 64;
+ int n_predict = 0;
static struct option long_options[] = {
{"model", required_argument, 0, 'm'},
{"prompt", required_argument, 0, 'p'},
{"context", required_argument, 0, 'c'},
+ {"embed-model", required_argument, 0, 'e'},
{"list", no_argument, 0, 'l'},
{"verbose", no_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
@@ -256,7 +311,7 @@ int main(int argc, char **argv) {
int opt;
int option_index = 0;
- while ((opt = getopt_long(argc, argv, "m:p:c:lvh", long_options, &option_index)) != -1) {
+ while ((opt = getopt_long(argc, argv, "m:p:c:e:lvh", long_options, &option_index)) != -1) {
switch (opt) {
case 'm':
model_name = optarg;
@@ -267,6 +322,9 @@ int main(int argc, char **argv) {
case 'c':
context_file = optarg;
break;
+ case 'e':
+ embed_model_name = optarg;
+ break;
case 'v':
verbose = 1;
break;
@@ -320,7 +378,29 @@ int main(int argc, char **argv) {
cfg = &models[0];
}
- struct llama_model *model = llama_model_load_from_file(cfg->filepath, llama_model_default_params());
+ const ModelConfig *embed_cfg = NULL;
+ if (embed_model_name != NULL) {
+ embed_cfg = get_model_by_name(embed_model_name);
+ if (embed_cfg == NULL) {
+ log_message(stderr, LOG_ERROR, "Unknown embedding model '%s'", embed_model_name);
+ llama_backend_free();
+ return 1;
+ }
+ } else if (cfg->embed_model_name != NULL) {
+ embed_cfg = get_model_by_name(cfg->embed_model_name);
+ }
+ if (embed_cfg == NULL) {
+ embed_cfg = cfg;
+ }
+
+ if (n_predict <= 0) {
+ n_predict = cfg->n_predict > 0 ? cfg->n_predict : 128;
+ }
+
+ struct llama_model_params embed_params = llama_model_default_params();
+ embed_params.n_gpu_layers = embed_cfg->n_gpu_layers;
+ embed_params.use_mmap = embed_cfg->use_mmap;
+ struct llama_model *model = llama_model_load_from_file(embed_cfg->filepath, embed_params);
if (model == NULL) {
log_message(stderr, LOG_ERROR, "Unable to load embedding model");
llama_backend_free();
@@ -328,6 +408,8 @@ int main(int argc, char **argv) {
}
struct llama_context_params cparams = llama_context_default_params();
+ cparams.n_ctx = embed_cfg->n_ctx;
+ cparams.n_batch = embed_cfg->n_batch;
cparams.embeddings = true;
struct llama_context *embed_ctx = llama_init_from_model(model, cparams);
@@ -350,10 +432,13 @@ int main(int argc, char **argv) {
}
float query[VDB_EMBED_SIZE];
- int results[3];
+ int results[5];
+ for (int i = 0; i < 5; i++) {
+ results[i] = -1;
+ }
vdb_embed_query(&db, prompt, query);
- vdb_search(&db, query, 3, results);
+ vdb_search(&db, query, 5, results);
size_t context_cap = 1024;
size_t context_len = 0;
@@ -367,13 +452,15 @@ int main(int argc, char **argv) {
}
context[0] = '\0';
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < 5; i++) {
if (results[i] < 0) {
continue;
}
const char *text = db.docs[results[i]].text;
+ char header[32];
+ int header_len = snprintf(header, sizeof(header), "Snippet %d:\n", i + 1);
size_t text_len = strlen(text);
- size_t need = context_len + text_len + 2;
+ size_t need = context_len + (size_t)header_len + text_len + 2;
if (need > context_cap) {
while (need > context_cap) {
context_cap *= 2;
@@ -389,6 +476,10 @@ int main(int argc, char **argv) {
}
context = next;
}
+ if (header_len > 0) {
+ memcpy(context + context_len, header, (size_t)header_len);
+ context_len += (size_t)header_len;
+ }
memcpy(context + context_len, text, text_len);
context_len += text_len;
context[context_len++] = '\n';