Simple Vector Database

Author Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-13 03:29:25 +0100
Committer Mitja Felicijan <mitja.felicijan@gmail.com> 2026-02-13 03:29:25 +0100
Commit a1a595a3305727d30e16e856f4faf95980643e1c (patch)
-rw-r--r-- Dockerfile 3
-rw-r--r-- Makefile 6
-rw-r--r-- context.txt 33
-rw-r--r-- prompt.c 408
-rw-r--r-- vectordb.c 92
-rw-r--r-- vectordb.h 29
6 files changed, 515 insertions, 56 deletions
diff --git a/Dockerfile b/Dockerfile
...
4
RUN apt-get install -y libstdc++6
4
RUN apt-get install -y libstdc++6
5
  
5
  
6
COPY prompt /app/prompt
6
COPY prompt /app/prompt
  
7
COPY context.txt /app/context.txt
7
COPY models/ /app/models/
8
COPY models/ /app/models/
8
  
9
  
9
# ENTRYPOINT ["bash"]
10
ENTRYPOINT ["bash"]
diff --git a/Makefile b/Makefile
...
12
  
12
  
13
help: .help
13
help: .help
14
  
14
  
15
prompt: prompt.c models.h # Build prompt binary for testing
15
prompt: prompt.c vectordb.c models.h # Build prompt binary for testing
16
	$(CC) $(CFLAGS) prompt.c -o prompt $(LDFLAGS)
16
	$(CC) $(CFLAGS) prompt.c vectordb.c -o prompt $(LDFLAGS)
17
  
17
  
18
llamacpp: .assure # Build llama.cpp libraries
18
llamacpp: .assure # Build llama.cpp libraries
19
	mkdir $(LLAMA_DIR)/build && \
19
	mkdir $(LLAMA_DIR)/build && \
...
27
  
27
  
28
docker: .assure # Runs prompt in Docker container
28
docker: .assure # Runs prompt in Docker container
29
	docker build -t promptd .
29
	docker build -t promptd .
30
	docker run -it promptd bash
30
	docker run -it promptd
31
  
31
  
32
clean: # Cleans up all the build artefacts
32
clean: # Cleans up all the build artefacts
33
	-rm -f prompt
33
	-rm -f prompt
...
diff --git a/context.txt b/context.txt
1
Gandalf: wizard, Lord of the Rings, grey beard, staff, Istari, Grey Pilgrim, Mithrandir, fought Sauron, helped destroy One Ring.
1
Gandalf is a wizard in The Lord of the Rings with a grey beard and a staff.
2
  
2
Gandalf is one of the Istari and is called the Grey Pilgrim and Mithrandir.
3
Frodo: hobbit, Lord of the Rings, Bilbo's nephew, Shire, carried One Ring to Mount Doom, Fellowship of the Ring.
3
Gandalf fought Sauron and helped destroy the One Ring.
4
  
4
Frodo Baggins is a hobbit in The Lord of the Rings and is Bilbo's nephew.
5
Example: Who is Gandalf? Gandalf is a wizard from The Lord of the Rings.
5
Frodo is from the Shire and carried the One Ring to Mount Doom.
6
Example: Who is Frodo? Frodo is a hobbit from The Lord of the Rings.
6
Frodo is a member of the Fellowship of the Ring.
7
Example: Who is Harry Potter? I don't have that information.
7
Samwise Gamgee is a hobbit from the Shire in The Lord of the Rings.
8
  
8
Samwise is Frodo's loyal companion and a member of the Fellowship of the Ring.
9
Answer this question. Use only the facts from above. If unknown, say "I don't have that information." Just give the answer, no prefix:
9
Aragorn is a man in The Lord of the Rings and is known as Strider.
  
10
Aragorn is a ranger, a leader of Men, and a member of the Fellowship of the Ring.
  
11
Legolas is an elf in The Lord of the Rings and a skilled archer.
  
12
Legolas is a member of the Fellowship of the Ring.
  
13
Gimli is a dwarf in The Lord of the Rings and a warrior.
  
14
Gimli is a member of the Fellowship of the Ring.
  
15
Boromir is a man from Gondor in The Lord of the Rings.
  
16
Boromir is a member of the Fellowship of the Ring.
  
17
The One Ring is a powerful ring in The Lord of the Rings that was created by Sauron.
  
18
The One Ring corrupts its bearer and must be destroyed in Mount Doom.
  
19
Sauron is the Dark Lord in The Lord of the Rings and created the One Ring.
  
20
Sauron is an enemy of the free peoples of Middle-earth.
  
21
Mordor is the realm of Sauron in The Lord of the Rings and contains Mount Doom.
  
22
Mount Doom is a volcano in Mordor in The Lord of the Rings where the One Ring was destroyed.
  
23
The Shire is the homeland of hobbits in The Lord of the Rings and the home of Frodo and Samwise.
  
24
Gondor is a kingdom of Men in The Lord of the Rings and the home of Boromir.
diff --git a/prompt.c b/prompt.c
1
#include "llama.h"
1
#include "llama.h"
  
2
#include "vectordb.h"
2
#include "models.h"
3
#include "models.h"
  
4
  
3
#include <stdio.h>
5
#include <stdio.h>
4
#include <stdlib.h>
6
#include <stdlib.h>
5
#include <string.h>
7
#include <string.h>
6
#include <getopt.h>
8
#include <getopt.h>
  
9
#include <ctype.h>
7
  
10
  
8
static void show_help(const char *prog) {
11
#define MAX_TOKENS 512
9
    printf("Usage: %s [OPTIONS]\n", prog);
12
#define MAX_TOKEN_LEN 32
10
    printf("Options:\n");
13
  
11
    printf("  -m, --model <name>    Specify model to use (default: first model)\n");
14
static const char *refusal_text = "I don't have that information.";
12
    printf("  -p, --prompt <text>   Specify prompt text (default: \"What is 2+2?\")\n");
15
  
13
    printf("  -h, --help            Show this help message\n");
16
static void llama_log_callback(enum ggml_log_level level, const char *text, void *user_data) {
  
17
    (void)level;
  
18
    (void)user_data;
  
19
    (void)text;
14
}
20
}
15
  
21
  
16
int main(int argc, char **argv) {
22
static int is_stopword(const char *token, size_t len) {
17
    const char *model_name = NULL;
23
    static const char *stopwords[] = {
18
    const char *prompt = NULL;
24
        "a", "an", "the", "is", "are", "was", "were", "of", "to", "in", "on",
19
    
25
        "for", "with", "and", "or", "not", "if", "then", "else", "from", "by",
20
    int n_predict = 64;
26
        "as", "at", "it", "its", "this", "that", "these", "those", "who", "what",
21
  
27
        "when", "where", "why", "how", "which", "about", "into", "over", "under",
22
    static struct option long_options[] = {
28
        "be", "been", "being", "do", "does", "did", "but", "so", "than"
23
        {"model", required_argument, 0, 'm'},
  
24
        {"prompt", required_argument, 0, 'p'},
  
25
        {"help", no_argument, 0, 'h'},
  
26
        {0, 0, 0, 0}
  
27
    };
29
    };
  
30
    for (size_t i = 0; i < sizeof(stopwords) / sizeof(stopwords[0]); i++) {
  
31
        if (strlen(stopwords[i]) == len && strncmp(stopwords[i], token, len) == 0) {
  
32
            return 1;
  
33
        }
  
34
    }
  
35
    return 0;
  
36
}
28
  
37
  
29
    int opt;
38
static int token_exists(char tokens[MAX_TOKENS][MAX_TOKEN_LEN], int count, const char *token) {
30
    int option_index = 0;
39
    for (int i = 0; i < count; i++) {
31
    while ((opt = getopt_long(argc, argv, "m:p:h", long_options, &option_index)) != -1) {
40
        if (strcmp(tokens[i], token) == 0) {
32
        switch (opt) {
41
            return 1;
33
            case 'm':
42
        }
34
                model_name = optarg;
43
    }
35
                break;
44
    return 0;
36
            case 'p':
45
}
37
                prompt = optarg;
46
  
  
47
static int collect_tokens(const char *text, char tokens[MAX_TOKENS][MAX_TOKEN_LEN]) {
  
48
    int count = 0;
  
49
    char buf[MAX_TOKEN_LEN];
  
50
    int len = 0;
  
51
    for (const unsigned char *p = (const unsigned char *)text; ; p++) {
  
52
        if (isalnum(*p)) {
  
53
            if (len < MAX_TOKEN_LEN - 1) {
  
54
                buf[len++] = (char)tolower(*p);
  
55
            }
  
56
        } else {
  
57
            if (len > 0) {
  
58
                buf[len] = '\0';
  
59
                if (len >= 4 && !is_stopword(buf, (size_t)len)) {
  
60
                    if (!token_exists(tokens, count, buf) && count < MAX_TOKENS) {
  
61
                        strncpy(tokens[count], buf, MAX_TOKEN_LEN - 1);
  
62
                        tokens[count][MAX_TOKEN_LEN - 1] = '\0';
  
63
                        count++;
  
64
                    }
  
65
                }
  
66
                len = 0;
  
67
            }
  
68
            if (*p == '\0') {
38
                break;
69
                break;
39
            case 'h':
70
            }
40
                show_help(argv[0]);
  
41
                return 0;
  
42
            default:
  
43
                fprintf(stderr, "Usage: %s [-m model] [-p prompt] [-h]\n", argv[0]);
  
44
                return 1;
  
45
        }
71
        }
46
    }
72
    }
  
73
    return count;
  
74
}
47
  
75
  
48
    if (prompt == NULL) {
76
static int has_overlap(const char *a, const char *b) {
49
		printf("Prompt must be provided. Exiting...");
77
    if (a == NULL || b == NULL) {
50
		return 1;
78
        return 0;
51
    }
79
    }
  
80
    char tokens[MAX_TOKENS][MAX_TOKEN_LEN];
  
81
    int token_count = collect_tokens(b, tokens);
  
82
    if (token_count == 0) {
  
83
        return 0;
  
84
    }
  
85
    char buf[MAX_TOKEN_LEN];
  
86
    int len = 0;
  
87
    for (const unsigned char *p = (const unsigned char *)a; ; p++) {
  
88
        if (isalnum(*p)) {
  
89
            if (len < MAX_TOKEN_LEN - 1) {
  
90
                buf[len++] = (char)tolower(*p);
  
91
            }
  
92
        } else {
  
93
            if (len > 0) {
  
94
                buf[len] = '\0';
  
95
                if (len >= 4 && !is_stopword(buf, (size_t)len)) {
  
96
                    if (token_exists(tokens, token_count, buf)) {
  
97
                        return 1;
  
98
                    }
  
99
                }
  
100
                len = 0;
  
101
            }
  
102
            if (*p == '\0') {
  
103
                break;
  
104
            }
  
105
        }
  
106
    }
  
107
    return 0;
  
108
}
52
  
109
  
  
110
static int execute_prompt(const char *model_name, const char *prompt, const char *context, int n_predict) {
53
    const model_config *cfg = NULL;
111
    const model_config *cfg = NULL;
54
    if (model_name != NULL) {
112
    if (model_name != NULL) {
55
        cfg = get_model_by_name(model_name);
113
        cfg = get_model_by_name(model_name);
...
61
        cfg = &models[0];
119
        cfg = &models[0];
62
    }
120
    }
63
  
121
  
  
122
    if (!has_overlap(prompt, context)) {
  
123
        printf("------------ Prompt: %s\n", prompt);
  
124
        printf("------------ Response: %s\n", refusal_text);
  
125
        return 0;
  
126
    }
  
127
  
64
    ggml_backend_load_all();
128
    ggml_backend_load_all();
65
  
129
  
66
    struct llama_model_params model_params = llama_model_default_params();
130
    struct llama_model_params model_params = llama_model_default_params();
...
75
  
139
  
76
    const struct llama_vocab *vocab = llama_model_get_vocab(model);
140
    const struct llama_vocab *vocab = llama_model_get_vocab(model);
77
  
141
  
78
    int n_prompt = -llama_tokenize(vocab, prompt, strlen(prompt), NULL, 0, true, true);
142
    const char *system_prefix = "System: Answer using only the Context. If the answer is not explicitly stated in Context, respond exactly: I don't have that information.\n\n";
  
143
    const char *context_prefix = "Context:\n";
  
144
    const char *prompt_prefix = "\n\nQuestion:\n";
  
145
    const char *answer_prefix = "\n\nAnswer:\n";
  
146
    size_t context_len = context ? strlen(context) : 0;
  
147
    size_t prompt_len = strlen(prompt);
  
148
    size_t full_len = strlen(system_prefix) + strlen(context_prefix) + context_len + strlen(prompt_prefix) + prompt_len + strlen(answer_prefix) + 1;
  
149
    char *full_prompt = (char *)malloc(full_len);
  
150
    if (full_prompt == NULL) {
  
151
        fprintf(stderr, "Error: failed to allocate prompt buffer\n");
  
152
        llama_model_free(model);
  
153
        return 1;
  
154
    }
  
155
    snprintf(full_prompt, full_len, "%s%s%s%s%s", system_prefix, context_prefix, context ? context : "", prompt_prefix, prompt);
  
156
    strncat(full_prompt, answer_prefix, full_len - strlen(full_prompt) - 1);
  
157
  
  
158
    int n_prompt = -llama_tokenize(vocab, full_prompt, strlen(full_prompt), NULL, 0, true, true);
79
    llama_token *prompt_tokens = (llama_token *)malloc(n_prompt * sizeof(llama_token));
159
    llama_token *prompt_tokens = (llama_token *)malloc(n_prompt * sizeof(llama_token));
80
    if (llama_tokenize(vocab, prompt, strlen(prompt), prompt_tokens, n_prompt, true, true) < 0) {
160
    if (llama_tokenize(vocab, full_prompt, strlen(full_prompt), prompt_tokens, n_prompt, true, true) < 0) {
81
        fprintf(stderr, "Error: failed to tokenize the prompt\n");
161
        fprintf(stderr, "Error: failed to tokenize the prompt\n");
  
162
        free(full_prompt);
82
        free(prompt_tokens);
163
        free(prompt_tokens);
83
        llama_model_free(model);
164
        llama_model_free(model);
84
        return 1;
165
        return 1;
...
92
    struct llama_context *ctx = llama_init_from_model(model, ctx_params);
173
    struct llama_context *ctx = llama_init_from_model(model, ctx_params);
93
    if (ctx == NULL) {
174
    if (ctx == NULL) {
94
        fprintf(stderr, "Error: failed to create the llama_context\n");
175
        fprintf(stderr, "Error: failed to create the llama_context\n");
  
176
        free(full_prompt);
95
        free(prompt_tokens);
177
        free(prompt_tokens);
96
        llama_model_free(model);
178
        llama_model_free(model);
97
        return 1;
179
        return 1;
...
104
    llama_sampler_chain_add(smpl, llama_sampler_init_dist(cfg->seed));
186
    llama_sampler_chain_add(smpl, llama_sampler_init_dist(cfg->seed));
105
  
187
  
106
    struct llama_batch batch = llama_batch_get_one(prompt_tokens, n_prompt);
188
    struct llama_batch batch = llama_batch_get_one(prompt_tokens, n_prompt);
107
    
189
  
108
    if (llama_model_has_encoder(model)) {
190
    if (llama_model_has_encoder(model)) {
109
        if (llama_encode(ctx, batch)) {
191
        if (llama_encode(ctx, batch)) {
110
            fprintf(stderr, "Error: failed to encode prompt\n");
192
            fprintf(stderr, "Error: failed to encode prompt\n");
  
193
            llama_sampler_free(smpl);
  
194
            free(full_prompt);
  
195
            free(prompt_tokens);
  
196
            llama_free(ctx);
  
197
            llama_model_free(model);
111
            return 1;
198
            return 1;
112
        }
199
        }
113
  
200
  
...
118
        batch = llama_batch_get_one(&decoder_start, 1);
205
        batch = llama_batch_get_one(&decoder_start, 1);
119
    }
206
    }
120
  
207
  
121
    printf("Prompt: %s\n", prompt);
208
    printf("------------ Prompt: %s\n", prompt);
122
    printf("Response: ");
209
    printf("------------ Response: ");
123
    fflush(stdout);
210
    fflush(stdout);
124
  
211
  
125
    int n_pos = 0;
212
    int n_pos = 0;
126
    llama_token new_token_id;
213
    llama_token new_token_id;
  
214
    size_t out_cap = 256;
  
215
    size_t out_len = 0;
  
216
    char *out = (char *)malloc(out_cap);
  
217
    if (out == NULL) {
  
218
        fprintf(stderr, "Error: failed to allocate output buffer\n");
  
219
        free(full_prompt);
  
220
        free(prompt_tokens);
  
221
        llama_sampler_free(smpl);
  
222
        llama_free(ctx);
  
223
        llama_model_free(model);
  
224
        return 1;
  
225
    }
  
226
    out[0] = '\0';
127
  
227
  
128
    while (n_pos + batch.n_tokens < n_prompt + n_predict) {
228
    while (n_pos + batch.n_tokens < n_prompt + n_predict) {
129
        if (llama_decode(ctx, batch)) {
229
        if (llama_decode(ctx, batch)) {
...
145
            fprintf(stderr, "Error: failed to convert token to piece\n");
245
            fprintf(stderr, "Error: failed to convert token to piece\n");
146
            break;
246
            break;
147
        }
247
        }
148
        printf("%.*s", n, buf);
248
        int stop_at = n;
149
        fflush(stdout);
249
        for (int i = 0; i < n; i++) {
  
250
            if (buf[i] == '\n') {
  
251
                stop_at = i;
  
252
                break;
  
253
            }
  
254
        }
  
255
        if (out_len + (size_t)stop_at + 1 > out_cap) {
  
256
            while (out_len + (size_t)stop_at + 1 > out_cap) {
  
257
                out_cap *= 2;
  
258
            }
  
259
            char *next = (char *)realloc(out, out_cap);
  
260
            if (next == NULL) {
  
261
                fprintf(stderr, "Error: failed to grow output buffer\n");
  
262
                break;
  
263
            }
  
264
            out = next;
  
265
        }
  
266
        memcpy(out + out_len, buf, (size_t)stop_at);
  
267
        out_len += (size_t)stop_at;
  
268
        out[out_len] = '\0';
  
269
  
  
270
        if (stop_at != n) {
  
271
            break;
  
272
        }
150
  
273
  
151
        batch = llama_batch_get_one(&new_token_id, 1);
274
        batch = llama_batch_get_one(&new_token_id, 1);
152
    }
275
    }
153
  
276
  
154
    printf("\n");
277
    if (!has_overlap(out, context)) {
  
278
        strcpy(out, refusal_text);
  
279
        out_len = strlen(out);
  
280
    }
  
281
  
  
282
    printf("%s\n", out);
155
  
283
  
  
284
    free(full_prompt);
156
    free(prompt_tokens);
285
    free(prompt_tokens);
  
286
    free(out);
157
    llama_sampler_free(smpl);
287
    llama_sampler_free(smpl);
158
    llama_free(ctx);
288
    llama_free(ctx);
159
    llama_model_free(model);
289
    llama_model_free(model);
160
  
290
  
161
    return 0;
291
    return 0;
162
}
292
}
  
293
  
  
294
static char *generate_context(const char *model_name, const char *context_file, const char *prompt) {
  
295
    FILE *context_fp = fopen(context_file, "r");
  
296
    if (context_fp == NULL) {
  
297
        fprintf(stderr, "Error: unable to open context file %s\n", context_file);
  
298
        return NULL;
  
299
    }
  
300
  
  
301
    llama_backend_init();
  
302
  
  
303
    const model_config *cfg = NULL;
  
304
    if (model_name != NULL) {
  
305
        cfg = get_model_by_name(model_name);
  
306
        if (cfg == NULL) {
  
307
            fprintf(stderr, "Error: unknown model '%s'\n", model_name);
  
308
            fclose(context_fp);
  
309
            llama_backend_free();
  
310
            return NULL;
  
311
        }
  
312
    } else {
  
313
        cfg = &models[0];
  
314
    }
  
315
  
  
316
    /* struct llama_model *model = llama_load_model_from_file(cfg->filepath, llama_model_default_params()); */
  
317
    struct llama_model *model = llama_model_load_from_file(cfg->filepath, llama_model_default_params());
  
318
    if (model == NULL) {
  
319
        fprintf(stderr, "Error: unable to load embedding model\n");
  
320
        fclose(context_fp);
  
321
        llama_backend_free();
  
322
        return NULL;
  
323
    }
  
324
  
  
325
    struct llama_context_params cparams = llama_context_default_params();
  
326
    cparams.embeddings = true;
  
327
  
  
328
    /* struct llama_context *embed_ctx = llama_new_context_with_model(model, cparams); */
  
329
    struct llama_context *embed_ctx = llama_init_from_model(model, cparams);
  
330
    if (embed_ctx == NULL) {
  
331
        fprintf(stderr, "Error: failed to create embedding context\n");
  
332
        llama_model_free(model);
  
333
        fclose(context_fp);
  
334
        llama_backend_free();
  
335
        return NULL;
  
336
    }
  
337
  
  
338
    VectorDB db;
  
339
    vdb_init(&db, embed_ctx);
  
340
  
  
341
    char line[1024];
  
342
    while (fgets(line, sizeof(line), context_fp) != NULL) {
  
343
        size_t len = strlen(line);
  
344
        while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) {
  
345
            line[len - 1] = '\0';
  
346
            len--;
  
347
        }
  
348
        if (len == 0) {
  
349
            continue;
  
350
        }
  
351
        vdb_add_document(&db, line);
  
352
    }
  
353
  
  
354
    float query[VDB_EMBED_SIZE];
  
355
    int results[3];
  
356
  
  
357
    vdb_embed_query(&db, prompt, query);
  
358
    vdb_search(&db, query, 3, results);
  
359
  
  
360
    size_t context_cap = 1024;
  
361
    size_t context_len = 0;
  
362
    char *context = (char *)malloc(context_cap);
  
363
    if (context == NULL) {
  
364
        fprintf(stderr, "Error: failed to allocate context buffer\n");
  
365
        fclose(context_fp);
  
366
        llama_free(embed_ctx);
  
367
        llama_model_free(model);
  
368
        llama_backend_free();
  
369
        return NULL;
  
370
    }
  
371
    context[0] = '\0';
  
372
  
  
373
    for (int i = 0; i < 3; i++) {
  
374
        if (results[i] < 0) {
  
375
            continue;
  
376
        }
  
377
        const char *text = db.docs[results[i]].text;
  
378
        size_t text_len = strlen(text);
  
379
        size_t need = context_len + text_len + 2;
  
380
        if (need > context_cap) {
  
381
            while (need > context_cap) {
  
382
                context_cap *= 2;
  
383
            }
  
384
            char *next = (char *)realloc(context, context_cap);
  
385
            if (next == NULL) {
  
386
                fprintf(stderr, "Error: failed to grow context buffer\n");
  
387
                free(context);
  
388
                fclose(context_fp);
  
389
                llama_free(embed_ctx);
  
390
                llama_model_free(model);
  
391
                llama_backend_free();
  
392
                return NULL;
  
393
            }
  
394
            context = next;
  
395
        }
  
396
        memcpy(context + context_len, text, text_len);
  
397
        context_len += text_len;
  
398
        context[context_len++] = '\n';
  
399
        context[context_len] = '\0';
  
400
    }
  
401
  
  
402
    fclose(context_fp);
  
403
    llama_free(embed_ctx);
  
404
    llama_model_free(model);
  
405
    llama_backend_free();
  
406
  
  
407
    return context;
  
408
}
  
409
  
  
410
static void show_help(const char *prog) {
  
411
    printf("Usage: %s [OPTIONS]\n", prog);
  
412
    printf("Options:\n");
  
413
    printf("  -m, --model <name>    Specify model to use (default: first model)\n");
  
414
    printf("  -p, --prompt <text>   Specify prompt text (default: \"What is 2+2?\")\n");
  
415
    printf("  -c, --context <text>  Specify context file\n");
  
416
    printf("  -v, --verbose         Enable verbose logging\n");
  
417
    printf("  -h, --help            Show this help message\n");
  
418
}
  
419
  
  
420
int main(int argc, char **argv) {
  
421
    const char *model_name = NULL;
  
422
    const char *prompt = NULL;
  
423
    const char *context_file = NULL;
  
424
	int verbose = 0;
  
425
    
  
426
    int n_predict = 64;
  
427
  
  
428
    static struct option long_options[] = {
  
429
        {"model", required_argument, 0, 'm'},
  
430
        {"prompt", required_argument, 0, 'p'},
  
431
        {"context", required_argument, 0, 'c'},
  
432
        {"verbose", no_argument, 0, 'v'},
  
433
        {"help", no_argument, 0, 'h'},
  
434
        {0, 0, 0, 0}
  
435
    };
  
436
  
  
437
    int opt;
  
438
    int option_index = 0;
  
439
    while ((opt = getopt_long(argc, argv, "m:p:c:vh", long_options, &option_index)) != -1) {
  
440
        switch (opt) {
  
441
            case 'm':
  
442
                model_name = optarg;
  
443
                break;
  
444
            case 'p':
  
445
                prompt = optarg;
  
446
                break;
  
447
            case 'c':
  
448
                context_file = optarg;
  
449
                break;
  
450
            case 'v':
  
451
                verbose = 1;
  
452
                break;
  
453
            case 'h':
  
454
                show_help(argv[0]);
  
455
                return 0;
  
456
            default:
  
457
                fprintf(stderr, "Usage: %s [-m model] [-p prompt] [-h]\n", argv[0]);
  
458
                return 1;
  
459
        }
  
460
    }
  
461
  
  
462
	if (verbose == 0) {
  
463
		llama_log_set(llama_log_callback, NULL);
  
464
	}
  
465
  
  
466
    if (prompt == NULL) {
  
467
		printf("Prompt must be provided. Exiting...");
  
468
		return 1;
  
469
    }
  
470
  
  
471
    if (context_file == NULL) {
  
472
		printf("Context file must be provided. Exiting...");
  
473
		return 1;
  
474
    }
  
475
  
  
476
    char *context = generate_context(model_name, context_file, prompt);
  
477
    if (context == NULL) {
  
478
        return 1;
  
479
    }
  
480
  
  
481
    int rc = execute_prompt(model_name, prompt, context, n_predict);
  
482
    free(context);
  
483
    return rc;
  
484
}
diff --git a/vectordb.c b/vectordb.c
  
1
#include <stdio.h>
  
2
#include <string.h>
  
3
#include <math.h>
  
4
  
  
5
#include "llama.h"
  
6
#include "vectordb.h"
  
7
  
  
8
static float cosine_similarity(float *a, float *b, int n) {
  
9
	float dot = 0, normA = 0, normB = 0;
  
10
	for (int i = 0; i < n; i++) {
  
11
		dot += a[i] * b[i];
  
12
		normA += a[i] * a[i];
  
13
		normB += b[i] * b[i];
  
14
	}
  
15
	return dot / (sqrtf(normA) * sqrtf(normB) + 1e-8f);
  
16
}
  
17
  
  
18
static void embed_text(struct llama_context *ctx, const char *text, float *out) {
  
19
	llama_token tokens[512];
  
20
	const struct llama_model *model = llama_get_model(ctx);
  
21
	const struct llama_vocab *vocab = llama_model_get_vocab(model);
  
22
	int n_tokens = llama_tokenize(
  
23
			vocab,
  
24
			text,
  
25
			strlen(text),
  
26
			tokens,
  
27
			512,
  
28
			true,
  
29
			true
  
30
			);
  
31
	if (n_tokens < 0) {
  
32
		return;
  
33
	}
  
34
  
  
35
	struct llama_batch batch = llama_batch_get_one(tokens, n_tokens);
  
36
	llama_decode(ctx, batch);
  
37
  
  
38
	const float *emb = llama_get_embeddings(ctx);
  
39
	memcpy(out, emb, sizeof(float) * VDB_EMBED_SIZE);
  
40
  
  
41
}
  
42
  
  
43
void vdb_init(VectorDB *db, struct llama_context *embed_ctx) {
  
44
	memset(db, 0, sizeof(VectorDB));
  
45
	db->embed_ctx = embed_ctx;
  
46
}
  
47
  
  
48
void vdb_free(VectorDB *db) {
  
49
	(void)db; // nothing yet (future persistence etc.)
  
50
}
  
51
  
  
52
void vdb_add_document(VectorDB *db, const char *text) {
  
53
	if (db->count >= VDB_MAX_DOCS) {
  
54
		printf("VectorDB full!\n");
  
55
		return;
  
56
	}
  
57
  
  
58
	VectorDoc *doc = &db->docs[db->count++];
  
59
	strncpy(doc->text, text, VDB_MAX_TEXT - 1);
  
60
	doc->text[VDB_MAX_TEXT - 1] = 0;
  
61
  
  
62
	printf("Embedding doc %d...\n", db->count);
  
63
	embed_text(db->embed_ctx, text, doc->embedding);
  
64
}
  
65
  
  
66
void vdb_embed_query(VectorDB *db, const char *text, float *out_embedding) {
  
67
	embed_text(db->embed_ctx, text, out_embedding);
  
68
}
  
69
  
  
70
void vdb_search(VectorDB *db, float *query, int top_k, int *results) {
  
71
	float best_scores[top_k];
  
72
	for (int i = 0; i < top_k; i++) {
  
73
		best_scores[i] = -1.0f;
  
74
		results[i] = -1;
  
75
	}
  
76
  
  
77
	for (int i = 0; i < db->count; i++) {
  
78
		float score = cosine_similarity(query, db->docs[i].embedding, VDB_EMBED_SIZE);
  
79
  
  
80
		for (int j = 0; j < top_k; j++) {
  
81
			if (score > best_scores[j]) {
  
82
				for (int k = top_k - 1; k > j; k--) {
  
83
					best_scores[k] = best_scores[k - 1];
  
84
					results[k] = results[k - 1];
  
85
				}
  
86
				best_scores[j] = score;
  
87
				results[j] = i;
  
88
				break;
  
89
			}
  
90
		}
  
91
	}
  
92
}
diff --git a/vectordb.h b/vectordb.h
  
1
#ifndef VECTORDB_H
  
2
#define VECTORDB_H
  
3
  
  
4
#include "llama.h"
  
5
  
  
6
#define VDB_MAX_DOCS 1000
  
7
#define VDB_EMBED_SIZE 768
  
8
#define VDB_MAX_TEXT 1024
  
9
  
  
10
typedef struct {
  
11
	float embedding[VDB_EMBED_SIZE];
  
12
	char text[VDB_MAX_TEXT];
  
13
} VectorDoc;
  
14
  
  
15
typedef struct {
  
16
	VectorDoc docs[VDB_MAX_DOCS];
  
17
	int count;
  
18
	struct llama_context *embed_ctx;
  
19
} VectorDB;
  
20
  
  
21
void vdb_init(VectorDB *db, struct llama_context *embed_ctx);
  
22
void vdb_free(VectorDB *db);
  
23
  
  
24
void vdb_add_document(VectorDB *db, const char *text);
  
25
  
  
26
void vdb_embed_query(VectorDB *db, const char *text, float *out_embedding);
  
27
void vdb_search(VectorDB *db, float *query_embedding, int top_k, int *results);
  
28
  
  
29
#endif