diff --git a/Makefile b/Makefile index ba4deab754b73eb0f2807116dffea151651618bc..0d27bd63dd90b94474531e98ec891cad23384175 100644 --- a/Makefile +++ b/Makefile @@ -10,9 +10,12 @@ LDFLAGS = -L$(LLAMA_DIR)/build/src -L$(LLAMA_DIR)/build/ggml/src \ -lpthread -lm -ldl -lstdc++ -g \ -lllama -lggml -lggml-cpu -lggml-base +PROMPT_TXT := $(wildcard prompts/*.txt) +PROMPT_HEADERS := $(PROMPT_TXT:.txt=.h) + help: .help -build/npc: run/system-prompt npc.c vectordb.c models.h # Build npc binary for testing +build/npc: build/prompts npc.c vectordb.c models.h # Build npc binary for testing $(CC) $(CFLAGS) npc.c vectordb.c -o npc $(LDFLAGS) build/context: context.c vectordb.c models.h # Build context binary for testing @@ -23,6 +26,8 @@ mkdir $(LLAMA_DIR)/build && \ cd $(LLAMA_DIR)/build && \ cmake ../ -DBUILD_SHARED_LIBS=OFF && \ make -j8 + +build/prompts: $(PROMPT_HEADERS) # Generate C style header run/fetch-models: .assure # Fetch GGUF models -mkdir -p models @@ -32,10 +37,10 @@ run/docker: .assure # Runs npc in Docker container docker build -t npcd . docker run -it npcd -run/system-prompt: .assure # Generate C style header - xxd -i system_prompt.txt > system_prompt.h - run/clean: # Cleans up all the build artefacts -rm -f npc cd $(LLAMA_DIR)/build && make clean -rm -Rf $(LLAMA_DIR)/build + +prompts/%.h: prompts/%.txt .assure + xxd -i $< > $@ diff --git a/README.md b/README.md index fee82f8cf6f6c4aefe1548f9b97cc4ace5f3cdb4..a20bcef2b94b92d223ccc45f116b337cca8822cb 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,18 @@ -# llmnpc +An experiment using tiny LLMs as NPCs that could be embedded into the game. + +Goals of the experiment: -Command-line tooling for NPC-focused LLM experiments with lightweight context -retrieval, powered by [llama.cpp](https://github.com/ggerganov/llama.cpp). +- Have LLM be run only on CPU, this is why small LLMs have been chosen in this + experiment, so they can be used in other games. +- To produce a simple C library that can be reused elsewhere. +- Test existing small and tiny LLMs and provide some useful results on how they + behave. + +> [!NOTE] +> This project is just for fun, to see how LLMs would fare as NPCs. Because of +> the non-deterministic nature of LLMs, the results vary and are often quite +> funny. A lot of tweaking would be needed to make this really useful in real +> games, but not impossible. ## Building @@ -26,6 +37,7 @@ 3. Build binaries: ```bash make build/context + make build/prompts make build/npc ``` @@ -37,8 +49,8 @@ `context` reads a text file (one document per line), embeds each line, and produces a binary vector database file. ```bash -./context -i context.txt -o context.vdb -./context -m flan-t5-small -i context.txt -o context.vdb +./context -i corpus/lotr.txt -o corpus/lotr.vdb +./context -m flan-t5-small -i corpus/lotr.txt -o corpus/lotr.vdb ``` ### Run an NPC query with retrieved context @@ -47,8 +59,8 @@ `npc` loads a vector database, embeds the prompt, selects the top 3 matching lines by cosine similarity, and runs the NPC system prompt against that context. ```bash -./npc -m flan-t5-small -p "Who is Gandalf?" -c context.vdb -./npc -m flan-t5-small -p "Who is Frodo?" -c context.vdb +./npc -m flan-t5-small -p "Who is Gandalf?" -c corpus/lotr.vdb +./npc -m flan-t5-small -p "Who is Frodo?" -c corpus/lotr.vdb ``` ### context options diff --git a/context.txt b/corpus/lotr.txt rename from context.txt rename to corpus/lotr.txt diff --git a/npc.c b/npc.c index 01c980c3484d16d7866160bb4e24dff5cee39932..5450866ebc367cb0e6f4878a7cf1d44d306028f6 100644 --- a/npc.c +++ b/npc.c @@ -11,7 +11,7 @@ #include #include #include -#include "system_prompt.h" +#include "prompts/lotr.h" static void llama_log_callback(enum ggml_log_level level, const char *text, void *user_data) { (void)level; @@ -54,13 +54,13 @@ log_message(stderr, LOG_ERROR, "Model config is missing"); return 1; } - char *system_prefix = (char *)malloc(system_prompt_txt_len + 1); + char *system_prefix = (char *)malloc(prompts_lotr_txt_len + 1); if (system_prefix == NULL) { log_message(stderr, LOG_ERROR, "Failed to allocate system prompt"); return 1; } - memcpy(system_prefix, system_prompt_txt, system_prompt_txt_len); - system_prefix[system_prompt_txt_len] = '\0'; + memcpy(system_prefix, prompts_lotr_txt, prompts_lotr_txt_len); + system_prefix[prompts_lotr_txt_len] = '\0'; ggml_backend_load_all(); diff --git a/system_prompt.h b/prompts/lotr.h rename from system_prompt.h rename to prompts/lotr.h index 119b7f2d606c607a78d915aae0f7ecff05d55231..521002129e803b1fdd901c48c457ac6407995add 100644 --- a/system_prompt.h +++ b/prompts/lotr.h @@ -1,4 +1,4 @@ -unsigned char system_prompt_txt[] = { +unsigned char prompts_lotr_txt[] = { 0x53, 0x79, 0x73, 0x74, 0x65, 0x6d, 0x3a, 0x20, 0x41, 0x6e, 0x73, 0x77, 0x65, 0x72, 0x20, 0x75, 0x73, 0x69, 0x6e, 0x67, 0x20, 0x6f, 0x6e, 0x6c, 0x79, 0x20, 0x74, 0x68, 0x65, 0x20, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x78, @@ -12,4 +12,4 @@ 0x20, 0x64, 0x6f, 0x6e, 0x27, 0x74, 0x20, 0x68, 0x61, 0x76, 0x65, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x69, 0x6e, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x0a }; -unsigned int system_prompt_txt_len = 138; +unsigned int prompts_lotr_txt_len = 138; diff --git a/system_prompt.txt b/prompts/lotr.txt rename from system_prompt.txt rename to prompts/lotr.txt