summaryrefslogtreecommitdiff
path: root/llama.cpp/tools/server/tests/unit/test_basic.py
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/tools/server/tests/unit/test_basic.py')
-rw-r--r--llama.cpp/tools/server/tests/unit/test_basic.py96
1 files changed, 96 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/tests/unit/test_basic.py b/llama.cpp/tools/server/tests/unit/test_basic.py
new file mode 100644
index 0000000..3405be3
--- /dev/null
+++ b/llama.cpp/tools/server/tests/unit/test_basic.py
@@ -0,0 +1,96 @@
+import pytest
+import requests
+from utils import *
+
+server = ServerPreset.tinyllama2()
+
+
+@pytest.fixture(autouse=True)
+def create_server():
+ global server
+ server = ServerPreset.tinyllama2()
+
+
+def test_server_start_simple():
+ global server
+ server.start()
+ res = server.make_request("GET", "/health")
+ assert res.status_code == 200
+
+
+def test_server_props():
+ global server
+ server.start()
+ res = server.make_request("GET", "/props")
+ assert res.status_code == 200
+ assert ".gguf" in res.body["model_path"]
+ assert res.body["total_slots"] == server.n_slots
+ default_val = res.body["default_generation_settings"]
+ assert server.n_ctx is not None and server.n_slots is not None
+ assert default_val["n_ctx"] == server.n_ctx / server.n_slots
+ assert default_val["params"]["seed"] == server.seed
+
+
+def test_server_models():
+ global server
+ server.start()
+ res = server.make_request("GET", "/models")
+ assert res.status_code == 200
+ assert len(res.body["data"]) == 1
+ assert res.body["data"][0]["id"] == server.model_alias
+
+
+def test_server_slots():
+ global server
+
+ # without slots endpoint enabled, this should return error
+ server.server_slots = False
+ server.start()
+ res = server.make_request("GET", "/slots")
+ assert res.status_code == 501 # ERROR_TYPE_NOT_SUPPORTED
+ assert "error" in res.body
+ server.stop()
+
+ # with slots endpoint enabled, this should return slots info
+ server.server_slots = True
+ server.n_slots = 2
+ server.start()
+ res = server.make_request("GET", "/slots")
+ assert res.status_code == 200
+ assert len(res.body) == server.n_slots
+ assert server.n_ctx is not None and server.n_slots is not None
+ assert res.body[0]["n_ctx"] == server.n_ctx / server.n_slots
+ assert "params" not in res.body[0]
+
+
+def test_load_split_model():
+ global server
+ server.offline = False
+ server.model_hf_repo = "ggml-org/models"
+ server.model_hf_file = "tinyllamas/split/stories15M-q8_0-00001-of-00003.gguf"
+ server.model_alias = "tinyllama-split"
+ server.start()
+ res = server.make_request("POST", "/completion", data={
+ "n_predict": 16,
+ "prompt": "Hello",
+ "temperature": 0.0,
+ })
+ assert res.status_code == 200
+ assert match_regex("(little|girl)+", res.body["content"])
+
+
+def test_no_webui():
+ global server
+ # default: webui enabled
+ server.start()
+ url = f"http://{server.server_host}:{server.server_port}"
+ res = requests.get(url)
+ assert res.status_code == 200
+ assert "<!doctype html>" in res.text
+ server.stop()
+
+ # with --no-webui
+ server.no_webui = True
+ server.start()
+ res = requests.get(url)
+ assert res.status_code == 404