1import pytest
2import requests
3from utils import *
4
5server = ServerPreset.tinyllama2()
6
7
8@pytest.fixture(autouse=True)
9def create_server():
10 global server
11 server = ServerPreset.tinyllama2()
12
13
14def test_server_start_simple():
15 global server
16 server.start()
17 res = server.make_request("GET", "/health")
18 assert res.status_code == 200
19
20
21def test_server_props():
22 global server
23 server.start()
24 res = server.make_request("GET", "/props")
25 assert res.status_code == 200
26 assert ".gguf" in res.body["model_path"]
27 assert res.body["total_slots"] == server.n_slots
28 default_val = res.body["default_generation_settings"]
29 assert server.n_ctx is not None and server.n_slots is not None
30 assert default_val["n_ctx"] == server.n_ctx / server.n_slots
31 assert default_val["params"]["seed"] == server.seed
32
33
34def test_server_models():
35 global server
36 server.start()
37 res = server.make_request("GET", "/models")
38 assert res.status_code == 200
39 assert len(res.body["data"]) == 1
40 assert res.body["data"][0]["id"] == server.model_alias
41
42
43def test_server_slots():
44 global server
45
46 # without slots endpoint enabled, this should return error
47 server.server_slots = False
48 server.start()
49 res = server.make_request("GET", "/slots")
50 assert res.status_code == 501 # ERROR_TYPE_NOT_SUPPORTED
51 assert "error" in res.body
52 server.stop()
53
54 # with slots endpoint enabled, this should return slots info
55 server.server_slots = True
56 server.n_slots = 2
57 server.start()
58 res = server.make_request("GET", "/slots")
59 assert res.status_code == 200
60 assert len(res.body) == server.n_slots
61 assert server.n_ctx is not None and server.n_slots is not None
62 assert res.body[0]["n_ctx"] == server.n_ctx / server.n_slots
63 assert "params" not in res.body[0]
64
65
66def test_load_split_model():
67 global server
68 server.offline = False
69 server.model_hf_repo = "ggml-org/models"
70 server.model_hf_file = "tinyllamas/split/stories15M-q8_0-00001-of-00003.gguf"
71 server.model_alias = "tinyllama-split"
72 server.start()
73 res = server.make_request("POST", "/completion", data={
74 "n_predict": 16,
75 "prompt": "Hello",
76 "temperature": 0.0,
77 })
78 assert res.status_code == 200
79 assert match_regex("(little|girl)+", res.body["content"])
80
81
82def test_no_webui():
83 global server
84 # default: webui enabled
85 server.start()
86 url = f"http://{server.server_host}:{server.server_port}"
87 res = requests.get(url)
88 assert res.status_code == 200
89 assert "<!doctype html>" in res.text
90 server.stop()
91
92 # with --no-webui
93 server.no_webui = True
94 server.start()
95 res = requests.get(url)
96 assert res.status_code == 404