1import pytest
 2import requests
 3from utils import *
 4
 5server = ServerPreset.tinyllama2()
 6
 7
 8@pytest.fixture(autouse=True)
 9def create_server():
10    global server
11    server = ServerPreset.tinyllama2()
12
13
14def test_server_start_simple():
15    global server
16    server.start()
17    res = server.make_request("GET", "/health")
18    assert res.status_code == 200
19
20
21def test_server_props():
22    global server
23    server.start()
24    res = server.make_request("GET", "/props")
25    assert res.status_code == 200
26    assert ".gguf" in res.body["model_path"]
27    assert res.body["total_slots"] == server.n_slots
28    default_val = res.body["default_generation_settings"]
29    assert server.n_ctx is not None and server.n_slots is not None
30    assert default_val["n_ctx"] == server.n_ctx / server.n_slots
31    assert default_val["params"]["seed"] == server.seed
32
33
34def test_server_models():
35    global server
36    server.start()
37    res = server.make_request("GET", "/models")
38    assert res.status_code == 200
39    assert len(res.body["data"]) == 1
40    assert res.body["data"][0]["id"] == server.model_alias
41
42
43def test_server_slots():
44    global server
45
46    # without slots endpoint enabled, this should return error
47    server.server_slots = False
48    server.start()
49    res = server.make_request("GET", "/slots")
50    assert res.status_code == 501 # ERROR_TYPE_NOT_SUPPORTED
51    assert "error" in res.body
52    server.stop()
53
54    # with slots endpoint enabled, this should return slots info
55    server.server_slots = True
56    server.n_slots = 2
57    server.start()
58    res = server.make_request("GET", "/slots")
59    assert res.status_code == 200
60    assert len(res.body) == server.n_slots
61    assert server.n_ctx is not None and server.n_slots is not None
62    assert res.body[0]["n_ctx"] == server.n_ctx / server.n_slots
63    assert "params" not in res.body[0]
64
65
66def test_load_split_model():
67    global server
68    server.offline = False
69    server.model_hf_repo = "ggml-org/models"
70    server.model_hf_file = "tinyllamas/split/stories15M-q8_0-00001-of-00003.gguf"
71    server.model_alias = "tinyllama-split"
72    server.start()
73    res = server.make_request("POST", "/completion", data={
74        "n_predict": 16,
75        "prompt": "Hello",
76        "temperature": 0.0,
77    })
78    assert res.status_code == 200
79    assert match_regex("(little|girl)+", res.body["content"])
80
81
82def test_no_webui():
83    global server
84    # default: webui enabled
85    server.start()
86    url = f"http://{server.server_host}:{server.server_port}"
87    res = requests.get(url)
88    assert res.status_code == 200
89    assert "<!doctype html>" in res.text
90    server.stop()
91
92    # with --no-webui
93    server.no_webui = True
94    server.start()
95    res = requests.get(url)
96    assert res.status_code == 404