1import pytest
2import time
3from utils import *
4
5server = ServerPreset.tinyllama2()
6
7
8@pytest.fixture(autouse=True)
9def create_server():
10 global server
11 server = ServerPreset.tinyllama2()
12
13
14def test_server_sleep():
15 global server
16 server.sleep_idle_seconds = 1
17 server.start()
18
19 # wait a bit so that server can go to sleep
20 time.sleep(2)
21
22 # make sure these endpoints are still responsive after sleep
23 res = server.make_request("GET", "/health")
24 assert res.status_code == 200
25 res = server.make_request("GET", "/props")
26 assert res.status_code == 200
27 assert res.body["is_sleeping"] == True
28
29 # make a generation request to wake up the server
30 res = server.make_request("POST", "/completion", data={
31 "n_predict": 1,
32 "prompt": "Hello",
33 })
34 assert res.status_code == 200
35
36 # it should no longer be sleeping
37 res = server.make_request("GET", "/props")
38 assert res.status_code == 200
39 assert res.body["is_sleeping"] == False