1import pytest
 2import time
 3from utils import *
 4
 5server = ServerPreset.tinyllama2()
 6
 7
 8@pytest.fixture(autouse=True)
 9def create_server():
10    global server
11    server = ServerPreset.tinyllama2()
12
13
14def test_server_sleep():
15    global server
16    server.sleep_idle_seconds = 1
17    server.start()
18
19    # wait a bit so that server can go to sleep
20    time.sleep(2)
21
22    # make sure these endpoints are still responsive after sleep
23    res = server.make_request("GET", "/health")
24    assert res.status_code == 200
25    res = server.make_request("GET", "/props")
26    assert res.status_code == 200
27    assert res.body["is_sleeping"] == True
28
29    # make a generation request to wake up the server
30    res = server.make_request("POST", "/completion", data={
31        "n_predict": 1,
32        "prompt": "Hello",
33    })
34    assert res.status_code == 200
35
36    # it should no longer be sleeping
37    res = server.make_request("GET", "/props")
38    assert res.status_code == 200
39    assert res.body["is_sleeping"] == False