1import pytest
2from utils import *
3
4server = ServerPreset.tinyllama2()
5
6
7@pytest.fixture(autouse=True)
8def create_server():
9 global server
10 server = ServerPreset.tinyllama2()
11
12
13def test_tokenize_detokenize():
14 global server
15 server.start()
16 # tokenize
17 content = "What is the capital of France ?"
18 res_tok = server.make_request("POST", "/tokenize", data={
19 "content": content
20 })
21 assert res_tok.status_code == 200
22 assert len(res_tok.body["tokens"]) > 5
23 # detokenize
24 res_detok = server.make_request("POST", "/detokenize", data={
25 "tokens": res_tok.body["tokens"],
26 })
27 assert res_detok.status_code == 200
28 assert res_detok.body["content"].strip() == content
29
30
31def test_tokenize_with_bos():
32 global server
33 server.start()
34 # tokenize
35 content = "What is the capital of France ?"
36 bosId = 1
37 res_tok = server.make_request("POST", "/tokenize", data={
38 "content": content,
39 "add_special": True,
40 })
41 assert res_tok.status_code == 200
42 assert res_tok.body["tokens"][0] == bosId
43
44
45def test_tokenize_with_pieces():
46 global server
47 server.start()
48 # tokenize
49 content = "This is a test string with unicode 媽 and emoji 🤗"
50 res_tok = server.make_request("POST", "/tokenize", data={
51 "content": content,
52 "with_pieces": True,
53 })
54 assert res_tok.status_code == 200
55 for token in res_tok.body["tokens"]:
56 assert "id" in token
57 assert token["id"] > 0
58 assert "piece" in token
59 assert len(token["piece"]) > 0