1import pytest
 2from utils import *
 3
 4server = ServerPreset.tinyllama2()
 5
 6
 7@pytest.fixture(autouse=True)
 8def create_server():
 9    global server
10    server = ServerPreset.tinyllama2()
11
12
13def test_tokenize_detokenize():
14    global server
15    server.start()
16    # tokenize
17    content = "What is the capital of France ?"
18    res_tok = server.make_request("POST", "/tokenize", data={
19        "content": content
20    })
21    assert res_tok.status_code == 200
22    assert len(res_tok.body["tokens"]) > 5
23    # detokenize
24    res_detok = server.make_request("POST", "/detokenize", data={
25        "tokens": res_tok.body["tokens"],
26    })
27    assert res_detok.status_code == 200
28    assert res_detok.body["content"].strip() == content
29
30
31def test_tokenize_with_bos():
32    global server
33    server.start()
34    # tokenize
35    content = "What is the capital of France ?"
36    bosId = 1
37    res_tok = server.make_request("POST", "/tokenize", data={
38        "content": content,
39        "add_special": True,
40    })
41    assert res_tok.status_code == 200
42    assert res_tok.body["tokens"][0] == bosId
43
44
45def test_tokenize_with_pieces():
46    global server
47    server.start()
48    # tokenize
49    content = "This is a test string with unicode 媽 and emoji 🤗"
50    res_tok = server.make_request("POST", "/tokenize", data={
51        "content": content,
52        "with_pieces": True,
53    })
54    assert res_tok.status_code == 200
55    for token in res_tok.body["tokens"]:
56        assert "id" in token
57        assert token["id"] > 0
58        assert "piece" in token
59        assert len(token["piece"]) > 0