diff options
Diffstat (limited to 'llama.cpp/tools/server/tests/unit/test_tokenize.py')
| -rw-r--r-- | llama.cpp/tools/server/tests/unit/test_tokenize.py | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/tests/unit/test_tokenize.py b/llama.cpp/tools/server/tests/unit/test_tokenize.py new file mode 100644 index 0000000..424cac5 --- /dev/null +++ b/llama.cpp/tools/server/tests/unit/test_tokenize.py @@ -0,0 +1,59 @@ +import pytest +from utils import * + +server = ServerPreset.tinyllama2() + + +@pytest.fixture(autouse=True) +def create_server(): + global server + server = ServerPreset.tinyllama2() + + +def test_tokenize_detokenize(): + global server + server.start() + # tokenize + content = "What is the capital of France ?" + res_tok = server.make_request("POST", "/tokenize", data={ + "content": content + }) + assert res_tok.status_code == 200 + assert len(res_tok.body["tokens"]) > 5 + # detokenize + res_detok = server.make_request("POST", "/detokenize", data={ + "tokens": res_tok.body["tokens"], + }) + assert res_detok.status_code == 200 + assert res_detok.body["content"].strip() == content + + +def test_tokenize_with_bos(): + global server + server.start() + # tokenize + content = "What is the capital of France ?" + bosId = 1 + res_tok = server.make_request("POST", "/tokenize", data={ + "content": content, + "add_special": True, + }) + assert res_tok.status_code == 200 + assert res_tok.body["tokens"][0] == bosId + + +def test_tokenize_with_pieces(): + global server + server.start() + # tokenize + content = "This is a test string with unicode 媽 and emoji 🤗" + res_tok = server.make_request("POST", "/tokenize", data={ + "content": content, + "with_pieces": True, + }) + assert res_tok.status_code == 200 + for token in res_tok.body["tokens"]: + assert "id" in token + assert token["id"] > 0 + assert "piece" in token + assert len(token["piece"]) > 0 |
