summaryrefslogtreecommitdiff
path: root/llama.cpp/tools/server/tests/unit/test_tokenize.py
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/tools/server/tests/unit/test_tokenize.py')
-rw-r--r--llama.cpp/tools/server/tests/unit/test_tokenize.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/llama.cpp/tools/server/tests/unit/test_tokenize.py b/llama.cpp/tools/server/tests/unit/test_tokenize.py
new file mode 100644
index 0000000..424cac5
--- /dev/null
+++ b/llama.cpp/tools/server/tests/unit/test_tokenize.py
@@ -0,0 +1,59 @@
+import pytest
+from utils import *
+
+server = ServerPreset.tinyllama2()
+
+
+@pytest.fixture(autouse=True)
+def create_server():
+ global server
+ server = ServerPreset.tinyllama2()
+
+
+def test_tokenize_detokenize():
+ global server
+ server.start()
+ # tokenize
+ content = "What is the capital of France ?"
+ res_tok = server.make_request("POST", "/tokenize", data={
+ "content": content
+ })
+ assert res_tok.status_code == 200
+ assert len(res_tok.body["tokens"]) > 5
+ # detokenize
+ res_detok = server.make_request("POST", "/detokenize", data={
+ "tokens": res_tok.body["tokens"],
+ })
+ assert res_detok.status_code == 200
+ assert res_detok.body["content"].strip() == content
+
+
+def test_tokenize_with_bos():
+ global server
+ server.start()
+ # tokenize
+ content = "What is the capital of France ?"
+ bosId = 1
+ res_tok = server.make_request("POST", "/tokenize", data={
+ "content": content,
+ "add_special": True,
+ })
+ assert res_tok.status_code == 200
+ assert res_tok.body["tokens"][0] == bosId
+
+
+def test_tokenize_with_pieces():
+ global server
+ server.start()
+ # tokenize
+ content = "This is a test string with unicode 媽 and emoji 🤗"
+ res_tok = server.make_request("POST", "/tokenize", data={
+ "content": content,
+ "with_pieces": True,
+ })
+ assert res_tok.status_code == 200
+ for token in res_tok.body["tokens"]:
+ assert "id" in token
+ assert token["id"] > 0
+ assert "piece" in token
+ assert len(token["piece"]) > 0