1#!/usr/bin/env python3
2import pytest
3import base64
4import requests
5
6from utils import *
7
8server: ServerProcess
9
10
11def get_test_image_base64() -> str:
12 """Get a test image in base64 format"""
13 # Use the same test image as test_vision_api.py
14 IMG_URL = "https://huggingface.co/ggml-org/tinygemma3-GGUF/resolve/main/test/11_truck.png"
15 response = requests.get(IMG_URL)
16 response.raise_for_status()
17 return base64.b64encode(response.content).decode("utf-8")
18
19@pytest.fixture(autouse=True)
20def create_server():
21 global server
22 server = ServerPreset.tinyllama2()
23 server.model_alias = "tinyllama-2-anthropic"
24 server.server_port = 8082
25 server.n_slots = 1
26 server.n_ctx = 8192
27 server.n_batch = 2048
28
29
30@pytest.fixture
31def vision_server():
32 """Separate fixture for vision tests that require multimodal support"""
33 global server
34 server = ServerPreset.tinygemma3()
35 server.offline = False # Allow downloading the model
36 server.model_alias = "tinygemma3-anthropic"
37 server.server_port = 8083 # Different port to avoid conflicts
38 server.n_slots = 1
39 return server
40
41
42# Basic message tests
43
44def test_anthropic_messages_basic():
45 """Test basic Anthropic messages endpoint"""
46 server.start()
47
48 res = server.make_request("POST", "/v1/messages", data={
49 "model": "test",
50 "max_tokens": 50,
51 "messages": [
52 {"role": "user", "content": "Say hello"}
53 ]
54 })
55
56 assert res.status_code == 200, f"Expected 200, got {res.status_code}"
57 assert res.body["type"] == "message", f"Expected type 'message', got {res.body.get('type')}"
58 assert res.body["role"] == "assistant", f"Expected role 'assistant', got {res.body.get('role')}"
59 assert "content" in res.body, "Missing 'content' field"
60 assert isinstance(res.body["content"], list), "Content should be an array"
61 assert len(res.body["content"]) > 0, "Content array should not be empty"
62 assert res.body["content"][0]["type"] == "text", "First content block should be text"
63 assert "text" in res.body["content"][0], "Text content block missing 'text' field"
64 assert res.body["stop_reason"] in ["end_turn", "max_tokens"], f"Invalid stop_reason: {res.body.get('stop_reason')}"
65 assert "usage" in res.body, "Missing 'usage' field"
66 assert "input_tokens" in res.body["usage"], "Missing usage.input_tokens"
67 assert "output_tokens" in res.body["usage"], "Missing usage.output_tokens"
68 assert isinstance(res.body["usage"]["input_tokens"], int), "input_tokens should be integer"
69 assert isinstance(res.body["usage"]["output_tokens"], int), "output_tokens should be integer"
70 assert res.body["usage"]["output_tokens"] > 0, "Should have generated some tokens"
71 # Anthropic API should NOT include timings
72 assert "timings" not in res.body, "Anthropic API should not include timings field"
73
74
75def test_anthropic_messages_with_system():
76 """Test messages with system prompt"""
77 server.start()
78
79 res = server.make_request("POST", "/v1/messages", data={
80 "model": "test",
81 "max_tokens": 50,
82 "system": "You are a helpful assistant.",
83 "messages": [
84 {"role": "user", "content": "Hello"}
85 ]
86 })
87
88 assert res.status_code == 200
89 assert res.body["type"] == "message"
90 assert len(res.body["content"]) > 0
91
92
93def test_anthropic_messages_multipart_content():
94 """Test messages with multipart content blocks"""
95 server.start()
96
97 res = server.make_request("POST", "/v1/messages", data={
98 "model": "test",
99 "max_tokens": 50,
100 "messages": [
101 {
102 "role": "user",
103 "content": [
104 {"type": "text", "text": "What is"},
105 {"type": "text", "text": " the answer?"}
106 ]
107 }
108 ]
109 })
110
111 assert res.status_code == 200
112 assert res.body["type"] == "message"
113
114
115def test_anthropic_messages_conversation():
116 """Test multi-turn conversation"""
117 server.start()
118
119 res = server.make_request("POST", "/v1/messages", data={
120 "model": "test",
121 "max_tokens": 50,
122 "messages": [
123 {"role": "user", "content": "Hello"},
124 {"role": "assistant", "content": "Hi there!"},
125 {"role": "user", "content": "How are you?"}
126 ]
127 })
128
129 assert res.status_code == 200
130 assert res.body["type"] == "message"
131
132
133# Streaming tests
134
135def test_anthropic_messages_streaming():
136 """Test streaming messages"""
137 server.start()
138
139 res = server.make_stream_request("POST", "/v1/messages", data={
140 "model": "test",
141 "max_tokens": 30,
142 "messages": [
143 {"role": "user", "content": "Say hello"}
144 ],
145 "stream": True
146 })
147
148 events = []
149 for data in res:
150 # Each event should have type and other fields
151 assert "type" in data, f"Missing 'type' in event: {data}"
152 events.append(data)
153
154 # Verify event sequence
155 event_types = [e["type"] for e in events]
156 assert "message_start" in event_types, "Missing message_start event"
157 assert "content_block_start" in event_types, "Missing content_block_start event"
158 assert "content_block_delta" in event_types, "Missing content_block_delta event"
159 assert "content_block_stop" in event_types, "Missing content_block_stop event"
160 assert "message_delta" in event_types, "Missing message_delta event"
161 assert "message_stop" in event_types, "Missing message_stop event"
162
163 # Check message_start structure
164 message_start = next(e for e in events if e["type"] == "message_start")
165 assert "message" in message_start, "message_start missing 'message' field"
166 assert message_start["message"]["type"] == "message"
167 assert message_start["message"]["role"] == "assistant"
168 assert message_start["message"]["content"] == []
169 assert "usage" in message_start["message"]
170 assert message_start["message"]["usage"]["input_tokens"] > 0
171
172 # Check content_block_start
173 block_start = next(e for e in events if e["type"] == "content_block_start")
174 assert "index" in block_start, "content_block_start missing 'index'"
175 assert block_start["index"] == 0, "First content block should be at index 0"
176 assert "content_block" in block_start
177 assert block_start["content_block"]["type"] == "text"
178
179 # Check content_block_delta
180 deltas = [e for e in events if e["type"] == "content_block_delta"]
181 assert len(deltas) > 0, "Should have at least one content_block_delta"
182 for delta in deltas:
183 assert "index" in delta
184 assert "delta" in delta
185 assert delta["delta"]["type"] == "text_delta"
186 assert "text" in delta["delta"]
187
188 # Check content_block_stop
189 block_stop = next(e for e in events if e["type"] == "content_block_stop")
190 assert "index" in block_stop
191 assert block_stop["index"] == 0
192
193 # Check message_delta
194 message_delta = next(e for e in events if e["type"] == "message_delta")
195 assert "delta" in message_delta
196 assert "stop_reason" in message_delta["delta"]
197 assert message_delta["delta"]["stop_reason"] in ["end_turn", "max_tokens"]
198 assert "usage" in message_delta
199 assert message_delta["usage"]["output_tokens"] > 0
200
201 # Check message_stop
202 message_stop = next(e for e in events if e["type"] == "message_stop")
203 # message_stop should NOT have timings for Anthropic API
204 assert "timings" not in message_stop, "Anthropic streaming should not include timings"
205
206
207# Token counting tests
208
209def test_anthropic_count_tokens():
210 """Test token counting endpoint"""
211 server.start()
212
213 res = server.make_request("POST", "/v1/messages/count_tokens", data={
214 "model": "test",
215 "messages": [
216 {"role": "user", "content": "Hello world"}
217 ]
218 })
219
220 assert res.status_code == 200
221 assert "input_tokens" in res.body
222 assert isinstance(res.body["input_tokens"], int)
223 assert res.body["input_tokens"] > 0
224 # Should only have input_tokens, no other fields
225 assert "output_tokens" not in res.body
226
227
228def test_anthropic_count_tokens_with_system():
229 """Test token counting with system prompt"""
230 server.start()
231
232 res = server.make_request("POST", "/v1/messages/count_tokens", data={
233 "model": "test",
234 "system": "You are a helpful assistant.",
235 "messages": [
236 {"role": "user", "content": "Hello"}
237 ]
238 })
239
240 assert res.status_code == 200
241 assert res.body["input_tokens"] > 0
242
243
244def test_anthropic_count_tokens_no_max_tokens():
245 """Test that count_tokens doesn't require max_tokens"""
246 server.start()
247
248 # max_tokens is NOT required for count_tokens
249 res = server.make_request("POST", "/v1/messages/count_tokens", data={
250 "model": "test",
251 "messages": [
252 {"role": "user", "content": "Hello"}
253 ]
254 })
255
256 assert res.status_code == 200
257 assert "input_tokens" in res.body
258
259
260# Tool use tests
261
262def test_anthropic_tool_use_basic():
263 """Test basic tool use"""
264 server.jinja = True
265 server.start()
266
267 res = server.make_request("POST", "/v1/messages", data={
268 "model": "test",
269 "max_tokens": 200,
270 "tools": [{
271 "name": "get_weather",
272 "description": "Get the current weather in a location",
273 "input_schema": {
274 "type": "object",
275 "properties": {
276 "location": {
277 "type": "string",
278 "description": "City name"
279 }
280 },
281 "required": ["location"]
282 }
283 }],
284 "messages": [
285 {"role": "user", "content": "What's the weather in Paris?"}
286 ]
287 })
288
289 assert res.status_code == 200
290 assert res.body["type"] == "message"
291 assert len(res.body["content"]) > 0
292
293 # Check if model used the tool (it might not always, depending on the model)
294 content_types = [block.get("type") for block in res.body["content"]]
295
296 if "tool_use" in content_types:
297 # Model used the tool
298 assert res.body["stop_reason"] == "tool_use"
299
300 # Find the tool_use block
301 tool_block = next(b for b in res.body["content"] if b.get("type") == "tool_use")
302 assert "id" in tool_block
303 assert "name" in tool_block
304 assert tool_block["name"] == "get_weather"
305 assert "input" in tool_block
306 assert isinstance(tool_block["input"], dict)
307
308
309def test_anthropic_tool_result():
310 """Test sending tool results back
311
312 This test verifies that tool_result blocks are properly converted to
313 role="tool" messages internally. Without proper conversion, this would
314 fail with a 500 error: "unsupported content[].type" because tool_result
315 blocks would remain in the user message content array.
316 """
317 server.jinja = True
318 server.start()
319
320 res = server.make_request("POST", "/v1/messages", data={
321 "model": "test",
322 "max_tokens": 100,
323 "messages": [
324 {"role": "user", "content": "What's the weather?"},
325 {
326 "role": "assistant",
327 "content": [
328 {
329 "type": "tool_use",
330 "id": "test123",
331 "name": "get_weather",
332 "input": {"location": "Paris"}
333 }
334 ]
335 },
336 {
337 "role": "user",
338 "content": [
339 {
340 "type": "tool_result",
341 "tool_use_id": "test123",
342 "content": "The weather is sunny, 25ยฐC"
343 }
344 ]
345 }
346 ]
347 })
348
349 # This would be 500 with the old bug where tool_result blocks weren't converted
350 assert res.status_code == 200
351 assert res.body["type"] == "message"
352 # Model should respond to the tool result
353 assert len(res.body["content"]) > 0
354 assert res.body["content"][0]["type"] == "text"
355
356
357def test_anthropic_tool_result_with_text():
358 """Test tool result mixed with text content
359
360 This tests the edge case where a user message contains both text and
361 tool_result blocks. The server must properly split these into separate
362 messages: a user message with text, followed by tool messages.
363 Without proper handling, this would fail with 500: "unsupported content[].type"
364 """
365 server.jinja = True
366 server.start()
367
368 res = server.make_request("POST", "/v1/messages", data={
369 "model": "test",
370 "max_tokens": 100,
371 "messages": [
372 {"role": "user", "content": "What's the weather?"},
373 {
374 "role": "assistant",
375 "content": [
376 {
377 "type": "tool_use",
378 "id": "tool_1",
379 "name": "get_weather",
380 "input": {"location": "Paris"}
381 }
382 ]
383 },
384 {
385 "role": "user",
386 "content": [
387 {"type": "text", "text": "Here are the results:"},
388 {
389 "type": "tool_result",
390 "tool_use_id": "tool_1",
391 "content": "Sunny, 25ยฐC"
392 }
393 ]
394 }
395 ]
396 })
397
398 assert res.status_code == 200
399 assert res.body["type"] == "message"
400 assert len(res.body["content"]) > 0
401
402
403def test_anthropic_tool_result_error():
404 """Test tool result with error flag"""
405 server.jinja = True
406 server.start()
407
408 res = server.make_request("POST", "/v1/messages", data={
409 "model": "test",
410 "max_tokens": 100,
411 "messages": [
412 {"role": "user", "content": "Get the weather"},
413 {
414 "role": "assistant",
415 "content": [
416 {
417 "type": "tool_use",
418 "id": "test123",
419 "name": "get_weather",
420 "input": {"location": "InvalidCity"}
421 }
422 ]
423 },
424 {
425 "role": "user",
426 "content": [
427 {
428 "type": "tool_result",
429 "tool_use_id": "test123",
430 "is_error": True,
431 "content": "City not found"
432 }
433 ]
434 }
435 ]
436 })
437
438 assert res.status_code == 200
439 assert res.body["type"] == "message"
440
441
442def test_anthropic_tool_streaming():
443 """Test streaming with tool use"""
444 server.jinja = True
445 server.start()
446
447 res = server.make_stream_request("POST", "/v1/messages", data={
448 "model": "test",
449 "max_tokens": 200,
450 "stream": True,
451 "tools": [{
452 "name": "calculator",
453 "description": "Calculate math",
454 "input_schema": {
455 "type": "object",
456 "properties": {
457 "expression": {"type": "string"}
458 },
459 "required": ["expression"]
460 }
461 }],
462 "messages": [
463 {"role": "user", "content": "Calculate 2+2"}
464 ]
465 })
466
467 events = []
468 for data in res:
469 events.append(data)
470
471 event_types = [e["type"] for e in events]
472
473 # Should have basic events
474 assert "message_start" in event_types
475 assert "message_stop" in event_types
476
477 # If tool was used, check for proper tool streaming
478 if any(e.get("type") == "content_block_start" and
479 e.get("content_block", {}).get("type") == "tool_use"
480 for e in events):
481 # Find tool use block start
482 tool_starts = [e for e in events if
483 e.get("type") == "content_block_start" and
484 e.get("content_block", {}).get("type") == "tool_use"]
485
486 assert len(tool_starts) > 0, "Should have tool_use content_block_start"
487
488 # Check index is correct (should be 0 if no text, 1 if there's text)
489 tool_start = tool_starts[0]
490 assert "index" in tool_start
491 assert tool_start["content_block"]["type"] == "tool_use"
492 assert "name" in tool_start["content_block"]
493
494
495# Vision/multimodal tests
496
497def test_anthropic_vision_format_accepted():
498 """Test that Anthropic vision format is accepted (format validation only)"""
499 server.start()
500
501 # Small 1x1 red PNG image in base64
502 red_pixel_png = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
503
504 res = server.make_request("POST", "/v1/messages", data={
505 "model": "test",
506 "max_tokens": 10,
507 "messages": [
508 {
509 "role": "user",
510 "content": [
511 {
512 "type": "image",
513 "source": {
514 "type": "base64",
515 "media_type": "image/png",
516 "data": red_pixel_png
517 }
518 },
519 {
520 "type": "text",
521 "text": "What is this?"
522 }
523 ]
524 }
525 ]
526 })
527
528 # Server accepts the format but tinyllama doesn't support images
529 # So it should return 500 with clear error message about missing mmproj
530 assert res.status_code == 500
531 assert "image input is not supported" in res.body.get("error", {}).get("message", "").lower()
532
533
534def test_anthropic_vision_base64_with_multimodal_model(vision_server):
535 """Test vision with base64 image using Anthropic format with multimodal model"""
536 global server
537 server = vision_server
538 server.start()
539
540 # Get test image in base64 format
541 image_base64 = get_test_image_base64()
542
543 res = server.make_request("POST", "/v1/messages", data={
544 "model": "test",
545 "max_tokens": 10,
546 "messages": [
547 {
548 "role": "user",
549 "content": [
550 {
551 "type": "image",
552 "source": {
553 "type": "base64",
554 "media_type": "image/png",
555 "data": image_base64
556 }
557 },
558 {
559 "type": "text",
560 "text": "What is this:\n"
561 }
562 ]
563 }
564 ]
565 })
566
567 assert res.status_code == 200, f"Expected 200, got {res.status_code}: {res.body}"
568 assert res.body["type"] == "message"
569 assert len(res.body["content"]) > 0
570 assert res.body["content"][0]["type"] == "text"
571 # The model should generate some response about the image
572 assert len(res.body["content"][0]["text"]) > 0
573
574
575# Parameter tests
576
577def test_anthropic_stop_sequences():
578 """Test stop_sequences parameter"""
579 server.start()
580
581 res = server.make_request("POST", "/v1/messages", data={
582 "model": "test",
583 "max_tokens": 100,
584 "stop_sequences": ["\n", "END"],
585 "messages": [
586 {"role": "user", "content": "Count to 10"}
587 ]
588 })
589
590 assert res.status_code == 200
591 assert res.body["type"] == "message"
592
593
594def test_anthropic_temperature():
595 """Test temperature parameter"""
596 server.start()
597
598 res = server.make_request("POST", "/v1/messages", data={
599 "model": "test",
600 "max_tokens": 50,
601 "temperature": 0.5,
602 "messages": [
603 {"role": "user", "content": "Hello"}
604 ]
605 })
606
607 assert res.status_code == 200
608 assert res.body["type"] == "message"
609
610
611def test_anthropic_top_p():
612 """Test top_p parameter"""
613 server.start()
614
615 res = server.make_request("POST", "/v1/messages", data={
616 "model": "test",
617 "max_tokens": 50,
618 "top_p": 0.9,
619 "messages": [
620 {"role": "user", "content": "Hello"}
621 ]
622 })
623
624 assert res.status_code == 200
625 assert res.body["type"] == "message"
626
627
628def test_anthropic_top_k():
629 """Test top_k parameter (llama.cpp specific)"""
630 server.start()
631
632 res = server.make_request("POST", "/v1/messages", data={
633 "model": "test",
634 "max_tokens": 50,
635 "top_k": 40,
636 "messages": [
637 {"role": "user", "content": "Hello"}
638 ]
639 })
640
641 assert res.status_code == 200
642 assert res.body["type"] == "message"
643
644
645# Error handling tests
646
647def test_anthropic_missing_messages():
648 """Test error when messages are missing"""
649 server.start()
650
651 res = server.make_request("POST", "/v1/messages", data={
652 "model": "test",
653 "max_tokens": 50
654 # missing "messages" field
655 })
656
657 # Should return an error (400 or 500)
658 assert res.status_code >= 400
659
660
661def test_anthropic_empty_messages():
662 """Test permissive handling of empty messages array"""
663 server.start()
664
665 res = server.make_request("POST", "/v1/messages", data={
666 "model": "test",
667 "max_tokens": 50,
668 "messages": []
669 })
670
671 # Server is permissive and accepts empty messages (provides defaults)
672 # This matches the permissive validation design choice
673 assert res.status_code == 200
674 assert res.body["type"] == "message"
675
676
677# Content block index tests
678
679def test_anthropic_streaming_content_block_indices():
680 """Test that content block indices are correct in streaming"""
681 server.jinja = True
682 server.start()
683
684 # Request that might produce both text and tool use
685 res = server.make_stream_request("POST", "/v1/messages", data={
686 "model": "test",
687 "max_tokens": 400,
688 "stream": True,
689 "tools": [{
690 "name": "test_tool",
691 "description": "A test tool",
692 "input_schema": {
693 "type": "object",
694 "properties": {
695 "param": {"type": "string"}
696 },
697 "required": ["param"]
698 }
699 }],
700 "messages": [
701 {"role": "user", "content": "Use the test tool"}
702 ]
703 })
704
705 events = []
706 for data in res:
707 events.append(data)
708
709 # Check content_block_start events have sequential indices
710 block_starts = [e for e in events if e.get("type") == "content_block_start"]
711 if len(block_starts) > 1:
712 # If there are multiple blocks, indices should be sequential
713 indices = [e["index"] for e in block_starts]
714 expected_indices = list(range(len(block_starts)))
715 assert indices == expected_indices, f"Expected indices {expected_indices}, got {indices}"
716
717 # Check content_block_stop events match the starts
718 block_stops = [e for e in events if e.get("type") == "content_block_stop"]
719 start_indices = set(e["index"] for e in block_starts)
720 stop_indices = set(e["index"] for e in block_stops)
721 assert start_indices == stop_indices, "content_block_stop indices should match content_block_start indices"
722
723
724# Extended features tests
725
726def test_anthropic_thinking():
727 """Test extended thinking parameter"""
728 server.jinja = True
729 server.start()
730
731 res = server.make_request("POST", "/v1/messages", data={
732 "model": "test",
733 "max_tokens": 100,
734 "thinking": {
735 "type": "enabled",
736 "budget_tokens": 50
737 },
738 "messages": [
739 {"role": "user", "content": "What is 2+2?"}
740 ]
741 })
742
743 assert res.status_code == 200
744 assert res.body["type"] == "message"
745
746
747def test_anthropic_metadata():
748 """Test metadata parameter"""
749 server.start()
750
751 res = server.make_request("POST", "/v1/messages", data={
752 "model": "test",
753 "max_tokens": 50,
754 "metadata": {
755 "user_id": "test_user_123"
756 },
757 "messages": [
758 {"role": "user", "content": "Hello"}
759 ]
760 })
761
762 assert res.status_code == 200
763 assert res.body["type"] == "message"
764
765
766# Compatibility tests
767
768def test_anthropic_vs_openai_different_response_format():
769 """Verify Anthropic format is different from OpenAI format"""
770 server.start()
771
772 # Make OpenAI request
773 openai_res = server.make_request("POST", "/v1/chat/completions", data={
774 "model": "test",
775 "max_tokens": 50,
776 "messages": [
777 {"role": "user", "content": "Hello"}
778 ]
779 })
780
781 # Make Anthropic request
782 anthropic_res = server.make_request("POST", "/v1/messages", data={
783 "model": "test",
784 "max_tokens": 50,
785 "messages": [
786 {"role": "user", "content": "Hello"}
787 ]
788 })
789
790 assert openai_res.status_code == 200
791 assert anthropic_res.status_code == 200
792
793 # OpenAI has "object", Anthropic has "type"
794 assert "object" in openai_res.body
795 assert "type" in anthropic_res.body
796 assert openai_res.body["object"] == "chat.completion"
797 assert anthropic_res.body["type"] == "message"
798
799 # OpenAI has "choices", Anthropic has "content"
800 assert "choices" in openai_res.body
801 assert "content" in anthropic_res.body
802
803 # Different usage field names
804 assert "prompt_tokens" in openai_res.body["usage"]
805 assert "input_tokens" in anthropic_res.body["usage"]
806 assert "completion_tokens" in openai_res.body["usage"]
807 assert "output_tokens" in anthropic_res.body["usage"]
808
809
810# Extended thinking tests with reasoning models
811
812@pytest.mark.slow
813@pytest.mark.parametrize("stream", [False, True])
814def test_anthropic_thinking_with_reasoning_model(stream):
815 """Test that thinking content blocks are properly returned for reasoning models"""
816 global server
817 server = ServerProcess()
818 server.model_hf_repo = "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF"
819 server.model_hf_file = "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf"
820 server.reasoning_format = "deepseek"
821 server.jinja = True
822 server.n_ctx = 8192
823 server.n_predict = 1024
824 server.server_port = 8084
825 server.start(timeout_seconds=600) # large model needs time to download
826
827 if stream:
828 res = server.make_stream_request("POST", "/v1/messages", data={
829 "model": "test",
830 "max_tokens": 1024,
831 "thinking": {
832 "type": "enabled",
833 "budget_tokens": 500
834 },
835 "messages": [
836 {"role": "user", "content": "What is 2+2?"}
837 ],
838 "stream": True
839 })
840
841 events = list(res)
842
843 # should have thinking content block events
844 thinking_starts = [e for e in events if
845 e.get("type") == "content_block_start" and
846 e.get("content_block", {}).get("type") == "thinking"]
847 assert len(thinking_starts) > 0, "Should have thinking content_block_start event"
848 assert thinking_starts[0]["index"] == 0, "Thinking block should be at index 0"
849
850 # should have thinking_delta events
851 thinking_deltas = [e for e in events if
852 e.get("type") == "content_block_delta" and
853 e.get("delta", {}).get("type") == "thinking_delta"]
854 assert len(thinking_deltas) > 0, "Should have thinking_delta events"
855
856 # should have signature_delta event before thinking block closes (Anthropic API requirement)
857 signature_deltas = [e for e in events if
858 e.get("type") == "content_block_delta" and
859 e.get("delta", {}).get("type") == "signature_delta"]
860 assert len(signature_deltas) > 0, "Should have signature_delta event for thinking block"
861
862 # should have text block after thinking
863 text_starts = [e for e in events if
864 e.get("type") == "content_block_start" and
865 e.get("content_block", {}).get("type") == "text"]
866 assert len(text_starts) > 0, "Should have text content_block_start event"
867 assert text_starts[0]["index"] == 1, "Text block should be at index 1 (after thinking)"
868 else:
869 res = server.make_request("POST", "/v1/messages", data={
870 "model": "test",
871 "max_tokens": 1024,
872 "thinking": {
873 "type": "enabled",
874 "budget_tokens": 500
875 },
876 "messages": [
877 {"role": "user", "content": "What is 2+2?"}
878 ]
879 })
880
881 assert res.status_code == 200
882 assert res.body["type"] == "message"
883
884 content = res.body["content"]
885 assert len(content) >= 2, "Should have at least thinking and text blocks"
886
887 # first block should be thinking
888 thinking_blocks = [b for b in content if b.get("type") == "thinking"]
889 assert len(thinking_blocks) > 0, "Should have thinking content block"
890 assert "thinking" in thinking_blocks[0], "Thinking block should have 'thinking' field"
891 assert len(thinking_blocks[0]["thinking"]) > 0, "Thinking content should not be empty"
892 assert "signature" in thinking_blocks[0], "Thinking block should have 'signature' field (Anthropic API requirement)"
893
894 # should also have text block
895 text_blocks = [b for b in content if b.get("type") == "text"]
896 assert len(text_blocks) > 0, "Should have text content block"