aboutsummaryrefslogtreecommitdiff
path: root/examples/redis-unstable/tests/unit/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'examples/redis-unstable/tests/unit/cluster')
-rw-r--r--examples/redis-unstable/tests/unit/cluster/announced-endpoints.tcl75
-rw-r--r--examples/redis-unstable/tests/unit/cluster/atomic-slot-migration.tcl3063
-rw-r--r--examples/redis-unstable/tests/unit/cluster/cli.tcl415
-rw-r--r--examples/redis-unstable/tests/unit/cluster/cluster-response-tls.tcl110
-rw-r--r--examples/redis-unstable/tests/unit/cluster/failure-marking.tcl53
-rw-r--r--examples/redis-unstable/tests/unit/cluster/hostnames.tcl230
-rw-r--r--examples/redis-unstable/tests/unit/cluster/human-announced-nodename.tcl29
-rw-r--r--examples/redis-unstable/tests/unit/cluster/internal-secret.tcl71
-rw-r--r--examples/redis-unstable/tests/unit/cluster/links.tcl292
-rw-r--r--examples/redis-unstable/tests/unit/cluster/misc.tcl36
-rw-r--r--examples/redis-unstable/tests/unit/cluster/multi-slot-operations.tcl182
-rw-r--r--examples/redis-unstable/tests/unit/cluster/scripting.tcl91
-rw-r--r--examples/redis-unstable/tests/unit/cluster/sharded-pubsub.tcl67
-rw-r--r--examples/redis-unstable/tests/unit/cluster/slot-ownership.tcl61
-rw-r--r--examples/redis-unstable/tests/unit/cluster/slot-stats.tcl1169
15 files changed, 0 insertions, 5944 deletions
diff --git a/examples/redis-unstable/tests/unit/cluster/announced-endpoints.tcl b/examples/redis-unstable/tests/unit/cluster/announced-endpoints.tcl
deleted file mode 100644
index a37ca58..0000000
--- a/examples/redis-unstable/tests/unit/cluster/announced-endpoints.tcl
+++ /dev/null
@@ -1,75 +0,0 @@
1start_cluster 2 2 {tags {external:skip cluster}} {
2
3 test "Test change cluster-announce-port and cluster-announce-tls-port at runtime" {
4 if {$::tls} {
5 set baseport [lindex [R 0 config get tls-port] 1]
6 } else {
7 set baseport [lindex [R 0 config get port] 1]
8 }
9 set count [expr [llength $::servers] + 1]
10 set used_port [find_available_port $baseport $count]
11
12 R 0 config set cluster-announce-tls-port $used_port
13 R 0 config set cluster-announce-port $used_port
14
15 assert_match "*:$used_port@*" [R 0 CLUSTER NODES]
16 wait_for_condition 50 100 {
17 [string match "*:$used_port@*" [R 1 CLUSTER NODES]]
18 } else {
19 fail "Cluster announced port was not propagated via gossip"
20 }
21
22 R 0 config set cluster-announce-tls-port 0
23 R 0 config set cluster-announce-port 0
24 assert_match "*:$baseport@*" [R 0 CLUSTER NODES]
25 }
26
27 test "Test change cluster-announce-bus-port at runtime" {
28 if {$::tls} {
29 set baseport [lindex [R 0 config get tls-port] 1]
30 } else {
31 set baseport [lindex [R 0 config get port] 1]
32 }
33 set count [expr [llength $::servers] + 1]
34 set used_port [find_available_port $baseport $count]
35
36 # Verify config set cluster-announce-bus-port
37 R 0 config set cluster-announce-bus-port $used_port
38 assert_match "*@$used_port *" [R 0 CLUSTER NODES]
39 wait_for_condition 50 100 {
40 [string match "*@$used_port *" [R 1 CLUSTER NODES]]
41 } else {
42 fail "Cluster announced port was not propagated via gossip"
43 }
44
45 # Verify restore default cluster-announce-port
46 set base_bus_port [expr $baseport + 10000]
47 R 0 config set cluster-announce-bus-port 0
48 assert_match "*@$base_bus_port *" [R 0 CLUSTER NODES]
49 }
50
51 test "CONFIG SET port updates cluster-announced port" {
52 set count [expr [llength $::servers] + 1]
53 # Get the original port and change to new_port
54 if {$::tls} {
55 set orig_port [lindex [R 0 config get tls-port] 1]
56 } else {
57 set orig_port [lindex [R 0 config get port] 1]
58 }
59 assert {$orig_port != ""}
60 set new_port [find_available_port $orig_port $count]
61
62 if {$::tls} {
63 R 0 config set tls-port $new_port
64 } else {
65 R 0 config set port $new_port
66 }
67
68 # Verify that the new port appears in the output of cluster slots
69 wait_for_condition 50 100 {
70 [string match "*$new_port*" [R 0 cluster slots]]
71 } else {
72 fail "Cluster announced port was not updated in cluster slots"
73 }
74 }
75}
diff --git a/examples/redis-unstable/tests/unit/cluster/atomic-slot-migration.tcl b/examples/redis-unstable/tests/unit/cluster/atomic-slot-migration.tcl
deleted file mode 100644
index f04257f..0000000
--- a/examples/redis-unstable/tests/unit/cluster/atomic-slot-migration.tcl
+++ /dev/null
@@ -1,3063 +0,0 @@
1set ::slot_prefixes [dict create \
2 0 "{06S}" \
3 1 "{Qi}" \
4 2 "{5L5}" \
5 3 "{4Iu}" \
6 4 "{4gY}" \
7 5 "{460}" \
8 6 "{1Y7}" \
9 7 "{1LV}" \
10 101 "{1j2}" \
11 102 "{75V}" \
12 103 "{bno}" \
13 5462 "{450}"\
14 5463 "{4dY}"\
15 6000 "{4L7}" \
16 6001 "{4YV}" \
17 6002 "{0bx}" \
18 6003 "{AJ}" \
19 6004 "{of}" \
20 16383 "{6ZJ}" \
21]
22
23# Helper functions
24proc get_port {node_id} {
25 if {$::tls} {
26 return [lindex [R $node_id config get tls-port] 1]
27 } else {
28 return [lindex [R $node_id config get port] 1]
29 }
30}
31
32# return the prefix for the given slot
33proc slot_prefix {slot} {
34 return [dict get $::slot_prefixes $slot]
35}
36
37# return a key for the given slot
38proc slot_key {slot {suffix ""}} {
39 return "[slot_prefix $slot]$suffix"
40}
41
42# Populate a slot with keys
43# TODO: Consider merging with populate()
44proc populate_slot {num args} {
45 # Default values
46 set prefix "key:"
47 set size 3
48 set idx 0
49 set prints false
50 set expires 0
51 set slot -1
52
53 # Parse named arguments
54 foreach {key value} $args {
55 switch -- $key {
56 -prefix { set prefix $value }
57 -size { set size $value }
58 -idx { set idx $value }
59 -prints { set prints $value }
60 -expires { set expires $value }
61 -slot { set slot $value }
62 default { error "Unknown option: $key" }
63 }
64 }
65
66 # If slot is specified, use slot prefix from table
67 if {$slot >= 0} {
68 if {[dict exists $::slot_prefixes $slot]} {
69 set prefix [dict get $::slot_prefixes $slot]
70 } else {
71 error "Slot $slot not supported in slot_prefixes table, add it manually"
72 }
73 }
74
75 R $idx deferred 1
76 if {$num > 16} {set pipeline 16} else {set pipeline $num}
77 set val [string repeat A $size]
78 for {set j 0} {$j < $pipeline} {incr j} {
79 if {$expires > 0} {
80 R $idx set $prefix$j $val ex $expires
81 } else {
82 R $idx set $prefix$j $val
83 }
84 if {$prints} {puts $j}
85 }
86 for {} {$j < $num} {incr j} {
87 if {$expires > 0} {
88 R $idx set $prefix$j $val ex $expires
89 } else {
90 R $idx set $prefix$j $val
91 }
92 R $idx read
93 if {$prints} {puts $j}
94 }
95 for {set j 0} {$j < $pipeline} {incr j} {
96 R $idx read
97 if {$prints} {puts $j}
98 }
99 R $idx deferred 0
100}
101
102# Return 1 if all instances are idle
103proc asm_all_instances_idle {total} {
104 for {set i 0} {$i < $total} {incr i} {
105 if {[CI $i cluster_slot_migration_active_tasks] != 0} { return 0 }
106 if {[CI $i cluster_slot_migration_active_trim_running] != 0} { return 0 }
107 }
108 return 1
109}
110
111# Wait for all ASM tasks to complete in the cluster
112proc wait_for_asm_done {} {
113 set total_instances [expr {$::cluster_master_nodes + $::cluster_replica_nodes}]
114
115 wait_for_condition 1000 10 {
116 [asm_all_instances_idle $total_instances] == 1
117 } else {
118 # Print the number of active tasks on each instance
119 for {set i 0} {$i < $total_instances} {incr i} {
120 set migration_count [CI $i cluster_slot_migration_active_tasks]
121 set trim_count [CI $i cluster_slot_migration_active_trim_running]
122 puts "Instance $i: migration_tasks=$migration_count, trim_tasks=$trim_count"
123 }
124 fail "ASM tasks did not complete on all instances"
125 }
126 # wait all nodes to reach the same cluster config after ASM
127 wait_for_cluster_propagation
128}
129
130proc failover_and_wait_for_done {node_id {failover_arg ""}} {
131 set max_attempts 5
132 for {set attempt 1} {$attempt <= $max_attempts} {incr attempt} {
133 if {$failover_arg eq ""} {
134 R $node_id cluster failover
135 } else {
136 R $node_id cluster failover $failover_arg
137 }
138
139 set completed 1
140 wait_for_condition 1000 10 {
141 [string match "*master*" [R $node_id role]]
142 } else {
143 set completed 0
144 }
145
146 if {$completed} {
147 wait_for_cluster_propagation
148 return
149 }
150 }
151 fail "Failover did not complete after $max_attempts attempts for node $node_id"
152}
153
154proc migration_status {node_id task_id field} {
155 set status [R $node_id CLUSTER MIGRATION STATUS ID $task_id]
156
157 # STATUS ID returns single task, so get first element
158 if {[llength $status] == 0} {
159 return ""
160 }
161
162 set task_status [lindex $status 0]
163 set field_value ""
164
165 # Parse the key-value pairs in the task
166 for {set i 0} {$i < [llength $task_status]} {incr i 2} {
167 set key [lindex $task_status $i]
168 set value [lindex $task_status [expr $i + 1]]
169
170 if {$key eq $field} {
171 set field_value $value
172 break
173 }
174 }
175
176 return $field_value
177}
178
179# Setup slot migration test with keys and delay, then start migration
180# Returns the task_id for the migration
181proc setup_slot_migration_with_delay {src_node dst_node start_slot end_slot {keys 2} {delay 1000000}} {
182 # Two keys on the start slot
183 populate_slot $keys -idx $src_node -slot $start_slot
184
185 # we set a delay to ensure migration takes time for testing,
186 # with default parameters, two keys cost 2s to save
187 R $src_node config set rdb-key-save-delay $delay
188
189 # migrate slot range from src_node to dst_node
190 set task_id [R $dst_node CLUSTER MIGRATION IMPORT $start_slot $end_slot]
191 wait_for_condition 2000 10 {
192 [string match {*send-bulk-and-stream*} [migration_status $src_node $task_id state]]
193 } else {
194 fail "ASM task did not start"
195 }
196
197 return $task_id
198}
199
200# Helper function to clear module internal event logs
201proc clear_module_event_log {} {
202 for {set i 0} {$i < $::cluster_master_nodes + $::cluster_replica_nodes} {incr i} {
203 R $i asm.clear_event_log
204 }
205}
206
207proc reset_default_trim_method {} {
208 for {set i 0} {$i < $::cluster_master_nodes + $::cluster_replica_nodes} {incr i} {
209 R $i debug asm-trim-method default
210 }
211}
212
213start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout 60000 cluster-allow-replica-migration no}} {
214 foreach trim_method {"active" "bg"} {
215 test "Simple slot migration (trim method: $trim_method)" {
216 R 0 debug asm-trim-method $trim_method
217 R 3 debug asm-trim-method $trim_method
218
219 set slot0_key [slot_key 0 mykey]
220 R 0 set $slot0_key "a"
221 set slot1_key [slot_key 1 mykey]
222 R 0 set $slot1_key "b"
223 set slot101_key [slot_key 101 mykey]
224 R 0 set $slot101_key "c"
225 # 3 keys cost 3s to save
226 R 0 config set rdb-key-save-delay 1000000
227
228 # load a function
229 R 0 function load {#!lua name=test1
230 redis.register_function('test1', function() return 'hello1' end)
231 }
232
233 # migrate slot 0-100 to R 1
234 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100]
235 # migration is start, and in accumulating buffer stage
236 wait_for_condition 1000 50 {
237 [string match {*send-bulk-and-stream*} [migration_status 0 $task_id state]] &&
238 [string match {*accumulate-buffer*} [migration_status 1 $task_id state]]
239 } else {
240 fail "ASM task did not start"
241 }
242
243 # append 99 times during migration
244 for {set i 0} {$i < 99} {incr i} {
245 R 0 multi
246 R 0 append $slot0_key "a"
247 R 0 exec
248 R 0 append $slot1_key "b"
249 R 0 append $slot101_key "c"
250 }
251
252 # wait until migration of 0-100 successful
253 wait_for_asm_done
254
255 # verify task state became completed
256 assert_equal "completed" [migration_status 0 $task_id state]
257 assert_equal "completed" [migration_status 1 $task_id state]
258
259 # the appended 99 times should also be migrated
260 assert_equal [string repeat a 100] [R 1 get $slot0_key]
261 assert_equal [string repeat b 100] [R 1 get $slot1_key]
262
263 # function should be migrated
264 assert_equal [R 0 function dump] [R 1 function dump]
265 # the slave should also get the data
266 wait_for_ofs_sync [Rn 1] [Rn 4]
267
268 R 4 readonly
269 assert_equal [string repeat a 100] [R 4 get $slot0_key]
270 assert_equal [string repeat b 100] [R 4 get $slot1_key]
271 assert_equal [R 0 function dump] [R 4 function dump]
272
273 # verify key that was not in the slot range is not migrated
274 assert_equal [string repeat c 100] [R 0 get $slot101_key]
275 # verify changes in replica
276 wait_for_ofs_sync [Rn 0] [Rn 3]
277 R 3 readonly
278 assert_equal [string repeat c 100] [R 3 get $slot101_key]
279
280 # cleanup
281 R 0 config set rdb-key-save-delay 0
282 R 0 flushall
283 R 0 function flush
284 R 1 flushall
285 R 1 function flush
286 R 0 CLUSTER MIGRATION IMPORT 0 100
287 wait_for_asm_done
288 }
289 }
290}
291
292# Skip most of the tests when running under valgrind since it is hard to
293# stabilize tests under valgrind.
294if {!$::valgrind} {
295start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout 60000 cluster-allow-replica-migration no}} {
296 test "Test CLUSTER MIGRATION IMPORT input validation" {
297 # invalid arguments
298 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION}
299 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION IMPORT}
300 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION IMPORT 100}
301 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION IMPORT 100 200 300}
302 assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION UNKNOWN 1 2}
303
304 # invalid slot range
305 assert_error {*greater than end slot number*} {R 0 CLUSTER MIGRATION IMPORT 200 100}
306 assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 17000 18000}
307 assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 14000 18000}
308 assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 0 16384}
309 assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 0 -1}
310 assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT -1 2}
311 assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT -2 -1}
312 assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 10 a}
313 assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT sd sd}
314 assert_error {*already the owner of the slot*} {R 0 CLUSTER MIGRATION IMPORT 100 200}
315 }
316
317 test "Test CLUSTER MIGRATION CANCEL input validation" {
318 # invalid arguments
319 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION CANCEL}
320 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION CANCEL ID}
321 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION CANCEL ID 12345 EXTRAARG}
322 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION CANCEL ALL EXTRAARG}
323 assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION CANCEL UNKNOWNARG}
324 assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION CANCEL abc def}
325 # empty string id should not cancel any task
326 assert_equal 0 [R 0 CLUSTER MIGRATION CANCEL ID ""]
327 }
328
329 test "Test CLUSTER MIGRATION STATUS input validation" {
330 # invalid arguments
331 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION STATUS}
332 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION STATUS ID}
333 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION STATUS ID id EXTRAARG}
334 assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION STATUS ALL EXTRAARG}
335 assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION STATUS ABC DEF}
336 assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION STATUS UNKNOWNARG}
337 # empty string id should not list any task
338 assert_equal {} [R 0 CLUSTER MIGRATION STATUS ID ""]
339 }
340
341 test "Test TRIMSLOTS input validation" {
342 # Wrong number of arguments
343 assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS}
344 assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES}
345 assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES 1}
346 assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES 2 100}
347 assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES 17000 1}
348 assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES abc}
349
350 # Missing ranges argument
351 assert_error {*missing ranges argument*} {R 0 TRIMSLOTS UNKNOWN 1 100 200}
352
353 # Invalid number of ranges
354 assert_error {*invalid number of ranges*} {R 0 TRIMSLOTS RANGES 0 1 1}
355 assert_error {*invalid number of ranges*} {R 0 TRIMSLOTS RANGES -1 2 2}
356 assert_error {*invalid number of ranges*} {R 0 TRIMSLOTS RANGES 17000 1 2}
357 assert_error {*invalid number of ranges*} {R 0 TRIMSLOTS RANGES 2 100 200 300}
358
359 # Invalid slot numbers
360 assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 -1 0}
361 assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 -2 -1}
362 assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 0 16384}
363 assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 abc def}
364 assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 100 abc}
365
366 # Start slot greater than end slot
367 assert_error {*greater than end slot number*} {R 0 TRIMSLOTS RANGES 1 200 100}
368 }
369
370 test "Test IMPORT not allowed on replica" {
371 assert_error {* not allowed on replica*} {R 4 CLUSTER MIGRATION IMPORT 100 200}
372 }
373
374 test "Test IMPORT not allowed during manual migration" {
375 set dst_id [R 1 CLUSTER MYID]
376
377 # Set a slot to IMPORTING
378 R 0 CLUSTER SETSLOT 15000 IMPORTING $dst_id
379 assert_error {*must be STABLE to start*slot migration*} {R 0 CLUSTER MIGRATION IMPORT 100 200}
380 # Revert the change
381 R 0 CLUSTER SETSLOT 15000 STABLE
382
383 # Same test with setting a slot to MIGRATING
384 R 0 CLUSTER SETSLOT 5000 MIGRATING $dst_id
385 assert_error {*must be STABLE to start*slot migration*} {R 0 CLUSTER MIGRATION IMPORT 100 200}
386 # Revert the change
387 R 0 CLUSTER SETSLOT 5000 STABLE
388 }
389
390 test "Test IMPORT not allowed if the node is already the owner" {
391 assert_error {*already the owner of the slot*} {R 0 CLUSTER MIGRATION IMPORT 100 100}
392 }
393
394 test "Test IMPORT not allowed for a slot without an owner" {
395 # Slot will have no owner
396 R 0 CLUSTER DELSLOTS 5000
397
398 assert_error {*slot has no owner: 5000*} {R 0 CLUSTER MIGRATION IMPORT 5000 5000}
399
400 # Revert the change
401 R 0 CLUSTER ADDSLOTS 5000
402 }
403
404 test "Test IMPORT not allowed if slot ranges belong to different nodes" {
405 assert_error {*slots belong to different source nodes*} {R 0 CLUSTER MIGRATION IMPORT 7000 15000}
406 assert_error {*slots belong to different source nodes*} {R 0 CLUSTER MIGRATION IMPORT 7000 8000 14000 15000}
407 }
408
409 test "Test IMPORT not allowed if slot is given multiple times" {
410 assert_error {*Slot*specified multiple times*} {R 0 CLUSTER MIGRATION IMPORT 7000 8000 8000 9000}
411 assert_error {*Slot*specified multiple times*} {R 0 CLUSTER MIGRATION IMPORT 7000 8000 7900 9000}
412 }
413
414 test "Test CLUSTER MIGRATION STATUS ALL lists all tasks" {
415 # Create 3 completed tasks
416 R 0 CLUSTER MIGRATION IMPORT 7000 7001
417 wait_for_asm_done
418 R 0 CLUSTER MIGRATION IMPORT 7002 7003
419 wait_for_asm_done
420 R 0 CLUSTER MIGRATION IMPORT 7004 7005
421 wait_for_asm_done
422
423 # Get node IDs for verification
424 set node0_id [R 0 cluster myid]
425 set node1_id [R 1 cluster myid]
426
427 # Verify CLUSTER MIGRATION STATUS ALL reply from both nodes
428 foreach node_idx {0 1} {
429 set tasks [R $node_idx CLUSTER MIGRATION STATUS ALL]
430 assert_equal 3 [llength $tasks]
431
432 for {set i 0} {$i < 3} {incr i} {
433 set task [lindex $tasks $i]
434
435 # Verify field order
436 set expected_fields {id slots source dest operation state
437 last_error retries create_time start_time
438 end_time write_pause_ms}
439 for {set j 0} {$j < [llength $expected_fields]} {incr j} {
440 set expected_field [lindex $expected_fields $j]
441 set actual_field [lindex $task [expr $j * 2]]
442 assert_equal $expected_field $actual_field
443 }
444
445 # Verify basic fields
446 assert_equal "completed" [dict get $task state]
447 assert_equal "" [dict get $task last_error]
448 assert_equal 0 [dict get $task retries]
449 assert {[dict get $task write_pause_ms] >= 0}
450
451 # Verify operation based on node
452 if {$node_idx == 0} {
453 assert_equal "import" [dict get $task operation]
454 } else {
455 assert_equal "migrate" [dict get $task operation]
456 }
457
458 # Verify node IDs (all tasks: node1 -> node0)
459 assert_equal $node1_id [dict get $task source]
460 assert_equal $node0_id [dict get $task dest]
461
462 # Verify timestamps exist and are reasonable
463 set create_time [dict get $task create_time]
464 set start_time [dict get $task start_time]
465 set end_time [dict get $task end_time]
466 assert {$create_time > 0}
467 assert {$start_time >= $create_time}
468 assert {$end_time >= $start_time}
469
470 # Verify specific slot ranges for each task
471 set slots [dict get $task slots]
472 if {$i == 0} {
473 assert_equal "7004-7005" $slots
474 } elseif {$i == 1} {
475 assert_equal "7002-7003" $slots
476 } elseif {$i == 2} {
477 assert_equal "7000-7001" $slots
478 }
479 }
480 }
481
482 # cleanup
483 R 1 CLUSTER MIGRATION IMPORT 7000 7005
484 wait_for_asm_done
485 }
486
487 test "Test IMPORT not allowed if there is an overlapping import" {
488 # Let slot migration take long time, so that we can test overlapping import
489 R 1 config set rdb-key-save-delay 1000000
490 R 1 set tag22273 tag22273 ;# slot hash is 7000
491 R 1 set tag9283 tag9283 ;# slot hash is 8000
492
493 set task_id [R 0 CLUSTER MIGRATION IMPORT 7000 8000]
494 assert_error {*overlapping import exists*} {R 0 CLUSTER MIGRATION IMPORT 8000 9000}
495 assert_error {*overlapping import exists*} {R 0 CLUSTER MIGRATION IMPORT 7500 8500}
496 assert_error {*overlapping import exists*} {R 0 CLUSTER MIGRATION IMPORT 6000 7000}
497 assert_error {*overlapping import exists*} {R 0 CLUSTER MIGRATION IMPORT 6500 7500}
498
499 wait_for_condition 1000 50 {
500 [string match {*completed*} [migration_status 0 $task_id state]] &&
501 [string match {*completed*} [migration_status 1 $task_id state]]
502 } else {
503 fail "ASM task did not start"
504 }
505 assert_equal "tag22273" [R 0 get tag22273]
506 assert_equal "tag9283" [R 0 get tag9283]
507 R 1 config set rdb-key-save-delay 0
508
509 # revert the migration
510 R 1 CLUSTER MIGRATION IMPORT 7000 8000
511 wait_for_asm_done
512 }
513
514 test "Test IMPORT with unsorted and adjacent ranges" {
515 # Redis should sort and merge adjacent ranges
516 # Adjacent means: prev.end + 1 == next.start
517 # e.g. 7000-7001 7002-7003 7004-7005 => 7000-7005
518
519 # Test with adjacent ranges
520 set task_id [R 0 CLUSTER MIGRATION IMPORT 7000 7001 7002 7100]
521 wait_for_asm_done
522 # verify migration is successfully completed on both nodes
523 assert_equal "completed" [migration_status 0 $task_id state]
524 assert_equal "completed" [migration_status 1 $task_id state]
525 # verify slot ranges are merged correctly
526 assert_equal "7000-7100" [migration_status 0 $task_id slots]
527 assert_equal "7000-7100" [migration_status 1 $task_id slots]
528
529 # Test with unsorted and adjacent ranges
530 set task_id [R 1 CLUSTER MIGRATION IMPORT 7050 7051 7010 7049 7000 7005]
531 wait_for_asm_done
532 # verify migration is successfully completed on both nodes
533 assert_equal "completed" [migration_status 0 $task_id state]
534 assert_equal "completed" [migration_status 1 $task_id state]
535 # verify slot ranges are merged correctly
536 assert_equal "7000-7005 7010-7051" [migration_status 0 $task_id slots]
537 assert_equal "7000-7005 7010-7051" [migration_status 1 $task_id slots]
538
539 # Another test with unsorted and adjacent ranges
540 set task_id [R 1 CLUSTER MIGRATION IMPORT 7007 7007 7008 7009 7006 7006]
541 wait_for_asm_done
542 # verify migration is successfully completed on both nodes
543 assert_equal "completed" [migration_status 0 $task_id state]
544 assert_equal "completed" [migration_status 1 $task_id state]
545 # verify slot ranges are merged correctly
546 assert_equal "7006-7009" [migration_status 0 $task_id slots]
547 assert_equal "7006-7009" [migration_status 1 $task_id slots]
548 }
549
550 test "Simple slot migration with write load" {
551 # Perform slot migration while traffic is on and verify data consistency.
552 # Trimming is disabled on source nodes so, we can compare the dbs after
553 # migration via DEBUG DIGEST to ensure no data loss during migration.
554 # Steps:
555 # 1. Disable trimming on both nodes
556 # 2. Populate slot 0 on node-0 and slot 6000 on node-1
557 # 2. Start write traffic on both nodes
558 # 3. Migrate slot 0 from node-0 to node-1
559 # 4. Migrate slot 6000 from node-1 to node-0
560 # 5. Stop write traffic, verify db's are identical.
561
562 # This test runs slowly under the thread sanitizer.
563 # 1. Increase the lag threshold from the default 1 MB to 10 MB to let the destination catch up easily.
564 # 2. Increase the write pause timeout from the default 10s to 60s so the source can wait longer.
565 set prev_config_lag [lindex [R 0 config get cluster-slot-migration-handoff-max-lag-bytes] 1]
566 R 0 config set cluster-slot-migration-handoff-max-lag-bytes 10mb
567 R 1 config set cluster-slot-migration-handoff-max-lag-bytes 10mb
568 set prev_config_timeout [lindex [R 0 config get cluster-slot-migration-write-pause-timeout] 1]
569 R 0 config set cluster-slot-migration-write-pause-timeout 60000
570 R 1 config set cluster-slot-migration-write-pause-timeout 60000
571
572 R 0 flushall
573 R 0 debug asm-trim-method none
574 populate_slot 10000 -idx 0 -slot 0
575
576 R 1 flushall
577 R 1 debug asm-trim-method none
578 populate_slot 10000 -idx 1 -slot 6000
579
580 # Start write traffic on node-0
581 # Throws -MOVED error once asm is completed, catch block will ignore it.
582 catch {
583 # Start the slot 0 write load on the R 0
584 set port [get_port 0]
585 set key [slot_key 0 mykey]
586 set load_handle0 [start_write_load "127.0.0.1" $port 100 $key 0 5]
587 }
588
589 # Start write traffic on node-1
590 # Throws -MOVED error once asm is completed, catch block will ignore it.
591 catch {
592 # Start the slot 6000 write load on the R 1
593 set port [get_port 1]
594 set key [slot_key 6000 mykey]
595 set load_handle1 [start_write_load "127.0.0.1" $port 100 $key 0 5]
596 }
597
598 # Migrate keys
599 R 1 CLUSTER MIGRATION IMPORT 0 100
600 wait_for_asm_done
601 R 0 CLUSTER MIGRATION IMPORT 6000 6100
602 wait_for_asm_done
603
604 stop_write_load $load_handle0
605 stop_write_load $load_handle1
606
607 # verify data
608 assert_morethan [R 0 dbsize] 0
609 assert_equal [R 0 debug digest] [R 1 debug digest]
610
611 # cleanup
612 R 0 config set cluster-slot-migration-handoff-max-lag-bytes $prev_config_lag
613 R 0 config set cluster-slot-migration-write-pause-timeout $prev_config_timeout
614 R 0 debug asm-trim-method default
615 R 0 flushall
616 R 1 config set cluster-slot-migration-handoff-max-lag-bytes $prev_config_lag
617 R 1 config set cluster-slot-migration-write-pause-timeout $prev_config_timeout
618 R 1 debug asm-trim-method default
619 R 1 flushall
620
621 R 1 CLUSTER MIGRATION IMPORT 6000 6100
622 wait_for_asm_done
623 }
624
625 test "Verify expire time is migrated correctly" {
626 R 0 flushall
627 R 1 flushall
628
629 set string_key [slot_key 0 string_key]
630 set list_key [slot_key 0 list_key]
631 set hash_key [slot_key 0 hash_key]
632 set stream_key [slot_key 0 stream_key]
633
634 for {set i 0} {$i < 20} {incr i} {
635 R 1 hset $hash_key $i $i
636 R 1 xadd $stream_key * item $i
637 }
638 for {set i 0} {$i < 2000} {incr i} {
639 R 1 lpush $list_key $i
640 }
641
642 # set expire time of some keys
643 R 1 set $string_key "a" EX 1000
644 R 1 EXPIRE $list_key 1000
645 R 1 EXPIRE $hash_key 1000
646
647 # migrate slot 0-100 to R 0
648 R 0 CLUSTER MIGRATION IMPORT 0 100
649 wait_for_asm_done
650
651 # check expire times are migrated correctly
652 assert_range [R 0 ttl $string_key] 900 1000
653 assert_range [R 0 ttl $list_key] 900 1000
654 assert_range [R 0 ttl $hash_key] 900 1000
655 assert_equal -1 [R 0 ttl $stream_key]
656
657 # cleanup
658 R 0 flushall
659 R 1 flushall
660 R 1 CLUSTER MIGRATION IMPORT 0 100
661 wait_for_asm_done
662 }
663
664 test "Slot migration with complex data types can work well" {
665 R 0 flushall
666 R 1 flushall
667
668 set list_key [slot_key 0 list_key]
669 set set_key [slot_key 0 set_key]
670 set zset_key [slot_key 0 zset_key]
671 set hash_key [slot_key 0 hash_key]
672 set stream_key [slot_key 0 stream_key]
673
674 # generate big keys for each data type
675 for {set i 0} {$i < 1000} {incr i} {
676 R 1 lpush $list_key $i
677 R 1 sadd $set_key $i
678 R 1 zadd $zset_key $i $i
679 R 1 hset $hash_key $i $i
680 R 1 xadd $stream_key * item $i
681 }
682
683 # migrate slot 0-100 to R 0
684 R 0 CLUSTER MIGRATION IMPORT 0 100
685 wait_for_asm_done
686 # check the data on destination node is correct
687 assert_equal 1000 [R 0 llen $list_key]
688 assert_equal 1000 [R 0 scard $set_key]
689 assert_equal 1000 [R 0 zcard $zset_key]
690 assert_equal 1000 [R 0 hlen $hash_key]
691 assert_equal 1000 [R 0 xlen $stream_key]
692 # migrate slot 0-100 to R 1
693 R 1 CLUSTER MIGRATION IMPORT 0 100
694 wait_for_asm_done
695 }
696
697 proc asm_basic_error_handling_test {operation channel all_states} {
698 foreach state $all_states {
699 if {$::verbose} { puts "Testing $operation $channel channel with state: $state"}
700
701 # For states that need incremental data streaming, set a longer delay
702 set streaming_states [list "streaming-buffer" "accumulate-buffer" "send-bulk-and-stream" "send-stream"]
703 if {$state in $streaming_states} {
704 R 1 config set rdb-key-save-delay 1000000
705 }
706
707 # Let the destination node take time to stream buffer, so the source node will handle
708 # slot snapshot child process exit, and then enter "send-stream" state.
709 if {$state == "send-stream"} {
710 R 0 config set key-load-delay 100000
711 }
712
713 # Start the slot 0 write load on the R 1
714 set slot0_key [slot_key 0 mykey]
715 set load_handle [start_write_load "127.0.0.1" [get_port 1] 100 $slot0_key 500]
716
717 # clear old fail points and set the new fail point
718 assert_equal {OK} [R 0 debug asm-failpoint "" ""]
719 assert_equal {OK} [R 1 debug asm-failpoint "" ""]
720 if {$operation eq "import"} {
721 assert_equal {OK} [R 0 debug asm-failpoint "import-$channel-channel" $state]
722 } elseif {$operation eq "migrate"} {
723 assert_equal {OK} [R 1 debug asm-failpoint "migrate-$channel-channel" $state]
724 } else {
725 fail "Unknown operation: $operation"
726 }
727
728 # Start the migration
729 set task_id [R 0 CLUSTER MIGRATION IMPORT 0 100]
730
731 # The task should be failed due to the fail point
732 wait_for_condition 2000 10 {
733 [string match -nocase "*$channel*${state}*" [migration_status 0 $task_id last_error]] ||
734 [string match -nocase "*$channel*${state}*" [migration_status 1 $task_id last_error]]
735 } else {
736 fail "ASM task did not fail with expected error -
737 (dst: [migration_status 0 $task_id last_error]
738 src: [migration_status 1 $task_id last_error]
739 expected: $channel $state)"
740 }
741 stop_write_load $load_handle
742
743 # Cancel the task
744 R 0 CLUSTER MIGRATION CANCEL ID $task_id
745 R 1 CLUSTER MIGRATION CANCEL ID $task_id
746
747 R 1 config set rdb-key-save-delay 0
748 R 0 config set key-load-delay 0
749 }
750 }
751
752 test "Destination node main channel basic error-handling tests " {
753 set all_states [list \
754 "connecting" \
755 "auth-reply" \
756 "handshake-reply" \
757 "syncslots-reply" \
758 "accumulate-buffer" \
759 "streaming-buffer" \
760 "wait-stream-eof" \
761 ]
762 asm_basic_error_handling_test "import" "main" $all_states
763 }
764
765 test "Destination node rdb channel basic error-handling tests" {
766 set all_states [list \
767 "connecting" \
768 "auth-reply" \
769 "rdbchannel-reply" \
770 "rdbchannel-transfer" \
771 ]
772 asm_basic_error_handling_test "import" "rdb" $all_states
773 }
774
775 test "Source node main channel basic error-handling tests " {
776 set all_states [list \
777 "wait-rdbchannel" \
778 "send-bulk-and-stream" \
779 "send-stream" \
780 "handoff" \
781 ]
782 asm_basic_error_handling_test "migrate" "main" $all_states
783 }
784
785 test "Source node rdb channel basic error-handling tests" {
786 set all_states [list \
787 "wait-bgsave-start" \
788 "send-bulk-and-stream" \
789 ]
790 asm_basic_error_handling_test "migrate" "rdb" $all_states
791 }
792
793 test "Migration will be successful after fail points are cleared" {
794 R 0 flushall
795 R 1 flushall
796 set slot0_key [slot_key 0 mykey]
797 set slot1_key [slot_key 1 mykey]
798 R 1 set $slot0_key "a"
799 R 1 set $slot1_key "b"
800
801 # we set a delay to write incremental data
802 R 1 config set rdb-key-save-delay 1000000
803
804 # Start the slot 0 write load on the R 1
805 set load_handle [start_write_load "127.0.0.1" [get_port 1] 100 $slot0_key]
806
807 # Clear all fail points
808 assert_equal {OK} [R 0 debug asm-failpoint "" ""]
809 assert_equal {OK} [R 1 debug asm-failpoint "" ""]
810
811 # Start the migration
812 set task_id [R 0 CLUSTER MIGRATION IMPORT 0 100]
813
814 # Wait for the migration to complete
815 wait_for_asm_done
816
817 stop_write_load $load_handle
818
819 # Verify the data is migrated, slot 0 and 1 should belong to R 1
820 # slot 0 key should be changed by the write load
821 assert_not_equal "a" [R 0 get $slot0_key]
822 assert_equal "b" [R 0 get $slot1_key]
823 R 1 config set rdb-key-save-delay 0
824 }
825
826 test "Client output buffer limit is reached on source side" {
827 R 0 flushall
828 R 1 flushall
829 set r1_pid [S 1 process_id]
830 R 1 debug repl-pause on-streaming-repl-buf
831
832 # Set a small output buffer limit to trigger the error
833 R 0 config set client-output-buffer-limit "replica 4mb 0 0"
834
835 set task_id [setup_slot_migration_with_delay 0 1 0 100]
836
837 # some write traffic is to have chance to enter streaming buffer state
838 set slot0_key [slot_key 0 mykey]
839 R 0 set $slot0_key "a"
840
841 # after 3 second, the slots snapshot (costs 2s to generate) should be transferred,
842 # then start streaming buffer
843 after 3000
844
845 set loglines [count_log_lines 0]
846
847 # Start the slot 0 write load on the R 0
848 set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 $slot0_key 1000]
849
850 # verify the metric is accessible, it is transient, will be reset on disconnect
851 assert {[S 0 mem_cluster_slot_migration_output_buffer] >= 0}
852
853 # After some time, the client output buffer limit should be reached
854 wait_for_log_messages 0 {"*Client * closed * for overcoming of output buffer limits.*"} $loglines 1000 10
855 wait_for_condition 1000 10 {
856 [string match {*send*stream*} [migration_status 0 $task_id last_error]]
857 } else {
858 fail "ASM task did not fail as expected"
859 }
860
861 stop_write_load $load_handle
862
863 # Reset configurations
864 R 0 config set client-output-buffer-limit "replica 0 0 0"
865 R 0 config set rdb-key-save-delay 0
866
867 # resume server and clear pause point
868 resume_process $r1_pid
869 R 1 debug repl-pause clear
870
871 # Wait for the migration to complete
872 wait_for_asm_done
873 }
874
875 test "Full sync buffer limit is reached on destination side" {
876 # Set a small replication buffer limit to trigger the error
877 R 0 config set replica-full-sync-buffer-limit 1mb
878
879 # start migration from 1 to 0, cost 4s to transfer slots snapshot
880 set task_id [setup_slot_migration_with_delay 1 0 0 100 2 2000000]
881 set loglines [count_log_lines 0]
882
883 # Create some traffic on slot 0
884 populate_slot 100 -idx 1 -slot 0 -size 100000
885
886 # After some time, slots sync buffer limit should be reached, but migration would not fail
887 # since the buffer will be accumulated on source side from now.
888 wait_for_log_messages 0 {"*Slots sync buffer limit has been reached*"} $loglines 1000 10
889
890 # verify the peak value, should be greater than 1mb
891 assert {[S 0 mem_cluster_slot_migration_input_buffer_peak] > 1000000}
892 # verify the metric is accessible, it is transient, will be reset on disconnect
893 assert {[S 0 mem_cluster_slot_migration_input_buffer] >= 0}
894
895 wait_for_asm_done
896
897 # Reset configurations
898 R 0 config set replica-full-sync-buffer-limit 0
899 R 1 config set rdb-key-save-delay 0
900 R 1 cluster migration import 0 100
901 wait_for_asm_done
902 }
903
904 test "Expired key is not deleted and SCAN/KEYS/RANDOMKEY/CLUSTER GETKEYSINSLOT filter keys in importing slots" {
905 set slot0_key [slot_key 0 mykey]
906 set slot1_key [slot_key 1 mykey]
907 set slot2_key [slot_key 2 mykey]
908 R 1 flushall
909 R 0 flushall
910
911 # we set a delay to write incremental data
912 R 1 config set rdb-key-save-delay 1000000
913
914 # set expire time 2s. Generating slot snapshot will 3s, so these
915 # three keys will be expired after slot snapshot is transferred
916 R 1 setex $slot0_key 2 "a"
917 R 1 setex $slot1_key 2 "b"
918 R 1 hset $slot2_key "f1" "1"
919 R 1 expire $slot2_key 2
920 R 1 hexpire $slot2_key 2 FIELDS 1 "f1"
921
922 set task_id [R 0 CLUSTER MIGRATION IMPORT 0 100]
923 wait_for_condition 2000 10 {
924 [string match {*send-bulk-and-stream*} [migration_status 1 $task_id state]]
925 } else {
926 fail "ASM task did not start"
927 }
928
929 # update expire time during mirgration
930 R 1 setex $slot0_key 100 "a"
931 R 1 expire $slot1_key 80
932 R 1 expire $slot2_key 60
933 R 1 hincrbyfloat $slot2_key "f1" 1
934 R 1 hexpire $slot2_key 60 FIELDS 1 "f1"
935
936 # after 2s, at least a key should be transferred, and should not be deleted
937 # due to expired, neither active nor lazy expiration (SCAN) takes effect,
938 # Besides SCAN/KEYS/RANDOMKEY/CLUSTER GETKEYSINSLOT command can not find them
939 after 2000
940 R 3 readonly
941 foreach id {0 3} { ;# 0 is the master, 3 is the replica
942 assert_equal {0 {}} [R $id scan 0 count 10]
943 assert_equal {} [R $id keys "*"]
944 assert_equal {} [R $id keys "{06S}*"]
945 assert_equal {} [R $id randomkey]
946 assert_equal {} [R $id cluster getkeysinslot 0 100]
947 assert_equal [R $id cluster countkeysinslot 0] 0
948 assert_equal [R $id dbsize] 0
949
950 # but we can see the number of keys is increased in INFO KEYSPACE
951 assert {[scan [regexp -inline {keys\=([\d]*)} [R $id info keyspace]] keys=%d] >= 1}
952 assert {[scan [regexp -inline {expires\=([\d]*)} [R $id info keyspace]] expires=%d] >= 1}
953 }
954
955 wait_for_asm_done
956
957 wait_for_ofs_sync [Rn 0] [Rn 3]
958
959 foreach id {0 3} { ;# 0 is the master, 3 is the replica
960 # verify the keys are valid
961 assert_range [R $id ttl $slot0_key] 90 100
962 assert_range [R $id ttl $slot1_key] 70 80
963 assert_range [R $id ttl $slot2_key] 50 60
964 assert_range [R $id httl $slot2_key FIELDS 1 "f1"] 50 60
965
966 # KEYS/SCAN/RANDOMKEY/CLUSTER GETKEYSINSLOT will find the keys after migration
967 assert_equal [list 0 [list $slot0_key $slot1_key $slot2_key]] [R $id scan 0 count 10]
968 assert_equal [list $slot0_key $slot1_key $slot2_key] [R $id keys "*"]
969 assert_equal [list $slot0_key] [R $id keys "{06S}*"]
970 assert_not_equal {} [R $id randomkey]
971 assert_equal [list $slot0_key] [R $id cluster getkeysinslot 0 100]
972
973 # INFO KEYSPACE/DBSIZE/CLUSTER COUNTKEYSINSLOT will also reflect the keys
974 assert_equal 3 [scan [regexp -inline {keys\=([\d]*)} [R $id info keyspace]] keys=%d]
975 assert_equal 3 [scan [regexp -inline {expires\=([\d]*)} [R $id info keyspace]] expires=%d]
976 assert_equal 1 [scan [regexp -inline {subexpiry\=([\d]*)} [R $id info keyspace]] subexpiry=%d]
977 assert_equal 3 [R $id dbsize]
978 assert_equal 1 [R $id cluster countkeysinslot 0]
979 }
980
981 # update expire time to 10ms, after some time, the keys should be deleted due to
982 # active expiration
983 R 0 pexpire $slot0_key 10
984 R 0 pexpire $slot1_key 10
985 R 0 hpexpire $slot2_key 10 FIELDS 1 "f1" ;# the last field is expired, the key will be deleted
986 wait_for_condition 100 50 {
987 [scan [regexp -inline {keys\=([\d]*)} [R 0 info keyspace]] keys=%d] == {} &&
988 [scan [regexp -inline {keys\=([\d]*)} [R 3 info keyspace]] keys=%d] == {}
989 } else {
990 fail "keys did not expire"
991 }
992
993 R 1 config set rdb-key-save-delay 0
994 }
995
996 test "Eviction does not evict keys in importing slots" {
997 set slot0_key [slot_key 0 mykey]
998 set slot1_key [slot_key 1 mykey]
999 set slot2_key [slot_key 2 mykey]
1000 set slot5462_key [slot_key 5462 mykey]
1001 set slot5463_key [slot_key 5463 mykey]
1002 R 1 flushall
1003 R 0 flushall
1004
1005 # we set a delay to write incremental data
1006 R 0 config set rdb-key-save-delay 1000000
1007
1008 set 1k_str [string repeat "a" 1024]
1009 set 1m_str [string repeat "a" 1048576]
1010
1011 # set two keys to be evicted
1012 R 1 set $slot5462_key $1k_str
1013 R 1 set $slot5463_key $1k_str
1014
1015 # set maxmemory to 200kb more than current used memory,
1016 # redis should evict some keys if importing some big keys
1017 set r1_mem_used [S 1 used_memory]
1018 set r1_max_mem [expr {$r1_mem_used + 200*1024}]
1019 R 1 config set maxmemory $r1_max_mem
1020 R 1 config set maxmemory-policy allkeys-lru
1021
1022 # set 3 keys to be migrated
1023 R 0 set $slot0_key $1m_str
1024 R 0 set $slot1_key $1m_str
1025 R 0 set $slot2_key $1m_str
1026
1027 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100]
1028 wait_for_condition 2000 10 {
1029 [string match {*send-bulk-and-stream*} [migration_status 0 $task_id state]]
1030 } else {
1031 fail "ASM task did not start"
1032 }
1033
1034 # after 2.2s, at least two keys should be transferred, they should not be evicted
1035 # but other keys (slot5462_key and slot5463_key) should be evicted
1036 after 2200
1037 for {set j 0} {$j < 100} {incr j} { R 1 ping } ;# trigger eviction
1038 assert_equal 0 [R 1 exists $slot5462_key]
1039 assert_equal 0 [R 1 exists $slot5463_key]
1040 assert {[scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 2}
1041
1042 # current used memory should be more than the maxmemory, since the big keys that
1043 # belong importing slots can not be evicted.
1044 set r1_mem_used [S 1 used_memory]
1045 assert {$r1_mem_used > $r1_max_mem + 1024*1024}
1046
1047 wait_for_asm_done
1048
1049 # after migration, these big keys should be evicted
1050 for {set j 0} {$j < 100} {incr j} { R 1 ping } ;# trigger eviction
1051 assert_equal {} [scan [regexp -inline {expires\=([\d]*)} [R 1 info keyspace]] expires=%d]
1052 }
1053
1054 test "Failover will cancel slot migration tasks" {
1055 # migrate slot 0-100 from 1 to 0
1056 set task_id [setup_slot_migration_with_delay 1 0 0 100]
1057
1058 # FAILOVER happens on the destination node, instance #3 become master, #0 become slave
1059 failover_and_wait_for_done 3
1060
1061 # the old master will cancel the importing task, and the migrating task on
1062 # the source node will be failed
1063 wait_for_condition 1000 50 {
1064 [string match {*canceled*} [migration_status 0 $task_id state]] &&
1065 [string match {*failover*} [migration_status 0 $task_id last_error]] &&
1066 [string match {*failed*} [migration_status 1 $task_id state]]
1067 } else {
1068 fail "ASM task did not cancel"
1069 }
1070
1071 # We can restart ASM tasks on new master, migrate slot 0-100 from 1 to 3
1072 R 1 config set rdb-key-save-delay 0
1073 set task_id [R 3 CLUSTER MIGRATION IMPORT 0 100]
1074 wait_for_asm_done
1075
1076 # migrate slot 0-100 from 3 to 1
1077 set task_id [setup_slot_migration_with_delay 3 1 0 100]
1078
1079 # FAILOVER happens on the source node, instance #3 become slave, #0 become master
1080 failover_and_wait_for_done 0
1081
1082 # the old master will cancel the migrating task, but the destination node will
1083 # retry the importing task, and then succeed.
1084 wait_for_condition 1000 50 {
1085 [string match {*canceled*} [migration_status 3 $task_id state]]
1086 } else {
1087 fail "ASM task did not cancel"
1088 }
1089 wait_for_asm_done
1090 }
1091
1092 test "Flush-like command can cancel slot migration task" {
1093 # flushall, flushdb
1094 foreach flushcmd {flushall flushdb} {
1095 # start slot migration from 1 to 0
1096 set task_id [setup_slot_migration_with_delay 1 0 0 100]
1097
1098 if {$::verbose} { puts "Testing flush command: $flushcmd"}
1099 R 0 $flushcmd
1100
1101 # flush-like will cancel the task
1102 wait_for_condition 1000 50 {
1103 [string match {*canceled*} [migration_status 0 $task_id state]]
1104 } else {
1105 fail "ASM task did not cancel"
1106 }
1107 }
1108
1109 R 1 config set rdb-key-save-delay 0
1110 R 0 cluster migration import 0 100
1111 wait_for_asm_done
1112 }
1113
1114 test "CLUSTER SETSLOT command when there is a slot migration task" {
1115 # Setup slot migration test from node 0 to node 1
1116 set task_id [setup_slot_migration_with_delay 0 1 0 100]
1117
1118 # Cluster SETSLOT command is not allowed when there is a slot migration task
1119 # on the slot. #0 and #1 are having migration task now.
1120 foreach instance {0 1} {
1121 set node_id [R $instance cluster myid]
1122
1123 catch {R $instance cluster setslot 0 migrating $node_id} err
1124 assert_match {*in an active atomic slot migration*} $err
1125
1126 catch {R $instance cluster setslot 0 importing $node_id} err
1127 assert_match {*in an active atomic slot migration*} $err
1128
1129 catch {R $instance cluster setslot 0 stable} err
1130 assert_match {*in an active atomic slot migration*} $err
1131
1132 catch {R $instance cluster setslot 0 node $node_id} err
1133 assert_match {*in an active atomic slot migration*} $err
1134 }
1135
1136 # CLUSTER SETSLOT on other node will cancel the migration task, we update
1137 # the owner of slot 0 (that is migrating from #0 to #1) to #2 on #2, we
1138 # bump the config epoch to make sure the change can update #0 and #1
1139 # slot configuration, so #0 and #1 will cancel the migration task.
1140 # BTW, if config epoch is not bumped, the slot config of #2 may be
1141 # updated by #0 and #1.
1142 R 2 cluster bumpepoch
1143 R 2 cluster setslot 0 node [R 2 cluster myid]
1144 wait_for_condition 1000 50 {
1145 [string match {*canceled*} [migration_status 0 $task_id state]] &&
1146 [string match {*slots configuration updated*} [migration_status 0 $task_id last_error]] &&
1147 [string match {*canceled*} [migration_status 1 $task_id state]]
1148 } else {
1149 fail "ASM task did not cancel"
1150 }
1151
1152 # set slot 0 back to #0
1153 R 0 cluster bumpepoch
1154 R 0 cluster setslot 0 node [R 0 cluster myid]
1155 wait_for_cluster_propagation
1156 wait_for_cluster_state "ok"
1157 }
1158
1159 test "CLUSTER DELSLOTSRANGE command cancels a slot migration task" {
1160 # start slot migration from 0 to 1
1161 set task_id [setup_slot_migration_with_delay 0 1 0 100]
1162
1163 R 0 cluster delslotsrange 0 100
1164 wait_for_condition 1000 50 {
1165 [string match {*canceled*} [migration_status 0 $task_id state]] &&
1166 [string match {*slots configuration updated*} [migration_status 0 $task_id last_error]] &&
1167 [string match {*failed*} [migration_status 1 $task_id state]]
1168 } else {
1169 fail "ASM task did not cancel"
1170 }
1171 R 1 cluster migration cancel id $task_id
1172
1173 # add the slots back
1174 R 0 cluster addslotsrange 0 100
1175 wait_for_cluster_propagation
1176 wait_for_cluster_state "ok"
1177 }
1178
1179 # NOTE: this test needs more than 60s, maybe you can skip when testing
1180 test "CLUSTER FORGET command cancels a slot migration task" {
1181 R 0 config set rdb-key-save-delay 0
1182 # Migrate all slot on #0 to #1, so we can forget #0
1183 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 5461]
1184 wait_for_asm_done
1185
1186 # start slot migration from 1 to 0
1187 set task_id [setup_slot_migration_with_delay 1 0 0 5461]
1188
1189 # Forget #0 on #1, the migration task on #1 will be canceled due to node deleted,
1190 # and the importing task on #0 will be failed
1191 R 1 cluster forget [R 0 cluster myid]
1192 wait_for_condition 1000 50 {
1193 [string match {*canceled*} [migration_status 1 $task_id state]] &&
1194 [string match {*node deleted*} [migration_status 1 $task_id last_error]] &&
1195 [string match {*failed*} [migration_status 0 $task_id state]]
1196 } else {
1197 fail "ASM task did not cancel"
1198 }
1199
1200 # Add #0 back into cluster
1201 # NOTE: this will cost 60s to let #0 join the cluster since
1202 # other nodes add #0 into black list for 60s after FORGET.
1203 R 1 config set rdb-key-save-delay 0
1204 R 1 cluster meet "127.0.0.1" [lindex [R 0 config get port] 1]
1205
1206 # the importing task on #0 will be retried, and eventually succeed
1207 # since now #0 is back in the cluster
1208 wait_for_condition 3000 50 {
1209 [string match {*completed*} [migration_status 0 $task_id state]] &&
1210 [string match {*completed*} [migration_status 1 $task_id state]]
1211 } else {
1212 fail "ASM task did not finish"
1213 }
1214
1215 # make sure #0 is completely back to the cluster
1216 wait_for_cluster_propagation
1217 wait_for_cluster_state "ok"
1218 }
1219
1220 test "CLIENT PAUSE can cancel slot migration task" {
1221 # start slot migration from 0 to 1
1222 set task_id [setup_slot_migration_with_delay 0 1 0 100]
1223
1224 # CLIENT PAUSE happens on the destination node, #1 will cancel the importing task
1225 R 1 client pause 100000 write ;# pause 100s
1226 wait_for_condition 1000 50 {
1227 [string match {*canceled*} [migration_status 1 $task_id state]] &&
1228 [string match {*client pause*} [migration_status 1 $task_id last_error]]
1229 } else {
1230 fail "ASM task did not cancel"
1231 }
1232
1233 # start task again
1234 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100]
1235 after 200 ;# give some time to have chance to schedule the task
1236 # the task should not start since server is paused
1237 assert {[string match {*none*} [migration_status 1 $task_id state]]}
1238
1239 # unpause the server, the task should start
1240 R 1 client unpause
1241 wait_for_asm_done
1242
1243 # migrate back to original node #0
1244 R 0 config set rdb-key-save-delay 0
1245 R 1 config set rdb-key-save-delay 0
1246 R 0 CLUSTER MIGRATION IMPORT 0 100
1247 wait_for_asm_done
1248 }
1249
1250 test "Server shutdown can cancel slot migration task, exit with success" {
1251 # start slot migration from 0 to 1
1252 setup_slot_migration_with_delay 0 1 0 100
1253
1254 set loglines [count_log_lines -1]
1255
1256 # Shutdown the server, it should cancel the migration task
1257 restart_server -1 true false true nosave
1258
1259 wait_for_log_messages -1 {"*Cancelled due to server shutdown*"} $loglines 100 100
1260
1261 wait_for_cluster_propagation
1262 wait_for_cluster_state "ok"
1263 }
1264
1265 test "Cancel import task when streaming buffer into db" {
1266 # set a delay to have time to cancel import task that is streaming buf to db
1267 R 1 config set key-load-delay 50000
1268 # start slot migration from 0 to 1
1269 set task_id [setup_slot_migration_with_delay 0 1 0 100 5]
1270
1271 # start the slot 0 write load on the node 0
1272 set slot0_key [slot_key 0 mykey]
1273 set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 $slot0_key 500]
1274
1275 # wait for entering streaming buffer state
1276 wait_for_condition 1000 10 {
1277 [string match {*streaming-buffer*} [migration_status 1 $task_id state]]
1278 } else {
1279 fail "ASM task did not enter streaming buffer state"
1280 }
1281 stop_write_load $load_handle
1282
1283 # cancel the import task on #1, the destination node works fine
1284 R 1 cluster migration cancel id $task_id
1285 assert_match {*canceled*} [migration_status 1 $task_id state]
1286
1287 # reset config
1288 R 0 config set key-load-delay 0
1289 R 1 config set key-load-delay 0
1290 }
1291
1292 test "Destination node main channel timeout when waiting stream EOF" {
1293 set task_id [setup_slot_migration_with_delay 0 1 0 100]
1294 R 1 config set repl-timeout 5
1295
1296 # pause the source node to make EOF wait timeout. Do not pause
1297 # the child process, so it can deliver slot snapshot to destination
1298 set r0_process_id [S 0 process_id]
1299 pause_process $r0_process_id
1300
1301 # the destination node will fail after 7s, 5s for EOF wait and 2s for slot snapshot
1302 wait_for_condition 1000 20 {
1303 [string match {*failed*} [migration_status 1 $task_id state]] &&
1304 [string match {*Main channel*Connection timeout*wait-stream-eof*} \
1305 [migration_status 1 $task_id last_error]]
1306 } else {
1307 fail "ASM task did not fail"
1308 }
1309
1310 # resume the source node
1311 resume_process $r0_process_id
1312
1313 # After the source node is resumed, the task on source node may receive
1314 # ACKs from destination and consider the task is stream-done. In this case,
1315 # the task on source node will be failed after several seconds
1316 if {[string match {*stream-done*} [migration_status 0 $task_id state]]} {
1317 wait_for_condition 1000 20 {
1318 [string match {*failed*} [migration_status 0 $task_id state]] &&
1319 [string match {*Server paused*} [migration_status 0 $task_id last_error]]
1320 } else {
1321 fail "ASM task did not fail"
1322 }
1323 }
1324
1325 R 1 config set repl-timeout 60
1326 R 0 cluster migration cancel id $task_id
1327 R 1 cluster migration cancel id $task_id
1328 }
1329
1330 test "Destination node rdb channel timeout when transferring slots snapshot" {
1331 # cost 10s to transfer each key
1332 set task_id [setup_slot_migration_with_delay 0 1 0 100 2 10000000]
1333 R 1 config set repl-timeout 3
1334
1335 # the destination node will fail after 3s
1336 wait_for_condition 1000 20 {
1337 [string match {*failed*} [migration_status 1 $task_id state]] &&
1338 [string match {*RDB channel*Connection timeout*rdbchannel-transfer*} \
1339 [migration_status 1 $task_id last_error]]
1340 } else {
1341 fail "ASM task did not fail"
1342 }
1343
1344 R 1 config set repl-timeout 60
1345 R 0 cluster migration cancel id $task_id
1346 R 1 cluster migration cancel id $task_id
1347 }
1348
1349 test "Source node rdb channel timeout when transferring slots snapshot" {
1350 set r1_pid [S 1 process_id]
1351 R 0 flushall
1352 R 0 config set save ""
1353 # generate several large keys, make sure the memory usage is more than
1354 # socket buffer size, so the rdb channel will block and timeout if
1355 # no data is received by destination.
1356 set val [string repeat "a" 102400] ;# 100kb
1357 for {set i 0} {$i < 1000} {incr i} {
1358 set key [slot_key 0 "key$i"]
1359 R 0 set $key $val
1360 }
1361 R 0 config set repl-timeout 3 ;# 3s for rdb channel timeout
1362 R 0 config set rdb-key-save-delay 10000 ;# 1000 keys cost 10s to save
1363
1364 # start migration from #0 to #1
1365 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100]
1366 wait_for_condition 1000 20 {
1367 [string match {*send-bulk-and-stream*} [migration_status 0 $task_id state]]
1368 } else {
1369 fail "ASM task did not start"
1370 }
1371
1372 # pause the destination node to make rdb channel timeout
1373 pause_process $r1_pid
1374
1375 # the source node will fail, the rdb child process can not
1376 # write data to destination, so it will timeout
1377 wait_for_condition 1000 30 {
1378 [string match {*failed*} [migration_status 0 $task_id state]] &&
1379 [string match {*RDB channel*Failed to send slots snapshot*} \
1380 [migration_status 0 $task_id last_error]]
1381 } else {
1382 fail "ASM task did not fail"
1383 }
1384 resume_process $r1_pid
1385
1386 R 0 config set repl-timeout 60
1387 R 0 cluster migration cancel id $task_id
1388 R 1 cluster migration cancel id $task_id
1389 }
1390
1391 test "Source node main channel timeout when sending incremental stream" {
1392 R 0 flushall
1393 R 0 config set repl-timeout 2 ;# 2s for main channel timeout
1394
1395 set r1_pid [S 1 process_id]
1396 # in order to have time to pause the destination node
1397 R 1 config set key-load-delay 50000 ;# 50ms each 16k data
1398
1399 # start migration from #0 to #1
1400 set task_id [setup_slot_migration_with_delay 0 1 0 100]
1401
1402 # Create 200 keys of 16k size traffic on slot 0, streaming buffer need 10s (200*50ms)
1403 populate_slot 200 -idx 0 -slot 0 -size 16384
1404
1405 # wait for streaming buffer state, then pause the destination node
1406 wait_for_condition 1000 20 {
1407 [string match {*streaming-buffer*} [migration_status 1 $task_id state]]
1408 } else {
1409 fail "ASM task did not stream buffer, state: [migration_status 1 $task_id state]"
1410 }
1411 pause_process $r1_pid
1412
1413 # Start the slot 0 write load on the R 0
1414 set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 [slot_key 0 mykey] 500]
1415
1416 # the source node will fail after several seconds (including the time
1417 # to fill the socket buffer of source node), the main channel can not
1418 # write data to destination since the destination is paused
1419 wait_for_condition 1000 30 {
1420 [string match {*failed*} [migration_status 0 $task_id state]] &&
1421 [string match {*Main channel*Connection timeout*} \
1422 [migration_status 0 $task_id last_error]]
1423 } else {
1424 fail "ASM task did not fail"
1425 }
1426 stop_write_load $load_handle
1427 resume_process $r1_pid
1428
1429 R 0 config set repl-timeout 60
1430 R 1 config set key-load-delay 0
1431 R 0 cluster migration cancel id $task_id
1432 R 1 cluster migration cancel id $task_id
1433 R 0 flushall
1434 }
1435
1436 test "Source server paused timeout" {
1437 # set timeout to 0, so the task will fail immediately when checking timeout
1438 R 0 config set cluster-slot-migration-write-pause-timeout 0
1439
1440 # start migration from node 0 to 1
1441 set task_id [setup_slot_migration_with_delay 0 1 0 100]
1442
1443 # start the slot 0 write load on the node 0
1444 set slot0_key [slot_key 0 mykey]
1445 set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 $slot0_key]
1446
1447 # node 0 will fail since server paused timeout
1448 wait_for_condition 2000 10 {
1449 [string match {*failed*} [migration_status 0 $task_id state]] &&
1450 [string match {*Server paused timeout*} \
1451 [migration_status 0 $task_id last_error]]
1452 } else {
1453 fail "ASM task did not fail"
1454 }
1455
1456 stop_write_load $load_handle
1457
1458 # reset config
1459 R 0 config set cluster-slot-migration-write-pause-timeout 10000
1460 R 0 cluster migration cancel id $task_id
1461 R 1 cluster migration cancel id $task_id
1462 }
1463
1464 test "Sync buffer drain timeout" {
1465 # set a fail point to avoid the source node to enter handoff prep state
1466 # to test the sync buffer drain timeout
1467 R 0 debug asm-failpoint "migrate-main-channel" "handoff-prep"
1468 R 0 config set cluster-slot-migration-sync-buffer-drain-timeout 5000
1469
1470 set r1_pid [S 1 process_id]
1471
1472 # start migration from node 0 to 1
1473 set task_id [setup_slot_migration_with_delay 0 1 0 100]
1474
1475 # start the slot 0 write load on the node 0
1476 set slot0_key [slot_key 0 mykey]
1477 set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 $slot0_key]
1478
1479 # wait for entering streaming buffer state
1480 wait_for_condition 1000 10 {
1481 [string match {*wait-stream-eof*} [migration_status 1 $task_id state]]
1482 } else {
1483 fail "ASM task did not enter wait-stream-eof state"
1484 }
1485
1486 pause_process $r1_pid ;# avoid the destination to apply commands
1487
1488 # node 0 will fail since sync buffer drain timeout
1489 wait_for_condition 2000 10 {
1490 [string match {*failed*} [migration_status 0 $task_id state]] &&
1491 [string match {*Sync buffer drain timeout*} \
1492 [migration_status 0 $task_id last_error]]
1493 } else {
1494 fail "ASM task did not fail"
1495 }
1496
1497 stop_write_load $load_handle
1498 resume_process $r1_pid
1499
1500 # reset config
1501 R 0 config set cluster-slot-migration-sync-buffer-drain-timeout 60000
1502 R 0 debug asm-failpoint "" ""
1503 R 0 cluster migration cancel id $task_id
1504 R 1 cluster migration cancel id $task_id
1505 }
1506
1507 test "Cluster implementation cannot start migrate task temporarily" {
1508 # Inject a fail point to make the source node not ready
1509 R 0 debug asm-failpoint "migrate-main-channel" "none"
1510
1511 # start migration from node 0 to 1
1512 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100]
1513
1514 # verify source node replies SYNCSLOTS with -NOTREADY
1515 set loglines [count_log_lines -1]
1516 wait_for_log_messages -1 {"*Source node replied to SYNCSLOTS SYNC with -NOTREADY, will retry later*"} $loglines 100 100
1517
1518 # clear the fail point and verify the task is completed
1519 R 0 debug asm-failpoint "" ""
1520 wait_for_asm_done
1521 assert_equal "completed" [migration_status 0 $task_id state]
1522 assert_equal "completed" [migration_status 1 $task_id state]
1523
1524 # cleanup
1525 R 0 CLUSTER MIGRATION IMPORT 0 100
1526 wait_for_asm_done
1527 }
1528}
1529
1530start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout 60000 cluster-allow-replica-migration no}} {
1531 test "Test bgtrim after a successful migration" {
1532 R 0 debug asm-trim-method bg
1533 R 3 debug asm-trim-method bg
1534 R 0 CONFIG RESETSTAT
1535 R 3 CONFIG RESETSTAT
1536
1537 R 0 flushall
1538 # Fill slot 0
1539 populate_slot 1000 -idx 0 -slot 0
1540 # Fill slot 1 with keys that have TTL
1541 populate_slot 1000 -idx 0 -slot 1 -prefix "expirekey" -expires 100
1542 # HFE key on slot 2
1543 set slot2_hfekey [slot_key 2 hfekey]
1544 R 0 HSETEX $slot2_hfekey EX 10 FIELDS 1 f1 v1
1545
1546 # Fill slot 101, these keys won't be migrated
1547 populate_slot 1000 -idx 0 -slot 101
1548 # Fill slot 102 with keys that have TTL
1549 populate_slot 1000 -idx 0 -slot 102 -prefix "expirekey" -expires 100
1550 # HFE key on slot 103
1551 set slot103_hfekey [slot_key 103 hfekey]
1552 R 0 HSETEX $slot103_hfekey EX 10 FIELDS 1 f1 v1
1553
1554 # migrate slot 0 to node-1
1555 R 1 CLUSTER MIGRATION IMPORT 0 100
1556 wait_for_asm_done
1557
1558 # Verify the data is migrated
1559 wait_for_ofs_sync [Rn 0] [Rn 3]
1560 assert_equal 2001 [R 0 dbsize]
1561 assert_equal 2001 [R 3 dbsize]
1562 wait_for_ofs_sync [Rn 1] [Rn 4]
1563 assert_equal 2001 [R 1 dbsize]
1564 assert_equal 2001 [R 4 dbsize]
1565
1566 # Verify the keys are trimmed lazily
1567 wait_for_condition 1000 10 {
1568 [S 0 lazyfreed_objects] == 2001 &&
1569 [S 3 lazyfreed_objects] == 2001
1570 } else {
1571 puts "lazyfreed_objects: [S 0 lazyfreed_objects] [S 3 lazyfreed_objects]"
1572 fail "Background trim did not happen"
1573 }
1574
1575 # Cleanup
1576 R 0 CLUSTER MIGRATION IMPORT 0 100
1577 wait_for_asm_done
1578 R 0 flushall
1579 R 0 debug asm-trim-method default
1580 R 3 debug asm-trim-method default
1581 }
1582
1583 test "Test bgtrim after a failed migration" {
1584 R 0 debug asm-trim-method bg
1585 R 3 debug asm-trim-method bg
1586 R 1 CONFIG RESETSTAT
1587 R 4 CONFIG RESETSTAT
1588
1589 # Fill slot 0 on node-0 and migrate it to node-1 (with some delay)
1590 R 0 flushall
1591 set task_id [setup_slot_migration_with_delay 0 1 0 100 10000 1000]
1592 after 1000 ;# wait some time so that some keys are moved
1593
1594 # Fail the migration
1595 R 1 CLUSTER MIGRATION CANCEL ID $task_id
1596 wait_for_asm_done
1597
1598 # Verify the data is not migrated
1599 assert_equal 10000 [R 0 dbsize]
1600 assert_equal 10000 [R 3 dbsize]
1601
1602 # Verify the keys are trimmed lazily after a failed import on dest side.
1603 wait_for_condition 1000 20 {
1604 [R 1 dbsize] == 0 &&
1605 [R 4 dbsize] == 0 &&
1606 [S 1 lazyfreed_objects] > 0 &&
1607 [S 4 lazyfreed_objects] > 0
1608 } else {
1609 fail "Background trim did not happen"
1610 }
1611
1612 # Cleanup
1613 wait_for_asm_done
1614 R 0 flushall
1615 R 0 debug asm-trim-method default
1616 R 3 debug asm-trim-method default
1617 }
1618
1619 test "Test bgtrim unblocks stream client" {
1620 # Two clients waiting for data on two different streams which are in
1621 # different slots. We are going to migrate one slot, which will unblock
1622 # the client. The other client should still be blocked.
1623 R 0 debug asm-trim-method bg
1624
1625 set key0 [slot_key 0 mystream]
1626 set key1 [slot_key 1 mystream]
1627
1628 # First client waits on slot-0 key
1629 R 0 DEL $key0
1630 R 0 XADD $key0 666 f v
1631 R 0 XGROUP CREATE $key0 mygroup $
1632 set rd0 [redis_deferring_client]
1633 $rd0 XREADGROUP GROUP mygroup Alice BLOCK 0 STREAMS $key0 ">"
1634 wait_for_blocked_clients_count 1
1635
1636 # Second client waits on slot-1 key
1637 R 0 DEL $key1
1638 R 0 XADD $key1 666 f v
1639 R 0 XGROUP CREATE $key1 mygroup $
1640 set rd1 [redis_deferring_client]
1641 $rd1 XREADGROUP GROUP mygroup Alice BLOCK 0 STREAMS $key1 ">"
1642 wait_for_blocked_clients_count 2
1643
1644 # Migrate slot 0
1645 R 1 CLUSTER MIGRATION IMPORT 0 0
1646 wait_for_asm_done
1647
1648 # First client should get MOVED error
1649 assert_error "*MOVED*" {$rd0 read}
1650 $rd0 close
1651
1652 # Second client should operate normally
1653 R 0 XADD $key1 667 f v
1654 set res [$rd1 read]
1655 assert_equal [lindex $res 0 1 0] {667-0 {f v}}
1656 $rd1 close
1657
1658 # cleanup
1659 wait_for_asm_done
1660 R 0 CLUSTER MIGRATION IMPORT 0 0
1661 wait_for_asm_done
1662 R 0 flushall
1663 R 0 debug asm-trim-method default
1664 }
1665
1666 test "Test bgtrim touches watched keys" {
1667 R 0 debug asm-trim-method bg
1668
1669 # bgtrim should touch watched keys on migrated slots
1670 set key0 [slot_key 0 key]
1671 R 0 set $key0 30
1672 R 0 watch $key0
1673 R 1 CLUSTER MIGRATION IMPORT 0 0
1674 wait_for_asm_done
1675 R 0 multi
1676 R 0 ping
1677 assert_equal {} [R 0 exec]
1678
1679 # bgtrim should not touch watched keys on other slots
1680 set key2 [slot_key 2 key]
1681 R 0 set $key2 30
1682 R 0 watch $key2
1683 R 1 CLUSTER MIGRATION IMPORT 1 1
1684 wait_for_asm_done
1685 R 0 multi
1686 R 0 ping
1687 assert_equal PONG [R 0 exec]
1688
1689 # cleanup
1690 wait_for_asm_done
1691 R 0 CLUSTER MIGRATION IMPORT 0 1
1692 wait_for_asm_done
1693 R 0 flushall
1694 R 0 debug asm-trim-method default
1695 }
1696
1697 test "Test bgtrim after a FAILOVER on destination side" {
1698 R 1 debug asm-trim-method bg
1699 R 4 debug asm-trim-method bg
1700
1701 set loglines [count_log_lines -4]
1702
1703 # Fill slot 0 on node-0 and migrate it to node-1 (with some delay)
1704 R 0 flushall
1705 set task_id [setup_slot_migration_with_delay 0 1 0 100 10000 1000]
1706 after 1000 ;# wait some time so that some keys are moved
1707
1708 # Trigger a failover with force to simulate unreachable master and
1709 # verify unowned keys are trimmed once replica becomes master.
1710 failover_and_wait_for_done 4 force
1711 wait_for_log_messages -4 {"*Detected keys in slots that do not belong*Scheduling trim*"} $loglines 1000 10
1712 wait_for_condition 1000 10 {
1713 [R 1 dbsize] == 0 &&
1714 [R 4 dbsize] == 0
1715 } else {
1716 fail "Background trim did not happen"
1717 }
1718
1719 # cleanup
1720 wait_for_cluster_propagation
1721 failover_and_wait_for_done 1
1722 R 0 config set rdb-key-save-delay 0
1723 R 1 debug asm-trim-method default
1724 R 4 debug asm-trim-method default
1725 wait_for_asm_done
1726 }
1727
1728 test "CLUSTER SETSLOT is not allowed if there is a pending trim job" {
1729 R 0 debug asm-trim-method bg
1730 R 3 debug asm-trim-method bg
1731
1732 # Fill slot 0 on node-0 and migrate it to node-1 (with some delay)
1733 R 0 flushall
1734 set task_id [setup_slot_migration_with_delay 0 1 0 100 10000 1000]
1735
1736 # Pause will cancel the task and there will be a pending trim job
1737 # until writes are allowed again.
1738 R 1 client pause 100000 write ;# pause 100s
1739 wait_for_asm_done
1740
1741 # CLUSTER SETSLOT is not allowed if there is a pending trim job.
1742 assert_error {*There is a pending trim job for slot 0*} {R 1 CLUSTER SETSLOT 0 STABLE}
1743
1744 # Unpause the server, trim will be triggered and SETSLOT will be allowed
1745 R 1 client unpause
1746 R 1 CLUSTER SETSLOT 0 STABLE
1747 }
1748}
1749
1750start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout 60000 cluster-allow-replica-migration no save ""}} {
1751 test "Test active trim after a successful migration" {
1752 R 0 debug asm-trim-method active
1753 R 3 debug asm-trim-method active
1754 populate_slot 500 -slot 0
1755 populate_slot 500 -slot 1
1756 populate_slot 500 -slot 3
1757 populate_slot 500 -slot 4
1758
1759 # Migrate 1500 keys
1760 R 1 CLUSTER MIGRATION IMPORT 0 1 3 3
1761 wait_for_asm_done
1762
1763 wait_for_condition 1000 10 {
1764 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
1765 [CI 0 cluster_slot_migration_active_trim_running] == 0 &&
1766 [CI 0 cluster_slot_migration_active_trim_current_job_trimmed] == 1500 &&
1767 [CI 3 cluster_slot_migration_active_trim_running] == 0 &&
1768 [CI 3 cluster_slot_migration_active_trim_current_job_trimmed] == 1500
1769 } else {
1770 fail "trim failed"
1771 }
1772
1773 assert_equal 1500 [CI 0 cluster_slot_migration_active_trim_current_job_keys]
1774 assert_equal 1500 [CI 3 cluster_slot_migration_active_trim_current_job_keys]
1775
1776 assert_equal 500 [R 0 dbsize]
1777 assert_equal 500 [R 3 dbsize]
1778 assert_equal 1500 [R 1 dbsize]
1779 assert_equal 1500 [R 4 dbsize]
1780 assert_equal 0 [R 0 cluster countkeysinslot 0]
1781 assert_equal 0 [R 0 cluster countkeysinslot 1]
1782 assert_equal 0 [R 0 cluster countkeysinslot 3]
1783 assert_equal 500 [R 0 cluster countkeysinslot 4]
1784
1785 # cleanup
1786 R 0 debug asm-trim-method default
1787 R 3 debug asm-trim-method default
1788 R 0 CLUSTER MIGRATION IMPORT 0 1 3 3
1789 wait_for_asm_done
1790 R 0 flushall
1791 R 1 flushall
1792 }
1793
1794 test "Test multiple active trim jobs can be scheduled" {
1795 # Active trim will be scheduled but it won't run
1796 R 0 debug asm-trim-method active -1
1797 R 3 debug asm-trim-method active -1
1798
1799 populate_slot 500 -slot 0
1800 populate_slot 500 -slot 1
1801 populate_slot 500 -slot 3
1802 populate_slot 500 -slot 4
1803
1804 # Migrate 1500 keys
1805 R 1 CLUSTER MIGRATION IMPORT 0 1
1806 wait_for_condition 1000 10 {
1807 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
1808 [CI 0 cluster_slot_migration_active_trim_running] == 1 &&
1809 [CI 3 cluster_slot_migration_active_trim_running] == 1
1810 } else {
1811 fail "migrate failed"
1812 }
1813
1814 # Migrate another slot and verify there are two trim tasks on the source
1815 R 1 CLUSTER MIGRATION IMPORT 3 3
1816 wait_for_condition 1000 10 {
1817 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
1818 [CI 0 cluster_slot_migration_active_trim_running] == 2 &&
1819 [CI 3 cluster_slot_migration_active_trim_running] == 2
1820 } else {
1821 fail "migrate failed"
1822 }
1823
1824 # Enabled active trim and wait until it is completed.
1825 R 0 debug asm-trim-method active 0
1826 R 3 debug asm-trim-method active 0
1827 wait_for_asm_done
1828
1829 assert_equal 500 [R 0 dbsize]
1830 assert_equal 500 [R 3 dbsize]
1831 assert_equal 0 [R 0 cluster countkeysinslot 0]
1832 assert_equal 0 [R 0 cluster countkeysinslot 1]
1833 assert_equal 0 [R 0 cluster countkeysinslot 3]
1834 assert_equal 500 [R 0 cluster countkeysinslot 4]
1835
1836 # cleanup
1837 R 0 debug asm-trim-method default
1838 R 3 debug asm-trim-method default
1839 R 0 CLUSTER MIGRATION IMPORT 0 1 3 3
1840 wait_for_asm_done
1841 R 0 flushall
1842 R 1 flushall
1843 }
1844
1845 test "Test active-trim clears partially imported keys on cancel" {
1846 R 1 debug asm-trim-method active
1847 R 4 debug asm-trim-method active
1848
1849 # Rdb delivery will take 10 seconds
1850 R 0 config set rdb-key-save-delay 10000
1851 populate_slot 250 -slot 0
1852 populate_slot 250 -slot 1
1853 populate_slot 250 -slot 3
1854 populate_slot 250 -slot 4
1855
1856 R 1 CLUSTER MIGRATION IMPORT 0 100
1857 after 2000
1858 R 1 CLUSTER MIGRATION CANCEL ALL
1859 wait_for_asm_done
1860
1861 assert_morethan [CI 1 cluster_slot_migration_active_trim_current_job_keys] 0
1862 assert_morethan [CI 4 cluster_slot_migration_active_trim_current_job_trimmed] 0
1863
1864 assert_equal 1000 [R 0 dbsize]
1865 assert_equal 1000 [R 3 dbsize]
1866 assert_equal 0 [R 1 dbsize]
1867 assert_equal 0 [R 4 dbsize]
1868
1869 # Cleanup
1870 R 1 debug asm-trim-method default
1871 R 4 debug asm-trim-method default
1872 R 0 config set rdb-key-save-delay 0
1873 }
1874
1875 test "Test active-trim clears partially imported keys on failover" {
1876 R 1 debug asm-trim-method active
1877 R 4 debug asm-trim-method active
1878
1879 # Rdb delivery will take 10 seconds
1880 R 0 config set rdb-key-save-delay 10000
1881
1882 populate_slot 250 -slot 0
1883 populate_slot 250 -slot 1
1884 populate_slot 250 -slot 3
1885 populate_slot 250 -slot 4
1886
1887 set prev_trim_started_1 [CI 1 cluster_slot_migration_stats_active_trim_started]
1888 set prev_trim_started_4 [CI 4 cluster_slot_migration_stats_active_trim_started]
1889
1890 R 1 CLUSTER MIGRATION IMPORT 0 100
1891 after 2000
1892 failover_and_wait_for_done 4
1893 wait_for_asm_done
1894
1895 # Verify there is at least one trim job started
1896 assert_morethan [CI 1 cluster_slot_migration_stats_active_trim_started] $prev_trim_started_1
1897 assert_morethan [CI 4 cluster_slot_migration_stats_active_trim_started] $prev_trim_started_4
1898
1899 assert_equal 1000 [R 0 dbsize]
1900 assert_equal 1000 [R 3 dbsize]
1901 assert_equal 0 [R 1 dbsize]
1902 assert_equal 0 [R 4 dbsize]
1903
1904 # Cleanup
1905 failover_and_wait_for_done 1
1906 R 1 debug asm-trim-method default
1907 R 4 debug asm-trim-method default
1908 R 0 config set rdb-key-save-delay 0
1909 R 0 flushall
1910 R 1 flushall
1911 }
1912
1913 test "Test import task does not start if active trim is in progress for the same slots" {
1914 # Active trim will be scheduled but it won't run
1915 R 0 flushall
1916 R 1 flushall
1917 R 0 debug asm-trim-method active -1
1918
1919 populate_slot 500 -slot 0
1920 populate_slot 500 -slot 1
1921
1922 # Migrate 1000 keys
1923 R 1 CLUSTER MIGRATION IMPORT 0 1
1924 wait_for_condition 1000 10 {
1925 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
1926 [CI 0 cluster_slot_migration_active_trim_running] == 1
1927 } else {
1928 fail "migrate failed"
1929 }
1930
1931 # Try to migrate slots back
1932 R 0 CLUSTER MIGRATION IMPORT 0 1
1933 wait_for_log_messages 0 {"*Can not start import task*trim in progress for some of the slots*"} 0 1000 10
1934
1935 # Enabled active trim and verify slots are imported back
1936 R 0 debug asm-trim-method active 0
1937 wait_for_asm_done
1938
1939 assert_equal 1000 [R 0 dbsize]
1940 assert_equal 500 [R 0 cluster countkeysinslot 0]
1941 assert_equal 500 [R 0 cluster countkeysinslot 1]
1942
1943 # cleanup
1944 R 0 debug asm-trim-method default
1945 R 0 flushall
1946 }
1947
1948 test "Rdb save during active trim should skip keys in trimmed slots" {
1949 # Insert some delay to activate trim
1950 R 0 debug asm-trim-method active 1000
1951 R 0 config set repl-diskless-sync-delay 0
1952 R 0 flushall
1953
1954 populate_slot 5000 -idx 0 -slot 0
1955 populate_slot 5000 -idx 0 -slot 1
1956 populate_slot 5000 -idx 0 -slot 2
1957
1958 # Start migration and wait until trim is in progress
1959 R 1 CLUSTER MIGRATION IMPORT 0 1
1960 wait_for_condition 1000 10 {
1961 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
1962 [CI 0 cluster_slot_migration_active_trim_running] == 1 &&
1963 [S 0 rdb_bgsave_in_progress] == 0
1964 } else {
1965 puts "[CI 0 cluster_slot_migration_active_tasks]"
1966 puts "[CI 0 cluster_slot_migration_active_trim_running]"
1967 fail "trim failed"
1968 }
1969
1970 # Trigger save during active trim
1971 R 0 save
1972 # Wait until the log contains a "keys skipped" message with a non-zero value
1973 wait_for_log_messages 0 {"*BGSAVE done, 5000 keys saved, [1-9]* keys skipped*"} 0 1000 10
1974
1975 restart_server 0 yes no yes nosave
1976 assert_equal 5000 [R 0 dbsize]
1977 assert_equal 0 [R 0 cluster countkeysinslot 0]
1978 assert_equal 0 [R 0 cluster countkeysinslot 1]
1979 assert_equal 5000 [R 0 cluster countkeysinslot 2]
1980
1981 # Cleanup
1982 wait_for_cluster_propagation
1983 wait_for_cluster_state "ok"
1984 R 0 flushall
1985 R 1 flushall
1986 R 0 save
1987 R 0 CLUSTER MIGRATION IMPORT 0 1
1988 wait_for_asm_done
1989 }
1990
1991 test "AOF rewrite during active trim should skip keys in trimmed slots" {
1992 R 0 debug asm-trim-method active 1000
1993 R 0 config set repl-diskless-sync-delay 0
1994 R 0 config set aof-use-rdb-preamble no
1995 R 0 config set appendonly yes
1996 R 0 config rewrite
1997 R 0 flushall
1998 populate_slot 5000 -idx 0 -slot 0
1999 populate_slot 5000 -idx 0 -slot 1
2000 populate_slot 5000 -idx 0 -slot 2
2001
2002 R 1 CLUSTER MIGRATION IMPORT 0 1
2003 wait_for_condition 1000 10 {
2004 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
2005 [CI 0 cluster_slot_migration_active_trim_running] == 1
2006 } else {
2007 puts "[CI 0 cluster_slot_migration_active_tasks]"
2008 puts "[CI 0 cluster_slot_migration_active_trim_running]"
2009 fail "trim failed"
2010 }
2011
2012 wait_for_condition 50 100 {
2013 [S 0 rdb_bgsave_in_progress] == 0
2014 } else {
2015 fail "bgsave is in progress"
2016 }
2017
2018 R 0 bgrewriteaof
2019 # Wait until the log contains a "keys skipped" message with a non-zero value
2020 wait_for_log_messages 0 {"*AOF rewrite done, [1-9]* keys saved, [1-9]* keys skipped*"} 0 1000 10
2021
2022 restart_server 0 yes no yes nosave
2023 assert_equal 5000 [R 0 dbsize]
2024 assert_equal 0 [R 0 cluster countkeysinslot 0]
2025 assert_equal 0 [R 0 cluster countkeysinslot 1]
2026 assert_equal 5000 [R 0 cluster countkeysinslot 2]
2027
2028 # cleanup
2029 R 0 config set appendonly no
2030 R 0 config rewrite
2031 restart_server 0 yes no yes nosave
2032 wait_for_cluster_propagation
2033 wait_for_cluster_state "ok"
2034 R 0 flushall
2035 R 1 flushall
2036 R 0 save
2037 R 0 CLUSTER MIGRATION IMPORT 0 1
2038 wait_for_asm_done
2039 }
2040
2041 test "Pause actions will stop active trimming" {
2042 R 0 debug asm-trim-method active 1000
2043 R 0 config set repl-diskless-sync-delay 0
2044 R 0 flushall
2045 populate_slot 10000 -idx 0 -slot 0
2046
2047 R 1 CLUSTER MIGRATION IMPORT 0 100
2048 wait_for_condition 1000 10 {
2049 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
2050 [CI 0 cluster_slot_migration_active_trim_running] == 1
2051 } else {
2052 puts "[CI 0 cluster_slot_migration_active_tasks]"
2053 puts "[CI 0 cluster_slot_migration_active_trim_running]"
2054 fail "trim failed"
2055 }
2056
2057 # Pause the server and verify no keys are trimmed
2058 R 0 client pause 100000 write ;# pause 100s
2059 set prev [CI 0 cluster_slot_migration_active_trim_current_job_trimmed]
2060 after 1000 ; # wait some time to see if any keys are trimmed
2061 set curr [CI 0 cluster_slot_migration_active_trim_current_job_trimmed]
2062 assert_equal $prev $curr
2063
2064 R 0 client unpause
2065 R 0 debug asm-trim-method default
2066 wait_for_asm_done
2067 assert_equal 0 [R 0 dbsize]
2068
2069 # revert
2070 R 0 CLUSTER MIGRATION IMPORT 0 100
2071 wait_for_asm_done
2072 assert_equal 10000 [R 0 dbsize]
2073 }
2074
2075 foreach diskless_load {"disabled" "swapdb" "on-empty-db"} {
2076 test "Test fullsync cancels active trim (repl-diskless-load $diskless_load)" {
2077 R 3 debug asm-trim-method active -10
2078 R 3 config set repl-diskless-load $diskless_load
2079 R 0 flushall
2080
2081 R 0 config set repl-diskless-sync-delay 0
2082 populate_slot 10000 -idx 0 -slot 0
2083
2084 R 1 CLUSTER MIGRATION IMPORT 0 0
2085 wait_for_condition 1000 10 {
2086 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
2087 [CI 0 cluster_slot_migration_active_trim_running] == 0 &&
2088 [CI 3 cluster_slot_migration_active_trim_running] == 1
2089 } else {
2090 puts "[CI 0 cluster_slot_migration_active_tasks]"
2091 puts "[CI 0 cluster_slot_migration_active_trim_running]"
2092 puts "[CI 3 cluster_slot_migration_active_trim_running]"
2093 fail "trim failed"
2094 }
2095
2096 set prev_cancelled [CI 3 cluster_slot_migration_stats_active_trim_cancelled]
2097 R 0 config set client-output-buffer-limit "replica 1024 0 0"
2098
2099 # Trigger a fullsync
2100 populate_slot 1 -idx 0 -size 2000000 -slot 2
2101
2102 wait_for_condition 1000 10 {
2103 [CI 3 cluster_slot_migration_active_trim_running] == 0 &&
2104 [CI 3 cluster_slot_migration_stats_active_trim_cancelled] == $prev_cancelled + 1
2105 } else {
2106 puts "[CI 3 cluster_slot_migration_active_trim_running]"
2107 puts "[CI 3 cluster_slot_migration_stats_active_trim_cancelled]"
2108 fail "trim failed"
2109 }
2110
2111 R 3 debug asm-trim-method active 0
2112 R 3 config set repl-diskless-load disabled
2113 R 0 CLUSTER MIGRATION IMPORT 0 0
2114 wait_for_asm_done
2115 wait_for_ofs_sync [Rn 0] [Rn 3]
2116 assert_equal 10001 [R 0 dbsize]
2117 assert_equal 10001 [R 3 dbsize]
2118 assert_equal 0 [R 1 dbsize]
2119 assert_equal 0 [R 4 dbsize]
2120 R 0 flushall
2121 }
2122 }
2123
2124 test "Test importing slots while active-trim is in progress for the same slots on replica" {
2125 R 3 debug asm-trim-method active 10000
2126 R 0 flushall
2127 populate_slot 10000 -slot 0
2128 wait_for_ofs_sync [Rn 0] [Rn 3]
2129
2130 # Wait until active trim is in progress on replica
2131 R 1 CLUSTER MIGRATION IMPORT 0 100
2132 wait_for_condition 1000 10 {
2133 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
2134 [CI 0 cluster_slot_migration_active_trim_running] == 0 &&
2135 [CI 3 cluster_slot_migration_active_trim_running] == 1
2136 } else {
2137 puts "[CI 0 cluster_slot_migration_active_tasks]"
2138 puts "[CI 0 cluster_slot_migration_active_trim_running]"
2139 puts "[CI 3 cluster_slot_migration_active_trim_running]"
2140 fail "trim failed"
2141 }
2142
2143 set loglines [count_log_lines -3]
2144
2145 # Get slots back
2146 R 0 CLUSTER MIGRATION IMPORT 0 100
2147 wait_for_condition 1000 20 {
2148 [CI 0 cluster_slot_migration_active_tasks] == 1 &&
2149 [CI 0 cluster_slot_migration_active_trim_running] == 0 &&
2150 [CI 3 cluster_slot_migration_active_trim_running] == 1
2151 } else {
2152 fail "trim failed"
2153 }
2154
2155 # Verify replica blocks master until trim is done
2156 wait_for_log_messages -3 {"*Blocking master client until trim job is done*"} $loglines 1000 30
2157 R 3 debug asm-trim-method active 0
2158 wait_for_log_messages -3 {"*Unblocking master client after active trim*"} $loglines 1000 30
2159
2160 wait_for_asm_done
2161 wait_for_ofs_sync [Rn 0] [Rn 3]
2162 assert_equal 10000 [R 0 dbsize]
2163 assert_equal 10000 [R 3 dbsize]
2164 assert_equal 0 [R 1 dbsize]
2165 assert_equal 0 [R 4 dbsize]
2166 }
2167
2168 test "TRIMSLOTS should not trim slots that this node is serving" {
2169 assert_error {*the slot 0 is served by this node*} {R 0 trimslots ranges 1 0 0}
2170 assert_error {*READONLY*} {R 3 trimslots ranges 1 0 100}
2171 assert_equal {OK} [R 0 trimslots ranges 1 16383 16383]
2172 assert_error {*READONLY*} {R 3 trimslots ranges 1 16383 16383}
2173 }
2174
2175 test "Trigger multiple active trim jobs at the same time" {
2176 R 1 debug asm-trim-method active 0
2177 R 1 flushall
2178
2179 set prev_trim_done [CI 1 cluster_slot_migration_stats_active_trim_completed]
2180
2181 R 1 debug populate 1000 [slot_prefix 0] 100
2182 R 1 debug populate 1000 [slot_prefix 1] 100
2183 R 1 debug populate 1000 [slot_prefix 2] 100
2184
2185 R 1 multi
2186 R 1 trimslots ranges 1 0 0
2187 R 1 trimslots ranges 1 1 1
2188 R 1 trimslots ranges 1 2 2
2189 R 1 exec
2190
2191 wait_for_condition 1000 10 {
2192 [CI 1 cluster_slot_migration_stats_active_trim_completed] == $prev_trim_done + 3
2193 } else {
2194 fail "active trim failed"
2195 }
2196
2197 R 1 flushall
2198 R 1 debug asm-trim-method default
2199 }
2200
2201 test "Restart will clean up unowned slot keys" {
2202 R 1 flushall
2203
2204 # generate 1000 keys belonging to slot 0
2205 R 1 debug populate 1000 [slot_prefix 0] 100
2206 assert {[scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1000}
2207
2208 # restart node-1
2209 restart_server -1 true false true save
2210 wait_for_cluster_propagation
2211 wait_for_cluster_state "ok"
2212
2213 # Node-1 has no keys since unowned slot 0 keys were cleaned up during restart
2214 assert {[scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] == {}}
2215
2216 R 1 flushall
2217 }
2218
2219 test "Test active trim is used when client tracking is used" {
2220 R 0 flushall
2221 R 1 flushall
2222 R 0 debug asm-trim-method default
2223 R 1 debug asm-trim-method default
2224
2225 set prev_active_trim [CI 0 cluster_slot_migration_stats_active_trim_completed]
2226
2227 # Setup a tracking client that is redirected to a pubsub client
2228 set rd_redirection [redis_deferring_client]
2229 $rd_redirection client id
2230 set redir_id [$rd_redirection read]
2231 $rd_redirection subscribe __redis__:invalidate
2232 $rd_redirection read ; # Consume the SUBSCRIBE reply.
2233
2234 # setup tracking
2235 set key0 [slot_key 0 key]
2236 R 0 CLIENT TRACKING on REDIRECT $redir_id
2237 R 0 SET $key0 1
2238 R 0 GET $key0
2239 R 1 CLUSTER MIGRATION IMPORT 0 0
2240 wait_for_asm_done
2241
2242 wait_for_condition 1000 10 {
2243 [CI 0 cluster_slot_migration_stats_active_trim_completed] == [expr $prev_active_trim + 1]
2244 } else {
2245 fail "active trim did not happen"
2246 }
2247
2248 # Verify the tracking client received the invalidation message
2249 set msg [$rd_redirection read]
2250 set head [lindex $msg 0]
2251
2252 if {$head eq "message"} {
2253 # RESP 2
2254 set got_key [lindex [lindex $msg 2] 0]
2255 } elseif {$head eq "invalidate"} {
2256 # RESP 3
2257 set got_key [lindex $msg 1 0]
2258 } else {
2259 fail "unexpected invalidation message: $msg"
2260 }
2261 assert_equal $got_key $key0
2262
2263 # cleanup
2264 $rd_redirection close
2265 wait_for_asm_done
2266 R 0 CLUSTER MIGRATION IMPORT 0 0
2267 wait_for_asm_done
2268 R 0 flushall
2269 }
2270}
2271
2272set testmodule [file normalize tests/modules/atomicslotmigration.so]
2273
2274start_cluster 3 6 [list tags {external:skip cluster modules} config_lines [list loadmodule $testmodule cluster-node-timeout 60000 cluster-allow-replica-migration no]] {
2275 test "Module api sanity" {
2276 R 0 asm.sanity ;# on master
2277 R 3 asm.sanity ;# on replica
2278 }
2279
2280 test "Module replicate cross slot command" {
2281 set task_id [setup_slot_migration_with_delay 0 1 0 100]
2282 set listkey [slot_key 0 "asmlist"]
2283 # replicate cross slot command during migrating
2284 R 0 asm.lpush_replicate_crossslot_command $listkey "item1"
2285
2286 # node 0 will fail due to cross slot
2287 wait_for_condition 2000 10 {
2288 [string match {*canceled*} [migration_status 0 $task_id state]] &&
2289 [string match {*cross slot*} [migration_status 0 $task_id last_error]]
2290 } else {
2291 fail "ASM task did not fail"
2292 }
2293 R 1 CLUSTER MIGRATION CANCEL ID $task_id
2294
2295 # sanity check if lpush replicated correctly to the replica
2296 wait_for_ofs_sync [Rn 0] [Rn 3]
2297 assert_equal {item1} [R 0 lrange $listkey 0 -1]
2298 R 3 readonly
2299 assert_equal {item1} [R 3 lrange $listkey 0 -1]
2300 }
2301
2302 test "Test RM_ClusterCanAccessKeysInSlot" {
2303 # Test invalid slots
2304 assert_equal 0 [R 0 asm.cluster_can_access_keys_in_slot -1]
2305 assert_equal 0 [R 0 asm.cluster_can_access_keys_in_slot 20000]
2306 assert_equal 0 [R 2 asm.cluster_can_access_keys_in_slot 16384]
2307 assert_equal 0 [R 5 asm.cluster_can_access_keys_in_slot 16384]
2308
2309 # Test on a master-replica pair
2310 assert_equal 1 [R 0 asm.cluster_can_access_keys_in_slot 0]
2311 assert_equal 1 [R 0 asm.cluster_can_access_keys_in_slot 100]
2312 assert_equal 1 [R 3 asm.cluster_can_access_keys_in_slot 0]
2313 assert_equal 1 [R 3 asm.cluster_can_access_keys_in_slot 100]
2314
2315 # Test on a master-replica pair
2316 assert_equal 1 [R 2 asm.cluster_can_access_keys_in_slot 16383]
2317 assert_equal 1 [R 5 asm.cluster_can_access_keys_in_slot 16383]
2318 }
2319
2320 test "Test RM_ClusterCanAccessKeysInSlot returns false for unowned slots" {
2321 # Active trim will be scheduled but it won't run
2322 R 0 debug asm-trim-method active -1
2323 R 3 debug asm-trim-method active -1
2324
2325 setup_slot_migration_with_delay 0 1 0 100 3 1000000
2326
2327 # Verify importing slots are not local
2328 assert_equal 0 [R 1 asm.cluster_can_access_keys_in_slot 0]
2329 assert_equal 0 [R 1 asm.cluster_can_access_keys_in_slot 100]
2330 assert_equal 0 [R 4 asm.cluster_can_access_keys_in_slot 0]
2331 assert_equal 0 [R 4 asm.cluster_can_access_keys_in_slot 100]
2332
2333 wait_for_condition 1000 10 {
2334 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
2335 [CI 0 cluster_slot_migration_active_trim_running] == 1 &&
2336 [CI 3 cluster_slot_migration_active_trim_running] == 1
2337 } else {
2338 fail "migrate failed"
2339 }
2340
2341 # Wait for config propagation before checking the slot ownership on replica
2342 wait_for_cluster_propagation
2343
2344 # Verify slots that are being trimmed are not local
2345 assert_equal 0 [R 0 asm.cluster_can_access_keys_in_slot 0]
2346 assert_equal 0 [R 0 asm.cluster_can_access_keys_in_slot 100]
2347 assert_equal 0 [R 3 asm.cluster_can_access_keys_in_slot 0]
2348 assert_equal 0 [R 3 asm.cluster_can_access_keys_in_slot 100]
2349
2350 # Enabled active trim and wait until it is completed.
2351 R 0 debug asm-trim-method active 0
2352 R 3 debug asm-trim-method active 0
2353 wait_for_asm_done
2354 wait_for_ofs_sync [Rn 0] [Rn 3]
2355
2356 # Verify slots are local after migration
2357 assert_equal 1 [R 1 asm.cluster_can_access_keys_in_slot 0]
2358 assert_equal 1 [R 1 asm.cluster_can_access_keys_in_slot 100]
2359 assert_equal 1 [R 4 asm.cluster_can_access_keys_in_slot 0]
2360 assert_equal 1 [R 4 asm.cluster_can_access_keys_in_slot 100]
2361
2362 # cleanup
2363 R 0 debug asm-trim-method default
2364 R 3 debug asm-trim-method default
2365 R 0 CLUSTER MIGRATION IMPORT 0 100
2366 wait_for_asm_done
2367 R 0 flushall
2368 R 1 flushall
2369 }
2370
2371 foreach trim_method {"active" "bg"} {
2372 test "Test cluster module notifications on a successful migration ($trim_method-trim)" {
2373 clear_module_event_log
2374 R 0 debug asm-trim-method $trim_method
2375 R 3 debug asm-trim-method $trim_method
2376 R 6 debug asm-trim-method $trim_method
2377
2378 # Set a key in the slot range
2379 set key [slot_key 0 mykey]
2380 R 0 set $key "value"
2381
2382 # Migrate the slot ranges
2383 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100 200 300]
2384 wait_for_asm_done
2385
2386 set src_id [R 0 cluster myid]
2387 set dest_id [R 1 cluster myid]
2388
2389 # Verify the events on source, both master and replica
2390 set migrate_event_log [list \
2391 "sub: cluster-slot-migration-migrate-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100,200-300" \
2392 "sub: cluster-slot-migration-migrate-completed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100,200-300" \
2393 ]
2394 assert_equal [R 0 asm.get_cluster_event_log] $migrate_event_log
2395 assert_equal [R 3 asm.get_cluster_event_log] {}
2396 assert_equal [R 6 asm.get_cluster_event_log] {}
2397
2398 # Verify the events on destination, both master and replica
2399 set import_event_log [list \
2400 "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100,200-300" \
2401 "sub: cluster-slot-migration-import-completed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100,200-300" \
2402 ]
2403 wait_for_condition 500 20 {
2404 [R 1 asm.get_cluster_event_log] eq $import_event_log &&
2405 [R 4 asm.get_cluster_event_log] eq $import_event_log &&
2406 [R 7 asm.get_cluster_event_log] eq $import_event_log
2407 } else {
2408 puts "R1: [R 1 asm.get_cluster_event_log]"
2409 puts "R4: [R 4 asm.get_cluster_event_log]"
2410 puts "R7: [R 7 asm.get_cluster_event_log]"
2411 fail "ASM import event not received"
2412 }
2413
2414 # Verify the trim events
2415 if {$trim_method eq "active"} {
2416 set trim_event_log [list \
2417 "sub: cluster-slot-migration-trim-started, slots:0-100,200-300" \
2418 "keyspace: key_trimmed, key: $key" \
2419 "sub: cluster-slot-migration-trim-completed, slots:0-100,200-300" \
2420 ]
2421 } else {
2422 set trim_event_log [list \
2423 "sub: cluster-slot-migration-trim-background, slots:0-100,200-300" \
2424 ]
2425 }
2426 wait_for_condition 500 10 {
2427 [R 0 asm.get_cluster_trim_event_log] eq $trim_event_log &&
2428 [R 3 asm.get_cluster_trim_event_log] eq $trim_event_log &&
2429 [R 6 asm.get_cluster_trim_event_log] eq $trim_event_log
2430 } else {
2431 fail "ASM source trim event not received"
2432 }
2433
2434 # cleanup
2435 R 0 CLUSTER MIGRATION IMPORT 0 100 200 300
2436 wait_for_asm_done
2437 clear_module_event_log
2438 reset_default_trim_method
2439 R 0 flushall
2440 R 1 flushall
2441 }
2442
2443 test "Test cluster module notifications on a failed migration ($trim_method-trim)" {
2444 clear_module_event_log
2445 R 1 debug asm-trim-method $trim_method
2446 R 4 debug asm-trim-method $trim_method
2447 R 7 debug asm-trim-method $trim_method
2448
2449 # Set a key in the slot range
2450 set key [slot_key 0 mykey]
2451 R 0 set $key "value"
2452
2453 # Start migration and cancel it
2454 set task_id [setup_slot_migration_with_delay 0 1 0 100 0 2000000]
2455 # Wait until at least one key is moved to destination
2456 wait_for_condition 1000 10 {
2457 [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1
2458 } else {
2459 fail "Key not moved to destination"
2460 }
2461 R 1 CLUSTER MIGRATION CANCEL ID $task_id
2462 wait_for_asm_done
2463
2464 set src_id [R 0 cluster myid]
2465 set dest_id [R 1 cluster myid]
2466
2467 # Verify the events on source, both master and replica
2468 set migrate_event_log [list \
2469 "sub: cluster-slot-migration-migrate-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2470 "sub: cluster-slot-migration-migrate-failed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2471 ]
2472 assert_equal [R 0 asm.get_cluster_event_log] $migrate_event_log
2473 assert_equal [R 3 asm.get_cluster_event_log] {}
2474 assert_equal [R 6 asm.get_cluster_event_log] {}
2475
2476 # Verify the events on destination, both master and replica
2477 set import_event_log [list \
2478 "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2479 "sub: cluster-slot-migration-import-failed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2480 ]
2481 wait_for_condition 500 10 {
2482 [R 1 asm.get_cluster_event_log] eq $import_event_log &&
2483 [R 4 asm.get_cluster_event_log] eq $import_event_log &&
2484 [R 7 asm.get_cluster_event_log] eq $import_event_log
2485 } else {
2486 fail "ASM import event not received"
2487 }
2488
2489 # Verify the trim events on destination (partially imported keys are trimmed)
2490 if {$trim_method eq "active"} {
2491 set trim_event_log [list \
2492 "sub: cluster-slot-migration-trim-started, slots:0-100" \
2493 "keyspace: key_trimmed, key: $key" \
2494 "sub: cluster-slot-migration-trim-completed, slots:0-100" \
2495 ]
2496 } else {
2497 set trim_event_log [list \
2498 "sub: cluster-slot-migration-trim-background, slots:0-100" \
2499 ]
2500 }
2501 wait_for_condition 500 10 {
2502 [R 1 asm.get_cluster_trim_event_log] eq $trim_event_log &&
2503 [R 4 asm.get_cluster_trim_event_log] eq $trim_event_log &&
2504 [R 7 asm.get_cluster_trim_event_log] eq $trim_event_log
2505 } else {
2506 fail "ASM destination trim event not received"
2507 }
2508
2509 # cleanup
2510 clear_module_event_log
2511 reset_default_trim_method
2512 wait_for_asm_done
2513 R 0 flushall
2514 R 1 flushall
2515 }
2516
2517 test "Test cluster module notifications on failover ($trim_method-trim)" {
2518 # NOTE: cluster legacy may have a bug, multiple manual failover will fail,
2519 # so only perform one round of failover test, fix it later
2520 if {$trim_method eq "bg"} {
2521 clear_module_event_log
2522 R 1 debug asm-trim-method $trim_method
2523 R 4 debug asm-trim-method $trim_method
2524 R 7 debug asm-trim-method $trim_method
2525
2526 # Set a key in the slot range
2527 set key [slot_key 0 mykey]
2528 R 0 set $key "value"
2529
2530 # Start migration
2531 set task_id [setup_slot_migration_with_delay 0 1 0 100 0 2000000]
2532 # Wait until at least one key is moved to destination
2533 wait_for_condition 1000 10 {
2534 [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1
2535 } else {
2536 fail "Key not moved to destination"
2537 }
2538
2539 failover_and_wait_for_done 4
2540 wait_for_asm_done
2541
2542 set src_id [R 0 cluster myid]
2543 set dest_id [R 1 cluster myid]
2544
2545 # Verify the events on source, both master and replica
2546 set migrate_event_log [list \
2547 "sub: cluster-slot-migration-migrate-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2548 "sub: cluster-slot-migration-migrate-failed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2549 ]
2550 assert_equal [R 0 asm.get_cluster_event_log] $migrate_event_log
2551 assert_equal [R 3 asm.get_cluster_event_log] {}
2552 assert_equal [R 6 asm.get_cluster_event_log] {}
2553
2554 # Verify the events on destination, both master and replica
2555 set import_event_log [list \
2556 "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2557 "sub: cluster-slot-migration-import-failed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2558 ]
2559 wait_for_condition 500 20 {
2560 [R 1 asm.get_cluster_event_log] eq $import_event_log &&
2561 [R 4 asm.get_cluster_event_log] eq $import_event_log &&
2562 [R 7 asm.get_cluster_event_log] eq $import_event_log
2563 } else {
2564 puts "R1: [R 1 asm.get_cluster_event_log]"
2565 puts "R4: [R 4 asm.get_cluster_event_log]"
2566 puts "R7: [R 7 asm.get_cluster_event_log]"
2567 fail "ASM import event not received"
2568 }
2569
2570 # Verify the trim events on destination (partially imported keys are trimmed)
2571 # NOTE: after failover, the new master will initiate the slot trimming,
2572 # and only slot 0 has data, so only slot 0 is trimmed
2573 if {$trim_method eq "active"} {
2574 set trim_event_log [list \
2575 "sub: cluster-slot-migration-trim-started, slots:0-0" \
2576 "keyspace: key_trimmed, key: $key" \
2577 "sub: cluster-slot-migration-trim-completed, slots:0-0" \
2578 ]
2579 } else {
2580 set trim_event_log [list \
2581 "sub: cluster-slot-migration-trim-background, slots:0-0" \
2582 ]
2583 }
2584 wait_for_condition 500 20 {
2585 [R 1 asm.get_cluster_trim_event_log] eq $trim_event_log &&
2586 [R 4 asm.get_cluster_trim_event_log] eq $trim_event_log &&
2587 [R 7 asm.get_cluster_trim_event_log] eq $trim_event_log
2588 } else {
2589 puts "R1: [R 1 asm.get_cluster_trim_event_log]"
2590 puts "R4: [R 4 asm.get_cluster_trim_event_log]"
2591 puts "R7: [R 7 asm.get_cluster_trim_event_log]"
2592 fail "ASM destination trim event not received"
2593 }
2594
2595 # cleanup
2596 failover_and_wait_for_done 1
2597 clear_module_event_log
2598 reset_default_trim_method
2599 R 0 flushall
2600 R 1 flushall
2601 }
2602 }
2603 }
2604
2605 foreach with_rdb {"with" "without"} {
2606 test "Test cluster module notifications when replica restart $with_rdb RDB during importing" {
2607 clear_module_event_log
2608 R 1 debug asm-trim-method $trim_method
2609 R 4 debug asm-trim-method $trim_method
2610 R 7 debug asm-trim-method $trim_method
2611 R 4 config set save ""
2612
2613 set src_id [R 0 cluster myid]
2614 set dest_id [R 1 cluster myid]
2615
2616 # Set a key in the slot range
2617 set key [slot_key 0 mykey]
2618 R 0 set $key "value"
2619
2620 # Start migration, 2s delay
2621 set task_id [setup_slot_migration_with_delay 0 1 0 100 0 2000000]
2622 # Wait until at least one key is moved to destination
2623 wait_for_condition 1000 10 {
2624 [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1
2625 } else {
2626 fail "Key not moved to destination"
2627 }
2628 wait_for_ofs_sync [Rn 1] [Rn 4]
2629
2630 # restart node 4
2631 if {$with_rdb eq "with"} {
2632 restart_server -4 true false true save ;# rdb save
2633 } else {
2634 restart_server -4 true false true nosave ;# no rdb saved
2635 }
2636 wait_for_cluster_propagation
2637
2638 wait_for_asm_done
2639
2640 # started and completed are paired, and not duplicated
2641 set import_event_log [list \
2642 "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2643 "sub: cluster-slot-migration-import-completed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2644 ]
2645 wait_for_condition 500 10 {
2646 [R 1 asm.get_cluster_event_log] eq $import_event_log &&
2647 [R 4 asm.get_cluster_event_log] eq $import_event_log &&
2648 [R 7 asm.get_cluster_event_log] eq $import_event_log
2649 } else {
2650 fail "ASM import event not received"
2651 }
2652
2653 R 0 CLUSTER MIGRATION IMPORT 0 100
2654 wait_for_asm_done
2655 R 4 save ;# save an empty rdb to override previous one
2656 clear_module_event_log
2657 reset_default_trim_method
2658 R 0 flushall
2659 R 1 flushall
2660 }
2661 }
2662
2663 test "Test cluster module notifications when replica is disconnected and full resync after importing" {
2664 clear_module_event_log
2665 R 1 debug asm-trim-method $trim_method
2666 R 4 debug asm-trim-method $trim_method
2667 R 7 debug asm-trim-method $trim_method
2668
2669 set src_id [R 0 cluster myid]
2670 set dest_id [R 1 cluster myid]
2671
2672 # Set a key in the slot range
2673 set key [slot_key 0 mykey]
2674 R 0 set $key "value"
2675
2676 # Start migration, 2s delay
2677 set task_id [setup_slot_migration_with_delay 0 1 0 100 0 2000000]
2678 # Wait until at least one key is moved to destination
2679 wait_for_condition 1000 10 {
2680 [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1
2681 } else {
2682 fail "Key not moved to destination"
2683 }
2684 wait_for_ofs_sync [Rn 1] [Rn 4]
2685
2686 # puase node-4
2687 set r4_pid [S 4 process_id]
2688 pause_process $r4_pid
2689
2690 # set a small repl-backlog-size and write some commands to make node-4
2691 # full resync when reconnecting after waking up
2692 set r1_full_sync [S 1 sync_full]
2693 R 1 config set repl-backlog-size 16kb
2694 R 1 client kill type replica
2695 set 1k_str [string repeat "a" 1024]
2696 for {set i 0} {$i < 2000} {incr i} {
2697 R 1 set [slot_key 6000] $1k_str
2698 }
2699
2700 # after ASM task is completed, wake up node-4
2701 wait_for_condition 1000 10 {
2702 [CI 1 cluster_slot_migration_active_tasks] == 0 &&
2703 [CI 1 cluster_slot_migration_active_trim_running] == 0
2704 } else {
2705 fail "ASM tasks did not completed"
2706 }
2707 resume_process $r4_pid
2708
2709 # make sure full resync happens
2710 wait_for_sync [Rn 4]
2711 wait_for_ofs_sync [Rn 1] [Rn 4]
2712 assert_morethan [S 1 sync_full] $r1_full_sync
2713
2714 # started and completed are paired, and not duplicated
2715 set import_event_log [list \
2716 "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2717 "sub: cluster-slot-migration-import-completed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \
2718 ]
2719 wait_for_condition 500 10 {
2720 [R 1 asm.get_cluster_event_log] eq $import_event_log &&
2721 [R 4 asm.get_cluster_event_log] eq $import_event_log &&
2722 [R 7 asm.get_cluster_event_log] eq $import_event_log
2723 } else {
2724 fail "ASM import event not received"
2725 }
2726
2727 # since ASM task is completed on node-1 before node-4 reconnects,
2728 # no trim event should be received on node-4
2729 assert_equal {} [R 4 asm.get_cluster_trim_event_log]
2730
2731 R 0 CLUSTER MIGRATION IMPORT 0 100
2732 wait_for_asm_done
2733 clear_module_event_log
2734 reset_default_trim_method
2735 R 0 flushall
2736 R 1 flushall
2737 }
2738
2739 test "Test new master can trim slots when migration is completed and failover occurs on source side" {
2740 R 0 asm.disable_trim ;# can not start slot trimming on source side
2741 set slot0_key [slot_key 0 mykey]
2742 R 0 set $slot0_key "value"
2743
2744 # migrate slot 0 from #0 to #1, and wait it completed, but not allow to trim slots
2745 # on source node
2746 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 0]
2747 wait_for_condition 1000 10 {
2748 [string match {*completed*} [migration_status 0 $task_id state]] &&
2749 [string match {*completed*} [migration_status 1 $task_id state]]
2750 } else {
2751 fail "ASM task did not complete"
2752 }
2753 # verify trim is not allowed on source node, and replica node doesn't have trim job either
2754 wait_for_ofs_sync [Rn 0] [Rn 3]
2755 assert_equal 1 [R 0 asm.trim_in_progress]
2756 assert_equal "value" [R 0 asm.read_pending_trim_key $slot0_key]
2757 assert_equal 0 [R 3 asm.trim_in_progress]
2758 assert_equal "value" [R 3 asm.read_pending_trim_key $slot0_key]
2759
2760 set loglines [count_log_lines 0]
2761
2762 # failover happens on source node, instance #3 become slave, #0 become master
2763 failover_and_wait_for_done 3
2764 R 0 asm.enable_trim ;# enable trim on old master
2765
2766 # old master should cancel the pending trim job
2767 wait_for_log_messages 0 {"*Cancelling the pending trim job*"} $loglines 1000 10
2768
2769 wait_for_ofs_sync [Rn 3] [Rn 0]
2770 # verify trim is allowed on new master, and the key is trimmed
2771 wait_for_condition 1000 10 {
2772 [R 3 asm.trim_in_progress] == 0 &&
2773 [R 3 asm.read_pending_trim_key $slot0_key] eq "" &&
2774 [R 0 asm.trim_in_progress] == 0 &&
2775 [R 0 asm.read_pending_trim_key $slot0_key] eq ""
2776 } else {
2777 fail "Trim did not complete"
2778 }
2779
2780 # verify the trim events, use active trim since module is subscribed to trimmed event
2781 set trim_event_log [list \
2782 "sub: cluster-slot-migration-trim-started, slots:0-0" \
2783 "keyspace: key_trimmed, key: $slot0_key" \
2784 "sub: cluster-slot-migration-trim-completed, slots:0-0" \
2785 ]
2786 wait_for_condition 500 20 {
2787 [R 0 asm.get_cluster_trim_event_log] eq $trim_event_log &&
2788 [R 3 asm.get_cluster_trim_event_log] eq $trim_event_log &&
2789 [R 6 asm.get_cluster_trim_event_log] eq $trim_event_log
2790 } else {
2791 fail "ASM destination trim event not received"
2792 }
2793
2794 # cleanup
2795 failover_and_wait_for_done 0
2796 R 0 CLUSTER MIGRATION IMPORT 0 0
2797 wait_for_asm_done
2798 clear_module_event_log
2799 reset_default_trim_method
2800 R 0 flushall
2801 R 1 flushall
2802 }
2803
2804 test "Test module replicates commands at the beginning of slot migration " {
2805 R 0 flushall
2806 R 1 flushall
2807
2808 # Sanity check
2809 assert_equal 0 [R 1 asm.read_keyless_cmd_val]
2810 assert_equal 0 [R 4 asm.read_keyless_cmd_val]
2811
2812 # Enable module command replication and set a key to be replicated
2813 # Module will replicate two commands:
2814 # 1- A keyless command: asm.keyless_cmd
2815 # 2- SET command for the given key and value
2816 set keyname [slot_key 0 modulekey]
2817 R 0 asm.replicate_module_command 1 $keyname "value"
2818
2819 setup_slot_migration_with_delay 0 1 0 100
2820 wait_for_asm_done
2821 wait_for_ofs_sync [Rn 1] [Rn 4]
2822
2823 # Verify the commands are replicated
2824 assert_equal 1 [R 1 asm.read_keyless_cmd_val]
2825 assert_equal value [R 1 get $keyname]
2826
2827 # Verify the commands are replicated to replica
2828 R 4 readonly
2829 assert_equal 1 [R 4 asm.read_keyless_cmd_val]
2830 assert_equal value [R 4 get $keyname]
2831
2832 # cleanup
2833 R 0 asm.replicate_module_command 0 "" ""
2834 R 0 CLUSTER MIGRATION IMPORT 0 100
2835 wait_for_asm_done
2836 R 0 flushall
2837 R 1 flushall
2838 }
2839
2840 test "Test subcommand propagation during slot migration" {
2841 R 0 flushall
2842 R 1 flushall
2843 set task_id [setup_slot_migration_with_delay 0 1 0 100]
2844
2845 set key [slot_key 0 mykey]
2846 R 0 asm.parent set $key "value" ;# execute a module subcommand
2847 wait_for_asm_done
2848 assert_equal "value" [R 1 GET $key]
2849
2850 # cleanup
2851 R 0 cluster migration import 0 100
2852 wait_for_asm_done
2853 }
2854
2855 test "Test trim method selection based on module keyspace subscription" {
2856 R 0 debug asm-trim-method default
2857 R 1 debug asm-trim-method default
2858
2859 R 0 flushall
2860 R 1 flushall
2861
2862 populate_slot 10 -idx 0 -slot 0
2863
2864 # Make sure module is subscribed to NOTIFY_KEY_TRIMMED event. In this
2865 # case, active trim must be used.
2866 R 0 asm.subscribe_trimmed_event 1
2867 set loglines [count_log_lines 0]
2868 R 1 CLUSTER MIGRATION IMPORT 0 15
2869 wait_for_asm_done
2870 wait_for_log_messages 0 {"*Active trim scheduled for slots: 0-15*"} $loglines 1000 10
2871
2872 # Move slots back to node-0. Make sure module is not subscribed to
2873 # NOTIFY_KEY_TRIMMED event. In this case, background trim must be used.
2874 R 1 asm.subscribe_trimmed_event 0
2875 set loglines [count_log_lines -1]
2876 R 0 CLUSTER MIGRATION IMPORT 0 15
2877 wait_for_asm_done
2878 wait_for_log_messages -1 {"*Background trim started for slots: 0-15*"} $loglines 1000 10
2879
2880 # cleanup
2881 wait_for_asm_done
2882 R 0 asm.subscribe_trimmed_event 1
2883 R 1 asm.subscribe_trimmed_event 1
2884 R 0 flushall
2885 R 1 flushall
2886 }
2887
2888 test "Verify trimmed key value can be read in the server event callback" {
2889 R 0 flushall
2890 set key [slot_key 0]
2891 set value "value123random"
2892 R 0 set $key $value
2893
2894 R 1 CLUSTER MIGRATION IMPORT 0 0
2895 wait_for_asm_done
2896 wait_for_condition 1000 10 {
2897 [R 0 asm.get_last_deleted_key] eq "keyevent: key: $key, value: $value"
2898 } else {
2899 fail "Last deleted key event not received"
2900 }
2901
2902 # cleanup
2903 R 0 CLUSTER MIGRATION IMPORT 0 0
2904 wait_for_asm_done
2905 }
2906
2907 test "Verify module cannot open a key in a slot that is being trimmed" {
2908 R 0 flushall
2909 R 0 debug asm-trim-method active -1 ;# disable active trim
2910
2911 set key [slot_key 0]
2912 R 0 set $key value
2913
2914 R 1 CLUSTER MIGRATION IMPORT 0 0
2915 wait_for_condition 1000 10 {
2916 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
2917 [CI 1 cluster_slot_migration_active_tasks] == 0 &&
2918 [CI 0 cluster_slot_migration_active_trim_running] == 1
2919 } else {
2920 fail "migrate failed"
2921 }
2922
2923 # We cannot open the key since it is in a slot being trimmed
2924 assert_equal {} [R 0 asm.get $key]
2925
2926 # cleanup
2927 R 0 debug asm-trim-method default
2928 R 0 CLUSTER MIGRATION IMPORT 0 0
2929 wait_for_asm_done
2930 }
2931
2932 test "Test RM_ClusterGetLocalSlotRanges" {
2933 assert_equal [R 0 asm.cluster_get_local_slot_ranges] {{0 5461}}
2934 assert_equal [R 3 asm.cluster_get_local_slot_ranges] {{0 5461}}
2935
2936 R 0 cluster migration import 5463 6000
2937 wait_for_asm_done
2938 wait_for_cluster_propagation
2939 assert_equal [R 0 asm.cluster_get_local_slot_ranges] {{0 5461} {5463 6000}}
2940 assert_equal [R 3 asm.cluster_get_local_slot_ranges] {{0 5461} {5463 6000}}
2941
2942 R 0 cluster migration import 5462 5462 6001 10922
2943 wait_for_asm_done
2944 wait_for_cluster_propagation
2945 assert_equal [R 0 asm.cluster_get_local_slot_ranges] {{0 10922}}
2946 assert_equal [R 3 asm.cluster_get_local_slot_ranges] {{0 10922}}
2947 assert_equal [R 1 asm.cluster_get_local_slot_ranges] {}
2948 assert_equal [R 4 asm.cluster_get_local_slot_ranges] {}
2949 }
2950}
2951
2952set testmodule [file normalize tests/modules/atomicslotmigration.so]
2953
2954start_cluster 2 0 [list tags {external:skip cluster modules} config_lines [list loadmodule $testmodule cluster-node-timeout 60000 cluster-allow-replica-migration no appendonly yes]] {
2955 test "TRIMSLOTS in AOF will work synchronously on restart" {
2956 # When TRIMSLOTS is replayed from AOF during restart, it must execute
2957 # synchronously rather than using active trim. This prevents race
2958 # conditions where subsequent AOF commands might operate on keys
2959 # that should have been trimmed.
2960
2961 # Subscribe to key trimmed event to force active trim
2962 R 0 asm.subscribe_trimmed_event 1
2963 populate_slot 1000 -slot 0
2964 populate_slot 1000 -slot 1
2965 R 1 CLUSTER MIGRATION IMPORT 0 0
2966 wait_for_asm_done
2967
2968 # verify active trim is used
2969 assert_equal 1 [CI 0 cluster_slot_migration_stats_active_trim_completed]
2970
2971 # restart server and verify aof is loaded
2972 restart_server 0 yes no yes nosave
2973 assert {[scan [regexp -inline {aof_current_size:([\d]*)} [R 0 info persistence]] aof_current_size=%d] > 0}
2974 wait_for_cluster_state "ok"
2975
2976 # verify TRIMSLOTS in AOF is executed synchronously
2977 assert_equal 0 [CI 0 cluster_slot_migration_stats_active_trim_completed]
2978 assert_equal 1000 [R 0 dbsize]
2979
2980 # cleanup
2981 R 0 CLUSTER MIGRATION IMPORT 0 0
2982 wait_for_asm_done
2983 assert_equal 2000 [R 0 dbsize]
2984 R 0 flushall
2985 R 1 flushall
2986 clear_module_event_log
2987
2988 }
2989
2990 test "Test trim is disabled when module requests it" {
2991 R 0 asm.disable_trim
2992
2993 set slot0_key [slot_key 0 mykey]
2994 R 0 set $slot0_key "value"
2995 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 0]
2996 wait_for_condition 1000 10 {
2997 [string match {*completed*} [migration_status 0 $task_id state]]
2998 } else {
2999 fail "ASM task did not complete"
3000 }
3001 # since we disable trim, the key should still exist on source,
3002 # we can read it with REDISMODULE_OPEN_KEY_ACCESS_TRIMMED flag
3003 assert_equal "value" [R 0 asm.read_pending_trim_key $slot0_key]
3004 assert_equal 1 [R 0 asm.trim_in_progress]
3005
3006 # enable trim and verify the key is trimmed
3007 R 0 asm.enable_trim
3008 wait_for_condition 1000 10 {
3009 [R 0 asm.read_pending_trim_key $slot0_key] eq "" &&
3010 [R 0 asm.trim_in_progress] == 0
3011 } else {
3012 fail "Trim did not complete"
3013 }
3014 wait_for_asm_done
3015 R 0 CLUSTER MIGRATION IMPORT 0 0
3016 wait_for_asm_done
3017 clear_module_event_log
3018 }
3019
3020 test "Can not start new asm task when trim is not allowed" {
3021 # start a migration task, wait it completed but not allow to trim slots
3022 R 0 asm.disable_trim
3023 set task_id [R 1 CLUSTER MIGRATION IMPORT 0 0]
3024 wait_for_condition 1000 10 {
3025 [string match {*completed*} [migration_status 0 $task_id state]]
3026 } else {
3027 fail "ASM task did not complete"
3028 }
3029 # Can not start new migrating task since trim is disabled
3030 set task_id [R 1 CLUSTER MIGRATION IMPORT 1 1]
3031 wait_for_condition 1000 10 {
3032 [string match {*fail*} [migration_status 1 $task_id state]] &&
3033 [string match {*Trim is disabled by module*} [migration_status 1 $task_id last_error]]
3034 } else {
3035 fail "ASM task did not fail"
3036 }
3037 R 0 asm.enable_trim
3038 wait_for_asm_done
3039
3040 # start a migration task, wait it completed but not allow to trim slots
3041 R 0 asm.disable_trim
3042 set task_id [R 1 CLUSTER MIGRATION IMPORT 2 2]
3043 wait_for_condition 1000 10 {
3044 [string match {*completed*} [migration_status 0 $task_id state]]
3045 } else {
3046 fail "ASM task did not complete"
3047 }
3048 set logline [count_log_lines 0]
3049 # Can not start new importing task since trim is disabled
3050 set task_id [R 0 CLUSTER MIGRATION IMPORT 0 1]
3051 wait_for_log_messages 0 {"*Can not start import task*trim is disabled by module*"} $logline 1000 10
3052 R 0 asm.enable_trim
3053 wait_for_asm_done
3054 }
3055}
3056
3057start_server {tags "cluster external:skip"} {
3058 test "Test RM_ClusterGetLocalSlotRanges without cluster" {
3059 r module load $testmodule
3060 assert_equal [r asm.cluster_get_local_slot_ranges] {{0 16383}}
3061 }
3062}
3063}
diff --git a/examples/redis-unstable/tests/unit/cluster/cli.tcl b/examples/redis-unstable/tests/unit/cluster/cli.tcl
deleted file mode 100644
index ce4629e..0000000
--- a/examples/redis-unstable/tests/unit/cluster/cli.tcl
+++ /dev/null
@@ -1,415 +0,0 @@
1# Primitive tests on cluster-enabled redis using redis-cli
2
3source tests/support/cli.tcl
4
5# make sure the test infra won't use SELECT
6set old_singledb $::singledb
7set ::singledb 1
8
9# cluster creation is complicated with TLS, and the current tests don't really need that coverage
10tags {tls:skip external:skip cluster} {
11
12# start three servers
13set base_conf [list cluster-enabled yes cluster-node-timeout 1000]
14start_multiple_servers 3 [list overrides $base_conf] {
15
16 set node1 [srv 0 client]
17 set node2 [srv -1 client]
18 set node3 [srv -2 client]
19 set node3_pid [srv -2 pid]
20 set node3_rd [redis_deferring_client -2]
21
22 test {Create 3 node cluster} {
23 exec src/redis-cli --cluster-yes --cluster create \
24 127.0.0.1:[srv 0 port] \
25 127.0.0.1:[srv -1 port] \
26 127.0.0.1:[srv -2 port]
27
28 wait_for_condition 1000 50 {
29 [CI 0 cluster_state] eq {ok} &&
30 [CI 1 cluster_state] eq {ok} &&
31 [CI 2 cluster_state] eq {ok}
32 } else {
33 fail "Cluster doesn't stabilize"
34 }
35 }
36
37 test "Run blocking command on cluster node3" {
38 # key9184688 is mapped to slot 10923 (first slot of node 3)
39 $node3_rd brpop key9184688 0
40 $node3_rd flush
41
42 wait_for_condition 50 100 {
43 [s -2 blocked_clients] eq {1}
44 } else {
45 fail "Client not blocked"
46 }
47 }
48
49 test "Perform a Resharding" {
50 exec src/redis-cli --cluster-yes --cluster reshard 127.0.0.1:[srv -2 port] \
51 --cluster-to [$node1 cluster myid] \
52 --cluster-from [$node3 cluster myid] \
53 --cluster-slots 1
54 }
55
56 test "Verify command got unblocked after resharding" {
57 # this (read) will wait for the node3 to realize the new topology
58 assert_error {*MOVED*} {$node3_rd read}
59
60 # verify there are no blocked clients
61 assert_equal [s 0 blocked_clients] {0}
62 assert_equal [s -1 blocked_clients] {0}
63 assert_equal [s -2 blocked_clients] {0}
64 }
65
66 test "Wait for cluster to be stable" {
67 # Cluster check just verifies the config state is self-consistent,
68 # waiting for cluster_state to be okay is an independent check that all the
69 # nodes actually believe each other are healthy, prevent cluster down error.
70 wait_for_condition 1000 50 {
71 [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv 0 port]}] == 0 &&
72 [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -1 port]}] == 0 &&
73 [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -2 port]}] == 0 &&
74 [CI 0 cluster_state] eq {ok} &&
75 [CI 1 cluster_state] eq {ok} &&
76 [CI 2 cluster_state] eq {ok}
77 } else {
78 fail "Cluster doesn't stabilize"
79 }
80 }
81
82 set node1_rd [redis_deferring_client 0]
83
84 test "use previous hostip in \"cluster-preferred-endpoint-type unknown-endpoint\" mode" {
85
86 # backup and set cluster-preferred-endpoint-type unknown-endpoint
87 set endpoint_type_before_set [lindex [split [$node1 CONFIG GET cluster-preferred-endpoint-type] " "] 1]
88 $node1 CONFIG SET cluster-preferred-endpoint-type unknown-endpoint
89
90 # when redis-cli not in cluster mode, return MOVE with empty host
91 set slot_for_foo [$node1 CLUSTER KEYSLOT foo]
92 assert_error "*MOVED $slot_for_foo :*" {$node1 set foo bar}
93
94 # when in cluster mode, redirect using previous hostip
95 assert_equal "[exec src/redis-cli -h 127.0.0.1 -p [srv 0 port] -c set foo bar]" {OK}
96 assert_match "[exec src/redis-cli -h 127.0.0.1 -p [srv 0 port] -c get foo]" {bar}
97
98 assert_equal [$node1 CONFIG SET cluster-preferred-endpoint-type "$endpoint_type_before_set"] {OK}
99 }
100
101 test "Sanity test push cmd after resharding" {
102 assert_error {*MOVED*} {$node3 lpush key9184688 v1}
103
104 $node1_rd brpop key9184688 0
105 $node1_rd flush
106
107 wait_for_condition 50 100 {
108 [s 0 blocked_clients] eq {1}
109 } else {
110 puts "Client not blocked"
111 puts "read from blocked client: [$node1_rd read]"
112 fail "Client not blocked"
113 }
114
115 $node1 lpush key9184688 v2
116 assert_equal {key9184688 v2} [$node1_rd read]
117 }
118
119 $node3_rd close
120
121 test "Run blocking command again on cluster node1" {
122 $node1 del key9184688
123 # key9184688 is mapped to slot 10923 which has been moved to node1
124 $node1_rd brpop key9184688 0
125 $node1_rd flush
126
127 wait_for_condition 50 100 {
128 [s 0 blocked_clients] eq {1}
129 } else {
130 fail "Client not blocked"
131 }
132 }
133
134 test "Kill a cluster node and wait for fail state" {
135 # kill node3 in cluster
136 pause_process $node3_pid
137
138 wait_for_condition 1000 50 {
139 [CI 0 cluster_state] eq {fail} &&
140 [CI 1 cluster_state] eq {fail}
141 } else {
142 fail "Cluster doesn't fail"
143 }
144 }
145
146 test "Verify command got unblocked after cluster failure" {
147 assert_error {*CLUSTERDOWN*} {$node1_rd read}
148
149 # verify there are no blocked clients
150 assert_equal [s 0 blocked_clients] {0}
151 assert_equal [s -1 blocked_clients] {0}
152 }
153
154 resume_process $node3_pid
155 $node1_rd close
156
157} ;# stop servers
158
159# Test redis-cli -- cluster create, add-node, call.
160# Test that functions are propagated on add-node
161start_multiple_servers 5 [list overrides $base_conf] {
162
163 set node4_rd [redis_client -3]
164 set node5_rd [redis_client -4]
165
166 test {Functions are added to new node on redis-cli cluster add-node} {
167 exec src/redis-cli --cluster-yes --cluster create \
168 127.0.0.1:[srv 0 port] \
169 127.0.0.1:[srv -1 port] \
170 127.0.0.1:[srv -2 port]
171
172
173 wait_for_condition 1000 50 {
174 [CI 0 cluster_state] eq {ok} &&
175 [CI 1 cluster_state] eq {ok} &&
176 [CI 2 cluster_state] eq {ok}
177 } else {
178 fail "Cluster doesn't stabilize"
179 }
180
181 # upload a function to all the cluster
182 exec src/redis-cli --cluster-yes --cluster call 127.0.0.1:[srv 0 port] \
183 FUNCTION LOAD {#!lua name=TEST
184 redis.register_function('test', function() return 'hello' end)
185 }
186
187 # adding node to the cluster
188 exec src/redis-cli --cluster-yes --cluster add-node \
189 127.0.0.1:[srv -3 port] \
190 127.0.0.1:[srv 0 port]
191
192 wait_for_cluster_size 4
193
194 wait_for_condition 1000 50 {
195 [CI 0 cluster_state] eq {ok} &&
196 [CI 1 cluster_state] eq {ok} &&
197 [CI 2 cluster_state] eq {ok} &&
198 [CI 3 cluster_state] eq {ok}
199 } else {
200 fail "Cluster doesn't stabilize"
201 }
202
203 # make sure 'test' function was added to the new node
204 assert_equal {{library_name TEST engine LUA functions {{name test description {} flags {}}}}} [$node4_rd FUNCTION LIST]
205
206 # add function to node 5
207 assert_equal {TEST} [$node5_rd FUNCTION LOAD {#!lua name=TEST
208 redis.register_function('test', function() return 'hello' end)
209 }]
210
211 # make sure functions was added to node 5
212 assert_equal {{library_name TEST engine LUA functions {{name test description {} flags {}}}}} [$node5_rd FUNCTION LIST]
213
214 # adding node 5 to the cluster should failed because it already contains the 'test' function
215 catch {
216 exec src/redis-cli --cluster-yes --cluster add-node \
217 127.0.0.1:[srv -4 port] \
218 127.0.0.1:[srv 0 port]
219 } e
220 assert_match {*node already contains functions*} $e
221 }
222} ;# stop servers
223
224# Test redis-cli --cluster create, add-node.
225# Test that one slot can be migrated to and then away from the new node.
226test {Migrate the last slot away from a node using redis-cli} {
227 start_multiple_servers 4 [list overrides $base_conf] {
228
229 # Create a cluster of 3 nodes
230 exec src/redis-cli --cluster-yes --cluster create \
231 127.0.0.1:[srv 0 port] \
232 127.0.0.1:[srv -1 port] \
233 127.0.0.1:[srv -2 port]
234
235 wait_for_condition 1000 50 {
236 [CI 0 cluster_state] eq {ok} &&
237 [CI 1 cluster_state] eq {ok} &&
238 [CI 2 cluster_state] eq {ok}
239 } else {
240 fail "Cluster doesn't stabilize"
241 }
242
243 # Insert some data
244 assert_equal OK [exec src/redis-cli -c -p [srv 0 port] SET foo bar]
245 set slot [exec src/redis-cli -c -p [srv 0 port] CLUSTER KEYSLOT foo]
246
247 # Add new node to the cluster
248 exec src/redis-cli --cluster-yes --cluster add-node \
249 127.0.0.1:[srv -3 port] \
250 127.0.0.1:[srv 0 port]
251
252 # First we wait for new node to be recognized by entire cluster
253 wait_for_cluster_size 4
254
255 wait_for_condition 1000 50 {
256 [CI 0 cluster_state] eq {ok} &&
257 [CI 1 cluster_state] eq {ok} &&
258 [CI 2 cluster_state] eq {ok} &&
259 [CI 3 cluster_state] eq {ok}
260 } else {
261 fail "Cluster doesn't stabilize"
262 }
263
264 set newnode_r [redis_client -3]
265 set newnode_id [$newnode_r CLUSTER MYID]
266
267 # Find out which node has the key "foo" by asking the new node for a
268 # redirect.
269 catch { $newnode_r get foo } e
270 assert_match "MOVED $slot *" $e
271 lassign [split [lindex $e 2] :] owner_host owner_port
272 set owner_r [redis $owner_host $owner_port 0 $::tls]
273 set owner_id [$owner_r CLUSTER MYID]
274
275 # Move slot to new node using plain Redis commands
276 assert_equal OK [$newnode_r CLUSTER SETSLOT $slot IMPORTING $owner_id]
277 assert_equal OK [$owner_r CLUSTER SETSLOT $slot MIGRATING $newnode_id]
278 assert_equal {foo} [$owner_r CLUSTER GETKEYSINSLOT $slot 10]
279 assert_equal OK [$owner_r MIGRATE 127.0.0.1 [srv -3 port] "" 0 5000 KEYS foo]
280 assert_equal OK [$newnode_r CLUSTER SETSLOT $slot NODE $newnode_id]
281 assert_equal OK [$owner_r CLUSTER SETSLOT $slot NODE $newnode_id]
282
283 # Using --cluster check make sure we won't get `Not all slots are covered by nodes`.
284 # Wait for the cluster to become stable make sure the cluster is up during MIGRATE.
285 wait_for_condition 1000 50 {
286 [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv 0 port]}] == 0 &&
287 [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -1 port]}] == 0 &&
288 [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -2 port]}] == 0 &&
289 [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -3 port]}] == 0 &&
290 [CI 0 cluster_state] eq {ok} &&
291 [CI 1 cluster_state] eq {ok} &&
292 [CI 2 cluster_state] eq {ok} &&
293 [CI 3 cluster_state] eq {ok}
294 } else {
295 fail "Cluster doesn't stabilize"
296 }
297
298 # Move the only slot back to original node using redis-cli
299 exec src/redis-cli --cluster reshard 127.0.0.1:[srv -3 port] \
300 --cluster-from $newnode_id \
301 --cluster-to $owner_id \
302 --cluster-slots 1 \
303 --cluster-yes
304
305 # The empty node will become a replica of the new owner before the
306 # `MOVED` check, so let's wait for the cluster to become stable.
307 wait_for_condition 1000 50 {
308 [CI 0 cluster_state] eq {ok} &&
309 [CI 1 cluster_state] eq {ok} &&
310 [CI 2 cluster_state] eq {ok} &&
311 [CI 3 cluster_state] eq {ok}
312 } else {
313 fail "Cluster doesn't stabilize"
314 }
315
316 # Check that the key foo has been migrated back to the original owner.
317 catch { $newnode_r get foo } e
318 assert_equal "MOVED $slot $owner_host:$owner_port" $e
319
320 # Check that the empty node has turned itself into a replica of the new
321 # owner and that the new owner knows that.
322 wait_for_condition 1000 50 {
323 [string match "*slave*" [$owner_r CLUSTER REPLICAS $owner_id]]
324 } else {
325 fail "Empty node didn't turn itself into a replica."
326 }
327 }
328}
329
330foreach ip_or_localhost {127.0.0.1 localhost} {
331
332# Test redis-cli --cluster create, add-node with cluster-port.
333# Create five nodes, three with custom cluster_port and two with default values.
334start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cluster-port [find_available_port $::baseport $::portcount]]] {
335start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1]] {
336start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cluster-port [find_available_port $::baseport $::portcount]]] {
337start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1]] {
338start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cluster-port [find_available_port $::baseport $::portcount]]] {
339
340 # The first three are used to test --cluster create.
341 # The last two are used to test --cluster add-node
342
343 test "redis-cli -4 --cluster create using $ip_or_localhost with cluster-port" {
344 exec src/redis-cli -4 --cluster-yes --cluster create \
345 $ip_or_localhost:[srv 0 port] \
346 $ip_or_localhost:[srv -1 port] \
347 $ip_or_localhost:[srv -2 port]
348
349 wait_for_condition 1000 50 {
350 [CI 0 cluster_state] eq {ok} &&
351 [CI 1 cluster_state] eq {ok} &&
352 [CI 2 cluster_state] eq {ok}
353 } else {
354 fail "Cluster doesn't stabilize"
355 }
356
357 # Make sure each node can meet other nodes
358 assert_equal 3 [CI 0 cluster_known_nodes]
359 assert_equal 3 [CI 1 cluster_known_nodes]
360 assert_equal 3 [CI 2 cluster_known_nodes]
361 }
362
363 test "redis-cli -4 --cluster add-node using $ip_or_localhost with cluster-port" {
364 # Adding node to the cluster (without cluster-port)
365 exec src/redis-cli -4 --cluster-yes --cluster add-node \
366 $ip_or_localhost:[srv -3 port] \
367 $ip_or_localhost:[srv 0 port]
368
369 wait_for_cluster_size 4
370
371 wait_for_condition 1000 50 {
372 [CI 0 cluster_state] eq {ok} &&
373 [CI 1 cluster_state] eq {ok} &&
374 [CI 2 cluster_state] eq {ok} &&
375 [CI 3 cluster_state] eq {ok}
376 } else {
377 fail "Cluster doesn't stabilize"
378 }
379
380 # Adding node to the cluster (with cluster-port)
381 exec src/redis-cli -4 --cluster-yes --cluster add-node \
382 $ip_or_localhost:[srv -4 port] \
383 $ip_or_localhost:[srv 0 port]
384
385 wait_for_cluster_size 5
386
387 wait_for_condition 1000 50 {
388 [CI 0 cluster_state] eq {ok} &&
389 [CI 1 cluster_state] eq {ok} &&
390 [CI 2 cluster_state] eq {ok} &&
391 [CI 3 cluster_state] eq {ok} &&
392 [CI 4 cluster_state] eq {ok}
393 } else {
394 fail "Cluster doesn't stabilize"
395 }
396
397 # Make sure each node can meet other nodes
398 assert_equal 5 [CI 0 cluster_known_nodes]
399 assert_equal 5 [CI 1 cluster_known_nodes]
400 assert_equal 5 [CI 2 cluster_known_nodes]
401 assert_equal 5 [CI 3 cluster_known_nodes]
402 assert_equal 5 [CI 4 cluster_known_nodes]
403 }
404# stop 5 servers
405}
406}
407}
408}
409}
410
411} ;# foreach ip_or_localhost
412
413} ;# tags
414
415set ::singledb $old_singledb
diff --git a/examples/redis-unstable/tests/unit/cluster/cluster-response-tls.tcl b/examples/redis-unstable/tests/unit/cluster/cluster-response-tls.tcl
deleted file mode 100644
index a099fa7..0000000
--- a/examples/redis-unstable/tests/unit/cluster/cluster-response-tls.tcl
+++ /dev/null
@@ -1,110 +0,0 @@
1source tests/support/cluster.tcl
2
3proc get_port_from_moved_error {e} {
4 set ip_port [lindex [split $e " "] 2]
5 return [lindex [split $ip_port ":"] 1]
6}
7
8proc get_pport_by_port {port} {
9 foreach srv $::servers {
10 set srv_port [dict get $srv port]
11 if {$port == $srv_port} {
12 return [dict get $srv pport]
13 }
14 }
15 return 0
16}
17
18proc get_port_from_node_info {line} {
19 set fields [split $line " "]
20 set addr [lindex $fields 1]
21 set ip_port [lindex [split $addr "@"] 0]
22 return [lindex [split $ip_port ":"] 1]
23}
24
25proc cluster_response_tls {tls_cluster} {
26
27 test "CLUSTER SLOTS with different connection type -- tls-cluster $tls_cluster" {
28 set slots1 [R 0 cluster slots]
29 set pport [srv 0 pport]
30 set cluster_client [redis_cluster 127.0.0.1:$pport 0]
31 set slots2 [$cluster_client cluster slots]
32 $cluster_client close
33 # Compare the ports in the first row
34 assert_no_match [lindex $slots1 0 2 1] [lindex $slots2 0 2 1]
35 }
36
37 test "CLUSTER NODES return port according to connection type -- tls-cluster $tls_cluster" {
38 set nodes [R 0 cluster nodes]
39 set port1 [get_port_from_node_info [lindex [split $nodes "\r\n"] 0]]
40 set pport [srv 0 pport]
41 set cluster_client [redis_cluster 127.0.0.1:$pport 0]
42 set nodes [$cluster_client cluster nodes]
43 set port2 [get_port_from_node_info [lindex [split $nodes "\r\n"] 0]]
44 $cluster_client close
45 assert_not_equal $port1 $port2
46 }
47
48 set cluster [redis_cluster 127.0.0.1:[srv 0 port]]
49 set cluster_pport [redis_cluster 127.0.0.1:[srv 0 pport] 0]
50 $cluster refresh_nodes_map
51
52 test "Set many keys in the cluster -- tls-cluster $tls_cluster" {
53 for {set i 0} {$i < 5000} {incr i} {
54 $cluster set $i $i
55 assert { [$cluster get $i] eq $i }
56 }
57 }
58
59 test "Test cluster responses during migration of slot x -- tls-cluster $tls_cluster" {
60 set slot 10
61 array set nodefrom [$cluster masternode_for_slot $slot]
62 array set nodeto [$cluster masternode_notfor_slot $slot]
63 $nodeto(link) cluster setslot $slot importing $nodefrom(id)
64 $nodefrom(link) cluster setslot $slot migrating $nodeto(id)
65
66 # Get a key from that slot
67 set key [$nodefrom(link) cluster GETKEYSINSLOT $slot "1"]
68 # MOVED REPLY
69 catch {$nodeto(link) set $key "newVal"} e_moved1
70 assert_match "*MOVED*" $e_moved1
71 # ASK REPLY
72 catch {$nodefrom(link) set "abc{$key}" "newVal"} e_ask1
73 assert_match "*ASK*" $e_ask1
74
75 # UNSTABLE REPLY
76 assert_error "*TRYAGAIN*" {$nodefrom(link) mset "a{$key}" "newVal" $key "newVal2"}
77
78 # Connecting using another protocol
79 array set nodefrom_pport [$cluster_pport masternode_for_slot $slot]
80 array set nodeto_pport [$cluster_pport masternode_notfor_slot $slot]
81
82 # MOVED REPLY
83 catch {$nodeto_pport(link) set $key "newVal"} e_moved2
84 assert_match "*MOVED*" $e_moved2
85 # ASK REPLY
86 catch {$nodefrom_pport(link) set "abc{$key}" "newVal"} e_ask2
87 assert_match "*ASK*" $e_ask2
88 # Compare MOVED error's port
89 set port1 [get_port_from_moved_error $e_moved1]
90 set port2 [get_port_from_moved_error $e_moved2]
91 assert_not_equal $port1 $port2
92 assert_equal $port1 $nodefrom(port)
93 assert_equal $port2 [get_pport_by_port $nodefrom(port)]
94 # Compare ASK error's port
95 set port1 [get_port_from_moved_error $e_ask1]
96 set port2 [get_port_from_moved_error $e_ask2]
97 assert_not_equal $port1 $port2
98 assert_equal $port1 $nodeto(port)
99 assert_equal $port2 [get_pport_by_port $nodeto(port)]
100 }
101}
102
103if {$::tls} {
104 start_cluster 3 3 {tags {external:skip cluster tls} overrides {tls-cluster yes tls-replication yes}} {
105 cluster_response_tls yes
106 }
107 start_cluster 3 3 {tags {external:skip cluster tls} overrides {tls-cluster no tls-replication no}} {
108 cluster_response_tls no
109 }
110}
diff --git a/examples/redis-unstable/tests/unit/cluster/failure-marking.tcl b/examples/redis-unstable/tests/unit/cluster/failure-marking.tcl
deleted file mode 100644
index c4746c8..0000000
--- a/examples/redis-unstable/tests/unit/cluster/failure-marking.tcl
+++ /dev/null
@@ -1,53 +0,0 @@
1# Test a single primary can mark replica as `fail`
2start_cluster 1 1 {tags {external:skip cluster}} {
3
4 test "Verify that single primary marks replica as failed" {
5 set primary [srv -0 client]
6
7 set replica1 [srv -1 client]
8 set replica1_pid [srv -1 pid]
9 set replica1_instance_id [dict get [cluster_get_myself 1] id]
10
11 assert {[lindex [$primary role] 0] eq {master}}
12 assert {[lindex [$replica1 role] 0] eq {slave}}
13
14 wait_for_sync $replica1
15
16 pause_process $replica1_pid
17
18 wait_node_marked_fail 0 $replica1_instance_id
19 }
20}
21
22# Test multiple primaries wait for a quorum and then mark a replica as `fail`
23start_cluster 2 1 {tags {external:skip cluster}} {
24
25 test "Verify that multiple primaries mark replica as failed" {
26 set primary1 [srv -0 client]
27
28 set primary2 [srv -1 client]
29 set primary2_pid [srv -1 pid]
30
31 set replica1 [srv -2 client]
32 set replica1_pid [srv -2 pid]
33 set replica1_instance_id [dict get [cluster_get_myself 2] id]
34
35 assert {[lindex [$primary1 role] 0] eq {master}}
36 assert {[lindex [$primary2 role] 0] eq {master}}
37 assert {[lindex [$replica1 role] 0] eq {slave}}
38
39 wait_for_sync $replica1
40
41 pause_process $replica1_pid
42
43 # Pause other primary to allow time for pfail flag to appear
44 pause_process $primary2_pid
45
46 wait_node_marked_pfail 0 $replica1_instance_id
47
48 # Resume other primary and wait for to show replica as failed
49 resume_process $primary2_pid
50
51 wait_node_marked_fail 0 $replica1_instance_id
52 }
53}
diff --git a/examples/redis-unstable/tests/unit/cluster/hostnames.tcl b/examples/redis-unstable/tests/unit/cluster/hostnames.tcl
deleted file mode 100644
index 2236228..0000000
--- a/examples/redis-unstable/tests/unit/cluster/hostnames.tcl
+++ /dev/null
@@ -1,230 +0,0 @@
1#
2# Copyright (c) 2009-Present, Redis Ltd.
3# All rights reserved.
4#
5# Copyright (c) 2024-present, Valkey contributors.
6# All rights reserved.
7#
8# Licensed under your choice of (a) the Redis Source Available License 2.0
9# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
10# GNU Affero General Public License v3 (AGPLv3).
11#
12# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
13#
14
15proc get_slot_field {slot_output shard_id node_id attrib_id} {
16 return [lindex [lindex [lindex $slot_output $shard_id] $node_id] $attrib_id]
17}
18
19# Start a cluster with 3 masters and 4 replicas.
20# These tests rely on specific node ordering, so make sure no node fails over.
21start_cluster 3 4 {tags {external:skip cluster} overrides {cluster-replica-no-failover yes}} {
22test "Set cluster hostnames and verify they are propagated" {
23 for {set j 0} {$j < [llength $::servers]} {incr j} {
24 R $j config set cluster-announce-hostname "host-$j.com"
25 }
26
27 wait_for_condition 50 100 {
28 [are_hostnames_propagated "host-*.com"] eq 1
29 } else {
30 fail "cluster hostnames were not propagated"
31 }
32
33 # Now that everything is propagated, assert everyone agrees
34 wait_for_cluster_propagation
35}
36
37test "Update hostnames and make sure they are all eventually propagated" {
38 for {set j 0} {$j < [llength $::servers]} {incr j} {
39 R $j config set cluster-announce-hostname "host-updated-$j.com"
40 }
41
42 wait_for_condition 50 100 {
43 [are_hostnames_propagated "host-updated-*.com"] eq 1
44 } else {
45 fail "cluster hostnames were not propagated"
46 }
47
48 # Now that everything is propagated, assert everyone agrees
49 wait_for_cluster_propagation
50}
51
52test "Remove hostnames and make sure they are all eventually propagated" {
53 for {set j 0} {$j < [llength $::servers]} {incr j} {
54 R $j config set cluster-announce-hostname ""
55 }
56
57 wait_for_condition 50 100 {
58 [are_hostnames_propagated ""] eq 1
59 } else {
60 fail "cluster hostnames were not propagated"
61 }
62
63 # Now that everything is propagated, assert everyone agrees
64 wait_for_cluster_propagation
65}
66
67test "Verify cluster-preferred-endpoint-type behavior for redirects and info" {
68 R 0 config set cluster-announce-hostname "me.com"
69 R 1 config set cluster-announce-hostname ""
70 R 2 config set cluster-announce-hostname "them.com"
71
72 wait_for_cluster_propagation
73
74 # Verify default behavior
75 set slot_result [R 0 cluster slots]
76 assert_equal "" [lindex [get_slot_field $slot_result 0 2 0] 1]
77 assert_equal "" [lindex [get_slot_field $slot_result 2 2 0] 1]
78 assert_equal "hostname" [lindex [get_slot_field $slot_result 0 2 3] 0]
79 assert_equal "me.com" [lindex [get_slot_field $slot_result 0 2 3] 1]
80 assert_equal "hostname" [lindex [get_slot_field $slot_result 2 2 3] 0]
81 assert_equal "them.com" [lindex [get_slot_field $slot_result 2 2 3] 1]
82
83 # Redirect will use the IP address
84 catch {R 0 set foo foo} redir_err
85 assert_match "MOVED * 127.0.0.1:*" $redir_err
86
87 # Verify prefer hostname behavior
88 R 0 config set cluster-preferred-endpoint-type hostname
89
90 set slot_result [R 0 cluster slots]
91 assert_equal "me.com" [get_slot_field $slot_result 0 2 0]
92 assert_equal "them.com" [get_slot_field $slot_result 2 2 0]
93
94 # Redirect should use hostname
95 catch {R 0 set foo foo} redir_err
96 assert_match "MOVED * them.com:*" $redir_err
97
98 # Redirect to an unknown hostname returns ?
99 catch {R 0 set barfoo bar} redir_err
100 assert_match "MOVED * ?:*" $redir_err
101
102 # Verify unknown hostname behavior
103 R 0 config set cluster-preferred-endpoint-type unknown-endpoint
104
105 # Verify default behavior
106 set slot_result [R 0 cluster slots]
107 assert_equal "ip" [lindex [get_slot_field $slot_result 0 2 3] 0]
108 assert_equal "127.0.0.1" [lindex [get_slot_field $slot_result 0 2 3] 1]
109 assert_equal "ip" [lindex [get_slot_field $slot_result 2 2 3] 0]
110 assert_equal "127.0.0.1" [lindex [get_slot_field $slot_result 2 2 3] 1]
111 assert_equal "ip" [lindex [get_slot_field $slot_result 1 2 3] 0]
112 assert_equal "127.0.0.1" [lindex [get_slot_field $slot_result 1 2 3] 1]
113 # Not required by the protocol, but IP comes before hostname
114 assert_equal "hostname" [lindex [get_slot_field $slot_result 0 2 3] 2]
115 assert_equal "me.com" [lindex [get_slot_field $slot_result 0 2 3] 3]
116 assert_equal "hostname" [lindex [get_slot_field $slot_result 2 2 3] 2]
117 assert_equal "them.com" [lindex [get_slot_field $slot_result 2 2 3] 3]
118
119 # This node doesn't have a hostname
120 assert_equal 2 [llength [get_slot_field $slot_result 1 2 3]]
121
122 # Redirect should use empty string
123 catch {R 0 set foo foo} redir_err
124 assert_match "MOVED * :*" $redir_err
125
126 R 0 config set cluster-preferred-endpoint-type ip
127}
128
129test "Verify the nodes configured with prefer hostname only show hostname for new nodes" {
130 # Have everyone forget node 6 and isolate it from the cluster.
131 isolate_node 6
132
133 set primaries 3
134 for {set j 0} {$j < $primaries} {incr j} {
135 # Set hostnames for the masters, now that the node is isolated
136 R $j config set cluster-announce-hostname "shard-$j.com"
137 }
138
139 # Prevent Node 0 and Node 6 from properly meeting,
140 # they'll hang in the handshake phase. This allows us to
141 # test the case where we "know" about it but haven't
142 # successfully retrieved information about it yet.
143 R 0 DEBUG DROP-CLUSTER-PACKET-FILTER 0
144 R 6 DEBUG DROP-CLUSTER-PACKET-FILTER 0
145
146 # Have a replica meet the isolated node
147 R 3 cluster meet 127.0.0.1 [srv -6 port]
148
149 # Wait for the isolated node to learn about the rest of the cluster,
150 # which correspond to a single entry in cluster nodes. Note this
151 # doesn't mean the isolated node has successfully contacted each
152 # node.
153 wait_for_condition 50 100 {
154 [llength [split [R 6 CLUSTER NODES] "\n"]] eq [expr [llength $::servers] + 1]
155 } else {
156 fail "Isolated node didn't learn about the rest of the cluster *"
157 }
158
159 # Now, we wait until the two nodes that aren't filtering packets
160 # to accept our isolated nodes connections. At this point they will
161 # start showing up in cluster slots.
162 wait_for_condition 50 100 {
163 [llength [R 6 CLUSTER SLOTS]] eq 2
164 } else {
165 fail "Node did not learn about the 2 shards it can talk to"
166 }
167 wait_for_condition 50 100 {
168 [lindex [get_slot_field [R 6 CLUSTER SLOTS] 0 2 3] 1] eq "shard-1.com"
169 } else {
170 fail "hostname for shard-1 didn't reach node 6"
171 }
172
173 wait_for_condition 50 100 {
174 [lindex [get_slot_field [R 6 CLUSTER SLOTS] 1 2 3] 1] eq "shard-2.com"
175 } else {
176 fail "hostname for shard-2 didn't reach node 6"
177 }
178
179 # Also make sure we know about the isolated master, we
180 # just can't reach it.
181 set master_id [R 0 CLUSTER MYID]
182 assert_match "*$master_id*" [R 6 CLUSTER NODES]
183
184 # Stop dropping cluster packets, and make sure everything
185 # stabilizes
186 R 0 DEBUG DROP-CLUSTER-PACKET-FILTER -1
187 R 6 DEBUG DROP-CLUSTER-PACKET-FILTER -1
188
189 # This operation sometimes spikes to around 5 seconds to resolve the state,
190 # so it has a higher timeout.
191 wait_for_condition 50 500 {
192 [llength [R 6 CLUSTER SLOTS]] eq 3
193 } else {
194 fail "Node did not learn about the 2 shards it can talk to"
195 }
196
197 for {set j 0} {$j < $primaries} {incr j} {
198 wait_for_condition 50 100 {
199 [lindex [get_slot_field [R 6 CLUSTER SLOTS] $j 2 3] 1] eq "shard-$j.com"
200 } else {
201 fail "hostname information for shard-$j didn't reach node 6"
202 }
203 }
204}
205
206test "Test restart will keep hostname information" {
207 # Set a new hostname, reboot and make sure it sticks
208 R 0 config set cluster-announce-hostname "restart-1.com"
209
210 # Store the hostname in the config
211 R 0 config rewrite
212
213 restart_server 0 true false
214 set slot_result [R 0 CLUSTER SLOTS]
215 assert_equal [lindex [get_slot_field $slot_result 0 2 3] 1] "restart-1.com"
216
217 # As a sanity check, make sure everyone eventually agrees
218 wait_for_cluster_propagation
219}
220
221test "Test hostname validation" {
222 catch {R 0 config set cluster-announce-hostname [string repeat x 256]} err
223 assert_match "*Hostnames must be less than 256 characters*" $err
224 catch {R 0 config set cluster-announce-hostname "?.com"} err
225 assert_match "*Hostnames may only contain alphanumeric characters, hyphens or dots*" $err
226
227 # Note this isn't a valid hostname, but it passes our internal validation
228 R 0 config set cluster-announce-hostname "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-."
229}
230}
diff --git a/examples/redis-unstable/tests/unit/cluster/human-announced-nodename.tcl b/examples/redis-unstable/tests/unit/cluster/human-announced-nodename.tcl
deleted file mode 100644
index a595ca6..0000000
--- a/examples/redis-unstable/tests/unit/cluster/human-announced-nodename.tcl
+++ /dev/null
@@ -1,29 +0,0 @@
1# Check if cluster's view of human announced nodename is reported in logs
2start_cluster 3 0 {tags {external:skip cluster}} {
3 test "Set cluster human announced nodename and let it propagate" {
4 for {set j 0} {$j < [llength $::servers]} {incr j} {
5 R $j config set cluster-announce-hostname "host-$j.com"
6 R $j config set cluster-announce-human-nodename "nodename-$j"
7 }
8
9 # We wait for everyone to agree on the hostnames. Since they are gossiped
10 # the same way as nodenames, it implies everyone knows the nodenames too.
11 wait_for_condition 50 100 {
12 [are_hostnames_propagated "host-*.com"] eq 1
13 } else {
14 fail "cluster hostnames were not propagated"
15 }
16 }
17
18 test "Human nodenames are visible in log messages" {
19 # Pause instance 0, so everyone thinks it is dead
20 pause_process [srv 0 pid]
21
22 # We're going to use a message we will know will be sent, node unreachable,
23 # since it includes the other node gossiping.
24 wait_for_log_messages -1 {"*Node * (nodename-2) reported node * (nodename-0) as not reachable*"} 0 20 500
25 wait_for_log_messages -2 {"*Node * (nodename-1) reported node * (nodename-0) as not reachable*"} 0 20 500
26
27 resume_process [srv 0 pid]
28 }
29}
diff --git a/examples/redis-unstable/tests/unit/cluster/internal-secret.tcl b/examples/redis-unstable/tests/unit/cluster/internal-secret.tcl
deleted file mode 100644
index f310b74..0000000
--- a/examples/redis-unstable/tests/unit/cluster/internal-secret.tcl
+++ /dev/null
@@ -1,71 +0,0 @@
1proc num_unique_secrets {num_nodes} {
2 set secrets [list]
3 for {set i 0} {$i < $num_nodes} {incr i} {
4 lappend secrets [R $i debug internal_secret]
5 }
6 set num_secrets [llength [lsort -unique $secrets]]
7 return $num_secrets
8}
9
10proc wait_for_secret_sync {maxtries delay num_nodes} {
11 wait_for_condition $maxtries $delay {
12 [num_unique_secrets $num_nodes] eq 1
13 } else {
14 fail "Failed waiting for secrets to sync"
15 }
16}
17
18start_cluster 3 3 {tags {external:skip cluster}} {
19 test "Test internal secret sync" {
20 wait_for_secret_sync 50 100 6
21 }
22
23
24 set first_shard_host [srv 0 host]
25 set first_shard_port [srv 0 port]
26
27 if {$::verbose} {
28 puts {cluster internal secret:}
29 puts [R 1 debug internal_secret]
30 }
31
32 test "Join a node to the cluster and make sure it gets the same secret" {
33 start_server {tags {"external:skip"} overrides {cluster-enabled {yes}}} {
34 r cluster meet $first_shard_host $first_shard_port
35 wait_for_condition 50 100 {
36 [r debug internal_secret] eq [R 1 debug internal_secret]
37 } else {
38 puts [r debug internal_secret]
39 puts [R 1 debug internal_secret]
40 fail "Secrets not match"
41 }
42 }
43 }
44
45 test "Join another cluster, make sure clusters sync on the internal secret" {
46 start_server {tags {"external:skip"} overrides {cluster-enabled {yes}}} {
47 set new_shard_host [srv 0 host]
48 set new_shard_port [srv 0 port]
49 start_server {tags {"external:skip"} overrides {cluster-enabled {yes}}} {
50 r cluster meet $new_shard_host $new_shard_port
51 wait_for_condition 50 100 {
52 [r debug internal_secret] eq [r -1 debug internal_secret]
53 } else {
54 puts [r debug internal_secret]
55 puts [r -1 debug internal_secret]
56 fail "Secrets not match"
57 }
58 if {$::verbose} {
59 puts {new cluster internal secret:}
60 puts [r -1 debug internal_secret]
61 }
62 r cluster meet $first_shard_host $first_shard_port
63 wait_for_secret_sync 50 100 8
64 if {$::verbose} {
65 puts {internal secret after join to bigger cluster:}
66 puts [r -1 debug internal_secret]
67 }
68 }
69 }
70 }
71}
diff --git a/examples/redis-unstable/tests/unit/cluster/links.tcl b/examples/redis-unstable/tests/unit/cluster/links.tcl
deleted file mode 100644
index a202c37..0000000
--- a/examples/redis-unstable/tests/unit/cluster/links.tcl
+++ /dev/null
@@ -1,292 +0,0 @@
1proc get_links_with_peer {this_instance_id peer_nodename} {
2 set links [R $this_instance_id cluster links]
3 set links_with_peer {}
4 foreach l $links {
5 if {[dict get $l node] eq $peer_nodename} {
6 lappend links_with_peer $l
7 }
8 }
9 return $links_with_peer
10}
11
12# Return the entry in CLUSTER LINKS output by instance identified by `this_instance_id` that
13# corresponds to the link established toward a peer identified by `peer_nodename`
14proc get_link_to_peer {this_instance_id peer_nodename} {
15 set links_with_peer [get_links_with_peer $this_instance_id $peer_nodename]
16 foreach l $links_with_peer {
17 if {[dict get $l direction] eq "to"} {
18 return $l
19 }
20 }
21 return {}
22}
23
24# Return the entry in CLUSTER LINKS output by instance identified by `this_instance_id` that
25# corresponds to the link accepted from a peer identified by `peer_nodename`
26proc get_link_from_peer {this_instance_id peer_nodename} {
27 set links_with_peer [get_links_with_peer $this_instance_id $peer_nodename]
28 foreach l $links_with_peer {
29 if {[dict get $l direction] eq "from"} {
30 return $l
31 }
32 }
33 return {}
34}
35
36# Reset cluster links to their original state
37proc reset_links {id} {
38 set limit [lindex [R $id CONFIG get cluster-link-sendbuf-limit] 1]
39
40 # Set a 1 byte limit and wait for cluster cron to run
41 # (executes every 100ms) and terminate links
42 R $id CONFIG SET cluster-link-sendbuf-limit 1
43 after 150
44
45 # Reset limit
46 R $id CONFIG SET cluster-link-sendbuf-limit $limit
47
48 # Wait until the cluster links come back up for each node
49 wait_for_condition 50 100 {
50 [number_of_links $id] == [expr [number_of_peers $id] * 2]
51 } else {
52 fail "Cluster links did not come back up"
53 }
54}
55
56proc number_of_peers {id} {
57 expr [llength $::servers] - 1
58}
59
60proc number_of_links {id} {
61 llength [R $id cluster links]
62}
63
64proc publish_messages {server num_msgs msg_size} {
65 for {set i 0} {$i < $num_msgs} {incr i} {
66 $server PUBLISH channel [string repeat "x" $msg_size]
67 }
68}
69
70start_cluster 1 2 {tags {external:skip cluster}} {
71 set primary_id 0
72 set replica1_id 1
73
74 set primary [Rn $primary_id]
75 set replica1 [Rn $replica1_id]
76
77 test "Broadcast message across a cluster shard while a cluster link is down" {
78 set replica1_node_id [$replica1 CLUSTER MYID]
79
80 set channelname ch3
81
82 # subscribe on replica1
83 set subscribeclient1 [redis_deferring_client -1]
84 $subscribeclient1 deferred 1
85 $subscribeclient1 SSUBSCRIBE $channelname
86 $subscribeclient1 read
87
88 # subscribe on replica2
89 set subscribeclient2 [redis_deferring_client -2]
90 $subscribeclient2 deferred 1
91 $subscribeclient2 SSUBSCRIBE $channelname
92 $subscribeclient2 read
93
94 # Verify number of links with cluster stable state
95 assert_equal [expr [number_of_peers $primary_id]*2] [number_of_links $primary_id]
96
97 # Disconnect the cluster between primary and replica1 and publish a message.
98 $primary MULTI
99 $primary DEBUG CLUSTERLINK KILL TO $replica1_node_id
100 $primary SPUBLISH $channelname hello
101 set res [$primary EXEC]
102
103 # Verify no client exists on the primary to receive the published message.
104 assert_equal $res {OK 0}
105
106 # Wait for all the cluster links are healthy
107 wait_for_condition 50 100 {
108 [number_of_peers $primary_id]*2 == [number_of_links $primary_id]
109 } else {
110 fail "All peer links couldn't be established"
111 }
112
113 # Publish a message afterwards.
114 $primary SPUBLISH $channelname world
115
116 # Verify replica1 has received only (world) / hello is lost.
117 assert_equal "smessage ch3 world" [$subscribeclient1 read]
118
119 # Verify replica2 has received both messages (hello/world)
120 assert_equal "smessage ch3 hello" [$subscribeclient2 read]
121 assert_equal "smessage ch3 world" [$subscribeclient2 read]
122 } {} {needs:debug}
123}
124
125start_cluster 3 0 {tags {external:skip cluster}} {
126 test "Each node has two links with each peer" {
127 for {set id 0} {$id < [llength $::servers]} {incr id} {
128 # Assert that from point of view of each node, there are two links for
129 # each peer. It might take a while for cluster to stabilize so wait up
130 # to 5 seconds.
131 wait_for_condition 50 100 {
132 [number_of_peers $id]*2 == [number_of_links $id]
133 } else {
134 assert_equal [expr [number_of_peers $id]*2] [number_of_links $id]
135 }
136
137 set nodes [get_cluster_nodes $id]
138 set links [R $id cluster links]
139
140 # For each peer there should be exactly one
141 # link "to" it and one link "from" it.
142 foreach n $nodes {
143 if {[cluster_has_flag $n myself]} continue
144 set peer [dict get $n id]
145 set to 0
146 set from 0
147 foreach l $links {
148 if {[dict get $l node] eq $peer} {
149 if {[dict get $l direction] eq "to"} {
150 incr to
151 } elseif {[dict get $l direction] eq "from"} {
152 incr from
153 }
154 }
155 }
156 assert {$to eq 1}
157 assert {$from eq 1}
158 }
159 }
160 }
161
162 test {Validate cluster links format} {
163 set lines [R 0 cluster links]
164 foreach l $lines {
165 if {$l eq {}} continue
166 assert_equal [llength $l] 12
167 assert_equal 1 [dict exists $l "direction"]
168 assert_equal 1 [dict exists $l "node"]
169 assert_equal 1 [dict exists $l "create-time"]
170 assert_equal 1 [dict exists $l "events"]
171 assert_equal 1 [dict exists $l "send-buffer-allocated"]
172 assert_equal 1 [dict exists $l "send-buffer-used"]
173 }
174 }
175
176 set primary1_id 0
177 set primary2_id 1
178
179 set primary1 [Rn $primary1_id]
180 set primary2 [Rn $primary2_id]
181
182 test "Disconnect link when send buffer limit reached" {
183 # On primary1, set timeout to 1 hour so links won't get disconnected due to timeouts
184 set oldtimeout [lindex [$primary1 CONFIG get cluster-node-timeout] 1]
185 $primary1 CONFIG set cluster-node-timeout [expr 60*60*1000]
186
187 # Get primary1's links with primary2
188 set primary2_name [dict get [cluster_get_myself $primary2_id] id]
189 set orig_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name]
190 set orig_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name]
191
192 # On primary1, set cluster link send buffer limit to 256KB, which is large enough to not be
193 # overflowed by regular gossip messages but also small enough that it doesn't take too much
194 # memory to overflow it. If it is set too high, Redis may get OOM killed by kernel before this
195 # limit is overflowed in some RAM-limited test environments.
196 set oldlimit [lindex [$primary1 CONFIG get cluster-link-sendbuf-limit] 1]
197 $primary1 CONFIG set cluster-link-sendbuf-limit [expr 256*1024]
198 assert {[CI $primary1_id total_cluster_links_buffer_limit_exceeded] eq 0}
199
200 # To manufacture an ever-growing send buffer from primary1 to primary2,
201 # make primary2 unresponsive.
202 set primary2_pid [srv [expr -1*$primary2_id] pid]
203 pause_process $primary2_pid
204
205 # On primary1, send 128KB Pubsub messages in a loop until the send buffer of the link from
206 # primary1 to primary2 exceeds buffer limit therefore be dropped.
207 # For the send buffer to grow, we need to first exhaust TCP send buffer of primary1 and TCP
208 # receive buffer of primary2 first. The sizes of these two buffers vary by OS, but 100 128KB
209 # messages should be sufficient.
210 set i 0
211 wait_for_condition 100 0 {
212 [catch {incr i} e] == 0 &&
213 [catch {$primary1 publish channel [prepare_value [expr 128*1024]]} e] == 0 &&
214 [catch {after 500} e] == 0 &&
215 [CI $primary1_id total_cluster_links_buffer_limit_exceeded] >= 1
216 } else {
217 fail "Cluster link not freed as expected"
218 }
219
220 # A new link to primary2 should have been recreated
221 set new_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name]
222 assert {[dict get $new_link_p1_to_p2 create-time] > [dict get $orig_link_p1_to_p2 create-time]}
223
224 # Link from primary2 should not be affected
225 set same_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name]
226 assert {[dict get $same_link_p1_from_p2 create-time] eq [dict get $orig_link_p1_from_p2 create-time]}
227
228 # Revive primary2
229 resume_process $primary2_pid
230
231 # Reset configs on primary1 so config changes don't leak out to other tests
232 $primary1 CONFIG set cluster-node-timeout $oldtimeout
233 $primary1 CONFIG set cluster-link-sendbuf-limit $oldlimit
234
235 reset_links $primary1_id
236 }
237
238 test "Link memory increases with publishes" {
239 set server_id 0
240 set server [Rn $server_id]
241 set msg_size 10000
242 set num_msgs 10
243
244 # Remove any sendbuf limit
245 $primary1 CONFIG set cluster-link-sendbuf-limit 0
246
247 # Publish ~100KB to one of the servers
248 $server MULTI
249 $server INFO memory
250 publish_messages $server $num_msgs $msg_size
251 $server INFO memory
252 set res [$server EXEC]
253
254 set link_mem_before_pubs [getInfoProperty $res mem_cluster_links]
255
256 # Remove the first half of the response string which contains the
257 # first "INFO memory" results and search for the property again
258 set res [string range $res [expr [string length $res] / 2] end]
259 set link_mem_after_pubs [getInfoProperty $res mem_cluster_links]
260
261 # We expect the memory to have increased by more than
262 # the culmulative size of the publish messages
263 set mem_diff_floor [expr $msg_size * $num_msgs]
264 set mem_diff [expr $link_mem_after_pubs - $link_mem_before_pubs]
265 assert {$mem_diff > $mem_diff_floor}
266
267 # Reset links to ensure no leftover data for the next test
268 reset_links $server_id
269 }
270
271 test "Link memory resets after publish messages flush" {
272 set server [Rn 0]
273 set msg_size 100000
274 set num_msgs 10
275
276 set link_mem_before [status $server mem_cluster_links]
277
278 # Publish ~1MB to one of the servers
279 $server MULTI
280 publish_messages $server $num_msgs $msg_size
281 $server EXEC
282
283 # Wait until the cluster link memory has returned to below the pre-publish value.
284 # We can't guarantee it returns to the exact same value since gossip messages
285 # can cause the values to fluctuate.
286 wait_for_condition 1000 500 {
287 [status $server mem_cluster_links] <= $link_mem_before
288 } else {
289 fail "Cluster link memory did not settle back to expected range"
290 }
291 }
292}
diff --git a/examples/redis-unstable/tests/unit/cluster/misc.tcl b/examples/redis-unstable/tests/unit/cluster/misc.tcl
deleted file mode 100644
index 62bdcf7..0000000
--- a/examples/redis-unstable/tests/unit/cluster/misc.tcl
+++ /dev/null
@@ -1,36 +0,0 @@
1start_cluster 2 2 {tags {external:skip cluster}} {
2 test {Key lazy expires during key migration} {
3 R 0 DEBUG SET-ACTIVE-EXPIRE 0
4
5 set key_slot [R 0 CLUSTER KEYSLOT FOO]
6 R 0 set FOO BAR PX 10
7 set src_id [R 0 CLUSTER MYID]
8 set trg_id [R 1 CLUSTER MYID]
9 R 0 CLUSTER SETSLOT $key_slot MIGRATING $trg_id
10 R 1 CLUSTER SETSLOT $key_slot IMPORTING $src_id
11 after 11
12 assert_error {ASK*} {R 0 GET FOO}
13 R 0 ping
14 } {PONG}
15
16 test "Coverage: Basic cluster commands" {
17 assert_equal {OK} [R 0 CLUSTER saveconfig]
18
19 set id [R 0 CLUSTER MYID]
20 assert_equal {0} [R 0 CLUSTER count-failure-reports $id]
21
22 R 0 flushall
23 assert_equal {OK} [R 0 CLUSTER flushslots]
24 }
25
26 test "CROSSSLOT error for keys in different slots" {
27 # Test MSET with keys in different slots
28 assert_error {*CROSSSLOT Keys in request don't hash to the same slot*} {R 0 MSET foo bar baz qux}
29
30 # Test DEL with keys in different slots
31 assert_error {*CROSSSLOT Keys in request don't hash to the same slot*} {R 0 DEL foo bar}
32
33 # Test MGET with keys in different slots
34 assert_error {*CROSSSLOT Keys in request don't hash to the same slot*} {R 0 MGET foo bar}
35 }
36}
diff --git a/examples/redis-unstable/tests/unit/cluster/multi-slot-operations.tcl b/examples/redis-unstable/tests/unit/cluster/multi-slot-operations.tcl
deleted file mode 100644
index 5d2d03e..0000000
--- a/examples/redis-unstable/tests/unit/cluster/multi-slot-operations.tcl
+++ /dev/null
@@ -1,182 +0,0 @@
1# This test uses a custom slot allocation for testing
2proc cluster_allocate_with_continuous_slots_local {n} {
3 R 0 cluster ADDSLOTSRANGE 0 3276
4 R 1 cluster ADDSLOTSRANGE 3277 6552
5 R 2 cluster ADDSLOTSRANGE 6553 9828
6 R 3 cluster ADDSLOTSRANGE 9829 13104
7 R 4 cluster ADDSLOTSRANGE 13105 16383
8}
9
10start_cluster 5 0 {tags {external:skip cluster}} {
11
12set master1 [srv 0 "client"]
13set master2 [srv -1 "client"]
14set master3 [srv -2 "client"]
15set master4 [srv -3 "client"]
16set master5 [srv -4 "client"]
17
18test "Continuous slots distribution" {
19 assert_match "* 0-3276*" [$master1 CLUSTER NODES]
20 assert_match "* 3277-6552*" [$master2 CLUSTER NODES]
21 assert_match "* 6553-9828*" [$master3 CLUSTER NODES]
22 assert_match "* 9829-13104*" [$master4 CLUSTER NODES]
23 assert_match "* 13105-16383*" [$master5 CLUSTER NODES]
24 assert_match "*0 3276*" [$master1 CLUSTER SLOTS]
25 assert_match "*3277 6552*" [$master2 CLUSTER SLOTS]
26 assert_match "*6553 9828*" [$master3 CLUSTER SLOTS]
27 assert_match "*9829 13104*" [$master4 CLUSTER SLOTS]
28 assert_match "*13105 16383*" [$master5 CLUSTER SLOTS]
29
30 $master1 CLUSTER DELSLOTSRANGE 3001 3050
31 assert_match "* 0-3000 3051-3276*" [$master1 CLUSTER NODES]
32 assert_match "*0 3000*3051 3276*" [$master1 CLUSTER SLOTS]
33
34 $master2 CLUSTER DELSLOTSRANGE 5001 5500
35 assert_match "* 3277-5000 5501-6552*" [$master2 CLUSTER NODES]
36 assert_match "*3277 5000*5501 6552*" [$master2 CLUSTER SLOTS]
37
38 $master3 CLUSTER DELSLOTSRANGE 7001 7100 8001 8500
39 assert_match "* 6553-7000 7101-8000 8501-9828*" [$master3 CLUSTER NODES]
40 assert_match "*6553 7000*7101 8000*8501 9828*" [$master3 CLUSTER SLOTS]
41
42 $master4 CLUSTER DELSLOTSRANGE 11001 12000 12101 12200
43 assert_match "* 9829-11000 12001-12100 12201-13104*" [$master4 CLUSTER NODES]
44 assert_match "*9829 11000*12001 12100*12201 13104*" [$master4 CLUSTER SLOTS]
45
46 $master5 CLUSTER DELSLOTSRANGE 13501 14000 15001 16000
47 assert_match "* 13105-13500 14001-15000 16001-16383*" [$master5 CLUSTER NODES]
48 assert_match "*13105 13500*14001 15000*16001 16383*" [$master5 CLUSTER SLOTS]
49}
50
51test "ADDSLOTS command with several boundary conditions test suite" {
52 assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTS 3001 aaa}
53 assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTS 3001 -1000}
54 assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTS 3001 30003}
55
56 assert_error "ERR Slot 3200 is already busy" {R 0 cluster ADDSLOTS 3200}
57 assert_error "ERR Slot 8501 is already busy" {R 0 cluster ADDSLOTS 8501}
58
59 assert_error "ERR Slot 3001 specified multiple times" {R 0 cluster ADDSLOTS 3001 3002 3001}
60}
61
62test "ADDSLOTSRANGE command with several boundary conditions test suite" {
63 # Add multiple slots with incorrect argument number
64 assert_error "ERR wrong number of arguments for 'cluster|addslotsrange' command" {R 0 cluster ADDSLOTSRANGE 3001 3020 3030}
65
66 # Add multiple slots with invalid input slot
67 assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTSRANGE 3001 3020 3030 aaa}
68 assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTSRANGE 3001 3020 3030 70000}
69 assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTSRANGE 3001 3020 -1000 3030}
70
71 # Add multiple slots when start slot number is greater than the end slot
72 assert_error "ERR start slot number 3030 is greater than end slot number 3025" {R 0 cluster ADDSLOTSRANGE 3001 3020 3030 3025}
73
74 # Add multiple slots with busy slot
75 assert_error "ERR Slot 3200 is already busy" {R 0 cluster ADDSLOTSRANGE 3001 3020 3200 3250}
76
77 # Add multiple slots with assigned multiple times
78 assert_error "ERR Slot 3001 specified multiple times" {R 0 cluster ADDSLOTSRANGE 3001 3020 3001 3020}
79}
80
81test "DELSLOTSRANGE command with several boundary conditions test suite" {
82 # Delete multiple slots with incorrect argument number
83 assert_error "ERR wrong number of arguments for 'cluster|delslotsrange' command" {R 0 cluster DELSLOTSRANGE 1000 2000 2100}
84 assert_match "* 0-3000 3051-3276*" [$master1 CLUSTER NODES]
85 assert_match "*0 3000*3051 3276*" [$master1 CLUSTER SLOTS]
86
87 # Delete multiple slots with invalid input slot
88 assert_error "ERR Invalid or out of range slot" {R 0 cluster DELSLOTSRANGE 1000 2000 2100 aaa}
89 assert_error "ERR Invalid or out of range slot" {R 0 cluster DELSLOTSRANGE 1000 2000 2100 70000}
90 assert_error "ERR Invalid or out of range slot" {R 0 cluster DELSLOTSRANGE 1000 2000 -2100 2200}
91 assert_match "* 0-3000 3051-3276*" [$master1 CLUSTER NODES]
92 assert_match "*0 3000*3051 3276*" [$master1 CLUSTER SLOTS]
93
94 # Delete multiple slots when start slot number is greater than the end slot
95 assert_error "ERR start slot number 5800 is greater than end slot number 5750" {R 1 cluster DELSLOTSRANGE 5600 5700 5800 5750}
96 assert_match "* 3277-5000 5501-6552*" [$master2 CLUSTER NODES]
97 assert_match "*3277 5000*5501 6552*" [$master2 CLUSTER SLOTS]
98
99 # Delete multiple slots with already unassigned
100 assert_error "ERR Slot 7001 is already unassigned" {R 2 cluster DELSLOTSRANGE 7001 7100 9000 9200}
101 assert_match "* 6553-7000 7101-8000 8501-9828*" [$master3 CLUSTER NODES]
102 assert_match "*6553 7000*7101 8000*8501 9828*" [$master3 CLUSTER SLOTS]
103
104 # Delete multiple slots with assigned multiple times
105 assert_error "ERR Slot 12500 specified multiple times" {R 3 cluster DELSLOTSRANGE 12500 12600 12500 12600}
106 assert_match "* 9829-11000 12001-12100 12201-13104*" [$master4 CLUSTER NODES]
107 assert_match "*9829 11000*12001 12100*12201 13104*" [$master4 CLUSTER SLOTS]
108}
109} cluster_allocate_with_continuous_slots_local
110
111start_cluster 2 0 {tags {external:skip cluster experimental}} {
112
113set master1 [srv 0 "client"]
114set master2 [srv -1 "client"]
115
116test "SFLUSH - Errors and output validation" {
117 assert_match "* 0-8191*" [$master1 CLUSTER NODES]
118 assert_match "* 8192-16383*" [$master2 CLUSTER NODES]
119 assert_match "*0 8191*" [$master1 CLUSTER SLOTS]
120 assert_match "*8192 16383*" [$master2 CLUSTER SLOTS]
121
122 # make master1 non-continuous slots
123 $master1 cluster DELSLOTSRANGE 1000 2000
124
125 # Test SFLUSH errors validation
126 assert_error {ERR wrong number of arguments*} {$master1 SFLUSH 4}
127 assert_error {ERR wrong number of arguments*} {$master1 SFLUSH 4 SYNC}
128 assert_error {ERR Invalid or out of range slot} {$master1 SFLUSH x 4}
129 assert_error {ERR Invalid or out of range slot} {$master1 SFLUSH 0 12x}
130 assert_error {ERR Slot 3 specified multiple times} {$master1 SFLUSH 2 4 3 5}
131 assert_error {ERR start slot number 8 is greater than*} {$master1 SFLUSH 8 4}
132 assert_error {ERR wrong number of arguments*} {$master1 SFLUSH 4 8 10}
133 assert_error {ERR wrong number of arguments*} {$master1 SFLUSH 0 999 2001 8191 ASYNCX}
134
135 # Test SFLUSH output validation
136 assert_match "" [$master1 SFLUSH 2 4]
137 assert_match "" [$master1 SFLUSH 0 4]
138 assert_match "" [$master2 SFLUSH 0 4]
139 assert_match "" [$master1 SFLUSH 1 8191]
140 assert_match "" [$master1 SFLUSH 0 8190]
141 assert_match "" [$master1 SFLUSH 0 998 2001 8191]
142 assert_match "" [$master1 SFLUSH 1 999 2001 8191]
143 assert_match "" [$master1 SFLUSH 0 999 2001 8190]
144 assert_match "" [$master1 SFLUSH 0 999 2002 8191]
145 assert_match "{0 999} {2001 8191}" [$master1 SFLUSH 0 999 2001 8191]
146 assert_match "{0 999} {2001 8191}" [$master1 SFLUSH 0 8191]
147 assert_match "{0 999} {2001 8191}" [$master1 SFLUSH 0 4000 4001 8191]
148 assert_match "" [$master2 SFLUSH 8193 16383]
149 assert_match "" [$master2 SFLUSH 8192 16382]
150 assert_match "{8192 16383}" [$master2 SFLUSH 8192 16383]
151 assert_match "{8192 16383}" [$master2 SFLUSH 8192 16383 SYNC]
152 assert_match "{8192 16383}" [$master2 SFLUSH 8192 16383 ASYNC]
153 assert_match "{8192 16383}" [$master2 SFLUSH 8192 9000 9001 16383]
154 assert_match "{8192 16383}" [$master2 SFLUSH 8192 9000 9001 16383 SYNC]
155 assert_match "{8192 16383}" [$master2 SFLUSH 8192 9000 9001 16383 ASYNC]
156
157 # restore master1 continuous slots
158 $master1 cluster ADDSLOTSRANGE 1000 2000
159}
160
161test "SFLUSH - Deletes the keys with argument <NONE>/SYNC/ASYNC" {
162 foreach op {"" "SYNC" "ASYNC"} {
163 for {set i 0} {$i < 100} {incr i} {
164 catch {$master1 SET key$i val$i}
165 catch {$master2 SET key$i val$i}
166 }
167
168 assert {[$master1 DBSIZE] > 0}
169 assert {[$master2 DBSIZE] > 0}
170 if {$op eq ""} {
171 assert_match "{0 8191}" [ $master1 SFLUSH 0 8191]
172 } else {
173 assert_match "{0 8191}" [ $master1 SFLUSH 0 8191 $op]
174 }
175 assert {[$master1 DBSIZE] == 0}
176 assert {[$master2 DBSIZE] > 0}
177 assert_match "{8192 16383}" [ $master2 SFLUSH 8192 16383]
178 assert {[$master2 DBSIZE] == 0}
179 }
180}
181
182}
diff --git a/examples/redis-unstable/tests/unit/cluster/scripting.tcl b/examples/redis-unstable/tests/unit/cluster/scripting.tcl
deleted file mode 100644
index 76aa882..0000000
--- a/examples/redis-unstable/tests/unit/cluster/scripting.tcl
+++ /dev/null
@@ -1,91 +0,0 @@
1start_cluster 1 0 {tags {external:skip cluster}} {
2
3 test {Eval scripts with shebangs and functions default to no cross slots} {
4 # Test that scripts with shebang block cross slot operations
5 assert_error "ERR Script attempted to access keys that do not hash to the same slot*" {
6 r 0 eval {#!lua
7 redis.call('set', 'foo', 'bar')
8 redis.call('set', 'bar', 'foo')
9 return 'OK'
10 } 0}
11
12 # Test the functions by default block cross slot operations
13 r 0 function load REPLACE {#!lua name=crossslot
14 local function test_cross_slot(keys, args)
15 redis.call('set', 'foo', 'bar')
16 redis.call('set', 'bar', 'foo')
17 return 'OK'
18 end
19
20 redis.register_function('test_cross_slot', test_cross_slot)}
21 assert_error "ERR Script attempted to access keys that do not hash to the same slot*" {r FCALL test_cross_slot 0}
22 }
23
24 test {Cross slot commands are allowed by default for eval scripts and with allow-cross-slot-keys flag} {
25 # Old style lua scripts are allowed to access cross slot operations
26 r 0 eval "redis.call('set', 'foo', 'bar'); redis.call('set', 'bar', 'foo')" 0
27
28 # scripts with allow-cross-slot-keys flag are allowed
29 r 0 eval {#!lua flags=allow-cross-slot-keys
30 redis.call('set', 'foo', 'bar'); redis.call('set', 'bar', 'foo')
31 } 0
32
33 # Retrieve data from different slot to verify data has been stored in the correct dictionary in cluster-enabled setup
34 # during cross-slot operation from the above lua script.
35 assert_equal "bar" [r 0 get foo]
36 assert_equal "foo" [r 0 get bar]
37 r 0 del foo
38 r 0 del bar
39
40 # Functions with allow-cross-slot-keys flag are allowed
41 r 0 function load REPLACE {#!lua name=crossslot
42 local function test_cross_slot(keys, args)
43 redis.call('set', 'foo', 'bar')
44 redis.call('set', 'bar', 'foo')
45 return 'OK'
46 end
47
48 redis.register_function{function_name='test_cross_slot', callback=test_cross_slot, flags={ 'allow-cross-slot-keys' }}}
49 r FCALL test_cross_slot 0
50
51 # Retrieve data from different slot to verify data has been stored in the correct dictionary in cluster-enabled setup
52 # during cross-slot operation from the above lua function.
53 assert_equal "bar" [r 0 get foo]
54 assert_equal "foo" [r 0 get bar]
55 }
56
57 test {Cross slot commands are also blocked if they disagree with pre-declared keys} {
58 assert_error "ERR Script attempted to access keys that do not hash to the same slot*" {
59 r 0 eval {#!lua
60 redis.call('set', 'foo', 'bar')
61 return 'OK'
62 } 1 bar}
63 }
64
65 test {Cross slot commands are allowed by default if they disagree with pre-declared keys} {
66 r 0 flushall
67 r 0 eval "redis.call('set', 'foo', 'bar')" 1 bar
68
69 # Make sure the script writes to the right slot
70 assert_equal 1 [r 0 cluster COUNTKEYSINSLOT 12182] ;# foo slot
71 assert_equal 0 [r 0 cluster COUNTKEYSINSLOT 5061] ;# bar slot
72 }
73
74 test "Function no-cluster flag" {
75 R 0 function load {#!lua name=test
76 redis.register_function{function_name='f1', callback=function() return 'hello' end, flags={'no-cluster'}}
77 }
78 catch {R 0 fcall f1 0} e
79 assert_match {*Can not run script on cluster, 'no-cluster' flag is set*} $e
80 }
81
82 test "Script no-cluster flag" {
83 catch {
84 R 0 eval {#!lua flags=no-cluster
85 return 1
86 } 0
87 } e
88
89 assert_match {*Can not run script on cluster, 'no-cluster' flag is set*} $e
90 }
91}
diff --git a/examples/redis-unstable/tests/unit/cluster/sharded-pubsub.tcl b/examples/redis-unstable/tests/unit/cluster/sharded-pubsub.tcl
deleted file mode 100644
index 57b550a..0000000
--- a/examples/redis-unstable/tests/unit/cluster/sharded-pubsub.tcl
+++ /dev/null
@@ -1,67 +0,0 @@
1#
2# Copyright (c) 2009-Present, Redis Ltd.
3# All rights reserved.
4#
5# Licensed under your choice of (a) the Redis Source Available License 2.0
6# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
7# GNU Affero General Public License v3 (AGPLv3).
8#
9# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
10#
11
12start_cluster 1 1 {tags {external:skip cluster}} {
13 set primary_id 0
14 set replica1_id 1
15
16 set primary [Rn $primary_id]
17 set replica [Rn $replica1_id]
18
19 test "Sharded pubsub publish behavior within multi/exec" {
20 foreach {node} {primary replica} {
21 set node [set $node]
22 $node MULTI
23 $node SPUBLISH ch1 "hello"
24 $node EXEC
25 }
26 }
27
28 test "Sharded pubsub within multi/exec with cross slot operation" {
29 $primary MULTI
30 $primary SPUBLISH ch1 "hello"
31 $primary GET foo
32 catch {$primary EXEC} err
33 assert_match {CROSSSLOT*} $err
34 }
35
36 test "Sharded pubsub publish behavior within multi/exec with read operation on primary" {
37 $primary MULTI
38 $primary SPUBLISH foo "hello"
39 $primary GET foo
40 $primary EXEC
41 } {0 {}}
42
43 test "Sharded pubsub publish behavior within multi/exec with read operation on replica" {
44 $replica MULTI
45 $replica SPUBLISH foo "hello"
46 catch {[$replica GET foo]} err
47 assert_match {MOVED*} $err
48 catch {[$replica EXEC]} err
49 assert_match {EXECABORT*} $err
50 }
51
52 test "Sharded pubsub publish behavior within multi/exec with write operation on primary" {
53 $primary MULTI
54 $primary SPUBLISH foo "hello"
55 $primary SET foo bar
56 $primary EXEC
57 } {0 OK}
58
59 test "Sharded pubsub publish behavior within multi/exec with write operation on replica" {
60 $replica MULTI
61 $replica SPUBLISH foo "hello"
62 catch {[$replica SET foo bar]} err
63 assert_match {MOVED*} $err
64 catch {[$replica EXEC]} err
65 assert_match {EXECABORT*} $err
66 }
67}
diff --git a/examples/redis-unstable/tests/unit/cluster/slot-ownership.tcl b/examples/redis-unstable/tests/unit/cluster/slot-ownership.tcl
deleted file mode 100644
index 0f3e3cc..0000000
--- a/examples/redis-unstable/tests/unit/cluster/slot-ownership.tcl
+++ /dev/null
@@ -1,61 +0,0 @@
1start_cluster 2 2 {tags {external:skip cluster}} {
2
3 test "Verify that slot ownership transfer through gossip propagates deletes to replicas" {
4 assert {[s -2 role] eq {slave}}
5 wait_for_condition 1000 50 {
6 [s -2 master_link_status] eq {up}
7 } else {
8 fail "Instance #2 master link status is not up"
9 }
10
11 assert {[s -3 role] eq {slave}}
12 wait_for_condition 1000 50 {
13 [s -3 master_link_status] eq {up}
14 } else {
15 fail "Instance #3 master link status is not up"
16 }
17
18 # Set a single key that will be used to test deletion
19 set key "FOO"
20 R 0 SET $key TEST
21 set key_slot [R 0 cluster keyslot $key]
22 set slot_keys_num [R 0 cluster countkeysinslot $key_slot]
23 assert {$slot_keys_num > 0}
24
25 # Wait for replica to have the key
26 R 2 readonly
27 wait_for_condition 1000 50 {
28 [R 2 exists $key] eq "1"
29 } else {
30 fail "Test key was not replicated"
31 }
32
33 assert_equal [R 2 cluster countkeysinslot $key_slot] $slot_keys_num
34
35 # Assert other shards in cluster doesn't have the key
36 assert_equal [R 1 cluster countkeysinslot $key_slot] "0"
37 assert_equal [R 3 cluster countkeysinslot $key_slot] "0"
38
39 set nodeid [R 1 cluster myid]
40
41 R 1 cluster bumpepoch
42 # Move $key_slot to node 1
43 assert_equal [R 1 cluster setslot $key_slot node $nodeid] "OK"
44
45 wait_for_cluster_propagation
46
47 # src master will delete keys in the slot
48 wait_for_condition 50 100 {
49 [R 0 cluster countkeysinslot $key_slot] eq 0
50 } else {
51 fail "master 'countkeysinslot $key_slot' did not eq 0"
52 }
53
54 # src replica will delete keys in the slot
55 wait_for_condition 50 100 {
56 [R 2 cluster countkeysinslot $key_slot] eq 0
57 } else {
58 fail "replica 'countkeysinslot $key_slot' did not eq 0"
59 }
60 }
61}
diff --git a/examples/redis-unstable/tests/unit/cluster/slot-stats.tcl b/examples/redis-unstable/tests/unit/cluster/slot-stats.tcl
deleted file mode 100644
index 1123731..0000000
--- a/examples/redis-unstable/tests/unit/cluster/slot-stats.tcl
+++ /dev/null
@@ -1,1169 +0,0 @@
1#
2# Copyright (c) 2009-Present, Redis Ltd.
3# All rights reserved.
4#
5# Copyright (c) 2024-present, Valkey contributors.
6# All rights reserved.
7#
8# Licensed under your choice of (a) the Redis Source Available License 2.0
9# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
10# GNU Affero General Public License v3 (AGPLv3).
11#
12# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
13#
14
15# Integration tests for CLUSTER SLOT-STATS command.
16
17# -----------------------------------------------------------------------------
18# Helper functions for CLUSTER SLOT-STATS test cases.
19# -----------------------------------------------------------------------------
20
21# Converts array RESP response into a dict.
22# This is useful for many test cases, where unnecessary nesting is removed.
23proc convert_array_into_dict {slot_stats} {
24 set res [dict create]
25 foreach slot_stat $slot_stats {
26 # slot_stat is an array of size 2, where 0th index represents (int) slot,
27 # and 1st index represents (map) usage statistics.
28 dict set res [lindex $slot_stat 0] [lindex $slot_stat 1]
29 }
30 return $res
31}
32
33proc get_cmdstat_usec {cmd r} {
34 set cmdstatline [cmdrstat $cmd r]
35 regexp "usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline -> usec _
36 return $usec
37}
38
39proc initialize_expected_slots_dict {} {
40 set expected_slots [dict create]
41 for {set i 0} {$i < 16384} {incr i 1} {
42 dict set expected_slots $i 0
43 }
44 return $expected_slots
45}
46
47proc initialize_expected_slots_dict_with_range {start_slot end_slot} {
48 assert {$start_slot <= $end_slot}
49 set expected_slots [dict create]
50 for {set i $start_slot} {$i <= $end_slot} {incr i 1} {
51 dict set expected_slots $i 0
52 }
53 return $expected_slots
54}
55
56proc assert_empty_slot_stats {slot_stats metrics_to_assert} {
57 set slot_stats [convert_array_into_dict $slot_stats]
58 dict for {slot stats} $slot_stats {
59 foreach metric_name $metrics_to_assert {
60 set metric_value [dict get $stats $metric_name]
61 assert {$metric_value == 0}
62 }
63 }
64}
65
66proc assert_empty_slot_stats_with_exception {slot_stats exception_slots metrics_to_assert} {
67 set slot_stats [convert_array_into_dict $slot_stats]
68 dict for {slot stats} $exception_slots {
69 assert {[dict exists $slot_stats $slot]} ;# slot_stats must contain the expected slots.
70 }
71 dict for {slot stats} $slot_stats {
72 if {[dict exists $exception_slots $slot]} {
73 foreach metric_name $metrics_to_assert {
74 set metric_value [dict get $exception_slots $slot $metric_name]
75 assert {[dict get $stats $metric_name] == $metric_value}
76 }
77 } else {
78 dict for {metric value} $stats {
79 assert {$value == 0}
80 }
81 }
82 }
83}
84
85proc assert_equal_slot_stats {slot_stats_1 slot_stats_2 deterministic_metrics non_deterministic_metrics} {
86 set slot_stats_1 [convert_array_into_dict $slot_stats_1]
87 set slot_stats_2 [convert_array_into_dict $slot_stats_2]
88 assert {[dict size $slot_stats_1] == [dict size $slot_stats_2]}
89
90 dict for {slot stats_1} $slot_stats_1 {
91 assert {[dict exists $slot_stats_2 $slot]}
92 set stats_2 [dict get $slot_stats_2 $slot]
93
94 # For deterministic metrics, we assert their equality.
95 foreach metric $deterministic_metrics {
96 assert {[dict get $stats_1 $metric] == [dict get $stats_2 $metric]}
97 }
98 # For non-deterministic metrics, we assert their non-zeroness as a best-effort.
99 foreach metric $non_deterministic_metrics {
100 assert {([dict get $stats_1 $metric] == 0 && [dict get $stats_2 $metric] == 0) || \
101 ([dict get $stats_1 $metric] != 0 && [dict get $stats_2 $metric] != 0)}
102 }
103 }
104}
105
106proc assert_all_slots_have_been_seen {expected_slots} {
107 dict for {k v} $expected_slots {
108 assert {$v == 1}
109 }
110}
111
112proc assert_slot_visibility {slot_stats expected_slots} {
113 set slot_stats [convert_array_into_dict $slot_stats]
114 dict for {slot _} $slot_stats {
115 assert {[dict exists $expected_slots $slot]}
116 dict set expected_slots $slot 1
117 }
118
119 assert_all_slots_have_been_seen $expected_slots
120}
121
122proc assert_slot_stats_monotonic_order {slot_stats orderby is_desc} {
123 # For Tcl dict, the order of iteration is the order in which the keys were inserted into the dictionary
124 # Thus, the response ordering is preserved upon calling 'convert_array_into_dict()'.
125 # Source: https://www.tcl.tk/man/tcl8.6.11/TclCmd/dict.htm
126 set slot_stats [convert_array_into_dict $slot_stats]
127 set prev_metric -1
128 dict for {_ stats} $slot_stats {
129 set curr_metric [dict get $stats $orderby]
130 if {$prev_metric != -1} {
131 if {$is_desc == 1} {
132 assert {$prev_metric >= $curr_metric}
133 } else {
134 assert {$prev_metric <= $curr_metric}
135 }
136 }
137 set prev_metric $curr_metric
138 }
139}
140
141proc assert_slot_stats_monotonic_descent {slot_stats orderby} {
142 assert_slot_stats_monotonic_order $slot_stats $orderby 1
143}
144
145proc assert_slot_stats_monotonic_ascent {slot_stats orderby} {
146 assert_slot_stats_monotonic_order $slot_stats $orderby 0
147}
148
149proc wait_for_replica_key_exists {key key_count} {
150 wait_for_condition 1000 50 {
151 [R 1 exists $key] eq "$key_count"
152 } else {
153 fail "Test key was not replicated"
154 }
155}
156
157# -----------------------------------------------------------------------------
158# Test cases for CLUSTER SLOT-STATS cpu-usec metric correctness.
159# -----------------------------------------------------------------------------
160
161start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {
162
163 # Define shared variables.
164 set key "FOO"
165 set key_slot [R 0 cluster keyslot $key]
166 set key_secondary "FOO2"
167 set key_secondary_slot [R 0 cluster keyslot $key_secondary]
168 set metrics_to_assert [list cpu-usec]
169
170 test "CLUSTER SLOT-STATS cpu-usec reset upon CONFIG RESETSTAT." {
171 R 0 SET $key VALUE
172 R 0 DEL $key
173 R 0 CONFIG RESETSTAT
174 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
175 assert_empty_slot_stats $slot_stats $metrics_to_assert
176 }
177 R 0 CONFIG RESETSTAT
178 R 0 FLUSHALL
179
180 test "CLUSTER SLOT-STATS cpu-usec reset upon slot migration." {
181 R 0 SET $key VALUE
182
183 R 0 CLUSTER DELSLOTS $key_slot
184 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
185 assert_empty_slot_stats $slot_stats $metrics_to_assert
186
187 R 0 CLUSTER ADDSLOTS $key_slot
188 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
189 assert_empty_slot_stats $slot_stats $metrics_to_assert
190 }
191 R 0 CONFIG RESETSTAT
192 R 0 FLUSHALL
193
194 test "CLUSTER SLOT-STATS cpu-usec for non-slot specific commands." {
195 R 0 INFO
196 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
197 assert_empty_slot_stats $slot_stats $metrics_to_assert
198 }
199 R 0 CONFIG RESETSTAT
200 R 0 FLUSHALL
201
202 test "CLUSTER SLOT-STATS cpu-usec for slot specific commands." {
203 R 0 SET $key VALUE
204 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
205 set usec [get_cmdstat_usec set r]
206 set expected_slot_stats [
207 dict create $key_slot [
208 dict create cpu-usec $usec
209 ]
210 ]
211 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
212 }
213 R 0 CONFIG RESETSTAT
214 R 0 FLUSHALL
215
216 test "CLUSTER SLOT-STATS cpu-usec for blocking commands, unblocked on keyspace update." {
217 # Blocking command with no timeout. Only keyspace update can unblock this client.
218 set rd [redis_deferring_client]
219 $rd BLPOP $key 0
220 wait_for_blocked_clients_count 1
221 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
222 # When the client is blocked, no accumulation is made. This behaviour is identical to INFO COMMANDSTATS.
223 assert_empty_slot_stats $slot_stats $metrics_to_assert
224
225 # Unblocking command.
226 R 0 LPUSH $key value
227 wait_for_blocked_clients_count 0
228
229 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
230 set lpush_usec [get_cmdstat_usec lpush r]
231 set blpop_usec [get_cmdstat_usec blpop r]
232
233 # Assert that both blocking and non-blocking command times have been accumulated.
234 set expected_slot_stats [
235 dict create $key_slot [
236 dict create cpu-usec [expr $lpush_usec + $blpop_usec]
237 ]
238 ]
239 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
240 }
241 R 0 CONFIG RESETSTAT
242 R 0 FLUSHALL
243
244 test "CLUSTER SLOT-STATS cpu-usec for blocking commands, unblocked on timeout." {
245 # Blocking command with 0.5 seconds timeout.
246 set rd [redis_deferring_client]
247 $rd BLPOP $key 0.5
248
249 # Confirm that the client is blocked, then unblocked within 1 second.
250 wait_for_blocked_clients_count 1
251 wait_for_blocked_clients_count 0
252
253 # Assert that the blocking command time has been accumulated.
254 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
255 set blpop_usec [get_cmdstat_usec blpop r]
256 set expected_slot_stats [
257 dict create $key_slot [
258 dict create cpu-usec $blpop_usec
259 ]
260 ]
261 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
262 }
263 R 0 CONFIG RESETSTAT
264 R 0 FLUSHALL
265
266 test "CLUSTER SLOT-STATS cpu-usec for transactions." {
267 set r1 [redis_client]
268 $r1 MULTI
269 $r1 SET $key value
270 $r1 GET $key
271
272 # CPU metric is not accumulated until EXEC is reached. This behaviour is identical to INFO COMMANDSTATS.
273 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
274 assert_empty_slot_stats $slot_stats $metrics_to_assert
275
276 # Execute transaction, and assert that all nested command times have been accumulated.
277 $r1 EXEC
278 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
279 set exec_usec [get_cmdstat_usec exec r]
280 set expected_slot_stats [
281 dict create $key_slot [
282 dict create cpu-usec $exec_usec
283 ]
284 ]
285 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
286 }
287 R 0 CONFIG RESETSTAT
288 R 0 FLUSHALL
289
290 test "CLUSTER SLOT-STATS cpu-usec for lua-scripts, without cross-slot keys." {
291 R 0 eval {#!lua
292 redis.call('set', KEYS[1], 'bar') redis.call('get', KEYS[2])
293 } 2 $key $key
294
295 set eval_usec [get_cmdstat_usec eval r]
296 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
297
298 set expected_slot_stats [
299 dict create $key_slot [
300 dict create cpu-usec $eval_usec
301 ]
302 ]
303 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
304 }
305 R 0 CONFIG RESETSTAT
306 R 0 FLUSHALL
307
308 test "CLUSTER SLOT-STATS cpu-usec for lua-scripts, with cross-slot keys." {
309 R 0 eval {#!lua flags=allow-cross-slot-keys
310 redis.call('set', KEYS[1], 'bar') redis.call('get', ARGV[1])
311 } 1 $key $key_secondary
312
313 # For cross-slot, we do not accumulate at all.
314 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
315 assert_empty_slot_stats $slot_stats $metrics_to_assert
316 }
317 R 0 CONFIG RESETSTAT
318 R 0 FLUSHALL
319
320 test "CLUSTER SLOT-STATS cpu-usec for functions, without cross-slot keys." {
321 R 0 function load replace {#!lua name=f1
322 redis.register_function{
323 function_name='f1',
324 callback=function(keys, args) redis.call('set', keys[1], '1') redis.call('get', keys[2]) end
325 }
326 }
327 R 0 fcall f1 2 $key $key
328
329 set fcall_usec [get_cmdstat_usec fcall r]
330 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
331
332 set expected_slot_stats [
333 dict create $key_slot [
334 dict create cpu-usec $fcall_usec
335 ]
336 ]
337 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
338 }
339 R 0 CONFIG RESETSTAT
340 R 0 FLUSHALL
341
342 test "CLUSTER SLOT-STATS cpu-usec for functions, with cross-slot keys." {
343 R 0 function load replace {#!lua name=f1
344 redis.register_function{
345 function_name='f1',
346 callback=function(keys, args) redis.call('set', keys[1], '1') redis.call('get', args[1]) end,
347 flags={'allow-cross-slot-keys'}
348 }
349 }
350 R 0 fcall f1 1 $key $key_secondary
351
352 # For cross-slot, we do not accumulate at all.
353 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
354 assert_empty_slot_stats $slot_stats $metrics_to_assert
355 }
356 R 0 CONFIG RESETSTAT
357 R 0 FLUSHALL
358}
359
360# -----------------------------------------------------------------------------
361# Test cases for CLUSTER SLOT-STATS network-bytes-in.
362# -----------------------------------------------------------------------------
363
364start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {
365
366 # Define shared variables.
367 set key "key"
368 set key_slot [R 0 cluster keyslot $key]
369 set metrics_to_assert [list network-bytes-in]
370
371 test "CLUSTER SLOT-STATS network-bytes-in, multi bulk buffer processing." {
372 # *3\r\n$3\r\nSET\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 33 bytes.
373 R 0 SET $key value
374
375 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
376 set expected_slot_stats [
377 dict create $key_slot [
378 dict create network-bytes-in 33
379 ]
380 ]
381 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
382 }
383 R 0 CONFIG RESETSTAT
384 R 0 FLUSHALL
385
386 test "CLUSTER SLOT-STATS network-bytes-in, in-line buffer processing." {
387 set rd [redis_deferring_client]
388 # SET key value\r\n --> 15 bytes.
389 $rd write "SET $key value\r\n"
390 $rd flush
391
392 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
393 set expected_slot_stats [
394 dict create $key_slot [
395 dict create network-bytes-in 15
396 ]
397 ]
398
399 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
400 }
401 R 0 CONFIG RESETSTAT
402 R 0 FLUSHALL
403
404 test "CLUSTER SLOT-STATS network-bytes-in, blocking command." {
405 set rd [redis_deferring_client]
406 # *3\r\n$5\r\nblpop\r\n$3\r\nkey\r\n$1\r\n0\r\n --> 31 bytes.
407 $rd BLPOP $key 0
408 wait_for_blocked_clients_count 1
409
410 # Slot-stats must be empty here, as the client is yet to be unblocked.
411 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
412 assert_empty_slot_stats $slot_stats $metrics_to_assert
413
414 # *3\r\n$5\r\nlpush\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 35 bytes.
415 R 0 LPUSH $key value
416 wait_for_blocked_clients_count 0
417
418 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
419 set expected_slot_stats [
420 dict create $key_slot [
421 dict create network-bytes-in 66 ;# 31 + 35 bytes.
422 ]
423 ]
424
425 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
426 }
427 R 0 CONFIG RESETSTAT
428 R 0 FLUSHALL
429
430 test "CLUSTER SLOT-STATS network-bytes-in, multi-exec transaction." {
431 set r [redis_client]
432 # *1\r\n$5\r\nmulti\r\n --> 15 bytes.
433 $r MULTI
434 # *3\r\n$3\r\nSET\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 33 bytes.
435 assert {[$r SET $key value] eq {QUEUED}}
436 # *1\r\n$4\r\nexec\r\n --> 14 bytes.
437 assert {[$r EXEC] eq {OK}}
438
439 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
440 set expected_slot_stats [
441 dict create $key_slot [
442 dict create network-bytes-in 62 ;# 15 + 33 + 14 bytes.
443 ]
444 ]
445
446 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
447 }
448 R 0 CONFIG RESETSTAT
449 R 0 FLUSHALL
450
451 test "CLUSTER SLOT-STATS network-bytes-in, non slot specific command." {
452 R 0 INFO
453
454 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
455 assert_empty_slot_stats $slot_stats $metrics_to_assert
456 }
457 R 0 CONFIG RESETSTAT
458 R 0 FLUSHALL
459
460 test "CLUSTER SLOT-STATS network-bytes-in, pub/sub." {
461 # PUB/SUB does not get accumulated at per-slot basis,
462 # as it is cluster-wide and is not slot specific.
463 set rd [redis_deferring_client]
464 $rd subscribe channel
465 R 0 publish channel message
466
467 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
468 assert_empty_slot_stats $slot_stats $metrics_to_assert
469 }
470 R 0 CONFIG RESETSTAT
471 R 0 FLUSHALL
472}
473
474start_cluster 1 1 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {
475 set channel "channel"
476 set key_slot [R 0 cluster keyslot $channel]
477 set metrics_to_assert [list network-bytes-in]
478
479 # Setup replication.
480 assert {[s -1 role] eq {slave}}
481 wait_for_condition 1000 50 {
482 [s -1 master_link_status] eq {up}
483 } else {
484 fail "Instance #1 master link status is not up"
485 }
486 R 1 readonly
487
488 test "CLUSTER SLOT-STATS network-bytes-in, sharded pub/sub." {
489 set slot [R 0 cluster keyslot $channel]
490 set primary [Rn 0]
491 set replica [Rn 1]
492 set replica_subcriber [redis_deferring_client -1]
493 $replica_subcriber SSUBSCRIBE $channel
494 # *2\r\n$10\r\nssubscribe\r\n$7\r\nchannel\r\n --> 34 bytes.
495 $primary SPUBLISH $channel hello
496 # *3\r\n$8\r\nspublish\r\n$7\r\nchannel\r\n$5\r\nhello\r\n --> 42 bytes.
497
498 set slot_stats [$primary CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
499 set expected_slot_stats [
500 dict create $key_slot [
501 dict create network-bytes-in 42
502 ]
503 ]
504 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
505
506 set slot_stats [$replica CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
507 set expected_slot_stats [
508 dict create $key_slot [
509 dict create network-bytes-in 34
510 ]
511 ]
512 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
513 }
514 R 0 CONFIG RESETSTAT
515 R 0 FLUSHALL
516}
517
518# -----------------------------------------------------------------------------
519# Test cases for CLUSTER SLOT-STATS network-bytes-out correctness.
520# -----------------------------------------------------------------------------
521
522start_cluster 1 0 {tags {external:skip cluster}} {
523 # Define shared variables.
524 set key "FOO"
525 set key_slot [R 0 cluster keyslot $key]
526 set expected_slots_to_key_count [dict create $key_slot 1]
527 set metrics_to_assert [list network-bytes-out]
528 R 0 CONFIG SET cluster-slot-stats-enabled yes
529
530 test "CLUSTER SLOT-STATS network-bytes-out, for non-slot specific commands." {
531 R 0 INFO
532 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
533 assert_empty_slot_stats $slot_stats $metrics_to_assert
534 }
535 R 0 CONFIG RESETSTAT
536 R 0 FLUSHALL
537
538 test "CLUSTER SLOT-STATS network-bytes-out, for slot specific commands." {
539 R 0 SET $key value
540 # +OK\r\n --> 5 bytes
541
542 set expected_slot_stats [
543 dict create $key_slot [
544 dict create network-bytes-out 5
545 ]
546 ]
547 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
548 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
549 }
550 R 0 CONFIG RESETSTAT
551 R 0 FLUSHALL
552
553 test "CLUSTER SLOT-STATS network-bytes-out, blocking commands." {
554 set rd [redis_deferring_client]
555 $rd BLPOP $key 0
556 wait_for_blocked_clients_count 1
557
558 # Assert empty slot stats here, since COB is yet to be flushed due to the block.
559 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
560 assert_empty_slot_stats $slot_stats $metrics_to_assert
561
562 # Unblock the command.
563 # LPUSH client) :1\r\n --> 4 bytes.
564 # BLPOP client) *2\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 24 bytes, upon unblocking.
565 R 0 LPUSH $key value
566 wait_for_blocked_clients_count 0
567
568 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
569 set expected_slot_stats [
570 dict create $key_slot [
571 dict create network-bytes-out 28 ;# 4 + 24 bytes.
572 ]
573 ]
574 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
575 }
576 R 0 CONFIG RESETSTAT
577 R 0 FLUSHALL
578}
579
580start_cluster 1 1 {tags {external:skip cluster}} {
581
582 # Define shared variables.
583 set key "FOO"
584 set key_slot [R 0 CLUSTER KEYSLOT $key]
585 set metrics_to_assert [list network-bytes-out]
586 R 0 CONFIG SET cluster-slot-stats-enabled yes
587
588 # Setup replication.
589 assert {[s -1 role] eq {slave}}
590 wait_for_condition 1000 50 {
591 [s -1 master_link_status] eq {up}
592 } else {
593 fail "Instance #1 master link status is not up"
594 }
595 R 1 readonly
596
597 test "CLUSTER SLOT-STATS network-bytes-out, replication stream egress." {
598 assert_equal [R 0 SET $key VALUE] {OK}
599 # Local client) +OK\r\n --> 5 bytes.
600 # Replication stream) *3\r\n$3\r\nSET\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 33 bytes.
601 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
602 set expected_slot_stats [
603 dict create $key_slot [
604 dict create network-bytes-out 38 ;# 5 + 33 bytes.
605 ]
606 ]
607 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
608 }
609}
610
611start_cluster 1 1 {tags {external:skip cluster}} {
612
613 # Define shared variables.
614 set channel "channel"
615 set key_slot [R 0 cluster keyslot $channel]
616 set channel_secondary "channel2"
617 set key_slot_secondary [R 0 cluster keyslot $channel_secondary]
618 set metrics_to_assert [list network-bytes-out]
619 R 0 CONFIG SET cluster-slot-stats-enabled yes
620
621 test "CLUSTER SLOT-STATS network-bytes-out, sharded pub/sub, single channel." {
622 set slot [R 0 cluster keyslot $channel]
623 set publisher [Rn 0]
624 set subscriber [redis_client]
625 set replica [redis_deferring_client -1]
626
627 # Subscriber client) *3\r\n$10\r\nssubscribe\r\n$7\r\nchannel\r\n:1\r\n --> 38 bytes
628 $subscriber SSUBSCRIBE $channel
629 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
630 set expected_slot_stats [
631 dict create $key_slot [
632 dict create network-bytes-out 38
633 ]
634 ]
635 R 0 CONFIG RESETSTAT
636
637 # Publisher client) :1\r\n --> 4 bytes.
638 # Subscriber client) *3\r\n$8\r\nsmessage\r\n$7\r\nchannel\r\n$5\r\nhello\r\n --> 42 bytes.
639 assert_equal 1 [$publisher SPUBLISH $channel hello]
640 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
641 set expected_slot_stats [
642 dict create $key_slot [
643 dict create network-bytes-out 46 ;# 4 + 42 bytes.
644 ]
645 ]
646 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
647 }
648 $subscriber QUIT
649 R 0 FLUSHALL
650 R 0 CONFIG RESETSTAT
651
652 test "CLUSTER SLOT-STATS network-bytes-out, sharded pub/sub, cross-slot channels." {
653 set slot [R 0 cluster keyslot $channel]
654 set publisher [Rn 0]
655 set subscriber [redis_client]
656 set replica [redis_deferring_client -1]
657
658 # Stack multi-slot subscriptions against a single client.
659 # For primary channel;
660 # Subscriber client) *3\r\n$10\r\nssubscribe\r\n$7\r\nchannel\r\n:1\r\n --> 38 bytes
661 # For secondary channel;
662 # Subscriber client) *3\r\n$10\r\nssubscribe\r\n$8\r\nchannel2\r\n:1\r\n --> 39 bytes
663 $subscriber SSUBSCRIBE $channel
664 $subscriber SSUBSCRIBE $channel_secondary
665 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
666 set expected_slot_stats [
667 dict create \
668 $key_slot [ \
669 dict create network-bytes-out 38
670 ] \
671 $key_slot_secondary [ \
672 dict create network-bytes-out 39
673 ]
674 ]
675 R 0 CONFIG RESETSTAT
676
677 # For primary channel;
678 # Publisher client) :1\r\n --> 4 bytes.
679 # Subscriber client) *3\r\n$8\r\nsmessage\r\n$7\r\nchannel\r\n$5\r\nhello\r\n --> 42 bytes.
680 # For secondary channel;
681 # Publisher client) :1\r\n --> 4 bytes.
682 # Subscriber client) *3\r\n$8\r\nsmessage\r\n$8\r\nchannel2\r\n$5\r\nhello\r\n --> 43 bytes.
683 assert_equal 1 [$publisher SPUBLISH $channel hello]
684 assert_equal 1 [$publisher SPUBLISH $channel_secondary hello]
685 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
686 set expected_slot_stats [
687 dict create \
688 $key_slot [ \
689 dict create network-bytes-out 46 ;# 4 + 42 bytes.
690 ] \
691 $key_slot_secondary [ \
692 dict create network-bytes-out 47 ;# 4 + 43 bytes.
693 ]
694 ]
695 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
696 }
697}
698
699# -----------------------------------------------------------------------------
700# Test cases for CLUSTER SLOT-STATS key-count metric correctness.
701# -----------------------------------------------------------------------------
702
703start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {
704
705 # Define shared variables.
706 set key "FOO"
707 set key_slot [R 0 cluster keyslot $key]
708 set metrics_to_assert [list key-count]
709 set expected_slot_stats [
710 dict create $key_slot [
711 dict create key-count 1
712 ]
713 ]
714
715 test "CLUSTER SLOT-STATS contains default value upon redis-server startup" {
716 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
717 assert_empty_slot_stats $slot_stats $metrics_to_assert
718 }
719
720 test "CLUSTER SLOT-STATS contains correct metrics upon key introduction" {
721 R 0 SET $key TEST
722 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
723 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
724 }
725
726 test "CLUSTER SLOT-STATS contains correct metrics upon key mutation" {
727 R 0 SET $key NEW_VALUE
728 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
729 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
730 }
731
732 test "CLUSTER SLOT-STATS contains correct metrics upon key deletion" {
733 R 0 DEL $key
734 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
735 assert_empty_slot_stats $slot_stats $metrics_to_assert
736 }
737
738 test "CLUSTER SLOT-STATS slot visibility based on slot ownership changes" {
739 R 0 CONFIG SET cluster-require-full-coverage no
740
741 R 0 CLUSTER DELSLOTS $key_slot
742 set expected_slots [initialize_expected_slots_dict]
743 dict unset expected_slots $key_slot
744 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
745 assert {[dict size $expected_slots] == 16383}
746 assert_slot_visibility $slot_stats $expected_slots
747
748 R 0 CLUSTER ADDSLOTS $key_slot
749 set expected_slots [initialize_expected_slots_dict]
750 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
751 assert {[dict size $expected_slots] == 16384}
752 assert_slot_visibility $slot_stats $expected_slots
753 }
754}
755
756# -----------------------------------------------------------------------------
757# Test cases for CLUSTER SLOT-STATS SLOTSRANGE sub-argument.
758# -----------------------------------------------------------------------------
759
760start_cluster 1 0 {tags {external:skip cluster}} {
761
762 test "CLUSTER SLOT-STATS SLOTSRANGE all slots present" {
763 set start_slot 100
764 set end_slot 102
765 set expected_slots [initialize_expected_slots_dict_with_range $start_slot $end_slot]
766
767 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE $start_slot $end_slot]
768 assert_slot_visibility $slot_stats $expected_slots
769 }
770
771 test "CLUSTER SLOT-STATS SLOTSRANGE some slots missing" {
772 set start_slot 100
773 set end_slot 102
774 set expected_slots [initialize_expected_slots_dict_with_range $start_slot $end_slot]
775
776 R 0 CLUSTER DELSLOTS $start_slot
777 dict unset expected_slots $start_slot
778
779 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE $start_slot $end_slot]
780 assert_slot_visibility $slot_stats $expected_slots
781 }
782}
783
784# -----------------------------------------------------------------------------
785# Test cases for CLUSTER SLOT-STATS ORDERBY sub-argument.
786# -----------------------------------------------------------------------------
787
788start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {
789
790 set metrics [list "key-count" "memory-bytes" "cpu-usec" "network-bytes-in" "network-bytes-out"]
791
792 # SET keys for target hashslots, to encourage ordering.
793 set hash_tags [list 0 1 2 3 4]
794 set num_keys 1
795 foreach hash_tag $hash_tags {
796 for {set i 0} {$i < $num_keys} {incr i 1} {
797 R 0 SET "$i{$hash_tag}" VALUE
798 }
799 incr num_keys 1
800 }
801
802 # SET keys for random hashslots, for random noise.
803 set num_keys 0
804 while {$num_keys < 1000} {
805 set random_key [randomInt 16384]
806 R 0 SET $random_key VALUE
807 incr num_keys 1
808 }
809
810 test "CLUSTER SLOT-STATS ORDERBY DESC correct ordering" {
811 foreach orderby $metrics {
812 set slot_stats [R 0 CLUSTER SLOT-STATS ORDERBY $orderby DESC]
813 assert_slot_stats_monotonic_descent $slot_stats $orderby
814 }
815 }
816
817 test "CLUSTER SLOT-STATS ORDERBY ASC correct ordering" {
818 foreach orderby $metrics {
819 set slot_stats [R 0 CLUSTER SLOT-STATS ORDERBY $orderby ASC]
820 assert_slot_stats_monotonic_ascent $slot_stats $orderby
821 }
822 }
823
824 test "CLUSTER SLOT-STATS ORDERBY LIMIT correct response pagination, where limit is less than number of assigned slots" {
825 R 0 FLUSHALL SYNC
826 R 0 CONFIG RESETSTAT
827
828 foreach orderby $metrics {
829 set limit 5
830 set slot_stats_desc [R 0 CLUSTER SLOT-STATS ORDERBY $orderby LIMIT $limit DESC]
831 set slot_stats_asc [R 0 CLUSTER SLOT-STATS ORDERBY $orderby LIMIT $limit ASC]
832 set slot_stats_desc_length [llength $slot_stats_desc]
833 set slot_stats_asc_length [llength $slot_stats_asc]
834 assert {$limit == $slot_stats_desc_length && $limit == $slot_stats_asc_length}
835
836 # All slot statistics have been reset to 0, so we will order by slot in ascending order.
837 set expected_slots [dict create 0 0 1 0 2 0 3 0 4 0]
838 assert_slot_visibility $slot_stats_desc $expected_slots
839 assert_slot_visibility $slot_stats_asc $expected_slots
840 }
841 }
842
843 test "CLUSTER SLOT-STATS ORDERBY LIMIT correct response pagination, where limit is greater than number of assigned slots" {
844 R 0 CONFIG SET cluster-require-full-coverage no
845 R 0 FLUSHALL SYNC
846 R 0 CLUSTER FLUSHSLOTS
847 R 0 CLUSTER ADDSLOTS 100 101
848
849 foreach orderby $metrics {
850 set num_assigned_slots 2
851 set limit 5
852 set slot_stats_desc [R 0 CLUSTER SLOT-STATS ORDERBY $orderby LIMIT $limit DESC]
853 set slot_stats_asc [R 0 CLUSTER SLOT-STATS ORDERBY $orderby LIMIT $limit ASC]
854 set slot_stats_desc_length [llength $slot_stats_desc]
855 set slot_stats_asc_length [llength $slot_stats_asc]
856 set expected_response_length [expr min($num_assigned_slots, $limit)]
857 assert {$expected_response_length == $slot_stats_desc_length && $expected_response_length == $slot_stats_asc_length}
858
859 set expected_slots [dict create 100 0 101 0]
860 assert_slot_visibility $slot_stats_desc $expected_slots
861 assert_slot_visibility $slot_stats_asc $expected_slots
862 }
863 }
864
865 test "CLUSTER SLOT-STATS ORDERBY arg sanity check." {
866 # Non-existent argument.
867 assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY key-count non-existent-arg}
868 # Negative LIMIT.
869 assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY key-count DESC LIMIT -1}
870 # Non-existent ORDERBY metric.
871 assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY non-existent-metric}
872 # When cluster-slot-stats-enabled config is disabled, you cannot sort using advanced metrics.
873 R 0 CONFIG SET cluster-slot-stats-enabled no
874 set orderby "cpu-usec"
875 assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY $orderby}
876 set orderby "network-bytes-in"
877 assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY $orderby}
878 set orderby "network-bytes-out"
879 assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY $orderby}
880 set orderby "memory-bytes"
881 assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY $orderby}
882
883 # When only cpu net is enabled, memory-bytes ORDERBY should fail
884 R 0 CONFIG SET cluster-slot-stats-enabled "cpu net"
885 assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY memory-bytes}
886 }
887
888}
889
890# -----------------------------------------------------------------------------
891# Test cases for CLUSTER SLOT-STATS replication.
892# -----------------------------------------------------------------------------
893
894start_cluster 1 1 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {
895
896 # Define shared variables.
897 set key "key"
898 set key_slot [R 0 CLUSTER KEYSLOT $key]
899 set primary [Rn 0]
900 set replica [Rn 1]
901
902 # For replication, assertions are split between deterministic and non-deterministic metrics.
903 # * For deterministic metrics, strict equality assertions are made.
904 # * For non-deterministic metrics, non-zeroness assertions are made.
905 # Non-zeroness as in, both primary and replica should either have some value, or no value at all.
906 #
907 # * key-count is deterministic between primary and its replica.
908 # * cpu-usec is non-deterministic between primary and its replica.
909 # * network-bytes-in is deterministic between primary and its replica.
910 # * network-bytes-out will remain empty in the replica, since primary client do not receive replies, unless for replicationSendAck().
911 set deterministic_metrics [list key-count network-bytes-in]
912 set non_deterministic_metrics [list cpu-usec]
913 set empty_metrics [list network-bytes-out]
914
915 # Setup replication.
916 assert {[s -1 role] eq {slave}}
917 wait_for_condition 1000 50 {
918 [s -1 master_link_status] eq {up}
919 } else {
920 fail "Instance #1 master link status is not up"
921 }
922 R 1 readonly
923
924 test "CLUSTER SLOT-STATS metrics replication for new keys" {
925 # *3\r\n$3\r\nset\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 33 bytes.
926 R 0 SET $key VALUE
927
928 set expected_slot_stats [
929 dict create $key_slot [
930 dict create key-count 1 network-bytes-in 33
931 ]
932 ]
933 set slot_stats_master [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
934 assert_empty_slot_stats_with_exception $slot_stats_master $expected_slot_stats $deterministic_metrics
935
936 wait_for_condition 500 10 {
937 [string match {*calls=1,*} [cmdrstat set $replica]]
938 } else {
939 fail "Replica did not receive the command."
940 }
941 set slot_stats_replica [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
942 assert_equal_slot_stats $slot_stats_master $slot_stats_replica $deterministic_metrics $non_deterministic_metrics
943 assert_empty_slot_stats $slot_stats_replica $empty_metrics
944 }
945 R 0 CONFIG RESETSTAT
946 R 1 CONFIG RESETSTAT
947
948 test "CLUSTER SLOT-STATS metrics replication for existing keys" {
949 # *3\r\n$3\r\nset\r\n$3\r\nkey\r\n$13\r\nvalue_updated\r\n --> 42 bytes.
950 R 0 SET $key VALUE_UPDATED
951
952 set expected_slot_stats [
953 dict create $key_slot [
954 dict create key-count 1 network-bytes-in 42
955 ]
956 ]
957 set slot_stats_master [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
958 assert_empty_slot_stats_with_exception $slot_stats_master $expected_slot_stats $deterministic_metrics
959
960 wait_for_condition 500 10 {
961 [string match {*calls=1,*} [cmdrstat set $replica]]
962 } else {
963 fail "Replica did not receive the command."
964 }
965 set slot_stats_replica [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
966 assert_equal_slot_stats $slot_stats_master $slot_stats_replica $deterministic_metrics $non_deterministic_metrics
967 assert_empty_slot_stats $slot_stats_replica $empty_metrics
968 }
969 R 0 CONFIG RESETSTAT
970 R 1 CONFIG RESETSTAT
971
972 test "CLUSTER SLOT-STATS metrics replication for deleting keys" {
973 # *2\r\n$3\r\ndel\r\n$3\r\nkey\r\n --> 22 bytes.
974 R 0 DEL $key
975
976 set expected_slot_stats [
977 dict create $key_slot [
978 dict create key-count 0 network-bytes-in 22
979 ]
980 ]
981 set slot_stats_master [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
982 assert_empty_slot_stats_with_exception $slot_stats_master $expected_slot_stats $deterministic_metrics
983
984 wait_for_condition 500 10 {
985 [string match {*calls=1,*} [cmdrstat del $replica]]
986 } else {
987 fail "Replica did not receive the command."
988 }
989 set slot_stats_replica [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
990 assert_equal_slot_stats $slot_stats_master $slot_stats_replica $deterministic_metrics $non_deterministic_metrics
991 assert_empty_slot_stats $slot_stats_replica $empty_metrics
992 }
993 R 0 CONFIG RESETSTAT
994 R 1 CONFIG RESETSTAT
995}
996
997start_cluster 2 2 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {
998 test "CLUSTER SLOT-STATS reset upon atomic slot migration" {
999 # key on slot-0
1000 set key0 "{06S}mykey0"
1001 set key0_slot [R 0 CLUSTER KEYSLOT $key0]
1002 R 0 SET $key0 VALUE
1003
1004 # Migrate slot-0 to node-1
1005 R 1 CLUSTER MIGRATION IMPORT 0 0
1006 wait_for_condition 1000 10 {
1007 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
1008 [CI 1 cluster_slot_migration_active_tasks] == 0
1009 } else {
1010 fail "ASM tasks did not complete"
1011 }
1012
1013 set expected_slot_stats [
1014 dict create \
1015 $key0_slot [ \
1016 dict create key-count 1 \
1017 dict create cpu-usec 0 \
1018 dict create network-bytes-in 0 \
1019 dict create network-bytes-out 0 \
1020 ]
1021 ]
1022 set metrics_to_assert [list key-count cpu-usec network-bytes-in network-bytes-out]
1023
1024 # Verify metrics are reset except key-count
1025 set slot_stats [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 0]
1026 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
1027
1028 # Migrate slot-0 back to node-0
1029 R 0 CLUSTER MIGRATION IMPORT 0 0
1030 wait_for_condition 1000 10 {
1031 [CI 0 cluster_slot_migration_active_tasks] == 0 &&
1032 [CI 1 cluster_slot_migration_active_tasks] == 0
1033 } else {
1034 fail "ASM tasks did not complete"
1035 }
1036
1037 # Verify metrics are reset except key-count
1038 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 0]
1039 assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert
1040 }
1041}
1042
1043# -----------------------------------------------------------------------------
1044# Test cases for CLUSTER SLOT-STATS memory-bytes field presence.
1045# -----------------------------------------------------------------------------
1046
1047start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} {
1048 # Define shared variables.
1049 set key "FOO"
1050 set key_slot [R 0 cluster keyslot $key]
1051
1052 test "CLUSTER SLOT-STATS memory-bytes field present when cluster-slot-stats-enabled set on startup" {
1053 R 0 SET $key VALUE
1054 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
1055 set slot_stats [convert_array_into_dict $slot_stats]
1056
1057 # Verify memory-bytes field is present
1058 assert {[dict exists $slot_stats $key_slot]}
1059 set stats [dict get $slot_stats $key_slot]
1060 assert {[dict exists $stats memory-bytes]}
1061 assert {[dict get $stats memory-bytes] > 0}
1062 }
1063
1064 test "CLUSTER SLOT-STATS net mem combination shows only net and mem stats" {
1065 R 0 CONFIG SET cluster-slot-stats-enabled "net mem"
1066 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
1067 set slot_stats [convert_array_into_dict $slot_stats]
1068
1069 set stats [dict get $slot_stats $key_slot]
1070 assert {[dict exists $stats memory-bytes]}
1071 assert {[dict exists $stats network-bytes-in]}
1072 assert {[dict exists $stats network-bytes-out]}
1073 assert {![dict exists $stats cpu-usec]}
1074 }
1075
1076 test "CLUSTER SLOT-STATS cpu mem combination shows only cpu and mem stats" {
1077 R 0 CONFIG SET cluster-slot-stats-enabled "cpu mem"
1078 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
1079 set slot_stats [convert_array_into_dict $slot_stats]
1080
1081 set stats [dict get $slot_stats $key_slot]
1082 assert {[dict exists $stats memory-bytes]}
1083 assert {[dict exists $stats cpu-usec]}
1084 assert {![dict exists $stats network-bytes-in]}
1085 assert {![dict exists $stats network-bytes-out]}
1086
1087 # Restore to yes for subsequent tests
1088 R 0 CONFIG SET cluster-slot-stats-enabled yes
1089 }
1090
1091 test "CLUSTER SLOT-STATS memory-bytes field not present after disabling cluster-slot-stats-enabled" {
1092 R 0 CONFIG SET cluster-slot-stats-enabled no
1093 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
1094 set slot_stats [convert_array_into_dict $slot_stats]
1095
1096 # Verify memory-bytes field is not present after disabling config
1097 # (memory tracking is disabled when MEM flag is removed)
1098 assert {[dict exists $slot_stats $key_slot]}
1099 set stats [dict get $slot_stats $key_slot]
1100 assert {![dict exists $stats memory-bytes]}
1101
1102 # Verify other stats fields are not present
1103 assert {![dict exists $stats cpu-usec]}
1104 assert {![dict exists $stats network-bytes-in]}
1105 assert {![dict exists $stats network-bytes-out]}
1106 }
1107
1108 test "CLUSTER SLOT-STATS memory tracking cannot be re-enabled after being disabled" {
1109 # Once memory tracking is disabled, it cannot be re-enabled at runtime
1110 assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled yes}
1111 assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled mem}
1112
1113 # But cpu and net can still be enabled
1114 R 0 CONFIG SET cluster-slot-stats-enabled "cpu net"
1115 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
1116 set slot_stats [convert_array_into_dict $slot_stats]
1117
1118 assert {[dict exists $slot_stats $key_slot]}
1119 set stats [dict get $slot_stats $key_slot]
1120 assert {![dict exists $stats memory-bytes]}
1121 assert {[dict exists $stats cpu-usec]}
1122 assert {[dict exists $stats network-bytes-in]}
1123 assert {[dict exists $stats network-bytes-out]}
1124 }
1125}
1126
1127start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled no}} {
1128 # Define shared variables.
1129 set key "FOO"
1130 set key_slot [R 0 cluster keyslot $key]
1131
1132 test "CLUSTER SLOT-STATS memory-bytes field not present when cluster-slot-stats-enabled not set on startup" {
1133 R 0 SET $key VALUE
1134 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
1135 set slot_stats [convert_array_into_dict $slot_stats]
1136
1137 # Verify memory-bytes field is not present
1138 assert {[dict exists $slot_stats $key_slot]}
1139 set stats [dict get $slot_stats $key_slot]
1140 assert {![dict exists $stats memory-bytes]}
1141
1142 # Only key-count should be present
1143 assert {[dict exists $stats key-count]}
1144 assert {[dict get $stats key-count] == 1}
1145 }
1146
1147 test "CLUSTER SLOT-STATS enabling mem at runtime fails when not enabled at startup" {
1148 # Trying to enable memory tracking at runtime should fail
1149 assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled mem}
1150 assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled yes}
1151 assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled "cpu net mem"}
1152 }
1153
1154 test "CLUSTER SLOT-STATS enabling cpu and net at runtime works" {
1155 R 0 CONFIG SET cluster-slot-stats-enabled "cpu net"
1156 set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383]
1157 set slot_stats [convert_array_into_dict $slot_stats]
1158
1159 # Verify memory-bytes field is still not present
1160 assert {[dict exists $slot_stats $key_slot]}
1161 set stats [dict get $slot_stats $key_slot]
1162 assert {![dict exists $stats memory-bytes]}
1163
1164 # Other stats fields should now be present
1165 assert {[dict exists $stats cpu-usec]}
1166 assert {[dict exists $stats network-bytes-in]}
1167 assert {[dict exists $stats network-bytes-out]}
1168 }
1169}