diff options
Diffstat (limited to 'examples/redis-unstable/tests/unit/cluster')
15 files changed, 0 insertions, 5944 deletions
diff --git a/examples/redis-unstable/tests/unit/cluster/announced-endpoints.tcl b/examples/redis-unstable/tests/unit/cluster/announced-endpoints.tcl deleted file mode 100644 index a37ca58..0000000 --- a/examples/redis-unstable/tests/unit/cluster/announced-endpoints.tcl +++ /dev/null | |||
| @@ -1,75 +0,0 @@ | |||
| 1 | start_cluster 2 2 {tags {external:skip cluster}} { | ||
| 2 | |||
| 3 | test "Test change cluster-announce-port and cluster-announce-tls-port at runtime" { | ||
| 4 | if {$::tls} { | ||
| 5 | set baseport [lindex [R 0 config get tls-port] 1] | ||
| 6 | } else { | ||
| 7 | set baseport [lindex [R 0 config get port] 1] | ||
| 8 | } | ||
| 9 | set count [expr [llength $::servers] + 1] | ||
| 10 | set used_port [find_available_port $baseport $count] | ||
| 11 | |||
| 12 | R 0 config set cluster-announce-tls-port $used_port | ||
| 13 | R 0 config set cluster-announce-port $used_port | ||
| 14 | |||
| 15 | assert_match "*:$used_port@*" [R 0 CLUSTER NODES] | ||
| 16 | wait_for_condition 50 100 { | ||
| 17 | [string match "*:$used_port@*" [R 1 CLUSTER NODES]] | ||
| 18 | } else { | ||
| 19 | fail "Cluster announced port was not propagated via gossip" | ||
| 20 | } | ||
| 21 | |||
| 22 | R 0 config set cluster-announce-tls-port 0 | ||
| 23 | R 0 config set cluster-announce-port 0 | ||
| 24 | assert_match "*:$baseport@*" [R 0 CLUSTER NODES] | ||
| 25 | } | ||
| 26 | |||
| 27 | test "Test change cluster-announce-bus-port at runtime" { | ||
| 28 | if {$::tls} { | ||
| 29 | set baseport [lindex [R 0 config get tls-port] 1] | ||
| 30 | } else { | ||
| 31 | set baseport [lindex [R 0 config get port] 1] | ||
| 32 | } | ||
| 33 | set count [expr [llength $::servers] + 1] | ||
| 34 | set used_port [find_available_port $baseport $count] | ||
| 35 | |||
| 36 | # Verify config set cluster-announce-bus-port | ||
| 37 | R 0 config set cluster-announce-bus-port $used_port | ||
| 38 | assert_match "*@$used_port *" [R 0 CLUSTER NODES] | ||
| 39 | wait_for_condition 50 100 { | ||
| 40 | [string match "*@$used_port *" [R 1 CLUSTER NODES]] | ||
| 41 | } else { | ||
| 42 | fail "Cluster announced port was not propagated via gossip" | ||
| 43 | } | ||
| 44 | |||
| 45 | # Verify restore default cluster-announce-port | ||
| 46 | set base_bus_port [expr $baseport + 10000] | ||
| 47 | R 0 config set cluster-announce-bus-port 0 | ||
| 48 | assert_match "*@$base_bus_port *" [R 0 CLUSTER NODES] | ||
| 49 | } | ||
| 50 | |||
| 51 | test "CONFIG SET port updates cluster-announced port" { | ||
| 52 | set count [expr [llength $::servers] + 1] | ||
| 53 | # Get the original port and change to new_port | ||
| 54 | if {$::tls} { | ||
| 55 | set orig_port [lindex [R 0 config get tls-port] 1] | ||
| 56 | } else { | ||
| 57 | set orig_port [lindex [R 0 config get port] 1] | ||
| 58 | } | ||
| 59 | assert {$orig_port != ""} | ||
| 60 | set new_port [find_available_port $orig_port $count] | ||
| 61 | |||
| 62 | if {$::tls} { | ||
| 63 | R 0 config set tls-port $new_port | ||
| 64 | } else { | ||
| 65 | R 0 config set port $new_port | ||
| 66 | } | ||
| 67 | |||
| 68 | # Verify that the new port appears in the output of cluster slots | ||
| 69 | wait_for_condition 50 100 { | ||
| 70 | [string match "*$new_port*" [R 0 cluster slots]] | ||
| 71 | } else { | ||
| 72 | fail "Cluster announced port was not updated in cluster slots" | ||
| 73 | } | ||
| 74 | } | ||
| 75 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/atomic-slot-migration.tcl b/examples/redis-unstable/tests/unit/cluster/atomic-slot-migration.tcl deleted file mode 100644 index f04257f..0000000 --- a/examples/redis-unstable/tests/unit/cluster/atomic-slot-migration.tcl +++ /dev/null | |||
| @@ -1,3063 +0,0 @@ | |||
| 1 | set ::slot_prefixes [dict create \ | ||
| 2 | 0 "{06S}" \ | ||
| 3 | 1 "{Qi}" \ | ||
| 4 | 2 "{5L5}" \ | ||
| 5 | 3 "{4Iu}" \ | ||
| 6 | 4 "{4gY}" \ | ||
| 7 | 5 "{460}" \ | ||
| 8 | 6 "{1Y7}" \ | ||
| 9 | 7 "{1LV}" \ | ||
| 10 | 101 "{1j2}" \ | ||
| 11 | 102 "{75V}" \ | ||
| 12 | 103 "{bno}" \ | ||
| 13 | 5462 "{450}"\ | ||
| 14 | 5463 "{4dY}"\ | ||
| 15 | 6000 "{4L7}" \ | ||
| 16 | 6001 "{4YV}" \ | ||
| 17 | 6002 "{0bx}" \ | ||
| 18 | 6003 "{AJ}" \ | ||
| 19 | 6004 "{of}" \ | ||
| 20 | 16383 "{6ZJ}" \ | ||
| 21 | ] | ||
| 22 | |||
| 23 | # Helper functions | ||
| 24 | proc get_port {node_id} { | ||
| 25 | if {$::tls} { | ||
| 26 | return [lindex [R $node_id config get tls-port] 1] | ||
| 27 | } else { | ||
| 28 | return [lindex [R $node_id config get port] 1] | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | # return the prefix for the given slot | ||
| 33 | proc slot_prefix {slot} { | ||
| 34 | return [dict get $::slot_prefixes $slot] | ||
| 35 | } | ||
| 36 | |||
| 37 | # return a key for the given slot | ||
| 38 | proc slot_key {slot {suffix ""}} { | ||
| 39 | return "[slot_prefix $slot]$suffix" | ||
| 40 | } | ||
| 41 | |||
| 42 | # Populate a slot with keys | ||
| 43 | # TODO: Consider merging with populate() | ||
| 44 | proc populate_slot {num args} { | ||
| 45 | # Default values | ||
| 46 | set prefix "key:" | ||
| 47 | set size 3 | ||
| 48 | set idx 0 | ||
| 49 | set prints false | ||
| 50 | set expires 0 | ||
| 51 | set slot -1 | ||
| 52 | |||
| 53 | # Parse named arguments | ||
| 54 | foreach {key value} $args { | ||
| 55 | switch -- $key { | ||
| 56 | -prefix { set prefix $value } | ||
| 57 | -size { set size $value } | ||
| 58 | -idx { set idx $value } | ||
| 59 | -prints { set prints $value } | ||
| 60 | -expires { set expires $value } | ||
| 61 | -slot { set slot $value } | ||
| 62 | default { error "Unknown option: $key" } | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | # If slot is specified, use slot prefix from table | ||
| 67 | if {$slot >= 0} { | ||
| 68 | if {[dict exists $::slot_prefixes $slot]} { | ||
| 69 | set prefix [dict get $::slot_prefixes $slot] | ||
| 70 | } else { | ||
| 71 | error "Slot $slot not supported in slot_prefixes table, add it manually" | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | R $idx deferred 1 | ||
| 76 | if {$num > 16} {set pipeline 16} else {set pipeline $num} | ||
| 77 | set val [string repeat A $size] | ||
| 78 | for {set j 0} {$j < $pipeline} {incr j} { | ||
| 79 | if {$expires > 0} { | ||
| 80 | R $idx set $prefix$j $val ex $expires | ||
| 81 | } else { | ||
| 82 | R $idx set $prefix$j $val | ||
| 83 | } | ||
| 84 | if {$prints} {puts $j} | ||
| 85 | } | ||
| 86 | for {} {$j < $num} {incr j} { | ||
| 87 | if {$expires > 0} { | ||
| 88 | R $idx set $prefix$j $val ex $expires | ||
| 89 | } else { | ||
| 90 | R $idx set $prefix$j $val | ||
| 91 | } | ||
| 92 | R $idx read | ||
| 93 | if {$prints} {puts $j} | ||
| 94 | } | ||
| 95 | for {set j 0} {$j < $pipeline} {incr j} { | ||
| 96 | R $idx read | ||
| 97 | if {$prints} {puts $j} | ||
| 98 | } | ||
| 99 | R $idx deferred 0 | ||
| 100 | } | ||
| 101 | |||
| 102 | # Return 1 if all instances are idle | ||
| 103 | proc asm_all_instances_idle {total} { | ||
| 104 | for {set i 0} {$i < $total} {incr i} { | ||
| 105 | if {[CI $i cluster_slot_migration_active_tasks] != 0} { return 0 } | ||
| 106 | if {[CI $i cluster_slot_migration_active_trim_running] != 0} { return 0 } | ||
| 107 | } | ||
| 108 | return 1 | ||
| 109 | } | ||
| 110 | |||
| 111 | # Wait for all ASM tasks to complete in the cluster | ||
| 112 | proc wait_for_asm_done {} { | ||
| 113 | set total_instances [expr {$::cluster_master_nodes + $::cluster_replica_nodes}] | ||
| 114 | |||
| 115 | wait_for_condition 1000 10 { | ||
| 116 | [asm_all_instances_idle $total_instances] == 1 | ||
| 117 | } else { | ||
| 118 | # Print the number of active tasks on each instance | ||
| 119 | for {set i 0} {$i < $total_instances} {incr i} { | ||
| 120 | set migration_count [CI $i cluster_slot_migration_active_tasks] | ||
| 121 | set trim_count [CI $i cluster_slot_migration_active_trim_running] | ||
| 122 | puts "Instance $i: migration_tasks=$migration_count, trim_tasks=$trim_count" | ||
| 123 | } | ||
| 124 | fail "ASM tasks did not complete on all instances" | ||
| 125 | } | ||
| 126 | # wait all nodes to reach the same cluster config after ASM | ||
| 127 | wait_for_cluster_propagation | ||
| 128 | } | ||
| 129 | |||
| 130 | proc failover_and_wait_for_done {node_id {failover_arg ""}} { | ||
| 131 | set max_attempts 5 | ||
| 132 | for {set attempt 1} {$attempt <= $max_attempts} {incr attempt} { | ||
| 133 | if {$failover_arg eq ""} { | ||
| 134 | R $node_id cluster failover | ||
| 135 | } else { | ||
| 136 | R $node_id cluster failover $failover_arg | ||
| 137 | } | ||
| 138 | |||
| 139 | set completed 1 | ||
| 140 | wait_for_condition 1000 10 { | ||
| 141 | [string match "*master*" [R $node_id role]] | ||
| 142 | } else { | ||
| 143 | set completed 0 | ||
| 144 | } | ||
| 145 | |||
| 146 | if {$completed} { | ||
| 147 | wait_for_cluster_propagation | ||
| 148 | return | ||
| 149 | } | ||
| 150 | } | ||
| 151 | fail "Failover did not complete after $max_attempts attempts for node $node_id" | ||
| 152 | } | ||
| 153 | |||
| 154 | proc migration_status {node_id task_id field} { | ||
| 155 | set status [R $node_id CLUSTER MIGRATION STATUS ID $task_id] | ||
| 156 | |||
| 157 | # STATUS ID returns single task, so get first element | ||
| 158 | if {[llength $status] == 0} { | ||
| 159 | return "" | ||
| 160 | } | ||
| 161 | |||
| 162 | set task_status [lindex $status 0] | ||
| 163 | set field_value "" | ||
| 164 | |||
| 165 | # Parse the key-value pairs in the task | ||
| 166 | for {set i 0} {$i < [llength $task_status]} {incr i 2} { | ||
| 167 | set key [lindex $task_status $i] | ||
| 168 | set value [lindex $task_status [expr $i + 1]] | ||
| 169 | |||
| 170 | if {$key eq $field} { | ||
| 171 | set field_value $value | ||
| 172 | break | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | return $field_value | ||
| 177 | } | ||
| 178 | |||
| 179 | # Setup slot migration test with keys and delay, then start migration | ||
| 180 | # Returns the task_id for the migration | ||
| 181 | proc setup_slot_migration_with_delay {src_node dst_node start_slot end_slot {keys 2} {delay 1000000}} { | ||
| 182 | # Two keys on the start slot | ||
| 183 | populate_slot $keys -idx $src_node -slot $start_slot | ||
| 184 | |||
| 185 | # we set a delay to ensure migration takes time for testing, | ||
| 186 | # with default parameters, two keys cost 2s to save | ||
| 187 | R $src_node config set rdb-key-save-delay $delay | ||
| 188 | |||
| 189 | # migrate slot range from src_node to dst_node | ||
| 190 | set task_id [R $dst_node CLUSTER MIGRATION IMPORT $start_slot $end_slot] | ||
| 191 | wait_for_condition 2000 10 { | ||
| 192 | [string match {*send-bulk-and-stream*} [migration_status $src_node $task_id state]] | ||
| 193 | } else { | ||
| 194 | fail "ASM task did not start" | ||
| 195 | } | ||
| 196 | |||
| 197 | return $task_id | ||
| 198 | } | ||
| 199 | |||
| 200 | # Helper function to clear module internal event logs | ||
| 201 | proc clear_module_event_log {} { | ||
| 202 | for {set i 0} {$i < $::cluster_master_nodes + $::cluster_replica_nodes} {incr i} { | ||
| 203 | R $i asm.clear_event_log | ||
| 204 | } | ||
| 205 | } | ||
| 206 | |||
| 207 | proc reset_default_trim_method {} { | ||
| 208 | for {set i 0} {$i < $::cluster_master_nodes + $::cluster_replica_nodes} {incr i} { | ||
| 209 | R $i debug asm-trim-method default | ||
| 210 | } | ||
| 211 | } | ||
| 212 | |||
| 213 | start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout 60000 cluster-allow-replica-migration no}} { | ||
| 214 | foreach trim_method {"active" "bg"} { | ||
| 215 | test "Simple slot migration (trim method: $trim_method)" { | ||
| 216 | R 0 debug asm-trim-method $trim_method | ||
| 217 | R 3 debug asm-trim-method $trim_method | ||
| 218 | |||
| 219 | set slot0_key [slot_key 0 mykey] | ||
| 220 | R 0 set $slot0_key "a" | ||
| 221 | set slot1_key [slot_key 1 mykey] | ||
| 222 | R 0 set $slot1_key "b" | ||
| 223 | set slot101_key [slot_key 101 mykey] | ||
| 224 | R 0 set $slot101_key "c" | ||
| 225 | # 3 keys cost 3s to save | ||
| 226 | R 0 config set rdb-key-save-delay 1000000 | ||
| 227 | |||
| 228 | # load a function | ||
| 229 | R 0 function load {#!lua name=test1 | ||
| 230 | redis.register_function('test1', function() return 'hello1' end) | ||
| 231 | } | ||
| 232 | |||
| 233 | # migrate slot 0-100 to R 1 | ||
| 234 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100] | ||
| 235 | # migration is start, and in accumulating buffer stage | ||
| 236 | wait_for_condition 1000 50 { | ||
| 237 | [string match {*send-bulk-and-stream*} [migration_status 0 $task_id state]] && | ||
| 238 | [string match {*accumulate-buffer*} [migration_status 1 $task_id state]] | ||
| 239 | } else { | ||
| 240 | fail "ASM task did not start" | ||
| 241 | } | ||
| 242 | |||
| 243 | # append 99 times during migration | ||
| 244 | for {set i 0} {$i < 99} {incr i} { | ||
| 245 | R 0 multi | ||
| 246 | R 0 append $slot0_key "a" | ||
| 247 | R 0 exec | ||
| 248 | R 0 append $slot1_key "b" | ||
| 249 | R 0 append $slot101_key "c" | ||
| 250 | } | ||
| 251 | |||
| 252 | # wait until migration of 0-100 successful | ||
| 253 | wait_for_asm_done | ||
| 254 | |||
| 255 | # verify task state became completed | ||
| 256 | assert_equal "completed" [migration_status 0 $task_id state] | ||
| 257 | assert_equal "completed" [migration_status 1 $task_id state] | ||
| 258 | |||
| 259 | # the appended 99 times should also be migrated | ||
| 260 | assert_equal [string repeat a 100] [R 1 get $slot0_key] | ||
| 261 | assert_equal [string repeat b 100] [R 1 get $slot1_key] | ||
| 262 | |||
| 263 | # function should be migrated | ||
| 264 | assert_equal [R 0 function dump] [R 1 function dump] | ||
| 265 | # the slave should also get the data | ||
| 266 | wait_for_ofs_sync [Rn 1] [Rn 4] | ||
| 267 | |||
| 268 | R 4 readonly | ||
| 269 | assert_equal [string repeat a 100] [R 4 get $slot0_key] | ||
| 270 | assert_equal [string repeat b 100] [R 4 get $slot1_key] | ||
| 271 | assert_equal [R 0 function dump] [R 4 function dump] | ||
| 272 | |||
| 273 | # verify key that was not in the slot range is not migrated | ||
| 274 | assert_equal [string repeat c 100] [R 0 get $slot101_key] | ||
| 275 | # verify changes in replica | ||
| 276 | wait_for_ofs_sync [Rn 0] [Rn 3] | ||
| 277 | R 3 readonly | ||
| 278 | assert_equal [string repeat c 100] [R 3 get $slot101_key] | ||
| 279 | |||
| 280 | # cleanup | ||
| 281 | R 0 config set rdb-key-save-delay 0 | ||
| 282 | R 0 flushall | ||
| 283 | R 0 function flush | ||
| 284 | R 1 flushall | ||
| 285 | R 1 function flush | ||
| 286 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 287 | wait_for_asm_done | ||
| 288 | } | ||
| 289 | } | ||
| 290 | } | ||
| 291 | |||
| 292 | # Skip most of the tests when running under valgrind since it is hard to | ||
| 293 | # stabilize tests under valgrind. | ||
| 294 | if {!$::valgrind} { | ||
| 295 | start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout 60000 cluster-allow-replica-migration no}} { | ||
| 296 | test "Test CLUSTER MIGRATION IMPORT input validation" { | ||
| 297 | # invalid arguments | ||
| 298 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION} | ||
| 299 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION IMPORT} | ||
| 300 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION IMPORT 100} | ||
| 301 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION IMPORT 100 200 300} | ||
| 302 | assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION UNKNOWN 1 2} | ||
| 303 | |||
| 304 | # invalid slot range | ||
| 305 | assert_error {*greater than end slot number*} {R 0 CLUSTER MIGRATION IMPORT 200 100} | ||
| 306 | assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 17000 18000} | ||
| 307 | assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 14000 18000} | ||
| 308 | assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 0 16384} | ||
| 309 | assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 0 -1} | ||
| 310 | assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT -1 2} | ||
| 311 | assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT -2 -1} | ||
| 312 | assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT 10 a} | ||
| 313 | assert_error {*out of range slot*} {R 0 CLUSTER MIGRATION IMPORT sd sd} | ||
| 314 | assert_error {*already the owner of the slot*} {R 0 CLUSTER MIGRATION IMPORT 100 200} | ||
| 315 | } | ||
| 316 | |||
| 317 | test "Test CLUSTER MIGRATION CANCEL input validation" { | ||
| 318 | # invalid arguments | ||
| 319 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION CANCEL} | ||
| 320 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION CANCEL ID} | ||
| 321 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION CANCEL ID 12345 EXTRAARG} | ||
| 322 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION CANCEL ALL EXTRAARG} | ||
| 323 | assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION CANCEL UNKNOWNARG} | ||
| 324 | assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION CANCEL abc def} | ||
| 325 | # empty string id should not cancel any task | ||
| 326 | assert_equal 0 [R 0 CLUSTER MIGRATION CANCEL ID ""] | ||
| 327 | } | ||
| 328 | |||
| 329 | test "Test CLUSTER MIGRATION STATUS input validation" { | ||
| 330 | # invalid arguments | ||
| 331 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION STATUS} | ||
| 332 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION STATUS ID} | ||
| 333 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION STATUS ID id EXTRAARG} | ||
| 334 | assert_error {*wrong number of arguments*} {R 0 CLUSTER MIGRATION STATUS ALL EXTRAARG} | ||
| 335 | assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION STATUS ABC DEF} | ||
| 336 | assert_error {*unknown argument*} {R 0 CLUSTER MIGRATION STATUS UNKNOWNARG} | ||
| 337 | # empty string id should not list any task | ||
| 338 | assert_equal {} [R 0 CLUSTER MIGRATION STATUS ID ""] | ||
| 339 | } | ||
| 340 | |||
| 341 | test "Test TRIMSLOTS input validation" { | ||
| 342 | # Wrong number of arguments | ||
| 343 | assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS} | ||
| 344 | assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES} | ||
| 345 | assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES 1} | ||
| 346 | assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES 2 100} | ||
| 347 | assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES 17000 1} | ||
| 348 | assert_error {*wrong number of arguments*} {R 0 TRIMSLOTS RANGES abc} | ||
| 349 | |||
| 350 | # Missing ranges argument | ||
| 351 | assert_error {*missing ranges argument*} {R 0 TRIMSLOTS UNKNOWN 1 100 200} | ||
| 352 | |||
| 353 | # Invalid number of ranges | ||
| 354 | assert_error {*invalid number of ranges*} {R 0 TRIMSLOTS RANGES 0 1 1} | ||
| 355 | assert_error {*invalid number of ranges*} {R 0 TRIMSLOTS RANGES -1 2 2} | ||
| 356 | assert_error {*invalid number of ranges*} {R 0 TRIMSLOTS RANGES 17000 1 2} | ||
| 357 | assert_error {*invalid number of ranges*} {R 0 TRIMSLOTS RANGES 2 100 200 300} | ||
| 358 | |||
| 359 | # Invalid slot numbers | ||
| 360 | assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 -1 0} | ||
| 361 | assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 -2 -1} | ||
| 362 | assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 0 16384} | ||
| 363 | assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 abc def} | ||
| 364 | assert_error {*out of range slot*} {R 0 TRIMSLOTS RANGES 1 100 abc} | ||
| 365 | |||
| 366 | # Start slot greater than end slot | ||
| 367 | assert_error {*greater than end slot number*} {R 0 TRIMSLOTS RANGES 1 200 100} | ||
| 368 | } | ||
| 369 | |||
| 370 | test "Test IMPORT not allowed on replica" { | ||
| 371 | assert_error {* not allowed on replica*} {R 4 CLUSTER MIGRATION IMPORT 100 200} | ||
| 372 | } | ||
| 373 | |||
| 374 | test "Test IMPORT not allowed during manual migration" { | ||
| 375 | set dst_id [R 1 CLUSTER MYID] | ||
| 376 | |||
| 377 | # Set a slot to IMPORTING | ||
| 378 | R 0 CLUSTER SETSLOT 15000 IMPORTING $dst_id | ||
| 379 | assert_error {*must be STABLE to start*slot migration*} {R 0 CLUSTER MIGRATION IMPORT 100 200} | ||
| 380 | # Revert the change | ||
| 381 | R 0 CLUSTER SETSLOT 15000 STABLE | ||
| 382 | |||
| 383 | # Same test with setting a slot to MIGRATING | ||
| 384 | R 0 CLUSTER SETSLOT 5000 MIGRATING $dst_id | ||
| 385 | assert_error {*must be STABLE to start*slot migration*} {R 0 CLUSTER MIGRATION IMPORT 100 200} | ||
| 386 | # Revert the change | ||
| 387 | R 0 CLUSTER SETSLOT 5000 STABLE | ||
| 388 | } | ||
| 389 | |||
| 390 | test "Test IMPORT not allowed if the node is already the owner" { | ||
| 391 | assert_error {*already the owner of the slot*} {R 0 CLUSTER MIGRATION IMPORT 100 100} | ||
| 392 | } | ||
| 393 | |||
| 394 | test "Test IMPORT not allowed for a slot without an owner" { | ||
| 395 | # Slot will have no owner | ||
| 396 | R 0 CLUSTER DELSLOTS 5000 | ||
| 397 | |||
| 398 | assert_error {*slot has no owner: 5000*} {R 0 CLUSTER MIGRATION IMPORT 5000 5000} | ||
| 399 | |||
| 400 | # Revert the change | ||
| 401 | R 0 CLUSTER ADDSLOTS 5000 | ||
| 402 | } | ||
| 403 | |||
| 404 | test "Test IMPORT not allowed if slot ranges belong to different nodes" { | ||
| 405 | assert_error {*slots belong to different source nodes*} {R 0 CLUSTER MIGRATION IMPORT 7000 15000} | ||
| 406 | assert_error {*slots belong to different source nodes*} {R 0 CLUSTER MIGRATION IMPORT 7000 8000 14000 15000} | ||
| 407 | } | ||
| 408 | |||
| 409 | test "Test IMPORT not allowed if slot is given multiple times" { | ||
| 410 | assert_error {*Slot*specified multiple times*} {R 0 CLUSTER MIGRATION IMPORT 7000 8000 8000 9000} | ||
| 411 | assert_error {*Slot*specified multiple times*} {R 0 CLUSTER MIGRATION IMPORT 7000 8000 7900 9000} | ||
| 412 | } | ||
| 413 | |||
| 414 | test "Test CLUSTER MIGRATION STATUS ALL lists all tasks" { | ||
| 415 | # Create 3 completed tasks | ||
| 416 | R 0 CLUSTER MIGRATION IMPORT 7000 7001 | ||
| 417 | wait_for_asm_done | ||
| 418 | R 0 CLUSTER MIGRATION IMPORT 7002 7003 | ||
| 419 | wait_for_asm_done | ||
| 420 | R 0 CLUSTER MIGRATION IMPORT 7004 7005 | ||
| 421 | wait_for_asm_done | ||
| 422 | |||
| 423 | # Get node IDs for verification | ||
| 424 | set node0_id [R 0 cluster myid] | ||
| 425 | set node1_id [R 1 cluster myid] | ||
| 426 | |||
| 427 | # Verify CLUSTER MIGRATION STATUS ALL reply from both nodes | ||
| 428 | foreach node_idx {0 1} { | ||
| 429 | set tasks [R $node_idx CLUSTER MIGRATION STATUS ALL] | ||
| 430 | assert_equal 3 [llength $tasks] | ||
| 431 | |||
| 432 | for {set i 0} {$i < 3} {incr i} { | ||
| 433 | set task [lindex $tasks $i] | ||
| 434 | |||
| 435 | # Verify field order | ||
| 436 | set expected_fields {id slots source dest operation state | ||
| 437 | last_error retries create_time start_time | ||
| 438 | end_time write_pause_ms} | ||
| 439 | for {set j 0} {$j < [llength $expected_fields]} {incr j} { | ||
| 440 | set expected_field [lindex $expected_fields $j] | ||
| 441 | set actual_field [lindex $task [expr $j * 2]] | ||
| 442 | assert_equal $expected_field $actual_field | ||
| 443 | } | ||
| 444 | |||
| 445 | # Verify basic fields | ||
| 446 | assert_equal "completed" [dict get $task state] | ||
| 447 | assert_equal "" [dict get $task last_error] | ||
| 448 | assert_equal 0 [dict get $task retries] | ||
| 449 | assert {[dict get $task write_pause_ms] >= 0} | ||
| 450 | |||
| 451 | # Verify operation based on node | ||
| 452 | if {$node_idx == 0} { | ||
| 453 | assert_equal "import" [dict get $task operation] | ||
| 454 | } else { | ||
| 455 | assert_equal "migrate" [dict get $task operation] | ||
| 456 | } | ||
| 457 | |||
| 458 | # Verify node IDs (all tasks: node1 -> node0) | ||
| 459 | assert_equal $node1_id [dict get $task source] | ||
| 460 | assert_equal $node0_id [dict get $task dest] | ||
| 461 | |||
| 462 | # Verify timestamps exist and are reasonable | ||
| 463 | set create_time [dict get $task create_time] | ||
| 464 | set start_time [dict get $task start_time] | ||
| 465 | set end_time [dict get $task end_time] | ||
| 466 | assert {$create_time > 0} | ||
| 467 | assert {$start_time >= $create_time} | ||
| 468 | assert {$end_time >= $start_time} | ||
| 469 | |||
| 470 | # Verify specific slot ranges for each task | ||
| 471 | set slots [dict get $task slots] | ||
| 472 | if {$i == 0} { | ||
| 473 | assert_equal "7004-7005" $slots | ||
| 474 | } elseif {$i == 1} { | ||
| 475 | assert_equal "7002-7003" $slots | ||
| 476 | } elseif {$i == 2} { | ||
| 477 | assert_equal "7000-7001" $slots | ||
| 478 | } | ||
| 479 | } | ||
| 480 | } | ||
| 481 | |||
| 482 | # cleanup | ||
| 483 | R 1 CLUSTER MIGRATION IMPORT 7000 7005 | ||
| 484 | wait_for_asm_done | ||
| 485 | } | ||
| 486 | |||
| 487 | test "Test IMPORT not allowed if there is an overlapping import" { | ||
| 488 | # Let slot migration take long time, so that we can test overlapping import | ||
| 489 | R 1 config set rdb-key-save-delay 1000000 | ||
| 490 | R 1 set tag22273 tag22273 ;# slot hash is 7000 | ||
| 491 | R 1 set tag9283 tag9283 ;# slot hash is 8000 | ||
| 492 | |||
| 493 | set task_id [R 0 CLUSTER MIGRATION IMPORT 7000 8000] | ||
| 494 | assert_error {*overlapping import exists*} {R 0 CLUSTER MIGRATION IMPORT 8000 9000} | ||
| 495 | assert_error {*overlapping import exists*} {R 0 CLUSTER MIGRATION IMPORT 7500 8500} | ||
| 496 | assert_error {*overlapping import exists*} {R 0 CLUSTER MIGRATION IMPORT 6000 7000} | ||
| 497 | assert_error {*overlapping import exists*} {R 0 CLUSTER MIGRATION IMPORT 6500 7500} | ||
| 498 | |||
| 499 | wait_for_condition 1000 50 { | ||
| 500 | [string match {*completed*} [migration_status 0 $task_id state]] && | ||
| 501 | [string match {*completed*} [migration_status 1 $task_id state]] | ||
| 502 | } else { | ||
| 503 | fail "ASM task did not start" | ||
| 504 | } | ||
| 505 | assert_equal "tag22273" [R 0 get tag22273] | ||
| 506 | assert_equal "tag9283" [R 0 get tag9283] | ||
| 507 | R 1 config set rdb-key-save-delay 0 | ||
| 508 | |||
| 509 | # revert the migration | ||
| 510 | R 1 CLUSTER MIGRATION IMPORT 7000 8000 | ||
| 511 | wait_for_asm_done | ||
| 512 | } | ||
| 513 | |||
| 514 | test "Test IMPORT with unsorted and adjacent ranges" { | ||
| 515 | # Redis should sort and merge adjacent ranges | ||
| 516 | # Adjacent means: prev.end + 1 == next.start | ||
| 517 | # e.g. 7000-7001 7002-7003 7004-7005 => 7000-7005 | ||
| 518 | |||
| 519 | # Test with adjacent ranges | ||
| 520 | set task_id [R 0 CLUSTER MIGRATION IMPORT 7000 7001 7002 7100] | ||
| 521 | wait_for_asm_done | ||
| 522 | # verify migration is successfully completed on both nodes | ||
| 523 | assert_equal "completed" [migration_status 0 $task_id state] | ||
| 524 | assert_equal "completed" [migration_status 1 $task_id state] | ||
| 525 | # verify slot ranges are merged correctly | ||
| 526 | assert_equal "7000-7100" [migration_status 0 $task_id slots] | ||
| 527 | assert_equal "7000-7100" [migration_status 1 $task_id slots] | ||
| 528 | |||
| 529 | # Test with unsorted and adjacent ranges | ||
| 530 | set task_id [R 1 CLUSTER MIGRATION IMPORT 7050 7051 7010 7049 7000 7005] | ||
| 531 | wait_for_asm_done | ||
| 532 | # verify migration is successfully completed on both nodes | ||
| 533 | assert_equal "completed" [migration_status 0 $task_id state] | ||
| 534 | assert_equal "completed" [migration_status 1 $task_id state] | ||
| 535 | # verify slot ranges are merged correctly | ||
| 536 | assert_equal "7000-7005 7010-7051" [migration_status 0 $task_id slots] | ||
| 537 | assert_equal "7000-7005 7010-7051" [migration_status 1 $task_id slots] | ||
| 538 | |||
| 539 | # Another test with unsorted and adjacent ranges | ||
| 540 | set task_id [R 1 CLUSTER MIGRATION IMPORT 7007 7007 7008 7009 7006 7006] | ||
| 541 | wait_for_asm_done | ||
| 542 | # verify migration is successfully completed on both nodes | ||
| 543 | assert_equal "completed" [migration_status 0 $task_id state] | ||
| 544 | assert_equal "completed" [migration_status 1 $task_id state] | ||
| 545 | # verify slot ranges are merged correctly | ||
| 546 | assert_equal "7006-7009" [migration_status 0 $task_id slots] | ||
| 547 | assert_equal "7006-7009" [migration_status 1 $task_id slots] | ||
| 548 | } | ||
| 549 | |||
| 550 | test "Simple slot migration with write load" { | ||
| 551 | # Perform slot migration while traffic is on and verify data consistency. | ||
| 552 | # Trimming is disabled on source nodes so, we can compare the dbs after | ||
| 553 | # migration via DEBUG DIGEST to ensure no data loss during migration. | ||
| 554 | # Steps: | ||
| 555 | # 1. Disable trimming on both nodes | ||
| 556 | # 2. Populate slot 0 on node-0 and slot 6000 on node-1 | ||
| 557 | # 2. Start write traffic on both nodes | ||
| 558 | # 3. Migrate slot 0 from node-0 to node-1 | ||
| 559 | # 4. Migrate slot 6000 from node-1 to node-0 | ||
| 560 | # 5. Stop write traffic, verify db's are identical. | ||
| 561 | |||
| 562 | # This test runs slowly under the thread sanitizer. | ||
| 563 | # 1. Increase the lag threshold from the default 1 MB to 10 MB to let the destination catch up easily. | ||
| 564 | # 2. Increase the write pause timeout from the default 10s to 60s so the source can wait longer. | ||
| 565 | set prev_config_lag [lindex [R 0 config get cluster-slot-migration-handoff-max-lag-bytes] 1] | ||
| 566 | R 0 config set cluster-slot-migration-handoff-max-lag-bytes 10mb | ||
| 567 | R 1 config set cluster-slot-migration-handoff-max-lag-bytes 10mb | ||
| 568 | set prev_config_timeout [lindex [R 0 config get cluster-slot-migration-write-pause-timeout] 1] | ||
| 569 | R 0 config set cluster-slot-migration-write-pause-timeout 60000 | ||
| 570 | R 1 config set cluster-slot-migration-write-pause-timeout 60000 | ||
| 571 | |||
| 572 | R 0 flushall | ||
| 573 | R 0 debug asm-trim-method none | ||
| 574 | populate_slot 10000 -idx 0 -slot 0 | ||
| 575 | |||
| 576 | R 1 flushall | ||
| 577 | R 1 debug asm-trim-method none | ||
| 578 | populate_slot 10000 -idx 1 -slot 6000 | ||
| 579 | |||
| 580 | # Start write traffic on node-0 | ||
| 581 | # Throws -MOVED error once asm is completed, catch block will ignore it. | ||
| 582 | catch { | ||
| 583 | # Start the slot 0 write load on the R 0 | ||
| 584 | set port [get_port 0] | ||
| 585 | set key [slot_key 0 mykey] | ||
| 586 | set load_handle0 [start_write_load "127.0.0.1" $port 100 $key 0 5] | ||
| 587 | } | ||
| 588 | |||
| 589 | # Start write traffic on node-1 | ||
| 590 | # Throws -MOVED error once asm is completed, catch block will ignore it. | ||
| 591 | catch { | ||
| 592 | # Start the slot 6000 write load on the R 1 | ||
| 593 | set port [get_port 1] | ||
| 594 | set key [slot_key 6000 mykey] | ||
| 595 | set load_handle1 [start_write_load "127.0.0.1" $port 100 $key 0 5] | ||
| 596 | } | ||
| 597 | |||
| 598 | # Migrate keys | ||
| 599 | R 1 CLUSTER MIGRATION IMPORT 0 100 | ||
| 600 | wait_for_asm_done | ||
| 601 | R 0 CLUSTER MIGRATION IMPORT 6000 6100 | ||
| 602 | wait_for_asm_done | ||
| 603 | |||
| 604 | stop_write_load $load_handle0 | ||
| 605 | stop_write_load $load_handle1 | ||
| 606 | |||
| 607 | # verify data | ||
| 608 | assert_morethan [R 0 dbsize] 0 | ||
| 609 | assert_equal [R 0 debug digest] [R 1 debug digest] | ||
| 610 | |||
| 611 | # cleanup | ||
| 612 | R 0 config set cluster-slot-migration-handoff-max-lag-bytes $prev_config_lag | ||
| 613 | R 0 config set cluster-slot-migration-write-pause-timeout $prev_config_timeout | ||
| 614 | R 0 debug asm-trim-method default | ||
| 615 | R 0 flushall | ||
| 616 | R 1 config set cluster-slot-migration-handoff-max-lag-bytes $prev_config_lag | ||
| 617 | R 1 config set cluster-slot-migration-write-pause-timeout $prev_config_timeout | ||
| 618 | R 1 debug asm-trim-method default | ||
| 619 | R 1 flushall | ||
| 620 | |||
| 621 | R 1 CLUSTER MIGRATION IMPORT 6000 6100 | ||
| 622 | wait_for_asm_done | ||
| 623 | } | ||
| 624 | |||
| 625 | test "Verify expire time is migrated correctly" { | ||
| 626 | R 0 flushall | ||
| 627 | R 1 flushall | ||
| 628 | |||
| 629 | set string_key [slot_key 0 string_key] | ||
| 630 | set list_key [slot_key 0 list_key] | ||
| 631 | set hash_key [slot_key 0 hash_key] | ||
| 632 | set stream_key [slot_key 0 stream_key] | ||
| 633 | |||
| 634 | for {set i 0} {$i < 20} {incr i} { | ||
| 635 | R 1 hset $hash_key $i $i | ||
| 636 | R 1 xadd $stream_key * item $i | ||
| 637 | } | ||
| 638 | for {set i 0} {$i < 2000} {incr i} { | ||
| 639 | R 1 lpush $list_key $i | ||
| 640 | } | ||
| 641 | |||
| 642 | # set expire time of some keys | ||
| 643 | R 1 set $string_key "a" EX 1000 | ||
| 644 | R 1 EXPIRE $list_key 1000 | ||
| 645 | R 1 EXPIRE $hash_key 1000 | ||
| 646 | |||
| 647 | # migrate slot 0-100 to R 0 | ||
| 648 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 649 | wait_for_asm_done | ||
| 650 | |||
| 651 | # check expire times are migrated correctly | ||
| 652 | assert_range [R 0 ttl $string_key] 900 1000 | ||
| 653 | assert_range [R 0 ttl $list_key] 900 1000 | ||
| 654 | assert_range [R 0 ttl $hash_key] 900 1000 | ||
| 655 | assert_equal -1 [R 0 ttl $stream_key] | ||
| 656 | |||
| 657 | # cleanup | ||
| 658 | R 0 flushall | ||
| 659 | R 1 flushall | ||
| 660 | R 1 CLUSTER MIGRATION IMPORT 0 100 | ||
| 661 | wait_for_asm_done | ||
| 662 | } | ||
| 663 | |||
| 664 | test "Slot migration with complex data types can work well" { | ||
| 665 | R 0 flushall | ||
| 666 | R 1 flushall | ||
| 667 | |||
| 668 | set list_key [slot_key 0 list_key] | ||
| 669 | set set_key [slot_key 0 set_key] | ||
| 670 | set zset_key [slot_key 0 zset_key] | ||
| 671 | set hash_key [slot_key 0 hash_key] | ||
| 672 | set stream_key [slot_key 0 stream_key] | ||
| 673 | |||
| 674 | # generate big keys for each data type | ||
| 675 | for {set i 0} {$i < 1000} {incr i} { | ||
| 676 | R 1 lpush $list_key $i | ||
| 677 | R 1 sadd $set_key $i | ||
| 678 | R 1 zadd $zset_key $i $i | ||
| 679 | R 1 hset $hash_key $i $i | ||
| 680 | R 1 xadd $stream_key * item $i | ||
| 681 | } | ||
| 682 | |||
| 683 | # migrate slot 0-100 to R 0 | ||
| 684 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 685 | wait_for_asm_done | ||
| 686 | # check the data on destination node is correct | ||
| 687 | assert_equal 1000 [R 0 llen $list_key] | ||
| 688 | assert_equal 1000 [R 0 scard $set_key] | ||
| 689 | assert_equal 1000 [R 0 zcard $zset_key] | ||
| 690 | assert_equal 1000 [R 0 hlen $hash_key] | ||
| 691 | assert_equal 1000 [R 0 xlen $stream_key] | ||
| 692 | # migrate slot 0-100 to R 1 | ||
| 693 | R 1 CLUSTER MIGRATION IMPORT 0 100 | ||
| 694 | wait_for_asm_done | ||
| 695 | } | ||
| 696 | |||
| 697 | proc asm_basic_error_handling_test {operation channel all_states} { | ||
| 698 | foreach state $all_states { | ||
| 699 | if {$::verbose} { puts "Testing $operation $channel channel with state: $state"} | ||
| 700 | |||
| 701 | # For states that need incremental data streaming, set a longer delay | ||
| 702 | set streaming_states [list "streaming-buffer" "accumulate-buffer" "send-bulk-and-stream" "send-stream"] | ||
| 703 | if {$state in $streaming_states} { | ||
| 704 | R 1 config set rdb-key-save-delay 1000000 | ||
| 705 | } | ||
| 706 | |||
| 707 | # Let the destination node take time to stream buffer, so the source node will handle | ||
| 708 | # slot snapshot child process exit, and then enter "send-stream" state. | ||
| 709 | if {$state == "send-stream"} { | ||
| 710 | R 0 config set key-load-delay 100000 | ||
| 711 | } | ||
| 712 | |||
| 713 | # Start the slot 0 write load on the R 1 | ||
| 714 | set slot0_key [slot_key 0 mykey] | ||
| 715 | set load_handle [start_write_load "127.0.0.1" [get_port 1] 100 $slot0_key 500] | ||
| 716 | |||
| 717 | # clear old fail points and set the new fail point | ||
| 718 | assert_equal {OK} [R 0 debug asm-failpoint "" ""] | ||
| 719 | assert_equal {OK} [R 1 debug asm-failpoint "" ""] | ||
| 720 | if {$operation eq "import"} { | ||
| 721 | assert_equal {OK} [R 0 debug asm-failpoint "import-$channel-channel" $state] | ||
| 722 | } elseif {$operation eq "migrate"} { | ||
| 723 | assert_equal {OK} [R 1 debug asm-failpoint "migrate-$channel-channel" $state] | ||
| 724 | } else { | ||
| 725 | fail "Unknown operation: $operation" | ||
| 726 | } | ||
| 727 | |||
| 728 | # Start the migration | ||
| 729 | set task_id [R 0 CLUSTER MIGRATION IMPORT 0 100] | ||
| 730 | |||
| 731 | # The task should be failed due to the fail point | ||
| 732 | wait_for_condition 2000 10 { | ||
| 733 | [string match -nocase "*$channel*${state}*" [migration_status 0 $task_id last_error]] || | ||
| 734 | [string match -nocase "*$channel*${state}*" [migration_status 1 $task_id last_error]] | ||
| 735 | } else { | ||
| 736 | fail "ASM task did not fail with expected error - | ||
| 737 | (dst: [migration_status 0 $task_id last_error] | ||
| 738 | src: [migration_status 1 $task_id last_error] | ||
| 739 | expected: $channel $state)" | ||
| 740 | } | ||
| 741 | stop_write_load $load_handle | ||
| 742 | |||
| 743 | # Cancel the task | ||
| 744 | R 0 CLUSTER MIGRATION CANCEL ID $task_id | ||
| 745 | R 1 CLUSTER MIGRATION CANCEL ID $task_id | ||
| 746 | |||
| 747 | R 1 config set rdb-key-save-delay 0 | ||
| 748 | R 0 config set key-load-delay 0 | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 752 | test "Destination node main channel basic error-handling tests " { | ||
| 753 | set all_states [list \ | ||
| 754 | "connecting" \ | ||
| 755 | "auth-reply" \ | ||
| 756 | "handshake-reply" \ | ||
| 757 | "syncslots-reply" \ | ||
| 758 | "accumulate-buffer" \ | ||
| 759 | "streaming-buffer" \ | ||
| 760 | "wait-stream-eof" \ | ||
| 761 | ] | ||
| 762 | asm_basic_error_handling_test "import" "main" $all_states | ||
| 763 | } | ||
| 764 | |||
| 765 | test "Destination node rdb channel basic error-handling tests" { | ||
| 766 | set all_states [list \ | ||
| 767 | "connecting" \ | ||
| 768 | "auth-reply" \ | ||
| 769 | "rdbchannel-reply" \ | ||
| 770 | "rdbchannel-transfer" \ | ||
| 771 | ] | ||
| 772 | asm_basic_error_handling_test "import" "rdb" $all_states | ||
| 773 | } | ||
| 774 | |||
| 775 | test "Source node main channel basic error-handling tests " { | ||
| 776 | set all_states [list \ | ||
| 777 | "wait-rdbchannel" \ | ||
| 778 | "send-bulk-and-stream" \ | ||
| 779 | "send-stream" \ | ||
| 780 | "handoff" \ | ||
| 781 | ] | ||
| 782 | asm_basic_error_handling_test "migrate" "main" $all_states | ||
| 783 | } | ||
| 784 | |||
| 785 | test "Source node rdb channel basic error-handling tests" { | ||
| 786 | set all_states [list \ | ||
| 787 | "wait-bgsave-start" \ | ||
| 788 | "send-bulk-and-stream" \ | ||
| 789 | ] | ||
| 790 | asm_basic_error_handling_test "migrate" "rdb" $all_states | ||
| 791 | } | ||
| 792 | |||
| 793 | test "Migration will be successful after fail points are cleared" { | ||
| 794 | R 0 flushall | ||
| 795 | R 1 flushall | ||
| 796 | set slot0_key [slot_key 0 mykey] | ||
| 797 | set slot1_key [slot_key 1 mykey] | ||
| 798 | R 1 set $slot0_key "a" | ||
| 799 | R 1 set $slot1_key "b" | ||
| 800 | |||
| 801 | # we set a delay to write incremental data | ||
| 802 | R 1 config set rdb-key-save-delay 1000000 | ||
| 803 | |||
| 804 | # Start the slot 0 write load on the R 1 | ||
| 805 | set load_handle [start_write_load "127.0.0.1" [get_port 1] 100 $slot0_key] | ||
| 806 | |||
| 807 | # Clear all fail points | ||
| 808 | assert_equal {OK} [R 0 debug asm-failpoint "" ""] | ||
| 809 | assert_equal {OK} [R 1 debug asm-failpoint "" ""] | ||
| 810 | |||
| 811 | # Start the migration | ||
| 812 | set task_id [R 0 CLUSTER MIGRATION IMPORT 0 100] | ||
| 813 | |||
| 814 | # Wait for the migration to complete | ||
| 815 | wait_for_asm_done | ||
| 816 | |||
| 817 | stop_write_load $load_handle | ||
| 818 | |||
| 819 | # Verify the data is migrated, slot 0 and 1 should belong to R 1 | ||
| 820 | # slot 0 key should be changed by the write load | ||
| 821 | assert_not_equal "a" [R 0 get $slot0_key] | ||
| 822 | assert_equal "b" [R 0 get $slot1_key] | ||
| 823 | R 1 config set rdb-key-save-delay 0 | ||
| 824 | } | ||
| 825 | |||
| 826 | test "Client output buffer limit is reached on source side" { | ||
| 827 | R 0 flushall | ||
| 828 | R 1 flushall | ||
| 829 | set r1_pid [S 1 process_id] | ||
| 830 | R 1 debug repl-pause on-streaming-repl-buf | ||
| 831 | |||
| 832 | # Set a small output buffer limit to trigger the error | ||
| 833 | R 0 config set client-output-buffer-limit "replica 4mb 0 0" | ||
| 834 | |||
| 835 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 836 | |||
| 837 | # some write traffic is to have chance to enter streaming buffer state | ||
| 838 | set slot0_key [slot_key 0 mykey] | ||
| 839 | R 0 set $slot0_key "a" | ||
| 840 | |||
| 841 | # after 3 second, the slots snapshot (costs 2s to generate) should be transferred, | ||
| 842 | # then start streaming buffer | ||
| 843 | after 3000 | ||
| 844 | |||
| 845 | set loglines [count_log_lines 0] | ||
| 846 | |||
| 847 | # Start the slot 0 write load on the R 0 | ||
| 848 | set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 $slot0_key 1000] | ||
| 849 | |||
| 850 | # verify the metric is accessible, it is transient, will be reset on disconnect | ||
| 851 | assert {[S 0 mem_cluster_slot_migration_output_buffer] >= 0} | ||
| 852 | |||
| 853 | # After some time, the client output buffer limit should be reached | ||
| 854 | wait_for_log_messages 0 {"*Client * closed * for overcoming of output buffer limits.*"} $loglines 1000 10 | ||
| 855 | wait_for_condition 1000 10 { | ||
| 856 | [string match {*send*stream*} [migration_status 0 $task_id last_error]] | ||
| 857 | } else { | ||
| 858 | fail "ASM task did not fail as expected" | ||
| 859 | } | ||
| 860 | |||
| 861 | stop_write_load $load_handle | ||
| 862 | |||
| 863 | # Reset configurations | ||
| 864 | R 0 config set client-output-buffer-limit "replica 0 0 0" | ||
| 865 | R 0 config set rdb-key-save-delay 0 | ||
| 866 | |||
| 867 | # resume server and clear pause point | ||
| 868 | resume_process $r1_pid | ||
| 869 | R 1 debug repl-pause clear | ||
| 870 | |||
| 871 | # Wait for the migration to complete | ||
| 872 | wait_for_asm_done | ||
| 873 | } | ||
| 874 | |||
| 875 | test "Full sync buffer limit is reached on destination side" { | ||
| 876 | # Set a small replication buffer limit to trigger the error | ||
| 877 | R 0 config set replica-full-sync-buffer-limit 1mb | ||
| 878 | |||
| 879 | # start migration from 1 to 0, cost 4s to transfer slots snapshot | ||
| 880 | set task_id [setup_slot_migration_with_delay 1 0 0 100 2 2000000] | ||
| 881 | set loglines [count_log_lines 0] | ||
| 882 | |||
| 883 | # Create some traffic on slot 0 | ||
| 884 | populate_slot 100 -idx 1 -slot 0 -size 100000 | ||
| 885 | |||
| 886 | # After some time, slots sync buffer limit should be reached, but migration would not fail | ||
| 887 | # since the buffer will be accumulated on source side from now. | ||
| 888 | wait_for_log_messages 0 {"*Slots sync buffer limit has been reached*"} $loglines 1000 10 | ||
| 889 | |||
| 890 | # verify the peak value, should be greater than 1mb | ||
| 891 | assert {[S 0 mem_cluster_slot_migration_input_buffer_peak] > 1000000} | ||
| 892 | # verify the metric is accessible, it is transient, will be reset on disconnect | ||
| 893 | assert {[S 0 mem_cluster_slot_migration_input_buffer] >= 0} | ||
| 894 | |||
| 895 | wait_for_asm_done | ||
| 896 | |||
| 897 | # Reset configurations | ||
| 898 | R 0 config set replica-full-sync-buffer-limit 0 | ||
| 899 | R 1 config set rdb-key-save-delay 0 | ||
| 900 | R 1 cluster migration import 0 100 | ||
| 901 | wait_for_asm_done | ||
| 902 | } | ||
| 903 | |||
| 904 | test "Expired key is not deleted and SCAN/KEYS/RANDOMKEY/CLUSTER GETKEYSINSLOT filter keys in importing slots" { | ||
| 905 | set slot0_key [slot_key 0 mykey] | ||
| 906 | set slot1_key [slot_key 1 mykey] | ||
| 907 | set slot2_key [slot_key 2 mykey] | ||
| 908 | R 1 flushall | ||
| 909 | R 0 flushall | ||
| 910 | |||
| 911 | # we set a delay to write incremental data | ||
| 912 | R 1 config set rdb-key-save-delay 1000000 | ||
| 913 | |||
| 914 | # set expire time 2s. Generating slot snapshot will 3s, so these | ||
| 915 | # three keys will be expired after slot snapshot is transferred | ||
| 916 | R 1 setex $slot0_key 2 "a" | ||
| 917 | R 1 setex $slot1_key 2 "b" | ||
| 918 | R 1 hset $slot2_key "f1" "1" | ||
| 919 | R 1 expire $slot2_key 2 | ||
| 920 | R 1 hexpire $slot2_key 2 FIELDS 1 "f1" | ||
| 921 | |||
| 922 | set task_id [R 0 CLUSTER MIGRATION IMPORT 0 100] | ||
| 923 | wait_for_condition 2000 10 { | ||
| 924 | [string match {*send-bulk-and-stream*} [migration_status 1 $task_id state]] | ||
| 925 | } else { | ||
| 926 | fail "ASM task did not start" | ||
| 927 | } | ||
| 928 | |||
| 929 | # update expire time during mirgration | ||
| 930 | R 1 setex $slot0_key 100 "a" | ||
| 931 | R 1 expire $slot1_key 80 | ||
| 932 | R 1 expire $slot2_key 60 | ||
| 933 | R 1 hincrbyfloat $slot2_key "f1" 1 | ||
| 934 | R 1 hexpire $slot2_key 60 FIELDS 1 "f1" | ||
| 935 | |||
| 936 | # after 2s, at least a key should be transferred, and should not be deleted | ||
| 937 | # due to expired, neither active nor lazy expiration (SCAN) takes effect, | ||
| 938 | # Besides SCAN/KEYS/RANDOMKEY/CLUSTER GETKEYSINSLOT command can not find them | ||
| 939 | after 2000 | ||
| 940 | R 3 readonly | ||
| 941 | foreach id {0 3} { ;# 0 is the master, 3 is the replica | ||
| 942 | assert_equal {0 {}} [R $id scan 0 count 10] | ||
| 943 | assert_equal {} [R $id keys "*"] | ||
| 944 | assert_equal {} [R $id keys "{06S}*"] | ||
| 945 | assert_equal {} [R $id randomkey] | ||
| 946 | assert_equal {} [R $id cluster getkeysinslot 0 100] | ||
| 947 | assert_equal [R $id cluster countkeysinslot 0] 0 | ||
| 948 | assert_equal [R $id dbsize] 0 | ||
| 949 | |||
| 950 | # but we can see the number of keys is increased in INFO KEYSPACE | ||
| 951 | assert {[scan [regexp -inline {keys\=([\d]*)} [R $id info keyspace]] keys=%d] >= 1} | ||
| 952 | assert {[scan [regexp -inline {expires\=([\d]*)} [R $id info keyspace]] expires=%d] >= 1} | ||
| 953 | } | ||
| 954 | |||
| 955 | wait_for_asm_done | ||
| 956 | |||
| 957 | wait_for_ofs_sync [Rn 0] [Rn 3] | ||
| 958 | |||
| 959 | foreach id {0 3} { ;# 0 is the master, 3 is the replica | ||
| 960 | # verify the keys are valid | ||
| 961 | assert_range [R $id ttl $slot0_key] 90 100 | ||
| 962 | assert_range [R $id ttl $slot1_key] 70 80 | ||
| 963 | assert_range [R $id ttl $slot2_key] 50 60 | ||
| 964 | assert_range [R $id httl $slot2_key FIELDS 1 "f1"] 50 60 | ||
| 965 | |||
| 966 | # KEYS/SCAN/RANDOMKEY/CLUSTER GETKEYSINSLOT will find the keys after migration | ||
| 967 | assert_equal [list 0 [list $slot0_key $slot1_key $slot2_key]] [R $id scan 0 count 10] | ||
| 968 | assert_equal [list $slot0_key $slot1_key $slot2_key] [R $id keys "*"] | ||
| 969 | assert_equal [list $slot0_key] [R $id keys "{06S}*"] | ||
| 970 | assert_not_equal {} [R $id randomkey] | ||
| 971 | assert_equal [list $slot0_key] [R $id cluster getkeysinslot 0 100] | ||
| 972 | |||
| 973 | # INFO KEYSPACE/DBSIZE/CLUSTER COUNTKEYSINSLOT will also reflect the keys | ||
| 974 | assert_equal 3 [scan [regexp -inline {keys\=([\d]*)} [R $id info keyspace]] keys=%d] | ||
| 975 | assert_equal 3 [scan [regexp -inline {expires\=([\d]*)} [R $id info keyspace]] expires=%d] | ||
| 976 | assert_equal 1 [scan [regexp -inline {subexpiry\=([\d]*)} [R $id info keyspace]] subexpiry=%d] | ||
| 977 | assert_equal 3 [R $id dbsize] | ||
| 978 | assert_equal 1 [R $id cluster countkeysinslot 0] | ||
| 979 | } | ||
| 980 | |||
| 981 | # update expire time to 10ms, after some time, the keys should be deleted due to | ||
| 982 | # active expiration | ||
| 983 | R 0 pexpire $slot0_key 10 | ||
| 984 | R 0 pexpire $slot1_key 10 | ||
| 985 | R 0 hpexpire $slot2_key 10 FIELDS 1 "f1" ;# the last field is expired, the key will be deleted | ||
| 986 | wait_for_condition 100 50 { | ||
| 987 | [scan [regexp -inline {keys\=([\d]*)} [R 0 info keyspace]] keys=%d] == {} && | ||
| 988 | [scan [regexp -inline {keys\=([\d]*)} [R 3 info keyspace]] keys=%d] == {} | ||
| 989 | } else { | ||
| 990 | fail "keys did not expire" | ||
| 991 | } | ||
| 992 | |||
| 993 | R 1 config set rdb-key-save-delay 0 | ||
| 994 | } | ||
| 995 | |||
| 996 | test "Eviction does not evict keys in importing slots" { | ||
| 997 | set slot0_key [slot_key 0 mykey] | ||
| 998 | set slot1_key [slot_key 1 mykey] | ||
| 999 | set slot2_key [slot_key 2 mykey] | ||
| 1000 | set slot5462_key [slot_key 5462 mykey] | ||
| 1001 | set slot5463_key [slot_key 5463 mykey] | ||
| 1002 | R 1 flushall | ||
| 1003 | R 0 flushall | ||
| 1004 | |||
| 1005 | # we set a delay to write incremental data | ||
| 1006 | R 0 config set rdb-key-save-delay 1000000 | ||
| 1007 | |||
| 1008 | set 1k_str [string repeat "a" 1024] | ||
| 1009 | set 1m_str [string repeat "a" 1048576] | ||
| 1010 | |||
| 1011 | # set two keys to be evicted | ||
| 1012 | R 1 set $slot5462_key $1k_str | ||
| 1013 | R 1 set $slot5463_key $1k_str | ||
| 1014 | |||
| 1015 | # set maxmemory to 200kb more than current used memory, | ||
| 1016 | # redis should evict some keys if importing some big keys | ||
| 1017 | set r1_mem_used [S 1 used_memory] | ||
| 1018 | set r1_max_mem [expr {$r1_mem_used + 200*1024}] | ||
| 1019 | R 1 config set maxmemory $r1_max_mem | ||
| 1020 | R 1 config set maxmemory-policy allkeys-lru | ||
| 1021 | |||
| 1022 | # set 3 keys to be migrated | ||
| 1023 | R 0 set $slot0_key $1m_str | ||
| 1024 | R 0 set $slot1_key $1m_str | ||
| 1025 | R 0 set $slot2_key $1m_str | ||
| 1026 | |||
| 1027 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100] | ||
| 1028 | wait_for_condition 2000 10 { | ||
| 1029 | [string match {*send-bulk-and-stream*} [migration_status 0 $task_id state]] | ||
| 1030 | } else { | ||
| 1031 | fail "ASM task did not start" | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | # after 2.2s, at least two keys should be transferred, they should not be evicted | ||
| 1035 | # but other keys (slot5462_key and slot5463_key) should be evicted | ||
| 1036 | after 2200 | ||
| 1037 | for {set j 0} {$j < 100} {incr j} { R 1 ping } ;# trigger eviction | ||
| 1038 | assert_equal 0 [R 1 exists $slot5462_key] | ||
| 1039 | assert_equal 0 [R 1 exists $slot5463_key] | ||
| 1040 | assert {[scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 2} | ||
| 1041 | |||
| 1042 | # current used memory should be more than the maxmemory, since the big keys that | ||
| 1043 | # belong importing slots can not be evicted. | ||
| 1044 | set r1_mem_used [S 1 used_memory] | ||
| 1045 | assert {$r1_mem_used > $r1_max_mem + 1024*1024} | ||
| 1046 | |||
| 1047 | wait_for_asm_done | ||
| 1048 | |||
| 1049 | # after migration, these big keys should be evicted | ||
| 1050 | for {set j 0} {$j < 100} {incr j} { R 1 ping } ;# trigger eviction | ||
| 1051 | assert_equal {} [scan [regexp -inline {expires\=([\d]*)} [R 1 info keyspace]] expires=%d] | ||
| 1052 | } | ||
| 1053 | |||
| 1054 | test "Failover will cancel slot migration tasks" { | ||
| 1055 | # migrate slot 0-100 from 1 to 0 | ||
| 1056 | set task_id [setup_slot_migration_with_delay 1 0 0 100] | ||
| 1057 | |||
| 1058 | # FAILOVER happens on the destination node, instance #3 become master, #0 become slave | ||
| 1059 | failover_and_wait_for_done 3 | ||
| 1060 | |||
| 1061 | # the old master will cancel the importing task, and the migrating task on | ||
| 1062 | # the source node will be failed | ||
| 1063 | wait_for_condition 1000 50 { | ||
| 1064 | [string match {*canceled*} [migration_status 0 $task_id state]] && | ||
| 1065 | [string match {*failover*} [migration_status 0 $task_id last_error]] && | ||
| 1066 | [string match {*failed*} [migration_status 1 $task_id state]] | ||
| 1067 | } else { | ||
| 1068 | fail "ASM task did not cancel" | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | # We can restart ASM tasks on new master, migrate slot 0-100 from 1 to 3 | ||
| 1072 | R 1 config set rdb-key-save-delay 0 | ||
| 1073 | set task_id [R 3 CLUSTER MIGRATION IMPORT 0 100] | ||
| 1074 | wait_for_asm_done | ||
| 1075 | |||
| 1076 | # migrate slot 0-100 from 3 to 1 | ||
| 1077 | set task_id [setup_slot_migration_with_delay 3 1 0 100] | ||
| 1078 | |||
| 1079 | # FAILOVER happens on the source node, instance #3 become slave, #0 become master | ||
| 1080 | failover_and_wait_for_done 0 | ||
| 1081 | |||
| 1082 | # the old master will cancel the migrating task, but the destination node will | ||
| 1083 | # retry the importing task, and then succeed. | ||
| 1084 | wait_for_condition 1000 50 { | ||
| 1085 | [string match {*canceled*} [migration_status 3 $task_id state]] | ||
| 1086 | } else { | ||
| 1087 | fail "ASM task did not cancel" | ||
| 1088 | } | ||
| 1089 | wait_for_asm_done | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | test "Flush-like command can cancel slot migration task" { | ||
| 1093 | # flushall, flushdb | ||
| 1094 | foreach flushcmd {flushall flushdb} { | ||
| 1095 | # start slot migration from 1 to 0 | ||
| 1096 | set task_id [setup_slot_migration_with_delay 1 0 0 100] | ||
| 1097 | |||
| 1098 | if {$::verbose} { puts "Testing flush command: $flushcmd"} | ||
| 1099 | R 0 $flushcmd | ||
| 1100 | |||
| 1101 | # flush-like will cancel the task | ||
| 1102 | wait_for_condition 1000 50 { | ||
| 1103 | [string match {*canceled*} [migration_status 0 $task_id state]] | ||
| 1104 | } else { | ||
| 1105 | fail "ASM task did not cancel" | ||
| 1106 | } | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | R 1 config set rdb-key-save-delay 0 | ||
| 1110 | R 0 cluster migration import 0 100 | ||
| 1111 | wait_for_asm_done | ||
| 1112 | } | ||
| 1113 | |||
| 1114 | test "CLUSTER SETSLOT command when there is a slot migration task" { | ||
| 1115 | # Setup slot migration test from node 0 to node 1 | ||
| 1116 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 1117 | |||
| 1118 | # Cluster SETSLOT command is not allowed when there is a slot migration task | ||
| 1119 | # on the slot. #0 and #1 are having migration task now. | ||
| 1120 | foreach instance {0 1} { | ||
| 1121 | set node_id [R $instance cluster myid] | ||
| 1122 | |||
| 1123 | catch {R $instance cluster setslot 0 migrating $node_id} err | ||
| 1124 | assert_match {*in an active atomic slot migration*} $err | ||
| 1125 | |||
| 1126 | catch {R $instance cluster setslot 0 importing $node_id} err | ||
| 1127 | assert_match {*in an active atomic slot migration*} $err | ||
| 1128 | |||
| 1129 | catch {R $instance cluster setslot 0 stable} err | ||
| 1130 | assert_match {*in an active atomic slot migration*} $err | ||
| 1131 | |||
| 1132 | catch {R $instance cluster setslot 0 node $node_id} err | ||
| 1133 | assert_match {*in an active atomic slot migration*} $err | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | # CLUSTER SETSLOT on other node will cancel the migration task, we update | ||
| 1137 | # the owner of slot 0 (that is migrating from #0 to #1) to #2 on #2, we | ||
| 1138 | # bump the config epoch to make sure the change can update #0 and #1 | ||
| 1139 | # slot configuration, so #0 and #1 will cancel the migration task. | ||
| 1140 | # BTW, if config epoch is not bumped, the slot config of #2 may be | ||
| 1141 | # updated by #0 and #1. | ||
| 1142 | R 2 cluster bumpepoch | ||
| 1143 | R 2 cluster setslot 0 node [R 2 cluster myid] | ||
| 1144 | wait_for_condition 1000 50 { | ||
| 1145 | [string match {*canceled*} [migration_status 0 $task_id state]] && | ||
| 1146 | [string match {*slots configuration updated*} [migration_status 0 $task_id last_error]] && | ||
| 1147 | [string match {*canceled*} [migration_status 1 $task_id state]] | ||
| 1148 | } else { | ||
| 1149 | fail "ASM task did not cancel" | ||
| 1150 | } | ||
| 1151 | |||
| 1152 | # set slot 0 back to #0 | ||
| 1153 | R 0 cluster bumpepoch | ||
| 1154 | R 0 cluster setslot 0 node [R 0 cluster myid] | ||
| 1155 | wait_for_cluster_propagation | ||
| 1156 | wait_for_cluster_state "ok" | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | test "CLUSTER DELSLOTSRANGE command cancels a slot migration task" { | ||
| 1160 | # start slot migration from 0 to 1 | ||
| 1161 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 1162 | |||
| 1163 | R 0 cluster delslotsrange 0 100 | ||
| 1164 | wait_for_condition 1000 50 { | ||
| 1165 | [string match {*canceled*} [migration_status 0 $task_id state]] && | ||
| 1166 | [string match {*slots configuration updated*} [migration_status 0 $task_id last_error]] && | ||
| 1167 | [string match {*failed*} [migration_status 1 $task_id state]] | ||
| 1168 | } else { | ||
| 1169 | fail "ASM task did not cancel" | ||
| 1170 | } | ||
| 1171 | R 1 cluster migration cancel id $task_id | ||
| 1172 | |||
| 1173 | # add the slots back | ||
| 1174 | R 0 cluster addslotsrange 0 100 | ||
| 1175 | wait_for_cluster_propagation | ||
| 1176 | wait_for_cluster_state "ok" | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | # NOTE: this test needs more than 60s, maybe you can skip when testing | ||
| 1180 | test "CLUSTER FORGET command cancels a slot migration task" { | ||
| 1181 | R 0 config set rdb-key-save-delay 0 | ||
| 1182 | # Migrate all slot on #0 to #1, so we can forget #0 | ||
| 1183 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 5461] | ||
| 1184 | wait_for_asm_done | ||
| 1185 | |||
| 1186 | # start slot migration from 1 to 0 | ||
| 1187 | set task_id [setup_slot_migration_with_delay 1 0 0 5461] | ||
| 1188 | |||
| 1189 | # Forget #0 on #1, the migration task on #1 will be canceled due to node deleted, | ||
| 1190 | # and the importing task on #0 will be failed | ||
| 1191 | R 1 cluster forget [R 0 cluster myid] | ||
| 1192 | wait_for_condition 1000 50 { | ||
| 1193 | [string match {*canceled*} [migration_status 1 $task_id state]] && | ||
| 1194 | [string match {*node deleted*} [migration_status 1 $task_id last_error]] && | ||
| 1195 | [string match {*failed*} [migration_status 0 $task_id state]] | ||
| 1196 | } else { | ||
| 1197 | fail "ASM task did not cancel" | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | # Add #0 back into cluster | ||
| 1201 | # NOTE: this will cost 60s to let #0 join the cluster since | ||
| 1202 | # other nodes add #0 into black list for 60s after FORGET. | ||
| 1203 | R 1 config set rdb-key-save-delay 0 | ||
| 1204 | R 1 cluster meet "127.0.0.1" [lindex [R 0 config get port] 1] | ||
| 1205 | |||
| 1206 | # the importing task on #0 will be retried, and eventually succeed | ||
| 1207 | # since now #0 is back in the cluster | ||
| 1208 | wait_for_condition 3000 50 { | ||
| 1209 | [string match {*completed*} [migration_status 0 $task_id state]] && | ||
| 1210 | [string match {*completed*} [migration_status 1 $task_id state]] | ||
| 1211 | } else { | ||
| 1212 | fail "ASM task did not finish" | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | # make sure #0 is completely back to the cluster | ||
| 1216 | wait_for_cluster_propagation | ||
| 1217 | wait_for_cluster_state "ok" | ||
| 1218 | } | ||
| 1219 | |||
| 1220 | test "CLIENT PAUSE can cancel slot migration task" { | ||
| 1221 | # start slot migration from 0 to 1 | ||
| 1222 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 1223 | |||
| 1224 | # CLIENT PAUSE happens on the destination node, #1 will cancel the importing task | ||
| 1225 | R 1 client pause 100000 write ;# pause 100s | ||
| 1226 | wait_for_condition 1000 50 { | ||
| 1227 | [string match {*canceled*} [migration_status 1 $task_id state]] && | ||
| 1228 | [string match {*client pause*} [migration_status 1 $task_id last_error]] | ||
| 1229 | } else { | ||
| 1230 | fail "ASM task did not cancel" | ||
| 1231 | } | ||
| 1232 | |||
| 1233 | # start task again | ||
| 1234 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100] | ||
| 1235 | after 200 ;# give some time to have chance to schedule the task | ||
| 1236 | # the task should not start since server is paused | ||
| 1237 | assert {[string match {*none*} [migration_status 1 $task_id state]]} | ||
| 1238 | |||
| 1239 | # unpause the server, the task should start | ||
| 1240 | R 1 client unpause | ||
| 1241 | wait_for_asm_done | ||
| 1242 | |||
| 1243 | # migrate back to original node #0 | ||
| 1244 | R 0 config set rdb-key-save-delay 0 | ||
| 1245 | R 1 config set rdb-key-save-delay 0 | ||
| 1246 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 1247 | wait_for_asm_done | ||
| 1248 | } | ||
| 1249 | |||
| 1250 | test "Server shutdown can cancel slot migration task, exit with success" { | ||
| 1251 | # start slot migration from 0 to 1 | ||
| 1252 | setup_slot_migration_with_delay 0 1 0 100 | ||
| 1253 | |||
| 1254 | set loglines [count_log_lines -1] | ||
| 1255 | |||
| 1256 | # Shutdown the server, it should cancel the migration task | ||
| 1257 | restart_server -1 true false true nosave | ||
| 1258 | |||
| 1259 | wait_for_log_messages -1 {"*Cancelled due to server shutdown*"} $loglines 100 100 | ||
| 1260 | |||
| 1261 | wait_for_cluster_propagation | ||
| 1262 | wait_for_cluster_state "ok" | ||
| 1263 | } | ||
| 1264 | |||
| 1265 | test "Cancel import task when streaming buffer into db" { | ||
| 1266 | # set a delay to have time to cancel import task that is streaming buf to db | ||
| 1267 | R 1 config set key-load-delay 50000 | ||
| 1268 | # start slot migration from 0 to 1 | ||
| 1269 | set task_id [setup_slot_migration_with_delay 0 1 0 100 5] | ||
| 1270 | |||
| 1271 | # start the slot 0 write load on the node 0 | ||
| 1272 | set slot0_key [slot_key 0 mykey] | ||
| 1273 | set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 $slot0_key 500] | ||
| 1274 | |||
| 1275 | # wait for entering streaming buffer state | ||
| 1276 | wait_for_condition 1000 10 { | ||
| 1277 | [string match {*streaming-buffer*} [migration_status 1 $task_id state]] | ||
| 1278 | } else { | ||
| 1279 | fail "ASM task did not enter streaming buffer state" | ||
| 1280 | } | ||
| 1281 | stop_write_load $load_handle | ||
| 1282 | |||
| 1283 | # cancel the import task on #1, the destination node works fine | ||
| 1284 | R 1 cluster migration cancel id $task_id | ||
| 1285 | assert_match {*canceled*} [migration_status 1 $task_id state] | ||
| 1286 | |||
| 1287 | # reset config | ||
| 1288 | R 0 config set key-load-delay 0 | ||
| 1289 | R 1 config set key-load-delay 0 | ||
| 1290 | } | ||
| 1291 | |||
| 1292 | test "Destination node main channel timeout when waiting stream EOF" { | ||
| 1293 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 1294 | R 1 config set repl-timeout 5 | ||
| 1295 | |||
| 1296 | # pause the source node to make EOF wait timeout. Do not pause | ||
| 1297 | # the child process, so it can deliver slot snapshot to destination | ||
| 1298 | set r0_process_id [S 0 process_id] | ||
| 1299 | pause_process $r0_process_id | ||
| 1300 | |||
| 1301 | # the destination node will fail after 7s, 5s for EOF wait and 2s for slot snapshot | ||
| 1302 | wait_for_condition 1000 20 { | ||
| 1303 | [string match {*failed*} [migration_status 1 $task_id state]] && | ||
| 1304 | [string match {*Main channel*Connection timeout*wait-stream-eof*} \ | ||
| 1305 | [migration_status 1 $task_id last_error]] | ||
| 1306 | } else { | ||
| 1307 | fail "ASM task did not fail" | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | # resume the source node | ||
| 1311 | resume_process $r0_process_id | ||
| 1312 | |||
| 1313 | # After the source node is resumed, the task on source node may receive | ||
| 1314 | # ACKs from destination and consider the task is stream-done. In this case, | ||
| 1315 | # the task on source node will be failed after several seconds | ||
| 1316 | if {[string match {*stream-done*} [migration_status 0 $task_id state]]} { | ||
| 1317 | wait_for_condition 1000 20 { | ||
| 1318 | [string match {*failed*} [migration_status 0 $task_id state]] && | ||
| 1319 | [string match {*Server paused*} [migration_status 0 $task_id last_error]] | ||
| 1320 | } else { | ||
| 1321 | fail "ASM task did not fail" | ||
| 1322 | } | ||
| 1323 | } | ||
| 1324 | |||
| 1325 | R 1 config set repl-timeout 60 | ||
| 1326 | R 0 cluster migration cancel id $task_id | ||
| 1327 | R 1 cluster migration cancel id $task_id | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | test "Destination node rdb channel timeout when transferring slots snapshot" { | ||
| 1331 | # cost 10s to transfer each key | ||
| 1332 | set task_id [setup_slot_migration_with_delay 0 1 0 100 2 10000000] | ||
| 1333 | R 1 config set repl-timeout 3 | ||
| 1334 | |||
| 1335 | # the destination node will fail after 3s | ||
| 1336 | wait_for_condition 1000 20 { | ||
| 1337 | [string match {*failed*} [migration_status 1 $task_id state]] && | ||
| 1338 | [string match {*RDB channel*Connection timeout*rdbchannel-transfer*} \ | ||
| 1339 | [migration_status 1 $task_id last_error]] | ||
| 1340 | } else { | ||
| 1341 | fail "ASM task did not fail" | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | R 1 config set repl-timeout 60 | ||
| 1345 | R 0 cluster migration cancel id $task_id | ||
| 1346 | R 1 cluster migration cancel id $task_id | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | test "Source node rdb channel timeout when transferring slots snapshot" { | ||
| 1350 | set r1_pid [S 1 process_id] | ||
| 1351 | R 0 flushall | ||
| 1352 | R 0 config set save "" | ||
| 1353 | # generate several large keys, make sure the memory usage is more than | ||
| 1354 | # socket buffer size, so the rdb channel will block and timeout if | ||
| 1355 | # no data is received by destination. | ||
| 1356 | set val [string repeat "a" 102400] ;# 100kb | ||
| 1357 | for {set i 0} {$i < 1000} {incr i} { | ||
| 1358 | set key [slot_key 0 "key$i"] | ||
| 1359 | R 0 set $key $val | ||
| 1360 | } | ||
| 1361 | R 0 config set repl-timeout 3 ;# 3s for rdb channel timeout | ||
| 1362 | R 0 config set rdb-key-save-delay 10000 ;# 1000 keys cost 10s to save | ||
| 1363 | |||
| 1364 | # start migration from #0 to #1 | ||
| 1365 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100] | ||
| 1366 | wait_for_condition 1000 20 { | ||
| 1367 | [string match {*send-bulk-and-stream*} [migration_status 0 $task_id state]] | ||
| 1368 | } else { | ||
| 1369 | fail "ASM task did not start" | ||
| 1370 | } | ||
| 1371 | |||
| 1372 | # pause the destination node to make rdb channel timeout | ||
| 1373 | pause_process $r1_pid | ||
| 1374 | |||
| 1375 | # the source node will fail, the rdb child process can not | ||
| 1376 | # write data to destination, so it will timeout | ||
| 1377 | wait_for_condition 1000 30 { | ||
| 1378 | [string match {*failed*} [migration_status 0 $task_id state]] && | ||
| 1379 | [string match {*RDB channel*Failed to send slots snapshot*} \ | ||
| 1380 | [migration_status 0 $task_id last_error]] | ||
| 1381 | } else { | ||
| 1382 | fail "ASM task did not fail" | ||
| 1383 | } | ||
| 1384 | resume_process $r1_pid | ||
| 1385 | |||
| 1386 | R 0 config set repl-timeout 60 | ||
| 1387 | R 0 cluster migration cancel id $task_id | ||
| 1388 | R 1 cluster migration cancel id $task_id | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | test "Source node main channel timeout when sending incremental stream" { | ||
| 1392 | R 0 flushall | ||
| 1393 | R 0 config set repl-timeout 2 ;# 2s for main channel timeout | ||
| 1394 | |||
| 1395 | set r1_pid [S 1 process_id] | ||
| 1396 | # in order to have time to pause the destination node | ||
| 1397 | R 1 config set key-load-delay 50000 ;# 50ms each 16k data | ||
| 1398 | |||
| 1399 | # start migration from #0 to #1 | ||
| 1400 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 1401 | |||
| 1402 | # Create 200 keys of 16k size traffic on slot 0, streaming buffer need 10s (200*50ms) | ||
| 1403 | populate_slot 200 -idx 0 -slot 0 -size 16384 | ||
| 1404 | |||
| 1405 | # wait for streaming buffer state, then pause the destination node | ||
| 1406 | wait_for_condition 1000 20 { | ||
| 1407 | [string match {*streaming-buffer*} [migration_status 1 $task_id state]] | ||
| 1408 | } else { | ||
| 1409 | fail "ASM task did not stream buffer, state: [migration_status 1 $task_id state]" | ||
| 1410 | } | ||
| 1411 | pause_process $r1_pid | ||
| 1412 | |||
| 1413 | # Start the slot 0 write load on the R 0 | ||
| 1414 | set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 [slot_key 0 mykey] 500] | ||
| 1415 | |||
| 1416 | # the source node will fail after several seconds (including the time | ||
| 1417 | # to fill the socket buffer of source node), the main channel can not | ||
| 1418 | # write data to destination since the destination is paused | ||
| 1419 | wait_for_condition 1000 30 { | ||
| 1420 | [string match {*failed*} [migration_status 0 $task_id state]] && | ||
| 1421 | [string match {*Main channel*Connection timeout*} \ | ||
| 1422 | [migration_status 0 $task_id last_error]] | ||
| 1423 | } else { | ||
| 1424 | fail "ASM task did not fail" | ||
| 1425 | } | ||
| 1426 | stop_write_load $load_handle | ||
| 1427 | resume_process $r1_pid | ||
| 1428 | |||
| 1429 | R 0 config set repl-timeout 60 | ||
| 1430 | R 1 config set key-load-delay 0 | ||
| 1431 | R 0 cluster migration cancel id $task_id | ||
| 1432 | R 1 cluster migration cancel id $task_id | ||
| 1433 | R 0 flushall | ||
| 1434 | } | ||
| 1435 | |||
| 1436 | test "Source server paused timeout" { | ||
| 1437 | # set timeout to 0, so the task will fail immediately when checking timeout | ||
| 1438 | R 0 config set cluster-slot-migration-write-pause-timeout 0 | ||
| 1439 | |||
| 1440 | # start migration from node 0 to 1 | ||
| 1441 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 1442 | |||
| 1443 | # start the slot 0 write load on the node 0 | ||
| 1444 | set slot0_key [slot_key 0 mykey] | ||
| 1445 | set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 $slot0_key] | ||
| 1446 | |||
| 1447 | # node 0 will fail since server paused timeout | ||
| 1448 | wait_for_condition 2000 10 { | ||
| 1449 | [string match {*failed*} [migration_status 0 $task_id state]] && | ||
| 1450 | [string match {*Server paused timeout*} \ | ||
| 1451 | [migration_status 0 $task_id last_error]] | ||
| 1452 | } else { | ||
| 1453 | fail "ASM task did not fail" | ||
| 1454 | } | ||
| 1455 | |||
| 1456 | stop_write_load $load_handle | ||
| 1457 | |||
| 1458 | # reset config | ||
| 1459 | R 0 config set cluster-slot-migration-write-pause-timeout 10000 | ||
| 1460 | R 0 cluster migration cancel id $task_id | ||
| 1461 | R 1 cluster migration cancel id $task_id | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | test "Sync buffer drain timeout" { | ||
| 1465 | # set a fail point to avoid the source node to enter handoff prep state | ||
| 1466 | # to test the sync buffer drain timeout | ||
| 1467 | R 0 debug asm-failpoint "migrate-main-channel" "handoff-prep" | ||
| 1468 | R 0 config set cluster-slot-migration-sync-buffer-drain-timeout 5000 | ||
| 1469 | |||
| 1470 | set r1_pid [S 1 process_id] | ||
| 1471 | |||
| 1472 | # start migration from node 0 to 1 | ||
| 1473 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 1474 | |||
| 1475 | # start the slot 0 write load on the node 0 | ||
| 1476 | set slot0_key [slot_key 0 mykey] | ||
| 1477 | set load_handle [start_write_load "127.0.0.1" [get_port 0] 100 $slot0_key] | ||
| 1478 | |||
| 1479 | # wait for entering streaming buffer state | ||
| 1480 | wait_for_condition 1000 10 { | ||
| 1481 | [string match {*wait-stream-eof*} [migration_status 1 $task_id state]] | ||
| 1482 | } else { | ||
| 1483 | fail "ASM task did not enter wait-stream-eof state" | ||
| 1484 | } | ||
| 1485 | |||
| 1486 | pause_process $r1_pid ;# avoid the destination to apply commands | ||
| 1487 | |||
| 1488 | # node 0 will fail since sync buffer drain timeout | ||
| 1489 | wait_for_condition 2000 10 { | ||
| 1490 | [string match {*failed*} [migration_status 0 $task_id state]] && | ||
| 1491 | [string match {*Sync buffer drain timeout*} \ | ||
| 1492 | [migration_status 0 $task_id last_error]] | ||
| 1493 | } else { | ||
| 1494 | fail "ASM task did not fail" | ||
| 1495 | } | ||
| 1496 | |||
| 1497 | stop_write_load $load_handle | ||
| 1498 | resume_process $r1_pid | ||
| 1499 | |||
| 1500 | # reset config | ||
| 1501 | R 0 config set cluster-slot-migration-sync-buffer-drain-timeout 60000 | ||
| 1502 | R 0 debug asm-failpoint "" "" | ||
| 1503 | R 0 cluster migration cancel id $task_id | ||
| 1504 | R 1 cluster migration cancel id $task_id | ||
| 1505 | } | ||
| 1506 | |||
| 1507 | test "Cluster implementation cannot start migrate task temporarily" { | ||
| 1508 | # Inject a fail point to make the source node not ready | ||
| 1509 | R 0 debug asm-failpoint "migrate-main-channel" "none" | ||
| 1510 | |||
| 1511 | # start migration from node 0 to 1 | ||
| 1512 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100] | ||
| 1513 | |||
| 1514 | # verify source node replies SYNCSLOTS with -NOTREADY | ||
| 1515 | set loglines [count_log_lines -1] | ||
| 1516 | wait_for_log_messages -1 {"*Source node replied to SYNCSLOTS SYNC with -NOTREADY, will retry later*"} $loglines 100 100 | ||
| 1517 | |||
| 1518 | # clear the fail point and verify the task is completed | ||
| 1519 | R 0 debug asm-failpoint "" "" | ||
| 1520 | wait_for_asm_done | ||
| 1521 | assert_equal "completed" [migration_status 0 $task_id state] | ||
| 1522 | assert_equal "completed" [migration_status 1 $task_id state] | ||
| 1523 | |||
| 1524 | # cleanup | ||
| 1525 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 1526 | wait_for_asm_done | ||
| 1527 | } | ||
| 1528 | } | ||
| 1529 | |||
| 1530 | start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout 60000 cluster-allow-replica-migration no}} { | ||
| 1531 | test "Test bgtrim after a successful migration" { | ||
| 1532 | R 0 debug asm-trim-method bg | ||
| 1533 | R 3 debug asm-trim-method bg | ||
| 1534 | R 0 CONFIG RESETSTAT | ||
| 1535 | R 3 CONFIG RESETSTAT | ||
| 1536 | |||
| 1537 | R 0 flushall | ||
| 1538 | # Fill slot 0 | ||
| 1539 | populate_slot 1000 -idx 0 -slot 0 | ||
| 1540 | # Fill slot 1 with keys that have TTL | ||
| 1541 | populate_slot 1000 -idx 0 -slot 1 -prefix "expirekey" -expires 100 | ||
| 1542 | # HFE key on slot 2 | ||
| 1543 | set slot2_hfekey [slot_key 2 hfekey] | ||
| 1544 | R 0 HSETEX $slot2_hfekey EX 10 FIELDS 1 f1 v1 | ||
| 1545 | |||
| 1546 | # Fill slot 101, these keys won't be migrated | ||
| 1547 | populate_slot 1000 -idx 0 -slot 101 | ||
| 1548 | # Fill slot 102 with keys that have TTL | ||
| 1549 | populate_slot 1000 -idx 0 -slot 102 -prefix "expirekey" -expires 100 | ||
| 1550 | # HFE key on slot 103 | ||
| 1551 | set slot103_hfekey [slot_key 103 hfekey] | ||
| 1552 | R 0 HSETEX $slot103_hfekey EX 10 FIELDS 1 f1 v1 | ||
| 1553 | |||
| 1554 | # migrate slot 0 to node-1 | ||
| 1555 | R 1 CLUSTER MIGRATION IMPORT 0 100 | ||
| 1556 | wait_for_asm_done | ||
| 1557 | |||
| 1558 | # Verify the data is migrated | ||
| 1559 | wait_for_ofs_sync [Rn 0] [Rn 3] | ||
| 1560 | assert_equal 2001 [R 0 dbsize] | ||
| 1561 | assert_equal 2001 [R 3 dbsize] | ||
| 1562 | wait_for_ofs_sync [Rn 1] [Rn 4] | ||
| 1563 | assert_equal 2001 [R 1 dbsize] | ||
| 1564 | assert_equal 2001 [R 4 dbsize] | ||
| 1565 | |||
| 1566 | # Verify the keys are trimmed lazily | ||
| 1567 | wait_for_condition 1000 10 { | ||
| 1568 | [S 0 lazyfreed_objects] == 2001 && | ||
| 1569 | [S 3 lazyfreed_objects] == 2001 | ||
| 1570 | } else { | ||
| 1571 | puts "lazyfreed_objects: [S 0 lazyfreed_objects] [S 3 lazyfreed_objects]" | ||
| 1572 | fail "Background trim did not happen" | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | # Cleanup | ||
| 1576 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 1577 | wait_for_asm_done | ||
| 1578 | R 0 flushall | ||
| 1579 | R 0 debug asm-trim-method default | ||
| 1580 | R 3 debug asm-trim-method default | ||
| 1581 | } | ||
| 1582 | |||
| 1583 | test "Test bgtrim after a failed migration" { | ||
| 1584 | R 0 debug asm-trim-method bg | ||
| 1585 | R 3 debug asm-trim-method bg | ||
| 1586 | R 1 CONFIG RESETSTAT | ||
| 1587 | R 4 CONFIG RESETSTAT | ||
| 1588 | |||
| 1589 | # Fill slot 0 on node-0 and migrate it to node-1 (with some delay) | ||
| 1590 | R 0 flushall | ||
| 1591 | set task_id [setup_slot_migration_with_delay 0 1 0 100 10000 1000] | ||
| 1592 | after 1000 ;# wait some time so that some keys are moved | ||
| 1593 | |||
| 1594 | # Fail the migration | ||
| 1595 | R 1 CLUSTER MIGRATION CANCEL ID $task_id | ||
| 1596 | wait_for_asm_done | ||
| 1597 | |||
| 1598 | # Verify the data is not migrated | ||
| 1599 | assert_equal 10000 [R 0 dbsize] | ||
| 1600 | assert_equal 10000 [R 3 dbsize] | ||
| 1601 | |||
| 1602 | # Verify the keys are trimmed lazily after a failed import on dest side. | ||
| 1603 | wait_for_condition 1000 20 { | ||
| 1604 | [R 1 dbsize] == 0 && | ||
| 1605 | [R 4 dbsize] == 0 && | ||
| 1606 | [S 1 lazyfreed_objects] > 0 && | ||
| 1607 | [S 4 lazyfreed_objects] > 0 | ||
| 1608 | } else { | ||
| 1609 | fail "Background trim did not happen" | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | # Cleanup | ||
| 1613 | wait_for_asm_done | ||
| 1614 | R 0 flushall | ||
| 1615 | R 0 debug asm-trim-method default | ||
| 1616 | R 3 debug asm-trim-method default | ||
| 1617 | } | ||
| 1618 | |||
| 1619 | test "Test bgtrim unblocks stream client" { | ||
| 1620 | # Two clients waiting for data on two different streams which are in | ||
| 1621 | # different slots. We are going to migrate one slot, which will unblock | ||
| 1622 | # the client. The other client should still be blocked. | ||
| 1623 | R 0 debug asm-trim-method bg | ||
| 1624 | |||
| 1625 | set key0 [slot_key 0 mystream] | ||
| 1626 | set key1 [slot_key 1 mystream] | ||
| 1627 | |||
| 1628 | # First client waits on slot-0 key | ||
| 1629 | R 0 DEL $key0 | ||
| 1630 | R 0 XADD $key0 666 f v | ||
| 1631 | R 0 XGROUP CREATE $key0 mygroup $ | ||
| 1632 | set rd0 [redis_deferring_client] | ||
| 1633 | $rd0 XREADGROUP GROUP mygroup Alice BLOCK 0 STREAMS $key0 ">" | ||
| 1634 | wait_for_blocked_clients_count 1 | ||
| 1635 | |||
| 1636 | # Second client waits on slot-1 key | ||
| 1637 | R 0 DEL $key1 | ||
| 1638 | R 0 XADD $key1 666 f v | ||
| 1639 | R 0 XGROUP CREATE $key1 mygroup $ | ||
| 1640 | set rd1 [redis_deferring_client] | ||
| 1641 | $rd1 XREADGROUP GROUP mygroup Alice BLOCK 0 STREAMS $key1 ">" | ||
| 1642 | wait_for_blocked_clients_count 2 | ||
| 1643 | |||
| 1644 | # Migrate slot 0 | ||
| 1645 | R 1 CLUSTER MIGRATION IMPORT 0 0 | ||
| 1646 | wait_for_asm_done | ||
| 1647 | |||
| 1648 | # First client should get MOVED error | ||
| 1649 | assert_error "*MOVED*" {$rd0 read} | ||
| 1650 | $rd0 close | ||
| 1651 | |||
| 1652 | # Second client should operate normally | ||
| 1653 | R 0 XADD $key1 667 f v | ||
| 1654 | set res [$rd1 read] | ||
| 1655 | assert_equal [lindex $res 0 1 0] {667-0 {f v}} | ||
| 1656 | $rd1 close | ||
| 1657 | |||
| 1658 | # cleanup | ||
| 1659 | wait_for_asm_done | ||
| 1660 | R 0 CLUSTER MIGRATION IMPORT 0 0 | ||
| 1661 | wait_for_asm_done | ||
| 1662 | R 0 flushall | ||
| 1663 | R 0 debug asm-trim-method default | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | test "Test bgtrim touches watched keys" { | ||
| 1667 | R 0 debug asm-trim-method bg | ||
| 1668 | |||
| 1669 | # bgtrim should touch watched keys on migrated slots | ||
| 1670 | set key0 [slot_key 0 key] | ||
| 1671 | R 0 set $key0 30 | ||
| 1672 | R 0 watch $key0 | ||
| 1673 | R 1 CLUSTER MIGRATION IMPORT 0 0 | ||
| 1674 | wait_for_asm_done | ||
| 1675 | R 0 multi | ||
| 1676 | R 0 ping | ||
| 1677 | assert_equal {} [R 0 exec] | ||
| 1678 | |||
| 1679 | # bgtrim should not touch watched keys on other slots | ||
| 1680 | set key2 [slot_key 2 key] | ||
| 1681 | R 0 set $key2 30 | ||
| 1682 | R 0 watch $key2 | ||
| 1683 | R 1 CLUSTER MIGRATION IMPORT 1 1 | ||
| 1684 | wait_for_asm_done | ||
| 1685 | R 0 multi | ||
| 1686 | R 0 ping | ||
| 1687 | assert_equal PONG [R 0 exec] | ||
| 1688 | |||
| 1689 | # cleanup | ||
| 1690 | wait_for_asm_done | ||
| 1691 | R 0 CLUSTER MIGRATION IMPORT 0 1 | ||
| 1692 | wait_for_asm_done | ||
| 1693 | R 0 flushall | ||
| 1694 | R 0 debug asm-trim-method default | ||
| 1695 | } | ||
| 1696 | |||
| 1697 | test "Test bgtrim after a FAILOVER on destination side" { | ||
| 1698 | R 1 debug asm-trim-method bg | ||
| 1699 | R 4 debug asm-trim-method bg | ||
| 1700 | |||
| 1701 | set loglines [count_log_lines -4] | ||
| 1702 | |||
| 1703 | # Fill slot 0 on node-0 and migrate it to node-1 (with some delay) | ||
| 1704 | R 0 flushall | ||
| 1705 | set task_id [setup_slot_migration_with_delay 0 1 0 100 10000 1000] | ||
| 1706 | after 1000 ;# wait some time so that some keys are moved | ||
| 1707 | |||
| 1708 | # Trigger a failover with force to simulate unreachable master and | ||
| 1709 | # verify unowned keys are trimmed once replica becomes master. | ||
| 1710 | failover_and_wait_for_done 4 force | ||
| 1711 | wait_for_log_messages -4 {"*Detected keys in slots that do not belong*Scheduling trim*"} $loglines 1000 10 | ||
| 1712 | wait_for_condition 1000 10 { | ||
| 1713 | [R 1 dbsize] == 0 && | ||
| 1714 | [R 4 dbsize] == 0 | ||
| 1715 | } else { | ||
| 1716 | fail "Background trim did not happen" | ||
| 1717 | } | ||
| 1718 | |||
| 1719 | # cleanup | ||
| 1720 | wait_for_cluster_propagation | ||
| 1721 | failover_and_wait_for_done 1 | ||
| 1722 | R 0 config set rdb-key-save-delay 0 | ||
| 1723 | R 1 debug asm-trim-method default | ||
| 1724 | R 4 debug asm-trim-method default | ||
| 1725 | wait_for_asm_done | ||
| 1726 | } | ||
| 1727 | |||
| 1728 | test "CLUSTER SETSLOT is not allowed if there is a pending trim job" { | ||
| 1729 | R 0 debug asm-trim-method bg | ||
| 1730 | R 3 debug asm-trim-method bg | ||
| 1731 | |||
| 1732 | # Fill slot 0 on node-0 and migrate it to node-1 (with some delay) | ||
| 1733 | R 0 flushall | ||
| 1734 | set task_id [setup_slot_migration_with_delay 0 1 0 100 10000 1000] | ||
| 1735 | |||
| 1736 | # Pause will cancel the task and there will be a pending trim job | ||
| 1737 | # until writes are allowed again. | ||
| 1738 | R 1 client pause 100000 write ;# pause 100s | ||
| 1739 | wait_for_asm_done | ||
| 1740 | |||
| 1741 | # CLUSTER SETSLOT is not allowed if there is a pending trim job. | ||
| 1742 | assert_error {*There is a pending trim job for slot 0*} {R 1 CLUSTER SETSLOT 0 STABLE} | ||
| 1743 | |||
| 1744 | # Unpause the server, trim will be triggered and SETSLOT will be allowed | ||
| 1745 | R 1 client unpause | ||
| 1746 | R 1 CLUSTER SETSLOT 0 STABLE | ||
| 1747 | } | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-node-timeout 60000 cluster-allow-replica-migration no save ""}} { | ||
| 1751 | test "Test active trim after a successful migration" { | ||
| 1752 | R 0 debug asm-trim-method active | ||
| 1753 | R 3 debug asm-trim-method active | ||
| 1754 | populate_slot 500 -slot 0 | ||
| 1755 | populate_slot 500 -slot 1 | ||
| 1756 | populate_slot 500 -slot 3 | ||
| 1757 | populate_slot 500 -slot 4 | ||
| 1758 | |||
| 1759 | # Migrate 1500 keys | ||
| 1760 | R 1 CLUSTER MIGRATION IMPORT 0 1 3 3 | ||
| 1761 | wait_for_asm_done | ||
| 1762 | |||
| 1763 | wait_for_condition 1000 10 { | ||
| 1764 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 1765 | [CI 0 cluster_slot_migration_active_trim_running] == 0 && | ||
| 1766 | [CI 0 cluster_slot_migration_active_trim_current_job_trimmed] == 1500 && | ||
| 1767 | [CI 3 cluster_slot_migration_active_trim_running] == 0 && | ||
| 1768 | [CI 3 cluster_slot_migration_active_trim_current_job_trimmed] == 1500 | ||
| 1769 | } else { | ||
| 1770 | fail "trim failed" | ||
| 1771 | } | ||
| 1772 | |||
| 1773 | assert_equal 1500 [CI 0 cluster_slot_migration_active_trim_current_job_keys] | ||
| 1774 | assert_equal 1500 [CI 3 cluster_slot_migration_active_trim_current_job_keys] | ||
| 1775 | |||
| 1776 | assert_equal 500 [R 0 dbsize] | ||
| 1777 | assert_equal 500 [R 3 dbsize] | ||
| 1778 | assert_equal 1500 [R 1 dbsize] | ||
| 1779 | assert_equal 1500 [R 4 dbsize] | ||
| 1780 | assert_equal 0 [R 0 cluster countkeysinslot 0] | ||
| 1781 | assert_equal 0 [R 0 cluster countkeysinslot 1] | ||
| 1782 | assert_equal 0 [R 0 cluster countkeysinslot 3] | ||
| 1783 | assert_equal 500 [R 0 cluster countkeysinslot 4] | ||
| 1784 | |||
| 1785 | # cleanup | ||
| 1786 | R 0 debug asm-trim-method default | ||
| 1787 | R 3 debug asm-trim-method default | ||
| 1788 | R 0 CLUSTER MIGRATION IMPORT 0 1 3 3 | ||
| 1789 | wait_for_asm_done | ||
| 1790 | R 0 flushall | ||
| 1791 | R 1 flushall | ||
| 1792 | } | ||
| 1793 | |||
| 1794 | test "Test multiple active trim jobs can be scheduled" { | ||
| 1795 | # Active trim will be scheduled but it won't run | ||
| 1796 | R 0 debug asm-trim-method active -1 | ||
| 1797 | R 3 debug asm-trim-method active -1 | ||
| 1798 | |||
| 1799 | populate_slot 500 -slot 0 | ||
| 1800 | populate_slot 500 -slot 1 | ||
| 1801 | populate_slot 500 -slot 3 | ||
| 1802 | populate_slot 500 -slot 4 | ||
| 1803 | |||
| 1804 | # Migrate 1500 keys | ||
| 1805 | R 1 CLUSTER MIGRATION IMPORT 0 1 | ||
| 1806 | wait_for_condition 1000 10 { | ||
| 1807 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 1808 | [CI 0 cluster_slot_migration_active_trim_running] == 1 && | ||
| 1809 | [CI 3 cluster_slot_migration_active_trim_running] == 1 | ||
| 1810 | } else { | ||
| 1811 | fail "migrate failed" | ||
| 1812 | } | ||
| 1813 | |||
| 1814 | # Migrate another slot and verify there are two trim tasks on the source | ||
| 1815 | R 1 CLUSTER MIGRATION IMPORT 3 3 | ||
| 1816 | wait_for_condition 1000 10 { | ||
| 1817 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 1818 | [CI 0 cluster_slot_migration_active_trim_running] == 2 && | ||
| 1819 | [CI 3 cluster_slot_migration_active_trim_running] == 2 | ||
| 1820 | } else { | ||
| 1821 | fail "migrate failed" | ||
| 1822 | } | ||
| 1823 | |||
| 1824 | # Enabled active trim and wait until it is completed. | ||
| 1825 | R 0 debug asm-trim-method active 0 | ||
| 1826 | R 3 debug asm-trim-method active 0 | ||
| 1827 | wait_for_asm_done | ||
| 1828 | |||
| 1829 | assert_equal 500 [R 0 dbsize] | ||
| 1830 | assert_equal 500 [R 3 dbsize] | ||
| 1831 | assert_equal 0 [R 0 cluster countkeysinslot 0] | ||
| 1832 | assert_equal 0 [R 0 cluster countkeysinslot 1] | ||
| 1833 | assert_equal 0 [R 0 cluster countkeysinslot 3] | ||
| 1834 | assert_equal 500 [R 0 cluster countkeysinslot 4] | ||
| 1835 | |||
| 1836 | # cleanup | ||
| 1837 | R 0 debug asm-trim-method default | ||
| 1838 | R 3 debug asm-trim-method default | ||
| 1839 | R 0 CLUSTER MIGRATION IMPORT 0 1 3 3 | ||
| 1840 | wait_for_asm_done | ||
| 1841 | R 0 flushall | ||
| 1842 | R 1 flushall | ||
| 1843 | } | ||
| 1844 | |||
| 1845 | test "Test active-trim clears partially imported keys on cancel" { | ||
| 1846 | R 1 debug asm-trim-method active | ||
| 1847 | R 4 debug asm-trim-method active | ||
| 1848 | |||
| 1849 | # Rdb delivery will take 10 seconds | ||
| 1850 | R 0 config set rdb-key-save-delay 10000 | ||
| 1851 | populate_slot 250 -slot 0 | ||
| 1852 | populate_slot 250 -slot 1 | ||
| 1853 | populate_slot 250 -slot 3 | ||
| 1854 | populate_slot 250 -slot 4 | ||
| 1855 | |||
| 1856 | R 1 CLUSTER MIGRATION IMPORT 0 100 | ||
| 1857 | after 2000 | ||
| 1858 | R 1 CLUSTER MIGRATION CANCEL ALL | ||
| 1859 | wait_for_asm_done | ||
| 1860 | |||
| 1861 | assert_morethan [CI 1 cluster_slot_migration_active_trim_current_job_keys] 0 | ||
| 1862 | assert_morethan [CI 4 cluster_slot_migration_active_trim_current_job_trimmed] 0 | ||
| 1863 | |||
| 1864 | assert_equal 1000 [R 0 dbsize] | ||
| 1865 | assert_equal 1000 [R 3 dbsize] | ||
| 1866 | assert_equal 0 [R 1 dbsize] | ||
| 1867 | assert_equal 0 [R 4 dbsize] | ||
| 1868 | |||
| 1869 | # Cleanup | ||
| 1870 | R 1 debug asm-trim-method default | ||
| 1871 | R 4 debug asm-trim-method default | ||
| 1872 | R 0 config set rdb-key-save-delay 0 | ||
| 1873 | } | ||
| 1874 | |||
| 1875 | test "Test active-trim clears partially imported keys on failover" { | ||
| 1876 | R 1 debug asm-trim-method active | ||
| 1877 | R 4 debug asm-trim-method active | ||
| 1878 | |||
| 1879 | # Rdb delivery will take 10 seconds | ||
| 1880 | R 0 config set rdb-key-save-delay 10000 | ||
| 1881 | |||
| 1882 | populate_slot 250 -slot 0 | ||
| 1883 | populate_slot 250 -slot 1 | ||
| 1884 | populate_slot 250 -slot 3 | ||
| 1885 | populate_slot 250 -slot 4 | ||
| 1886 | |||
| 1887 | set prev_trim_started_1 [CI 1 cluster_slot_migration_stats_active_trim_started] | ||
| 1888 | set prev_trim_started_4 [CI 4 cluster_slot_migration_stats_active_trim_started] | ||
| 1889 | |||
| 1890 | R 1 CLUSTER MIGRATION IMPORT 0 100 | ||
| 1891 | after 2000 | ||
| 1892 | failover_and_wait_for_done 4 | ||
| 1893 | wait_for_asm_done | ||
| 1894 | |||
| 1895 | # Verify there is at least one trim job started | ||
| 1896 | assert_morethan [CI 1 cluster_slot_migration_stats_active_trim_started] $prev_trim_started_1 | ||
| 1897 | assert_morethan [CI 4 cluster_slot_migration_stats_active_trim_started] $prev_trim_started_4 | ||
| 1898 | |||
| 1899 | assert_equal 1000 [R 0 dbsize] | ||
| 1900 | assert_equal 1000 [R 3 dbsize] | ||
| 1901 | assert_equal 0 [R 1 dbsize] | ||
| 1902 | assert_equal 0 [R 4 dbsize] | ||
| 1903 | |||
| 1904 | # Cleanup | ||
| 1905 | failover_and_wait_for_done 1 | ||
| 1906 | R 1 debug asm-trim-method default | ||
| 1907 | R 4 debug asm-trim-method default | ||
| 1908 | R 0 config set rdb-key-save-delay 0 | ||
| 1909 | R 0 flushall | ||
| 1910 | R 1 flushall | ||
| 1911 | } | ||
| 1912 | |||
| 1913 | test "Test import task does not start if active trim is in progress for the same slots" { | ||
| 1914 | # Active trim will be scheduled but it won't run | ||
| 1915 | R 0 flushall | ||
| 1916 | R 1 flushall | ||
| 1917 | R 0 debug asm-trim-method active -1 | ||
| 1918 | |||
| 1919 | populate_slot 500 -slot 0 | ||
| 1920 | populate_slot 500 -slot 1 | ||
| 1921 | |||
| 1922 | # Migrate 1000 keys | ||
| 1923 | R 1 CLUSTER MIGRATION IMPORT 0 1 | ||
| 1924 | wait_for_condition 1000 10 { | ||
| 1925 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 1926 | [CI 0 cluster_slot_migration_active_trim_running] == 1 | ||
| 1927 | } else { | ||
| 1928 | fail "migrate failed" | ||
| 1929 | } | ||
| 1930 | |||
| 1931 | # Try to migrate slots back | ||
| 1932 | R 0 CLUSTER MIGRATION IMPORT 0 1 | ||
| 1933 | wait_for_log_messages 0 {"*Can not start import task*trim in progress for some of the slots*"} 0 1000 10 | ||
| 1934 | |||
| 1935 | # Enabled active trim and verify slots are imported back | ||
| 1936 | R 0 debug asm-trim-method active 0 | ||
| 1937 | wait_for_asm_done | ||
| 1938 | |||
| 1939 | assert_equal 1000 [R 0 dbsize] | ||
| 1940 | assert_equal 500 [R 0 cluster countkeysinslot 0] | ||
| 1941 | assert_equal 500 [R 0 cluster countkeysinslot 1] | ||
| 1942 | |||
| 1943 | # cleanup | ||
| 1944 | R 0 debug asm-trim-method default | ||
| 1945 | R 0 flushall | ||
| 1946 | } | ||
| 1947 | |||
| 1948 | test "Rdb save during active trim should skip keys in trimmed slots" { | ||
| 1949 | # Insert some delay to activate trim | ||
| 1950 | R 0 debug asm-trim-method active 1000 | ||
| 1951 | R 0 config set repl-diskless-sync-delay 0 | ||
| 1952 | R 0 flushall | ||
| 1953 | |||
| 1954 | populate_slot 5000 -idx 0 -slot 0 | ||
| 1955 | populate_slot 5000 -idx 0 -slot 1 | ||
| 1956 | populate_slot 5000 -idx 0 -slot 2 | ||
| 1957 | |||
| 1958 | # Start migration and wait until trim is in progress | ||
| 1959 | R 1 CLUSTER MIGRATION IMPORT 0 1 | ||
| 1960 | wait_for_condition 1000 10 { | ||
| 1961 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 1962 | [CI 0 cluster_slot_migration_active_trim_running] == 1 && | ||
| 1963 | [S 0 rdb_bgsave_in_progress] == 0 | ||
| 1964 | } else { | ||
| 1965 | puts "[CI 0 cluster_slot_migration_active_tasks]" | ||
| 1966 | puts "[CI 0 cluster_slot_migration_active_trim_running]" | ||
| 1967 | fail "trim failed" | ||
| 1968 | } | ||
| 1969 | |||
| 1970 | # Trigger save during active trim | ||
| 1971 | R 0 save | ||
| 1972 | # Wait until the log contains a "keys skipped" message with a non-zero value | ||
| 1973 | wait_for_log_messages 0 {"*BGSAVE done, 5000 keys saved, [1-9]* keys skipped*"} 0 1000 10 | ||
| 1974 | |||
| 1975 | restart_server 0 yes no yes nosave | ||
| 1976 | assert_equal 5000 [R 0 dbsize] | ||
| 1977 | assert_equal 0 [R 0 cluster countkeysinslot 0] | ||
| 1978 | assert_equal 0 [R 0 cluster countkeysinslot 1] | ||
| 1979 | assert_equal 5000 [R 0 cluster countkeysinslot 2] | ||
| 1980 | |||
| 1981 | # Cleanup | ||
| 1982 | wait_for_cluster_propagation | ||
| 1983 | wait_for_cluster_state "ok" | ||
| 1984 | R 0 flushall | ||
| 1985 | R 1 flushall | ||
| 1986 | R 0 save | ||
| 1987 | R 0 CLUSTER MIGRATION IMPORT 0 1 | ||
| 1988 | wait_for_asm_done | ||
| 1989 | } | ||
| 1990 | |||
| 1991 | test "AOF rewrite during active trim should skip keys in trimmed slots" { | ||
| 1992 | R 0 debug asm-trim-method active 1000 | ||
| 1993 | R 0 config set repl-diskless-sync-delay 0 | ||
| 1994 | R 0 config set aof-use-rdb-preamble no | ||
| 1995 | R 0 config set appendonly yes | ||
| 1996 | R 0 config rewrite | ||
| 1997 | R 0 flushall | ||
| 1998 | populate_slot 5000 -idx 0 -slot 0 | ||
| 1999 | populate_slot 5000 -idx 0 -slot 1 | ||
| 2000 | populate_slot 5000 -idx 0 -slot 2 | ||
| 2001 | |||
| 2002 | R 1 CLUSTER MIGRATION IMPORT 0 1 | ||
| 2003 | wait_for_condition 1000 10 { | ||
| 2004 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 2005 | [CI 0 cluster_slot_migration_active_trim_running] == 1 | ||
| 2006 | } else { | ||
| 2007 | puts "[CI 0 cluster_slot_migration_active_tasks]" | ||
| 2008 | puts "[CI 0 cluster_slot_migration_active_trim_running]" | ||
| 2009 | fail "trim failed" | ||
| 2010 | } | ||
| 2011 | |||
| 2012 | wait_for_condition 50 100 { | ||
| 2013 | [S 0 rdb_bgsave_in_progress] == 0 | ||
| 2014 | } else { | ||
| 2015 | fail "bgsave is in progress" | ||
| 2016 | } | ||
| 2017 | |||
| 2018 | R 0 bgrewriteaof | ||
| 2019 | # Wait until the log contains a "keys skipped" message with a non-zero value | ||
| 2020 | wait_for_log_messages 0 {"*AOF rewrite done, [1-9]* keys saved, [1-9]* keys skipped*"} 0 1000 10 | ||
| 2021 | |||
| 2022 | restart_server 0 yes no yes nosave | ||
| 2023 | assert_equal 5000 [R 0 dbsize] | ||
| 2024 | assert_equal 0 [R 0 cluster countkeysinslot 0] | ||
| 2025 | assert_equal 0 [R 0 cluster countkeysinslot 1] | ||
| 2026 | assert_equal 5000 [R 0 cluster countkeysinslot 2] | ||
| 2027 | |||
| 2028 | # cleanup | ||
| 2029 | R 0 config set appendonly no | ||
| 2030 | R 0 config rewrite | ||
| 2031 | restart_server 0 yes no yes nosave | ||
| 2032 | wait_for_cluster_propagation | ||
| 2033 | wait_for_cluster_state "ok" | ||
| 2034 | R 0 flushall | ||
| 2035 | R 1 flushall | ||
| 2036 | R 0 save | ||
| 2037 | R 0 CLUSTER MIGRATION IMPORT 0 1 | ||
| 2038 | wait_for_asm_done | ||
| 2039 | } | ||
| 2040 | |||
| 2041 | test "Pause actions will stop active trimming" { | ||
| 2042 | R 0 debug asm-trim-method active 1000 | ||
| 2043 | R 0 config set repl-diskless-sync-delay 0 | ||
| 2044 | R 0 flushall | ||
| 2045 | populate_slot 10000 -idx 0 -slot 0 | ||
| 2046 | |||
| 2047 | R 1 CLUSTER MIGRATION IMPORT 0 100 | ||
| 2048 | wait_for_condition 1000 10 { | ||
| 2049 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 2050 | [CI 0 cluster_slot_migration_active_trim_running] == 1 | ||
| 2051 | } else { | ||
| 2052 | puts "[CI 0 cluster_slot_migration_active_tasks]" | ||
| 2053 | puts "[CI 0 cluster_slot_migration_active_trim_running]" | ||
| 2054 | fail "trim failed" | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | # Pause the server and verify no keys are trimmed | ||
| 2058 | R 0 client pause 100000 write ;# pause 100s | ||
| 2059 | set prev [CI 0 cluster_slot_migration_active_trim_current_job_trimmed] | ||
| 2060 | after 1000 ; # wait some time to see if any keys are trimmed | ||
| 2061 | set curr [CI 0 cluster_slot_migration_active_trim_current_job_trimmed] | ||
| 2062 | assert_equal $prev $curr | ||
| 2063 | |||
| 2064 | R 0 client unpause | ||
| 2065 | R 0 debug asm-trim-method default | ||
| 2066 | wait_for_asm_done | ||
| 2067 | assert_equal 0 [R 0 dbsize] | ||
| 2068 | |||
| 2069 | # revert | ||
| 2070 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 2071 | wait_for_asm_done | ||
| 2072 | assert_equal 10000 [R 0 dbsize] | ||
| 2073 | } | ||
| 2074 | |||
| 2075 | foreach diskless_load {"disabled" "swapdb" "on-empty-db"} { | ||
| 2076 | test "Test fullsync cancels active trim (repl-diskless-load $diskless_load)" { | ||
| 2077 | R 3 debug asm-trim-method active -10 | ||
| 2078 | R 3 config set repl-diskless-load $diskless_load | ||
| 2079 | R 0 flushall | ||
| 2080 | |||
| 2081 | R 0 config set repl-diskless-sync-delay 0 | ||
| 2082 | populate_slot 10000 -idx 0 -slot 0 | ||
| 2083 | |||
| 2084 | R 1 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2085 | wait_for_condition 1000 10 { | ||
| 2086 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 2087 | [CI 0 cluster_slot_migration_active_trim_running] == 0 && | ||
| 2088 | [CI 3 cluster_slot_migration_active_trim_running] == 1 | ||
| 2089 | } else { | ||
| 2090 | puts "[CI 0 cluster_slot_migration_active_tasks]" | ||
| 2091 | puts "[CI 0 cluster_slot_migration_active_trim_running]" | ||
| 2092 | puts "[CI 3 cluster_slot_migration_active_trim_running]" | ||
| 2093 | fail "trim failed" | ||
| 2094 | } | ||
| 2095 | |||
| 2096 | set prev_cancelled [CI 3 cluster_slot_migration_stats_active_trim_cancelled] | ||
| 2097 | R 0 config set client-output-buffer-limit "replica 1024 0 0" | ||
| 2098 | |||
| 2099 | # Trigger a fullsync | ||
| 2100 | populate_slot 1 -idx 0 -size 2000000 -slot 2 | ||
| 2101 | |||
| 2102 | wait_for_condition 1000 10 { | ||
| 2103 | [CI 3 cluster_slot_migration_active_trim_running] == 0 && | ||
| 2104 | [CI 3 cluster_slot_migration_stats_active_trim_cancelled] == $prev_cancelled + 1 | ||
| 2105 | } else { | ||
| 2106 | puts "[CI 3 cluster_slot_migration_active_trim_running]" | ||
| 2107 | puts "[CI 3 cluster_slot_migration_stats_active_trim_cancelled]" | ||
| 2108 | fail "trim failed" | ||
| 2109 | } | ||
| 2110 | |||
| 2111 | R 3 debug asm-trim-method active 0 | ||
| 2112 | R 3 config set repl-diskless-load disabled | ||
| 2113 | R 0 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2114 | wait_for_asm_done | ||
| 2115 | wait_for_ofs_sync [Rn 0] [Rn 3] | ||
| 2116 | assert_equal 10001 [R 0 dbsize] | ||
| 2117 | assert_equal 10001 [R 3 dbsize] | ||
| 2118 | assert_equal 0 [R 1 dbsize] | ||
| 2119 | assert_equal 0 [R 4 dbsize] | ||
| 2120 | R 0 flushall | ||
| 2121 | } | ||
| 2122 | } | ||
| 2123 | |||
| 2124 | test "Test importing slots while active-trim is in progress for the same slots on replica" { | ||
| 2125 | R 3 debug asm-trim-method active 10000 | ||
| 2126 | R 0 flushall | ||
| 2127 | populate_slot 10000 -slot 0 | ||
| 2128 | wait_for_ofs_sync [Rn 0] [Rn 3] | ||
| 2129 | |||
| 2130 | # Wait until active trim is in progress on replica | ||
| 2131 | R 1 CLUSTER MIGRATION IMPORT 0 100 | ||
| 2132 | wait_for_condition 1000 10 { | ||
| 2133 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 2134 | [CI 0 cluster_slot_migration_active_trim_running] == 0 && | ||
| 2135 | [CI 3 cluster_slot_migration_active_trim_running] == 1 | ||
| 2136 | } else { | ||
| 2137 | puts "[CI 0 cluster_slot_migration_active_tasks]" | ||
| 2138 | puts "[CI 0 cluster_slot_migration_active_trim_running]" | ||
| 2139 | puts "[CI 3 cluster_slot_migration_active_trim_running]" | ||
| 2140 | fail "trim failed" | ||
| 2141 | } | ||
| 2142 | |||
| 2143 | set loglines [count_log_lines -3] | ||
| 2144 | |||
| 2145 | # Get slots back | ||
| 2146 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 2147 | wait_for_condition 1000 20 { | ||
| 2148 | [CI 0 cluster_slot_migration_active_tasks] == 1 && | ||
| 2149 | [CI 0 cluster_slot_migration_active_trim_running] == 0 && | ||
| 2150 | [CI 3 cluster_slot_migration_active_trim_running] == 1 | ||
| 2151 | } else { | ||
| 2152 | fail "trim failed" | ||
| 2153 | } | ||
| 2154 | |||
| 2155 | # Verify replica blocks master until trim is done | ||
| 2156 | wait_for_log_messages -3 {"*Blocking master client until trim job is done*"} $loglines 1000 30 | ||
| 2157 | R 3 debug asm-trim-method active 0 | ||
| 2158 | wait_for_log_messages -3 {"*Unblocking master client after active trim*"} $loglines 1000 30 | ||
| 2159 | |||
| 2160 | wait_for_asm_done | ||
| 2161 | wait_for_ofs_sync [Rn 0] [Rn 3] | ||
| 2162 | assert_equal 10000 [R 0 dbsize] | ||
| 2163 | assert_equal 10000 [R 3 dbsize] | ||
| 2164 | assert_equal 0 [R 1 dbsize] | ||
| 2165 | assert_equal 0 [R 4 dbsize] | ||
| 2166 | } | ||
| 2167 | |||
| 2168 | test "TRIMSLOTS should not trim slots that this node is serving" { | ||
| 2169 | assert_error {*the slot 0 is served by this node*} {R 0 trimslots ranges 1 0 0} | ||
| 2170 | assert_error {*READONLY*} {R 3 trimslots ranges 1 0 100} | ||
| 2171 | assert_equal {OK} [R 0 trimslots ranges 1 16383 16383] | ||
| 2172 | assert_error {*READONLY*} {R 3 trimslots ranges 1 16383 16383} | ||
| 2173 | } | ||
| 2174 | |||
| 2175 | test "Trigger multiple active trim jobs at the same time" { | ||
| 2176 | R 1 debug asm-trim-method active 0 | ||
| 2177 | R 1 flushall | ||
| 2178 | |||
| 2179 | set prev_trim_done [CI 1 cluster_slot_migration_stats_active_trim_completed] | ||
| 2180 | |||
| 2181 | R 1 debug populate 1000 [slot_prefix 0] 100 | ||
| 2182 | R 1 debug populate 1000 [slot_prefix 1] 100 | ||
| 2183 | R 1 debug populate 1000 [slot_prefix 2] 100 | ||
| 2184 | |||
| 2185 | R 1 multi | ||
| 2186 | R 1 trimslots ranges 1 0 0 | ||
| 2187 | R 1 trimslots ranges 1 1 1 | ||
| 2188 | R 1 trimslots ranges 1 2 2 | ||
| 2189 | R 1 exec | ||
| 2190 | |||
| 2191 | wait_for_condition 1000 10 { | ||
| 2192 | [CI 1 cluster_slot_migration_stats_active_trim_completed] == $prev_trim_done + 3 | ||
| 2193 | } else { | ||
| 2194 | fail "active trim failed" | ||
| 2195 | } | ||
| 2196 | |||
| 2197 | R 1 flushall | ||
| 2198 | R 1 debug asm-trim-method default | ||
| 2199 | } | ||
| 2200 | |||
| 2201 | test "Restart will clean up unowned slot keys" { | ||
| 2202 | R 1 flushall | ||
| 2203 | |||
| 2204 | # generate 1000 keys belonging to slot 0 | ||
| 2205 | R 1 debug populate 1000 [slot_prefix 0] 100 | ||
| 2206 | assert {[scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1000} | ||
| 2207 | |||
| 2208 | # restart node-1 | ||
| 2209 | restart_server -1 true false true save | ||
| 2210 | wait_for_cluster_propagation | ||
| 2211 | wait_for_cluster_state "ok" | ||
| 2212 | |||
| 2213 | # Node-1 has no keys since unowned slot 0 keys were cleaned up during restart | ||
| 2214 | assert {[scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] == {}} | ||
| 2215 | |||
| 2216 | R 1 flushall | ||
| 2217 | } | ||
| 2218 | |||
| 2219 | test "Test active trim is used when client tracking is used" { | ||
| 2220 | R 0 flushall | ||
| 2221 | R 1 flushall | ||
| 2222 | R 0 debug asm-trim-method default | ||
| 2223 | R 1 debug asm-trim-method default | ||
| 2224 | |||
| 2225 | set prev_active_trim [CI 0 cluster_slot_migration_stats_active_trim_completed] | ||
| 2226 | |||
| 2227 | # Setup a tracking client that is redirected to a pubsub client | ||
| 2228 | set rd_redirection [redis_deferring_client] | ||
| 2229 | $rd_redirection client id | ||
| 2230 | set redir_id [$rd_redirection read] | ||
| 2231 | $rd_redirection subscribe __redis__:invalidate | ||
| 2232 | $rd_redirection read ; # Consume the SUBSCRIBE reply. | ||
| 2233 | |||
| 2234 | # setup tracking | ||
| 2235 | set key0 [slot_key 0 key] | ||
| 2236 | R 0 CLIENT TRACKING on REDIRECT $redir_id | ||
| 2237 | R 0 SET $key0 1 | ||
| 2238 | R 0 GET $key0 | ||
| 2239 | R 1 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2240 | wait_for_asm_done | ||
| 2241 | |||
| 2242 | wait_for_condition 1000 10 { | ||
| 2243 | [CI 0 cluster_slot_migration_stats_active_trim_completed] == [expr $prev_active_trim + 1] | ||
| 2244 | } else { | ||
| 2245 | fail "active trim did not happen" | ||
| 2246 | } | ||
| 2247 | |||
| 2248 | # Verify the tracking client received the invalidation message | ||
| 2249 | set msg [$rd_redirection read] | ||
| 2250 | set head [lindex $msg 0] | ||
| 2251 | |||
| 2252 | if {$head eq "message"} { | ||
| 2253 | # RESP 2 | ||
| 2254 | set got_key [lindex [lindex $msg 2] 0] | ||
| 2255 | } elseif {$head eq "invalidate"} { | ||
| 2256 | # RESP 3 | ||
| 2257 | set got_key [lindex $msg 1 0] | ||
| 2258 | } else { | ||
| 2259 | fail "unexpected invalidation message: $msg" | ||
| 2260 | } | ||
| 2261 | assert_equal $got_key $key0 | ||
| 2262 | |||
| 2263 | # cleanup | ||
| 2264 | $rd_redirection close | ||
| 2265 | wait_for_asm_done | ||
| 2266 | R 0 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2267 | wait_for_asm_done | ||
| 2268 | R 0 flushall | ||
| 2269 | } | ||
| 2270 | } | ||
| 2271 | |||
| 2272 | set testmodule [file normalize tests/modules/atomicslotmigration.so] | ||
| 2273 | |||
| 2274 | start_cluster 3 6 [list tags {external:skip cluster modules} config_lines [list loadmodule $testmodule cluster-node-timeout 60000 cluster-allow-replica-migration no]] { | ||
| 2275 | test "Module api sanity" { | ||
| 2276 | R 0 asm.sanity ;# on master | ||
| 2277 | R 3 asm.sanity ;# on replica | ||
| 2278 | } | ||
| 2279 | |||
| 2280 | test "Module replicate cross slot command" { | ||
| 2281 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 2282 | set listkey [slot_key 0 "asmlist"] | ||
| 2283 | # replicate cross slot command during migrating | ||
| 2284 | R 0 asm.lpush_replicate_crossslot_command $listkey "item1" | ||
| 2285 | |||
| 2286 | # node 0 will fail due to cross slot | ||
| 2287 | wait_for_condition 2000 10 { | ||
| 2288 | [string match {*canceled*} [migration_status 0 $task_id state]] && | ||
| 2289 | [string match {*cross slot*} [migration_status 0 $task_id last_error]] | ||
| 2290 | } else { | ||
| 2291 | fail "ASM task did not fail" | ||
| 2292 | } | ||
| 2293 | R 1 CLUSTER MIGRATION CANCEL ID $task_id | ||
| 2294 | |||
| 2295 | # sanity check if lpush replicated correctly to the replica | ||
| 2296 | wait_for_ofs_sync [Rn 0] [Rn 3] | ||
| 2297 | assert_equal {item1} [R 0 lrange $listkey 0 -1] | ||
| 2298 | R 3 readonly | ||
| 2299 | assert_equal {item1} [R 3 lrange $listkey 0 -1] | ||
| 2300 | } | ||
| 2301 | |||
| 2302 | test "Test RM_ClusterCanAccessKeysInSlot" { | ||
| 2303 | # Test invalid slots | ||
| 2304 | assert_equal 0 [R 0 asm.cluster_can_access_keys_in_slot -1] | ||
| 2305 | assert_equal 0 [R 0 asm.cluster_can_access_keys_in_slot 20000] | ||
| 2306 | assert_equal 0 [R 2 asm.cluster_can_access_keys_in_slot 16384] | ||
| 2307 | assert_equal 0 [R 5 asm.cluster_can_access_keys_in_slot 16384] | ||
| 2308 | |||
| 2309 | # Test on a master-replica pair | ||
| 2310 | assert_equal 1 [R 0 asm.cluster_can_access_keys_in_slot 0] | ||
| 2311 | assert_equal 1 [R 0 asm.cluster_can_access_keys_in_slot 100] | ||
| 2312 | assert_equal 1 [R 3 asm.cluster_can_access_keys_in_slot 0] | ||
| 2313 | assert_equal 1 [R 3 asm.cluster_can_access_keys_in_slot 100] | ||
| 2314 | |||
| 2315 | # Test on a master-replica pair | ||
| 2316 | assert_equal 1 [R 2 asm.cluster_can_access_keys_in_slot 16383] | ||
| 2317 | assert_equal 1 [R 5 asm.cluster_can_access_keys_in_slot 16383] | ||
| 2318 | } | ||
| 2319 | |||
| 2320 | test "Test RM_ClusterCanAccessKeysInSlot returns false for unowned slots" { | ||
| 2321 | # Active trim will be scheduled but it won't run | ||
| 2322 | R 0 debug asm-trim-method active -1 | ||
| 2323 | R 3 debug asm-trim-method active -1 | ||
| 2324 | |||
| 2325 | setup_slot_migration_with_delay 0 1 0 100 3 1000000 | ||
| 2326 | |||
| 2327 | # Verify importing slots are not local | ||
| 2328 | assert_equal 0 [R 1 asm.cluster_can_access_keys_in_slot 0] | ||
| 2329 | assert_equal 0 [R 1 asm.cluster_can_access_keys_in_slot 100] | ||
| 2330 | assert_equal 0 [R 4 asm.cluster_can_access_keys_in_slot 0] | ||
| 2331 | assert_equal 0 [R 4 asm.cluster_can_access_keys_in_slot 100] | ||
| 2332 | |||
| 2333 | wait_for_condition 1000 10 { | ||
| 2334 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 2335 | [CI 0 cluster_slot_migration_active_trim_running] == 1 && | ||
| 2336 | [CI 3 cluster_slot_migration_active_trim_running] == 1 | ||
| 2337 | } else { | ||
| 2338 | fail "migrate failed" | ||
| 2339 | } | ||
| 2340 | |||
| 2341 | # Wait for config propagation before checking the slot ownership on replica | ||
| 2342 | wait_for_cluster_propagation | ||
| 2343 | |||
| 2344 | # Verify slots that are being trimmed are not local | ||
| 2345 | assert_equal 0 [R 0 asm.cluster_can_access_keys_in_slot 0] | ||
| 2346 | assert_equal 0 [R 0 asm.cluster_can_access_keys_in_slot 100] | ||
| 2347 | assert_equal 0 [R 3 asm.cluster_can_access_keys_in_slot 0] | ||
| 2348 | assert_equal 0 [R 3 asm.cluster_can_access_keys_in_slot 100] | ||
| 2349 | |||
| 2350 | # Enabled active trim and wait until it is completed. | ||
| 2351 | R 0 debug asm-trim-method active 0 | ||
| 2352 | R 3 debug asm-trim-method active 0 | ||
| 2353 | wait_for_asm_done | ||
| 2354 | wait_for_ofs_sync [Rn 0] [Rn 3] | ||
| 2355 | |||
| 2356 | # Verify slots are local after migration | ||
| 2357 | assert_equal 1 [R 1 asm.cluster_can_access_keys_in_slot 0] | ||
| 2358 | assert_equal 1 [R 1 asm.cluster_can_access_keys_in_slot 100] | ||
| 2359 | assert_equal 1 [R 4 asm.cluster_can_access_keys_in_slot 0] | ||
| 2360 | assert_equal 1 [R 4 asm.cluster_can_access_keys_in_slot 100] | ||
| 2361 | |||
| 2362 | # cleanup | ||
| 2363 | R 0 debug asm-trim-method default | ||
| 2364 | R 3 debug asm-trim-method default | ||
| 2365 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 2366 | wait_for_asm_done | ||
| 2367 | R 0 flushall | ||
| 2368 | R 1 flushall | ||
| 2369 | } | ||
| 2370 | |||
| 2371 | foreach trim_method {"active" "bg"} { | ||
| 2372 | test "Test cluster module notifications on a successful migration ($trim_method-trim)" { | ||
| 2373 | clear_module_event_log | ||
| 2374 | R 0 debug asm-trim-method $trim_method | ||
| 2375 | R 3 debug asm-trim-method $trim_method | ||
| 2376 | R 6 debug asm-trim-method $trim_method | ||
| 2377 | |||
| 2378 | # Set a key in the slot range | ||
| 2379 | set key [slot_key 0 mykey] | ||
| 2380 | R 0 set $key "value" | ||
| 2381 | |||
| 2382 | # Migrate the slot ranges | ||
| 2383 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 100 200 300] | ||
| 2384 | wait_for_asm_done | ||
| 2385 | |||
| 2386 | set src_id [R 0 cluster myid] | ||
| 2387 | set dest_id [R 1 cluster myid] | ||
| 2388 | |||
| 2389 | # Verify the events on source, both master and replica | ||
| 2390 | set migrate_event_log [list \ | ||
| 2391 | "sub: cluster-slot-migration-migrate-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100,200-300" \ | ||
| 2392 | "sub: cluster-slot-migration-migrate-completed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100,200-300" \ | ||
| 2393 | ] | ||
| 2394 | assert_equal [R 0 asm.get_cluster_event_log] $migrate_event_log | ||
| 2395 | assert_equal [R 3 asm.get_cluster_event_log] {} | ||
| 2396 | assert_equal [R 6 asm.get_cluster_event_log] {} | ||
| 2397 | |||
| 2398 | # Verify the events on destination, both master and replica | ||
| 2399 | set import_event_log [list \ | ||
| 2400 | "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100,200-300" \ | ||
| 2401 | "sub: cluster-slot-migration-import-completed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100,200-300" \ | ||
| 2402 | ] | ||
| 2403 | wait_for_condition 500 20 { | ||
| 2404 | [R 1 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2405 | [R 4 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2406 | [R 7 asm.get_cluster_event_log] eq $import_event_log | ||
| 2407 | } else { | ||
| 2408 | puts "R1: [R 1 asm.get_cluster_event_log]" | ||
| 2409 | puts "R4: [R 4 asm.get_cluster_event_log]" | ||
| 2410 | puts "R7: [R 7 asm.get_cluster_event_log]" | ||
| 2411 | fail "ASM import event not received" | ||
| 2412 | } | ||
| 2413 | |||
| 2414 | # Verify the trim events | ||
| 2415 | if {$trim_method eq "active"} { | ||
| 2416 | set trim_event_log [list \ | ||
| 2417 | "sub: cluster-slot-migration-trim-started, slots:0-100,200-300" \ | ||
| 2418 | "keyspace: key_trimmed, key: $key" \ | ||
| 2419 | "sub: cluster-slot-migration-trim-completed, slots:0-100,200-300" \ | ||
| 2420 | ] | ||
| 2421 | } else { | ||
| 2422 | set trim_event_log [list \ | ||
| 2423 | "sub: cluster-slot-migration-trim-background, slots:0-100,200-300" \ | ||
| 2424 | ] | ||
| 2425 | } | ||
| 2426 | wait_for_condition 500 10 { | ||
| 2427 | [R 0 asm.get_cluster_trim_event_log] eq $trim_event_log && | ||
| 2428 | [R 3 asm.get_cluster_trim_event_log] eq $trim_event_log && | ||
| 2429 | [R 6 asm.get_cluster_trim_event_log] eq $trim_event_log | ||
| 2430 | } else { | ||
| 2431 | fail "ASM source trim event not received" | ||
| 2432 | } | ||
| 2433 | |||
| 2434 | # cleanup | ||
| 2435 | R 0 CLUSTER MIGRATION IMPORT 0 100 200 300 | ||
| 2436 | wait_for_asm_done | ||
| 2437 | clear_module_event_log | ||
| 2438 | reset_default_trim_method | ||
| 2439 | R 0 flushall | ||
| 2440 | R 1 flushall | ||
| 2441 | } | ||
| 2442 | |||
| 2443 | test "Test cluster module notifications on a failed migration ($trim_method-trim)" { | ||
| 2444 | clear_module_event_log | ||
| 2445 | R 1 debug asm-trim-method $trim_method | ||
| 2446 | R 4 debug asm-trim-method $trim_method | ||
| 2447 | R 7 debug asm-trim-method $trim_method | ||
| 2448 | |||
| 2449 | # Set a key in the slot range | ||
| 2450 | set key [slot_key 0 mykey] | ||
| 2451 | R 0 set $key "value" | ||
| 2452 | |||
| 2453 | # Start migration and cancel it | ||
| 2454 | set task_id [setup_slot_migration_with_delay 0 1 0 100 0 2000000] | ||
| 2455 | # Wait until at least one key is moved to destination | ||
| 2456 | wait_for_condition 1000 10 { | ||
| 2457 | [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1 | ||
| 2458 | } else { | ||
| 2459 | fail "Key not moved to destination" | ||
| 2460 | } | ||
| 2461 | R 1 CLUSTER MIGRATION CANCEL ID $task_id | ||
| 2462 | wait_for_asm_done | ||
| 2463 | |||
| 2464 | set src_id [R 0 cluster myid] | ||
| 2465 | set dest_id [R 1 cluster myid] | ||
| 2466 | |||
| 2467 | # Verify the events on source, both master and replica | ||
| 2468 | set migrate_event_log [list \ | ||
| 2469 | "sub: cluster-slot-migration-migrate-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2470 | "sub: cluster-slot-migration-migrate-failed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2471 | ] | ||
| 2472 | assert_equal [R 0 asm.get_cluster_event_log] $migrate_event_log | ||
| 2473 | assert_equal [R 3 asm.get_cluster_event_log] {} | ||
| 2474 | assert_equal [R 6 asm.get_cluster_event_log] {} | ||
| 2475 | |||
| 2476 | # Verify the events on destination, both master and replica | ||
| 2477 | set import_event_log [list \ | ||
| 2478 | "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2479 | "sub: cluster-slot-migration-import-failed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2480 | ] | ||
| 2481 | wait_for_condition 500 10 { | ||
| 2482 | [R 1 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2483 | [R 4 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2484 | [R 7 asm.get_cluster_event_log] eq $import_event_log | ||
| 2485 | } else { | ||
| 2486 | fail "ASM import event not received" | ||
| 2487 | } | ||
| 2488 | |||
| 2489 | # Verify the trim events on destination (partially imported keys are trimmed) | ||
| 2490 | if {$trim_method eq "active"} { | ||
| 2491 | set trim_event_log [list \ | ||
| 2492 | "sub: cluster-slot-migration-trim-started, slots:0-100" \ | ||
| 2493 | "keyspace: key_trimmed, key: $key" \ | ||
| 2494 | "sub: cluster-slot-migration-trim-completed, slots:0-100" \ | ||
| 2495 | ] | ||
| 2496 | } else { | ||
| 2497 | set trim_event_log [list \ | ||
| 2498 | "sub: cluster-slot-migration-trim-background, slots:0-100" \ | ||
| 2499 | ] | ||
| 2500 | } | ||
| 2501 | wait_for_condition 500 10 { | ||
| 2502 | [R 1 asm.get_cluster_trim_event_log] eq $trim_event_log && | ||
| 2503 | [R 4 asm.get_cluster_trim_event_log] eq $trim_event_log && | ||
| 2504 | [R 7 asm.get_cluster_trim_event_log] eq $trim_event_log | ||
| 2505 | } else { | ||
| 2506 | fail "ASM destination trim event not received" | ||
| 2507 | } | ||
| 2508 | |||
| 2509 | # cleanup | ||
| 2510 | clear_module_event_log | ||
| 2511 | reset_default_trim_method | ||
| 2512 | wait_for_asm_done | ||
| 2513 | R 0 flushall | ||
| 2514 | R 1 flushall | ||
| 2515 | } | ||
| 2516 | |||
| 2517 | test "Test cluster module notifications on failover ($trim_method-trim)" { | ||
| 2518 | # NOTE: cluster legacy may have a bug, multiple manual failover will fail, | ||
| 2519 | # so only perform one round of failover test, fix it later | ||
| 2520 | if {$trim_method eq "bg"} { | ||
| 2521 | clear_module_event_log | ||
| 2522 | R 1 debug asm-trim-method $trim_method | ||
| 2523 | R 4 debug asm-trim-method $trim_method | ||
| 2524 | R 7 debug asm-trim-method $trim_method | ||
| 2525 | |||
| 2526 | # Set a key in the slot range | ||
| 2527 | set key [slot_key 0 mykey] | ||
| 2528 | R 0 set $key "value" | ||
| 2529 | |||
| 2530 | # Start migration | ||
| 2531 | set task_id [setup_slot_migration_with_delay 0 1 0 100 0 2000000] | ||
| 2532 | # Wait until at least one key is moved to destination | ||
| 2533 | wait_for_condition 1000 10 { | ||
| 2534 | [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1 | ||
| 2535 | } else { | ||
| 2536 | fail "Key not moved to destination" | ||
| 2537 | } | ||
| 2538 | |||
| 2539 | failover_and_wait_for_done 4 | ||
| 2540 | wait_for_asm_done | ||
| 2541 | |||
| 2542 | set src_id [R 0 cluster myid] | ||
| 2543 | set dest_id [R 1 cluster myid] | ||
| 2544 | |||
| 2545 | # Verify the events on source, both master and replica | ||
| 2546 | set migrate_event_log [list \ | ||
| 2547 | "sub: cluster-slot-migration-migrate-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2548 | "sub: cluster-slot-migration-migrate-failed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2549 | ] | ||
| 2550 | assert_equal [R 0 asm.get_cluster_event_log] $migrate_event_log | ||
| 2551 | assert_equal [R 3 asm.get_cluster_event_log] {} | ||
| 2552 | assert_equal [R 6 asm.get_cluster_event_log] {} | ||
| 2553 | |||
| 2554 | # Verify the events on destination, both master and replica | ||
| 2555 | set import_event_log [list \ | ||
| 2556 | "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2557 | "sub: cluster-slot-migration-import-failed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2558 | ] | ||
| 2559 | wait_for_condition 500 20 { | ||
| 2560 | [R 1 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2561 | [R 4 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2562 | [R 7 asm.get_cluster_event_log] eq $import_event_log | ||
| 2563 | } else { | ||
| 2564 | puts "R1: [R 1 asm.get_cluster_event_log]" | ||
| 2565 | puts "R4: [R 4 asm.get_cluster_event_log]" | ||
| 2566 | puts "R7: [R 7 asm.get_cluster_event_log]" | ||
| 2567 | fail "ASM import event not received" | ||
| 2568 | } | ||
| 2569 | |||
| 2570 | # Verify the trim events on destination (partially imported keys are trimmed) | ||
| 2571 | # NOTE: after failover, the new master will initiate the slot trimming, | ||
| 2572 | # and only slot 0 has data, so only slot 0 is trimmed | ||
| 2573 | if {$trim_method eq "active"} { | ||
| 2574 | set trim_event_log [list \ | ||
| 2575 | "sub: cluster-slot-migration-trim-started, slots:0-0" \ | ||
| 2576 | "keyspace: key_trimmed, key: $key" \ | ||
| 2577 | "sub: cluster-slot-migration-trim-completed, slots:0-0" \ | ||
| 2578 | ] | ||
| 2579 | } else { | ||
| 2580 | set trim_event_log [list \ | ||
| 2581 | "sub: cluster-slot-migration-trim-background, slots:0-0" \ | ||
| 2582 | ] | ||
| 2583 | } | ||
| 2584 | wait_for_condition 500 20 { | ||
| 2585 | [R 1 asm.get_cluster_trim_event_log] eq $trim_event_log && | ||
| 2586 | [R 4 asm.get_cluster_trim_event_log] eq $trim_event_log && | ||
| 2587 | [R 7 asm.get_cluster_trim_event_log] eq $trim_event_log | ||
| 2588 | } else { | ||
| 2589 | puts "R1: [R 1 asm.get_cluster_trim_event_log]" | ||
| 2590 | puts "R4: [R 4 asm.get_cluster_trim_event_log]" | ||
| 2591 | puts "R7: [R 7 asm.get_cluster_trim_event_log]" | ||
| 2592 | fail "ASM destination trim event not received" | ||
| 2593 | } | ||
| 2594 | |||
| 2595 | # cleanup | ||
| 2596 | failover_and_wait_for_done 1 | ||
| 2597 | clear_module_event_log | ||
| 2598 | reset_default_trim_method | ||
| 2599 | R 0 flushall | ||
| 2600 | R 1 flushall | ||
| 2601 | } | ||
| 2602 | } | ||
| 2603 | } | ||
| 2604 | |||
| 2605 | foreach with_rdb {"with" "without"} { | ||
| 2606 | test "Test cluster module notifications when replica restart $with_rdb RDB during importing" { | ||
| 2607 | clear_module_event_log | ||
| 2608 | R 1 debug asm-trim-method $trim_method | ||
| 2609 | R 4 debug asm-trim-method $trim_method | ||
| 2610 | R 7 debug asm-trim-method $trim_method | ||
| 2611 | R 4 config set save "" | ||
| 2612 | |||
| 2613 | set src_id [R 0 cluster myid] | ||
| 2614 | set dest_id [R 1 cluster myid] | ||
| 2615 | |||
| 2616 | # Set a key in the slot range | ||
| 2617 | set key [slot_key 0 mykey] | ||
| 2618 | R 0 set $key "value" | ||
| 2619 | |||
| 2620 | # Start migration, 2s delay | ||
| 2621 | set task_id [setup_slot_migration_with_delay 0 1 0 100 0 2000000] | ||
| 2622 | # Wait until at least one key is moved to destination | ||
| 2623 | wait_for_condition 1000 10 { | ||
| 2624 | [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1 | ||
| 2625 | } else { | ||
| 2626 | fail "Key not moved to destination" | ||
| 2627 | } | ||
| 2628 | wait_for_ofs_sync [Rn 1] [Rn 4] | ||
| 2629 | |||
| 2630 | # restart node 4 | ||
| 2631 | if {$with_rdb eq "with"} { | ||
| 2632 | restart_server -4 true false true save ;# rdb save | ||
| 2633 | } else { | ||
| 2634 | restart_server -4 true false true nosave ;# no rdb saved | ||
| 2635 | } | ||
| 2636 | wait_for_cluster_propagation | ||
| 2637 | |||
| 2638 | wait_for_asm_done | ||
| 2639 | |||
| 2640 | # started and completed are paired, and not duplicated | ||
| 2641 | set import_event_log [list \ | ||
| 2642 | "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2643 | "sub: cluster-slot-migration-import-completed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2644 | ] | ||
| 2645 | wait_for_condition 500 10 { | ||
| 2646 | [R 1 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2647 | [R 4 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2648 | [R 7 asm.get_cluster_event_log] eq $import_event_log | ||
| 2649 | } else { | ||
| 2650 | fail "ASM import event not received" | ||
| 2651 | } | ||
| 2652 | |||
| 2653 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 2654 | wait_for_asm_done | ||
| 2655 | R 4 save ;# save an empty rdb to override previous one | ||
| 2656 | clear_module_event_log | ||
| 2657 | reset_default_trim_method | ||
| 2658 | R 0 flushall | ||
| 2659 | R 1 flushall | ||
| 2660 | } | ||
| 2661 | } | ||
| 2662 | |||
| 2663 | test "Test cluster module notifications when replica is disconnected and full resync after importing" { | ||
| 2664 | clear_module_event_log | ||
| 2665 | R 1 debug asm-trim-method $trim_method | ||
| 2666 | R 4 debug asm-trim-method $trim_method | ||
| 2667 | R 7 debug asm-trim-method $trim_method | ||
| 2668 | |||
| 2669 | set src_id [R 0 cluster myid] | ||
| 2670 | set dest_id [R 1 cluster myid] | ||
| 2671 | |||
| 2672 | # Set a key in the slot range | ||
| 2673 | set key [slot_key 0 mykey] | ||
| 2674 | R 0 set $key "value" | ||
| 2675 | |||
| 2676 | # Start migration, 2s delay | ||
| 2677 | set task_id [setup_slot_migration_with_delay 0 1 0 100 0 2000000] | ||
| 2678 | # Wait until at least one key is moved to destination | ||
| 2679 | wait_for_condition 1000 10 { | ||
| 2680 | [scan [regexp -inline {keys\=([\d]*)} [R 1 info keyspace]] keys=%d] >= 1 | ||
| 2681 | } else { | ||
| 2682 | fail "Key not moved to destination" | ||
| 2683 | } | ||
| 2684 | wait_for_ofs_sync [Rn 1] [Rn 4] | ||
| 2685 | |||
| 2686 | # puase node-4 | ||
| 2687 | set r4_pid [S 4 process_id] | ||
| 2688 | pause_process $r4_pid | ||
| 2689 | |||
| 2690 | # set a small repl-backlog-size and write some commands to make node-4 | ||
| 2691 | # full resync when reconnecting after waking up | ||
| 2692 | set r1_full_sync [S 1 sync_full] | ||
| 2693 | R 1 config set repl-backlog-size 16kb | ||
| 2694 | R 1 client kill type replica | ||
| 2695 | set 1k_str [string repeat "a" 1024] | ||
| 2696 | for {set i 0} {$i < 2000} {incr i} { | ||
| 2697 | R 1 set [slot_key 6000] $1k_str | ||
| 2698 | } | ||
| 2699 | |||
| 2700 | # after ASM task is completed, wake up node-4 | ||
| 2701 | wait_for_condition 1000 10 { | ||
| 2702 | [CI 1 cluster_slot_migration_active_tasks] == 0 && | ||
| 2703 | [CI 1 cluster_slot_migration_active_trim_running] == 0 | ||
| 2704 | } else { | ||
| 2705 | fail "ASM tasks did not completed" | ||
| 2706 | } | ||
| 2707 | resume_process $r4_pid | ||
| 2708 | |||
| 2709 | # make sure full resync happens | ||
| 2710 | wait_for_sync [Rn 4] | ||
| 2711 | wait_for_ofs_sync [Rn 1] [Rn 4] | ||
| 2712 | assert_morethan [S 1 sync_full] $r1_full_sync | ||
| 2713 | |||
| 2714 | # started and completed are paired, and not duplicated | ||
| 2715 | set import_event_log [list \ | ||
| 2716 | "sub: cluster-slot-migration-import-started, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2717 | "sub: cluster-slot-migration-import-completed, source_node_id:$src_id, destination_node_id:$dest_id, task_id:$task_id, slots:0-100" \ | ||
| 2718 | ] | ||
| 2719 | wait_for_condition 500 10 { | ||
| 2720 | [R 1 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2721 | [R 4 asm.get_cluster_event_log] eq $import_event_log && | ||
| 2722 | [R 7 asm.get_cluster_event_log] eq $import_event_log | ||
| 2723 | } else { | ||
| 2724 | fail "ASM import event not received" | ||
| 2725 | } | ||
| 2726 | |||
| 2727 | # since ASM task is completed on node-1 before node-4 reconnects, | ||
| 2728 | # no trim event should be received on node-4 | ||
| 2729 | assert_equal {} [R 4 asm.get_cluster_trim_event_log] | ||
| 2730 | |||
| 2731 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 2732 | wait_for_asm_done | ||
| 2733 | clear_module_event_log | ||
| 2734 | reset_default_trim_method | ||
| 2735 | R 0 flushall | ||
| 2736 | R 1 flushall | ||
| 2737 | } | ||
| 2738 | |||
| 2739 | test "Test new master can trim slots when migration is completed and failover occurs on source side" { | ||
| 2740 | R 0 asm.disable_trim ;# can not start slot trimming on source side | ||
| 2741 | set slot0_key [slot_key 0 mykey] | ||
| 2742 | R 0 set $slot0_key "value" | ||
| 2743 | |||
| 2744 | # migrate slot 0 from #0 to #1, and wait it completed, but not allow to trim slots | ||
| 2745 | # on source node | ||
| 2746 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 0] | ||
| 2747 | wait_for_condition 1000 10 { | ||
| 2748 | [string match {*completed*} [migration_status 0 $task_id state]] && | ||
| 2749 | [string match {*completed*} [migration_status 1 $task_id state]] | ||
| 2750 | } else { | ||
| 2751 | fail "ASM task did not complete" | ||
| 2752 | } | ||
| 2753 | # verify trim is not allowed on source node, and replica node doesn't have trim job either | ||
| 2754 | wait_for_ofs_sync [Rn 0] [Rn 3] | ||
| 2755 | assert_equal 1 [R 0 asm.trim_in_progress] | ||
| 2756 | assert_equal "value" [R 0 asm.read_pending_trim_key $slot0_key] | ||
| 2757 | assert_equal 0 [R 3 asm.trim_in_progress] | ||
| 2758 | assert_equal "value" [R 3 asm.read_pending_trim_key $slot0_key] | ||
| 2759 | |||
| 2760 | set loglines [count_log_lines 0] | ||
| 2761 | |||
| 2762 | # failover happens on source node, instance #3 become slave, #0 become master | ||
| 2763 | failover_and_wait_for_done 3 | ||
| 2764 | R 0 asm.enable_trim ;# enable trim on old master | ||
| 2765 | |||
| 2766 | # old master should cancel the pending trim job | ||
| 2767 | wait_for_log_messages 0 {"*Cancelling the pending trim job*"} $loglines 1000 10 | ||
| 2768 | |||
| 2769 | wait_for_ofs_sync [Rn 3] [Rn 0] | ||
| 2770 | # verify trim is allowed on new master, and the key is trimmed | ||
| 2771 | wait_for_condition 1000 10 { | ||
| 2772 | [R 3 asm.trim_in_progress] == 0 && | ||
| 2773 | [R 3 asm.read_pending_trim_key $slot0_key] eq "" && | ||
| 2774 | [R 0 asm.trim_in_progress] == 0 && | ||
| 2775 | [R 0 asm.read_pending_trim_key $slot0_key] eq "" | ||
| 2776 | } else { | ||
| 2777 | fail "Trim did not complete" | ||
| 2778 | } | ||
| 2779 | |||
| 2780 | # verify the trim events, use active trim since module is subscribed to trimmed event | ||
| 2781 | set trim_event_log [list \ | ||
| 2782 | "sub: cluster-slot-migration-trim-started, slots:0-0" \ | ||
| 2783 | "keyspace: key_trimmed, key: $slot0_key" \ | ||
| 2784 | "sub: cluster-slot-migration-trim-completed, slots:0-0" \ | ||
| 2785 | ] | ||
| 2786 | wait_for_condition 500 20 { | ||
| 2787 | [R 0 asm.get_cluster_trim_event_log] eq $trim_event_log && | ||
| 2788 | [R 3 asm.get_cluster_trim_event_log] eq $trim_event_log && | ||
| 2789 | [R 6 asm.get_cluster_trim_event_log] eq $trim_event_log | ||
| 2790 | } else { | ||
| 2791 | fail "ASM destination trim event not received" | ||
| 2792 | } | ||
| 2793 | |||
| 2794 | # cleanup | ||
| 2795 | failover_and_wait_for_done 0 | ||
| 2796 | R 0 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2797 | wait_for_asm_done | ||
| 2798 | clear_module_event_log | ||
| 2799 | reset_default_trim_method | ||
| 2800 | R 0 flushall | ||
| 2801 | R 1 flushall | ||
| 2802 | } | ||
| 2803 | |||
| 2804 | test "Test module replicates commands at the beginning of slot migration " { | ||
| 2805 | R 0 flushall | ||
| 2806 | R 1 flushall | ||
| 2807 | |||
| 2808 | # Sanity check | ||
| 2809 | assert_equal 0 [R 1 asm.read_keyless_cmd_val] | ||
| 2810 | assert_equal 0 [R 4 asm.read_keyless_cmd_val] | ||
| 2811 | |||
| 2812 | # Enable module command replication and set a key to be replicated | ||
| 2813 | # Module will replicate two commands: | ||
| 2814 | # 1- A keyless command: asm.keyless_cmd | ||
| 2815 | # 2- SET command for the given key and value | ||
| 2816 | set keyname [slot_key 0 modulekey] | ||
| 2817 | R 0 asm.replicate_module_command 1 $keyname "value" | ||
| 2818 | |||
| 2819 | setup_slot_migration_with_delay 0 1 0 100 | ||
| 2820 | wait_for_asm_done | ||
| 2821 | wait_for_ofs_sync [Rn 1] [Rn 4] | ||
| 2822 | |||
| 2823 | # Verify the commands are replicated | ||
| 2824 | assert_equal 1 [R 1 asm.read_keyless_cmd_val] | ||
| 2825 | assert_equal value [R 1 get $keyname] | ||
| 2826 | |||
| 2827 | # Verify the commands are replicated to replica | ||
| 2828 | R 4 readonly | ||
| 2829 | assert_equal 1 [R 4 asm.read_keyless_cmd_val] | ||
| 2830 | assert_equal value [R 4 get $keyname] | ||
| 2831 | |||
| 2832 | # cleanup | ||
| 2833 | R 0 asm.replicate_module_command 0 "" "" | ||
| 2834 | R 0 CLUSTER MIGRATION IMPORT 0 100 | ||
| 2835 | wait_for_asm_done | ||
| 2836 | R 0 flushall | ||
| 2837 | R 1 flushall | ||
| 2838 | } | ||
| 2839 | |||
| 2840 | test "Test subcommand propagation during slot migration" { | ||
| 2841 | R 0 flushall | ||
| 2842 | R 1 flushall | ||
| 2843 | set task_id [setup_slot_migration_with_delay 0 1 0 100] | ||
| 2844 | |||
| 2845 | set key [slot_key 0 mykey] | ||
| 2846 | R 0 asm.parent set $key "value" ;# execute a module subcommand | ||
| 2847 | wait_for_asm_done | ||
| 2848 | assert_equal "value" [R 1 GET $key] | ||
| 2849 | |||
| 2850 | # cleanup | ||
| 2851 | R 0 cluster migration import 0 100 | ||
| 2852 | wait_for_asm_done | ||
| 2853 | } | ||
| 2854 | |||
| 2855 | test "Test trim method selection based on module keyspace subscription" { | ||
| 2856 | R 0 debug asm-trim-method default | ||
| 2857 | R 1 debug asm-trim-method default | ||
| 2858 | |||
| 2859 | R 0 flushall | ||
| 2860 | R 1 flushall | ||
| 2861 | |||
| 2862 | populate_slot 10 -idx 0 -slot 0 | ||
| 2863 | |||
| 2864 | # Make sure module is subscribed to NOTIFY_KEY_TRIMMED event. In this | ||
| 2865 | # case, active trim must be used. | ||
| 2866 | R 0 asm.subscribe_trimmed_event 1 | ||
| 2867 | set loglines [count_log_lines 0] | ||
| 2868 | R 1 CLUSTER MIGRATION IMPORT 0 15 | ||
| 2869 | wait_for_asm_done | ||
| 2870 | wait_for_log_messages 0 {"*Active trim scheduled for slots: 0-15*"} $loglines 1000 10 | ||
| 2871 | |||
| 2872 | # Move slots back to node-0. Make sure module is not subscribed to | ||
| 2873 | # NOTIFY_KEY_TRIMMED event. In this case, background trim must be used. | ||
| 2874 | R 1 asm.subscribe_trimmed_event 0 | ||
| 2875 | set loglines [count_log_lines -1] | ||
| 2876 | R 0 CLUSTER MIGRATION IMPORT 0 15 | ||
| 2877 | wait_for_asm_done | ||
| 2878 | wait_for_log_messages -1 {"*Background trim started for slots: 0-15*"} $loglines 1000 10 | ||
| 2879 | |||
| 2880 | # cleanup | ||
| 2881 | wait_for_asm_done | ||
| 2882 | R 0 asm.subscribe_trimmed_event 1 | ||
| 2883 | R 1 asm.subscribe_trimmed_event 1 | ||
| 2884 | R 0 flushall | ||
| 2885 | R 1 flushall | ||
| 2886 | } | ||
| 2887 | |||
| 2888 | test "Verify trimmed key value can be read in the server event callback" { | ||
| 2889 | R 0 flushall | ||
| 2890 | set key [slot_key 0] | ||
| 2891 | set value "value123random" | ||
| 2892 | R 0 set $key $value | ||
| 2893 | |||
| 2894 | R 1 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2895 | wait_for_asm_done | ||
| 2896 | wait_for_condition 1000 10 { | ||
| 2897 | [R 0 asm.get_last_deleted_key] eq "keyevent: key: $key, value: $value" | ||
| 2898 | } else { | ||
| 2899 | fail "Last deleted key event not received" | ||
| 2900 | } | ||
| 2901 | |||
| 2902 | # cleanup | ||
| 2903 | R 0 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2904 | wait_for_asm_done | ||
| 2905 | } | ||
| 2906 | |||
| 2907 | test "Verify module cannot open a key in a slot that is being trimmed" { | ||
| 2908 | R 0 flushall | ||
| 2909 | R 0 debug asm-trim-method active -1 ;# disable active trim | ||
| 2910 | |||
| 2911 | set key [slot_key 0] | ||
| 2912 | R 0 set $key value | ||
| 2913 | |||
| 2914 | R 1 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2915 | wait_for_condition 1000 10 { | ||
| 2916 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 2917 | [CI 1 cluster_slot_migration_active_tasks] == 0 && | ||
| 2918 | [CI 0 cluster_slot_migration_active_trim_running] == 1 | ||
| 2919 | } else { | ||
| 2920 | fail "migrate failed" | ||
| 2921 | } | ||
| 2922 | |||
| 2923 | # We cannot open the key since it is in a slot being trimmed | ||
| 2924 | assert_equal {} [R 0 asm.get $key] | ||
| 2925 | |||
| 2926 | # cleanup | ||
| 2927 | R 0 debug asm-trim-method default | ||
| 2928 | R 0 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2929 | wait_for_asm_done | ||
| 2930 | } | ||
| 2931 | |||
| 2932 | test "Test RM_ClusterGetLocalSlotRanges" { | ||
| 2933 | assert_equal [R 0 asm.cluster_get_local_slot_ranges] {{0 5461}} | ||
| 2934 | assert_equal [R 3 asm.cluster_get_local_slot_ranges] {{0 5461}} | ||
| 2935 | |||
| 2936 | R 0 cluster migration import 5463 6000 | ||
| 2937 | wait_for_asm_done | ||
| 2938 | wait_for_cluster_propagation | ||
| 2939 | assert_equal [R 0 asm.cluster_get_local_slot_ranges] {{0 5461} {5463 6000}} | ||
| 2940 | assert_equal [R 3 asm.cluster_get_local_slot_ranges] {{0 5461} {5463 6000}} | ||
| 2941 | |||
| 2942 | R 0 cluster migration import 5462 5462 6001 10922 | ||
| 2943 | wait_for_asm_done | ||
| 2944 | wait_for_cluster_propagation | ||
| 2945 | assert_equal [R 0 asm.cluster_get_local_slot_ranges] {{0 10922}} | ||
| 2946 | assert_equal [R 3 asm.cluster_get_local_slot_ranges] {{0 10922}} | ||
| 2947 | assert_equal [R 1 asm.cluster_get_local_slot_ranges] {} | ||
| 2948 | assert_equal [R 4 asm.cluster_get_local_slot_ranges] {} | ||
| 2949 | } | ||
| 2950 | } | ||
| 2951 | |||
| 2952 | set testmodule [file normalize tests/modules/atomicslotmigration.so] | ||
| 2953 | |||
| 2954 | start_cluster 2 0 [list tags {external:skip cluster modules} config_lines [list loadmodule $testmodule cluster-node-timeout 60000 cluster-allow-replica-migration no appendonly yes]] { | ||
| 2955 | test "TRIMSLOTS in AOF will work synchronously on restart" { | ||
| 2956 | # When TRIMSLOTS is replayed from AOF during restart, it must execute | ||
| 2957 | # synchronously rather than using active trim. This prevents race | ||
| 2958 | # conditions where subsequent AOF commands might operate on keys | ||
| 2959 | # that should have been trimmed. | ||
| 2960 | |||
| 2961 | # Subscribe to key trimmed event to force active trim | ||
| 2962 | R 0 asm.subscribe_trimmed_event 1 | ||
| 2963 | populate_slot 1000 -slot 0 | ||
| 2964 | populate_slot 1000 -slot 1 | ||
| 2965 | R 1 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2966 | wait_for_asm_done | ||
| 2967 | |||
| 2968 | # verify active trim is used | ||
| 2969 | assert_equal 1 [CI 0 cluster_slot_migration_stats_active_trim_completed] | ||
| 2970 | |||
| 2971 | # restart server and verify aof is loaded | ||
| 2972 | restart_server 0 yes no yes nosave | ||
| 2973 | assert {[scan [regexp -inline {aof_current_size:([\d]*)} [R 0 info persistence]] aof_current_size=%d] > 0} | ||
| 2974 | wait_for_cluster_state "ok" | ||
| 2975 | |||
| 2976 | # verify TRIMSLOTS in AOF is executed synchronously | ||
| 2977 | assert_equal 0 [CI 0 cluster_slot_migration_stats_active_trim_completed] | ||
| 2978 | assert_equal 1000 [R 0 dbsize] | ||
| 2979 | |||
| 2980 | # cleanup | ||
| 2981 | R 0 CLUSTER MIGRATION IMPORT 0 0 | ||
| 2982 | wait_for_asm_done | ||
| 2983 | assert_equal 2000 [R 0 dbsize] | ||
| 2984 | R 0 flushall | ||
| 2985 | R 1 flushall | ||
| 2986 | clear_module_event_log | ||
| 2987 | |||
| 2988 | } | ||
| 2989 | |||
| 2990 | test "Test trim is disabled when module requests it" { | ||
| 2991 | R 0 asm.disable_trim | ||
| 2992 | |||
| 2993 | set slot0_key [slot_key 0 mykey] | ||
| 2994 | R 0 set $slot0_key "value" | ||
| 2995 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 0] | ||
| 2996 | wait_for_condition 1000 10 { | ||
| 2997 | [string match {*completed*} [migration_status 0 $task_id state]] | ||
| 2998 | } else { | ||
| 2999 | fail "ASM task did not complete" | ||
| 3000 | } | ||
| 3001 | # since we disable trim, the key should still exist on source, | ||
| 3002 | # we can read it with REDISMODULE_OPEN_KEY_ACCESS_TRIMMED flag | ||
| 3003 | assert_equal "value" [R 0 asm.read_pending_trim_key $slot0_key] | ||
| 3004 | assert_equal 1 [R 0 asm.trim_in_progress] | ||
| 3005 | |||
| 3006 | # enable trim and verify the key is trimmed | ||
| 3007 | R 0 asm.enable_trim | ||
| 3008 | wait_for_condition 1000 10 { | ||
| 3009 | [R 0 asm.read_pending_trim_key $slot0_key] eq "" && | ||
| 3010 | [R 0 asm.trim_in_progress] == 0 | ||
| 3011 | } else { | ||
| 3012 | fail "Trim did not complete" | ||
| 3013 | } | ||
| 3014 | wait_for_asm_done | ||
| 3015 | R 0 CLUSTER MIGRATION IMPORT 0 0 | ||
| 3016 | wait_for_asm_done | ||
| 3017 | clear_module_event_log | ||
| 3018 | } | ||
| 3019 | |||
| 3020 | test "Can not start new asm task when trim is not allowed" { | ||
| 3021 | # start a migration task, wait it completed but not allow to trim slots | ||
| 3022 | R 0 asm.disable_trim | ||
| 3023 | set task_id [R 1 CLUSTER MIGRATION IMPORT 0 0] | ||
| 3024 | wait_for_condition 1000 10 { | ||
| 3025 | [string match {*completed*} [migration_status 0 $task_id state]] | ||
| 3026 | } else { | ||
| 3027 | fail "ASM task did not complete" | ||
| 3028 | } | ||
| 3029 | # Can not start new migrating task since trim is disabled | ||
| 3030 | set task_id [R 1 CLUSTER MIGRATION IMPORT 1 1] | ||
| 3031 | wait_for_condition 1000 10 { | ||
| 3032 | [string match {*fail*} [migration_status 1 $task_id state]] && | ||
| 3033 | [string match {*Trim is disabled by module*} [migration_status 1 $task_id last_error]] | ||
| 3034 | } else { | ||
| 3035 | fail "ASM task did not fail" | ||
| 3036 | } | ||
| 3037 | R 0 asm.enable_trim | ||
| 3038 | wait_for_asm_done | ||
| 3039 | |||
| 3040 | # start a migration task, wait it completed but not allow to trim slots | ||
| 3041 | R 0 asm.disable_trim | ||
| 3042 | set task_id [R 1 CLUSTER MIGRATION IMPORT 2 2] | ||
| 3043 | wait_for_condition 1000 10 { | ||
| 3044 | [string match {*completed*} [migration_status 0 $task_id state]] | ||
| 3045 | } else { | ||
| 3046 | fail "ASM task did not complete" | ||
| 3047 | } | ||
| 3048 | set logline [count_log_lines 0] | ||
| 3049 | # Can not start new importing task since trim is disabled | ||
| 3050 | set task_id [R 0 CLUSTER MIGRATION IMPORT 0 1] | ||
| 3051 | wait_for_log_messages 0 {"*Can not start import task*trim is disabled by module*"} $logline 1000 10 | ||
| 3052 | R 0 asm.enable_trim | ||
| 3053 | wait_for_asm_done | ||
| 3054 | } | ||
| 3055 | } | ||
| 3056 | |||
| 3057 | start_server {tags "cluster external:skip"} { | ||
| 3058 | test "Test RM_ClusterGetLocalSlotRanges without cluster" { | ||
| 3059 | r module load $testmodule | ||
| 3060 | assert_equal [r asm.cluster_get_local_slot_ranges] {{0 16383}} | ||
| 3061 | } | ||
| 3062 | } | ||
| 3063 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/cli.tcl b/examples/redis-unstable/tests/unit/cluster/cli.tcl deleted file mode 100644 index ce4629e..0000000 --- a/examples/redis-unstable/tests/unit/cluster/cli.tcl +++ /dev/null | |||
| @@ -1,415 +0,0 @@ | |||
| 1 | # Primitive tests on cluster-enabled redis using redis-cli | ||
| 2 | |||
| 3 | source tests/support/cli.tcl | ||
| 4 | |||
| 5 | # make sure the test infra won't use SELECT | ||
| 6 | set old_singledb $::singledb | ||
| 7 | set ::singledb 1 | ||
| 8 | |||
| 9 | # cluster creation is complicated with TLS, and the current tests don't really need that coverage | ||
| 10 | tags {tls:skip external:skip cluster} { | ||
| 11 | |||
| 12 | # start three servers | ||
| 13 | set base_conf [list cluster-enabled yes cluster-node-timeout 1000] | ||
| 14 | start_multiple_servers 3 [list overrides $base_conf] { | ||
| 15 | |||
| 16 | set node1 [srv 0 client] | ||
| 17 | set node2 [srv -1 client] | ||
| 18 | set node3 [srv -2 client] | ||
| 19 | set node3_pid [srv -2 pid] | ||
| 20 | set node3_rd [redis_deferring_client -2] | ||
| 21 | |||
| 22 | test {Create 3 node cluster} { | ||
| 23 | exec src/redis-cli --cluster-yes --cluster create \ | ||
| 24 | 127.0.0.1:[srv 0 port] \ | ||
| 25 | 127.0.0.1:[srv -1 port] \ | ||
| 26 | 127.0.0.1:[srv -2 port] | ||
| 27 | |||
| 28 | wait_for_condition 1000 50 { | ||
| 29 | [CI 0 cluster_state] eq {ok} && | ||
| 30 | [CI 1 cluster_state] eq {ok} && | ||
| 31 | [CI 2 cluster_state] eq {ok} | ||
| 32 | } else { | ||
| 33 | fail "Cluster doesn't stabilize" | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | test "Run blocking command on cluster node3" { | ||
| 38 | # key9184688 is mapped to slot 10923 (first slot of node 3) | ||
| 39 | $node3_rd brpop key9184688 0 | ||
| 40 | $node3_rd flush | ||
| 41 | |||
| 42 | wait_for_condition 50 100 { | ||
| 43 | [s -2 blocked_clients] eq {1} | ||
| 44 | } else { | ||
| 45 | fail "Client not blocked" | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | test "Perform a Resharding" { | ||
| 50 | exec src/redis-cli --cluster-yes --cluster reshard 127.0.0.1:[srv -2 port] \ | ||
| 51 | --cluster-to [$node1 cluster myid] \ | ||
| 52 | --cluster-from [$node3 cluster myid] \ | ||
| 53 | --cluster-slots 1 | ||
| 54 | } | ||
| 55 | |||
| 56 | test "Verify command got unblocked after resharding" { | ||
| 57 | # this (read) will wait for the node3 to realize the new topology | ||
| 58 | assert_error {*MOVED*} {$node3_rd read} | ||
| 59 | |||
| 60 | # verify there are no blocked clients | ||
| 61 | assert_equal [s 0 blocked_clients] {0} | ||
| 62 | assert_equal [s -1 blocked_clients] {0} | ||
| 63 | assert_equal [s -2 blocked_clients] {0} | ||
| 64 | } | ||
| 65 | |||
| 66 | test "Wait for cluster to be stable" { | ||
| 67 | # Cluster check just verifies the config state is self-consistent, | ||
| 68 | # waiting for cluster_state to be okay is an independent check that all the | ||
| 69 | # nodes actually believe each other are healthy, prevent cluster down error. | ||
| 70 | wait_for_condition 1000 50 { | ||
| 71 | [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv 0 port]}] == 0 && | ||
| 72 | [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -1 port]}] == 0 && | ||
| 73 | [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -2 port]}] == 0 && | ||
| 74 | [CI 0 cluster_state] eq {ok} && | ||
| 75 | [CI 1 cluster_state] eq {ok} && | ||
| 76 | [CI 2 cluster_state] eq {ok} | ||
| 77 | } else { | ||
| 78 | fail "Cluster doesn't stabilize" | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 | set node1_rd [redis_deferring_client 0] | ||
| 83 | |||
| 84 | test "use previous hostip in \"cluster-preferred-endpoint-type unknown-endpoint\" mode" { | ||
| 85 | |||
| 86 | # backup and set cluster-preferred-endpoint-type unknown-endpoint | ||
| 87 | set endpoint_type_before_set [lindex [split [$node1 CONFIG GET cluster-preferred-endpoint-type] " "] 1] | ||
| 88 | $node1 CONFIG SET cluster-preferred-endpoint-type unknown-endpoint | ||
| 89 | |||
| 90 | # when redis-cli not in cluster mode, return MOVE with empty host | ||
| 91 | set slot_for_foo [$node1 CLUSTER KEYSLOT foo] | ||
| 92 | assert_error "*MOVED $slot_for_foo :*" {$node1 set foo bar} | ||
| 93 | |||
| 94 | # when in cluster mode, redirect using previous hostip | ||
| 95 | assert_equal "[exec src/redis-cli -h 127.0.0.1 -p [srv 0 port] -c set foo bar]" {OK} | ||
| 96 | assert_match "[exec src/redis-cli -h 127.0.0.1 -p [srv 0 port] -c get foo]" {bar} | ||
| 97 | |||
| 98 | assert_equal [$node1 CONFIG SET cluster-preferred-endpoint-type "$endpoint_type_before_set"] {OK} | ||
| 99 | } | ||
| 100 | |||
| 101 | test "Sanity test push cmd after resharding" { | ||
| 102 | assert_error {*MOVED*} {$node3 lpush key9184688 v1} | ||
| 103 | |||
| 104 | $node1_rd brpop key9184688 0 | ||
| 105 | $node1_rd flush | ||
| 106 | |||
| 107 | wait_for_condition 50 100 { | ||
| 108 | [s 0 blocked_clients] eq {1} | ||
| 109 | } else { | ||
| 110 | puts "Client not blocked" | ||
| 111 | puts "read from blocked client: [$node1_rd read]" | ||
| 112 | fail "Client not blocked" | ||
| 113 | } | ||
| 114 | |||
| 115 | $node1 lpush key9184688 v2 | ||
| 116 | assert_equal {key9184688 v2} [$node1_rd read] | ||
| 117 | } | ||
| 118 | |||
| 119 | $node3_rd close | ||
| 120 | |||
| 121 | test "Run blocking command again on cluster node1" { | ||
| 122 | $node1 del key9184688 | ||
| 123 | # key9184688 is mapped to slot 10923 which has been moved to node1 | ||
| 124 | $node1_rd brpop key9184688 0 | ||
| 125 | $node1_rd flush | ||
| 126 | |||
| 127 | wait_for_condition 50 100 { | ||
| 128 | [s 0 blocked_clients] eq {1} | ||
| 129 | } else { | ||
| 130 | fail "Client not blocked" | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | test "Kill a cluster node and wait for fail state" { | ||
| 135 | # kill node3 in cluster | ||
| 136 | pause_process $node3_pid | ||
| 137 | |||
| 138 | wait_for_condition 1000 50 { | ||
| 139 | [CI 0 cluster_state] eq {fail} && | ||
| 140 | [CI 1 cluster_state] eq {fail} | ||
| 141 | } else { | ||
| 142 | fail "Cluster doesn't fail" | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | test "Verify command got unblocked after cluster failure" { | ||
| 147 | assert_error {*CLUSTERDOWN*} {$node1_rd read} | ||
| 148 | |||
| 149 | # verify there are no blocked clients | ||
| 150 | assert_equal [s 0 blocked_clients] {0} | ||
| 151 | assert_equal [s -1 blocked_clients] {0} | ||
| 152 | } | ||
| 153 | |||
| 154 | resume_process $node3_pid | ||
| 155 | $node1_rd close | ||
| 156 | |||
| 157 | } ;# stop servers | ||
| 158 | |||
| 159 | # Test redis-cli -- cluster create, add-node, call. | ||
| 160 | # Test that functions are propagated on add-node | ||
| 161 | start_multiple_servers 5 [list overrides $base_conf] { | ||
| 162 | |||
| 163 | set node4_rd [redis_client -3] | ||
| 164 | set node5_rd [redis_client -4] | ||
| 165 | |||
| 166 | test {Functions are added to new node on redis-cli cluster add-node} { | ||
| 167 | exec src/redis-cli --cluster-yes --cluster create \ | ||
| 168 | 127.0.0.1:[srv 0 port] \ | ||
| 169 | 127.0.0.1:[srv -1 port] \ | ||
| 170 | 127.0.0.1:[srv -2 port] | ||
| 171 | |||
| 172 | |||
| 173 | wait_for_condition 1000 50 { | ||
| 174 | [CI 0 cluster_state] eq {ok} && | ||
| 175 | [CI 1 cluster_state] eq {ok} && | ||
| 176 | [CI 2 cluster_state] eq {ok} | ||
| 177 | } else { | ||
| 178 | fail "Cluster doesn't stabilize" | ||
| 179 | } | ||
| 180 | |||
| 181 | # upload a function to all the cluster | ||
| 182 | exec src/redis-cli --cluster-yes --cluster call 127.0.0.1:[srv 0 port] \ | ||
| 183 | FUNCTION LOAD {#!lua name=TEST | ||
| 184 | redis.register_function('test', function() return 'hello' end) | ||
| 185 | } | ||
| 186 | |||
| 187 | # adding node to the cluster | ||
| 188 | exec src/redis-cli --cluster-yes --cluster add-node \ | ||
| 189 | 127.0.0.1:[srv -3 port] \ | ||
| 190 | 127.0.0.1:[srv 0 port] | ||
| 191 | |||
| 192 | wait_for_cluster_size 4 | ||
| 193 | |||
| 194 | wait_for_condition 1000 50 { | ||
| 195 | [CI 0 cluster_state] eq {ok} && | ||
| 196 | [CI 1 cluster_state] eq {ok} && | ||
| 197 | [CI 2 cluster_state] eq {ok} && | ||
| 198 | [CI 3 cluster_state] eq {ok} | ||
| 199 | } else { | ||
| 200 | fail "Cluster doesn't stabilize" | ||
| 201 | } | ||
| 202 | |||
| 203 | # make sure 'test' function was added to the new node | ||
| 204 | assert_equal {{library_name TEST engine LUA functions {{name test description {} flags {}}}}} [$node4_rd FUNCTION LIST] | ||
| 205 | |||
| 206 | # add function to node 5 | ||
| 207 | assert_equal {TEST} [$node5_rd FUNCTION LOAD {#!lua name=TEST | ||
| 208 | redis.register_function('test', function() return 'hello' end) | ||
| 209 | }] | ||
| 210 | |||
| 211 | # make sure functions was added to node 5 | ||
| 212 | assert_equal {{library_name TEST engine LUA functions {{name test description {} flags {}}}}} [$node5_rd FUNCTION LIST] | ||
| 213 | |||
| 214 | # adding node 5 to the cluster should failed because it already contains the 'test' function | ||
| 215 | catch { | ||
| 216 | exec src/redis-cli --cluster-yes --cluster add-node \ | ||
| 217 | 127.0.0.1:[srv -4 port] \ | ||
| 218 | 127.0.0.1:[srv 0 port] | ||
| 219 | } e | ||
| 220 | assert_match {*node already contains functions*} $e | ||
| 221 | } | ||
| 222 | } ;# stop servers | ||
| 223 | |||
| 224 | # Test redis-cli --cluster create, add-node. | ||
| 225 | # Test that one slot can be migrated to and then away from the new node. | ||
| 226 | test {Migrate the last slot away from a node using redis-cli} { | ||
| 227 | start_multiple_servers 4 [list overrides $base_conf] { | ||
| 228 | |||
| 229 | # Create a cluster of 3 nodes | ||
| 230 | exec src/redis-cli --cluster-yes --cluster create \ | ||
| 231 | 127.0.0.1:[srv 0 port] \ | ||
| 232 | 127.0.0.1:[srv -1 port] \ | ||
| 233 | 127.0.0.1:[srv -2 port] | ||
| 234 | |||
| 235 | wait_for_condition 1000 50 { | ||
| 236 | [CI 0 cluster_state] eq {ok} && | ||
| 237 | [CI 1 cluster_state] eq {ok} && | ||
| 238 | [CI 2 cluster_state] eq {ok} | ||
| 239 | } else { | ||
| 240 | fail "Cluster doesn't stabilize" | ||
| 241 | } | ||
| 242 | |||
| 243 | # Insert some data | ||
| 244 | assert_equal OK [exec src/redis-cli -c -p [srv 0 port] SET foo bar] | ||
| 245 | set slot [exec src/redis-cli -c -p [srv 0 port] CLUSTER KEYSLOT foo] | ||
| 246 | |||
| 247 | # Add new node to the cluster | ||
| 248 | exec src/redis-cli --cluster-yes --cluster add-node \ | ||
| 249 | 127.0.0.1:[srv -3 port] \ | ||
| 250 | 127.0.0.1:[srv 0 port] | ||
| 251 | |||
| 252 | # First we wait for new node to be recognized by entire cluster | ||
| 253 | wait_for_cluster_size 4 | ||
| 254 | |||
| 255 | wait_for_condition 1000 50 { | ||
| 256 | [CI 0 cluster_state] eq {ok} && | ||
| 257 | [CI 1 cluster_state] eq {ok} && | ||
| 258 | [CI 2 cluster_state] eq {ok} && | ||
| 259 | [CI 3 cluster_state] eq {ok} | ||
| 260 | } else { | ||
| 261 | fail "Cluster doesn't stabilize" | ||
| 262 | } | ||
| 263 | |||
| 264 | set newnode_r [redis_client -3] | ||
| 265 | set newnode_id [$newnode_r CLUSTER MYID] | ||
| 266 | |||
| 267 | # Find out which node has the key "foo" by asking the new node for a | ||
| 268 | # redirect. | ||
| 269 | catch { $newnode_r get foo } e | ||
| 270 | assert_match "MOVED $slot *" $e | ||
| 271 | lassign [split [lindex $e 2] :] owner_host owner_port | ||
| 272 | set owner_r [redis $owner_host $owner_port 0 $::tls] | ||
| 273 | set owner_id [$owner_r CLUSTER MYID] | ||
| 274 | |||
| 275 | # Move slot to new node using plain Redis commands | ||
| 276 | assert_equal OK [$newnode_r CLUSTER SETSLOT $slot IMPORTING $owner_id] | ||
| 277 | assert_equal OK [$owner_r CLUSTER SETSLOT $slot MIGRATING $newnode_id] | ||
| 278 | assert_equal {foo} [$owner_r CLUSTER GETKEYSINSLOT $slot 10] | ||
| 279 | assert_equal OK [$owner_r MIGRATE 127.0.0.1 [srv -3 port] "" 0 5000 KEYS foo] | ||
| 280 | assert_equal OK [$newnode_r CLUSTER SETSLOT $slot NODE $newnode_id] | ||
| 281 | assert_equal OK [$owner_r CLUSTER SETSLOT $slot NODE $newnode_id] | ||
| 282 | |||
| 283 | # Using --cluster check make sure we won't get `Not all slots are covered by nodes`. | ||
| 284 | # Wait for the cluster to become stable make sure the cluster is up during MIGRATE. | ||
| 285 | wait_for_condition 1000 50 { | ||
| 286 | [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv 0 port]}] == 0 && | ||
| 287 | [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -1 port]}] == 0 && | ||
| 288 | [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -2 port]}] == 0 && | ||
| 289 | [catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -3 port]}] == 0 && | ||
| 290 | [CI 0 cluster_state] eq {ok} && | ||
| 291 | [CI 1 cluster_state] eq {ok} && | ||
| 292 | [CI 2 cluster_state] eq {ok} && | ||
| 293 | [CI 3 cluster_state] eq {ok} | ||
| 294 | } else { | ||
| 295 | fail "Cluster doesn't stabilize" | ||
| 296 | } | ||
| 297 | |||
| 298 | # Move the only slot back to original node using redis-cli | ||
| 299 | exec src/redis-cli --cluster reshard 127.0.0.1:[srv -3 port] \ | ||
| 300 | --cluster-from $newnode_id \ | ||
| 301 | --cluster-to $owner_id \ | ||
| 302 | --cluster-slots 1 \ | ||
| 303 | --cluster-yes | ||
| 304 | |||
| 305 | # The empty node will become a replica of the new owner before the | ||
| 306 | # `MOVED` check, so let's wait for the cluster to become stable. | ||
| 307 | wait_for_condition 1000 50 { | ||
| 308 | [CI 0 cluster_state] eq {ok} && | ||
| 309 | [CI 1 cluster_state] eq {ok} && | ||
| 310 | [CI 2 cluster_state] eq {ok} && | ||
| 311 | [CI 3 cluster_state] eq {ok} | ||
| 312 | } else { | ||
| 313 | fail "Cluster doesn't stabilize" | ||
| 314 | } | ||
| 315 | |||
| 316 | # Check that the key foo has been migrated back to the original owner. | ||
| 317 | catch { $newnode_r get foo } e | ||
| 318 | assert_equal "MOVED $slot $owner_host:$owner_port" $e | ||
| 319 | |||
| 320 | # Check that the empty node has turned itself into a replica of the new | ||
| 321 | # owner and that the new owner knows that. | ||
| 322 | wait_for_condition 1000 50 { | ||
| 323 | [string match "*slave*" [$owner_r CLUSTER REPLICAS $owner_id]] | ||
| 324 | } else { | ||
| 325 | fail "Empty node didn't turn itself into a replica." | ||
| 326 | } | ||
| 327 | } | ||
| 328 | } | ||
| 329 | |||
| 330 | foreach ip_or_localhost {127.0.0.1 localhost} { | ||
| 331 | |||
| 332 | # Test redis-cli --cluster create, add-node with cluster-port. | ||
| 333 | # Create five nodes, three with custom cluster_port and two with default values. | ||
| 334 | start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cluster-port [find_available_port $::baseport $::portcount]]] { | ||
| 335 | start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1]] { | ||
| 336 | start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cluster-port [find_available_port $::baseport $::portcount]]] { | ||
| 337 | start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1]] { | ||
| 338 | start_server [list overrides [list cluster-enabled yes cluster-node-timeout 1 cluster-port [find_available_port $::baseport $::portcount]]] { | ||
| 339 | |||
| 340 | # The first three are used to test --cluster create. | ||
| 341 | # The last two are used to test --cluster add-node | ||
| 342 | |||
| 343 | test "redis-cli -4 --cluster create using $ip_or_localhost with cluster-port" { | ||
| 344 | exec src/redis-cli -4 --cluster-yes --cluster create \ | ||
| 345 | $ip_or_localhost:[srv 0 port] \ | ||
| 346 | $ip_or_localhost:[srv -1 port] \ | ||
| 347 | $ip_or_localhost:[srv -2 port] | ||
| 348 | |||
| 349 | wait_for_condition 1000 50 { | ||
| 350 | [CI 0 cluster_state] eq {ok} && | ||
| 351 | [CI 1 cluster_state] eq {ok} && | ||
| 352 | [CI 2 cluster_state] eq {ok} | ||
| 353 | } else { | ||
| 354 | fail "Cluster doesn't stabilize" | ||
| 355 | } | ||
| 356 | |||
| 357 | # Make sure each node can meet other nodes | ||
| 358 | assert_equal 3 [CI 0 cluster_known_nodes] | ||
| 359 | assert_equal 3 [CI 1 cluster_known_nodes] | ||
| 360 | assert_equal 3 [CI 2 cluster_known_nodes] | ||
| 361 | } | ||
| 362 | |||
| 363 | test "redis-cli -4 --cluster add-node using $ip_or_localhost with cluster-port" { | ||
| 364 | # Adding node to the cluster (without cluster-port) | ||
| 365 | exec src/redis-cli -4 --cluster-yes --cluster add-node \ | ||
| 366 | $ip_or_localhost:[srv -3 port] \ | ||
| 367 | $ip_or_localhost:[srv 0 port] | ||
| 368 | |||
| 369 | wait_for_cluster_size 4 | ||
| 370 | |||
| 371 | wait_for_condition 1000 50 { | ||
| 372 | [CI 0 cluster_state] eq {ok} && | ||
| 373 | [CI 1 cluster_state] eq {ok} && | ||
| 374 | [CI 2 cluster_state] eq {ok} && | ||
| 375 | [CI 3 cluster_state] eq {ok} | ||
| 376 | } else { | ||
| 377 | fail "Cluster doesn't stabilize" | ||
| 378 | } | ||
| 379 | |||
| 380 | # Adding node to the cluster (with cluster-port) | ||
| 381 | exec src/redis-cli -4 --cluster-yes --cluster add-node \ | ||
| 382 | $ip_or_localhost:[srv -4 port] \ | ||
| 383 | $ip_or_localhost:[srv 0 port] | ||
| 384 | |||
| 385 | wait_for_cluster_size 5 | ||
| 386 | |||
| 387 | wait_for_condition 1000 50 { | ||
| 388 | [CI 0 cluster_state] eq {ok} && | ||
| 389 | [CI 1 cluster_state] eq {ok} && | ||
| 390 | [CI 2 cluster_state] eq {ok} && | ||
| 391 | [CI 3 cluster_state] eq {ok} && | ||
| 392 | [CI 4 cluster_state] eq {ok} | ||
| 393 | } else { | ||
| 394 | fail "Cluster doesn't stabilize" | ||
| 395 | } | ||
| 396 | |||
| 397 | # Make sure each node can meet other nodes | ||
| 398 | assert_equal 5 [CI 0 cluster_known_nodes] | ||
| 399 | assert_equal 5 [CI 1 cluster_known_nodes] | ||
| 400 | assert_equal 5 [CI 2 cluster_known_nodes] | ||
| 401 | assert_equal 5 [CI 3 cluster_known_nodes] | ||
| 402 | assert_equal 5 [CI 4 cluster_known_nodes] | ||
| 403 | } | ||
| 404 | # stop 5 servers | ||
| 405 | } | ||
| 406 | } | ||
| 407 | } | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | } ;# foreach ip_or_localhost | ||
| 412 | |||
| 413 | } ;# tags | ||
| 414 | |||
| 415 | set ::singledb $old_singledb | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/cluster-response-tls.tcl b/examples/redis-unstable/tests/unit/cluster/cluster-response-tls.tcl deleted file mode 100644 index a099fa7..0000000 --- a/examples/redis-unstable/tests/unit/cluster/cluster-response-tls.tcl +++ /dev/null | |||
| @@ -1,110 +0,0 @@ | |||
| 1 | source tests/support/cluster.tcl | ||
| 2 | |||
| 3 | proc get_port_from_moved_error {e} { | ||
| 4 | set ip_port [lindex [split $e " "] 2] | ||
| 5 | return [lindex [split $ip_port ":"] 1] | ||
| 6 | } | ||
| 7 | |||
| 8 | proc get_pport_by_port {port} { | ||
| 9 | foreach srv $::servers { | ||
| 10 | set srv_port [dict get $srv port] | ||
| 11 | if {$port == $srv_port} { | ||
| 12 | return [dict get $srv pport] | ||
| 13 | } | ||
| 14 | } | ||
| 15 | return 0 | ||
| 16 | } | ||
| 17 | |||
| 18 | proc get_port_from_node_info {line} { | ||
| 19 | set fields [split $line " "] | ||
| 20 | set addr [lindex $fields 1] | ||
| 21 | set ip_port [lindex [split $addr "@"] 0] | ||
| 22 | return [lindex [split $ip_port ":"] 1] | ||
| 23 | } | ||
| 24 | |||
| 25 | proc cluster_response_tls {tls_cluster} { | ||
| 26 | |||
| 27 | test "CLUSTER SLOTS with different connection type -- tls-cluster $tls_cluster" { | ||
| 28 | set slots1 [R 0 cluster slots] | ||
| 29 | set pport [srv 0 pport] | ||
| 30 | set cluster_client [redis_cluster 127.0.0.1:$pport 0] | ||
| 31 | set slots2 [$cluster_client cluster slots] | ||
| 32 | $cluster_client close | ||
| 33 | # Compare the ports in the first row | ||
| 34 | assert_no_match [lindex $slots1 0 2 1] [lindex $slots2 0 2 1] | ||
| 35 | } | ||
| 36 | |||
| 37 | test "CLUSTER NODES return port according to connection type -- tls-cluster $tls_cluster" { | ||
| 38 | set nodes [R 0 cluster nodes] | ||
| 39 | set port1 [get_port_from_node_info [lindex [split $nodes "\r\n"] 0]] | ||
| 40 | set pport [srv 0 pport] | ||
| 41 | set cluster_client [redis_cluster 127.0.0.1:$pport 0] | ||
| 42 | set nodes [$cluster_client cluster nodes] | ||
| 43 | set port2 [get_port_from_node_info [lindex [split $nodes "\r\n"] 0]] | ||
| 44 | $cluster_client close | ||
| 45 | assert_not_equal $port1 $port2 | ||
| 46 | } | ||
| 47 | |||
| 48 | set cluster [redis_cluster 127.0.0.1:[srv 0 port]] | ||
| 49 | set cluster_pport [redis_cluster 127.0.0.1:[srv 0 pport] 0] | ||
| 50 | $cluster refresh_nodes_map | ||
| 51 | |||
| 52 | test "Set many keys in the cluster -- tls-cluster $tls_cluster" { | ||
| 53 | for {set i 0} {$i < 5000} {incr i} { | ||
| 54 | $cluster set $i $i | ||
| 55 | assert { [$cluster get $i] eq $i } | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | test "Test cluster responses during migration of slot x -- tls-cluster $tls_cluster" { | ||
| 60 | set slot 10 | ||
| 61 | array set nodefrom [$cluster masternode_for_slot $slot] | ||
| 62 | array set nodeto [$cluster masternode_notfor_slot $slot] | ||
| 63 | $nodeto(link) cluster setslot $slot importing $nodefrom(id) | ||
| 64 | $nodefrom(link) cluster setslot $slot migrating $nodeto(id) | ||
| 65 | |||
| 66 | # Get a key from that slot | ||
| 67 | set key [$nodefrom(link) cluster GETKEYSINSLOT $slot "1"] | ||
| 68 | # MOVED REPLY | ||
| 69 | catch {$nodeto(link) set $key "newVal"} e_moved1 | ||
| 70 | assert_match "*MOVED*" $e_moved1 | ||
| 71 | # ASK REPLY | ||
| 72 | catch {$nodefrom(link) set "abc{$key}" "newVal"} e_ask1 | ||
| 73 | assert_match "*ASK*" $e_ask1 | ||
| 74 | |||
| 75 | # UNSTABLE REPLY | ||
| 76 | assert_error "*TRYAGAIN*" {$nodefrom(link) mset "a{$key}" "newVal" $key "newVal2"} | ||
| 77 | |||
| 78 | # Connecting using another protocol | ||
| 79 | array set nodefrom_pport [$cluster_pport masternode_for_slot $slot] | ||
| 80 | array set nodeto_pport [$cluster_pport masternode_notfor_slot $slot] | ||
| 81 | |||
| 82 | # MOVED REPLY | ||
| 83 | catch {$nodeto_pport(link) set $key "newVal"} e_moved2 | ||
| 84 | assert_match "*MOVED*" $e_moved2 | ||
| 85 | # ASK REPLY | ||
| 86 | catch {$nodefrom_pport(link) set "abc{$key}" "newVal"} e_ask2 | ||
| 87 | assert_match "*ASK*" $e_ask2 | ||
| 88 | # Compare MOVED error's port | ||
| 89 | set port1 [get_port_from_moved_error $e_moved1] | ||
| 90 | set port2 [get_port_from_moved_error $e_moved2] | ||
| 91 | assert_not_equal $port1 $port2 | ||
| 92 | assert_equal $port1 $nodefrom(port) | ||
| 93 | assert_equal $port2 [get_pport_by_port $nodefrom(port)] | ||
| 94 | # Compare ASK error's port | ||
| 95 | set port1 [get_port_from_moved_error $e_ask1] | ||
| 96 | set port2 [get_port_from_moved_error $e_ask2] | ||
| 97 | assert_not_equal $port1 $port2 | ||
| 98 | assert_equal $port1 $nodeto(port) | ||
| 99 | assert_equal $port2 [get_pport_by_port $nodeto(port)] | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | if {$::tls} { | ||
| 104 | start_cluster 3 3 {tags {external:skip cluster tls} overrides {tls-cluster yes tls-replication yes}} { | ||
| 105 | cluster_response_tls yes | ||
| 106 | } | ||
| 107 | start_cluster 3 3 {tags {external:skip cluster tls} overrides {tls-cluster no tls-replication no}} { | ||
| 108 | cluster_response_tls no | ||
| 109 | } | ||
| 110 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/failure-marking.tcl b/examples/redis-unstable/tests/unit/cluster/failure-marking.tcl deleted file mode 100644 index c4746c8..0000000 --- a/examples/redis-unstable/tests/unit/cluster/failure-marking.tcl +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | # Test a single primary can mark replica as `fail` | ||
| 2 | start_cluster 1 1 {tags {external:skip cluster}} { | ||
| 3 | |||
| 4 | test "Verify that single primary marks replica as failed" { | ||
| 5 | set primary [srv -0 client] | ||
| 6 | |||
| 7 | set replica1 [srv -1 client] | ||
| 8 | set replica1_pid [srv -1 pid] | ||
| 9 | set replica1_instance_id [dict get [cluster_get_myself 1] id] | ||
| 10 | |||
| 11 | assert {[lindex [$primary role] 0] eq {master}} | ||
| 12 | assert {[lindex [$replica1 role] 0] eq {slave}} | ||
| 13 | |||
| 14 | wait_for_sync $replica1 | ||
| 15 | |||
| 16 | pause_process $replica1_pid | ||
| 17 | |||
| 18 | wait_node_marked_fail 0 $replica1_instance_id | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | # Test multiple primaries wait for a quorum and then mark a replica as `fail` | ||
| 23 | start_cluster 2 1 {tags {external:skip cluster}} { | ||
| 24 | |||
| 25 | test "Verify that multiple primaries mark replica as failed" { | ||
| 26 | set primary1 [srv -0 client] | ||
| 27 | |||
| 28 | set primary2 [srv -1 client] | ||
| 29 | set primary2_pid [srv -1 pid] | ||
| 30 | |||
| 31 | set replica1 [srv -2 client] | ||
| 32 | set replica1_pid [srv -2 pid] | ||
| 33 | set replica1_instance_id [dict get [cluster_get_myself 2] id] | ||
| 34 | |||
| 35 | assert {[lindex [$primary1 role] 0] eq {master}} | ||
| 36 | assert {[lindex [$primary2 role] 0] eq {master}} | ||
| 37 | assert {[lindex [$replica1 role] 0] eq {slave}} | ||
| 38 | |||
| 39 | wait_for_sync $replica1 | ||
| 40 | |||
| 41 | pause_process $replica1_pid | ||
| 42 | |||
| 43 | # Pause other primary to allow time for pfail flag to appear | ||
| 44 | pause_process $primary2_pid | ||
| 45 | |||
| 46 | wait_node_marked_pfail 0 $replica1_instance_id | ||
| 47 | |||
| 48 | # Resume other primary and wait for to show replica as failed | ||
| 49 | resume_process $primary2_pid | ||
| 50 | |||
| 51 | wait_node_marked_fail 0 $replica1_instance_id | ||
| 52 | } | ||
| 53 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/hostnames.tcl b/examples/redis-unstable/tests/unit/cluster/hostnames.tcl deleted file mode 100644 index 2236228..0000000 --- a/examples/redis-unstable/tests/unit/cluster/hostnames.tcl +++ /dev/null | |||
| @@ -1,230 +0,0 @@ | |||
| 1 | # | ||
| 2 | # Copyright (c) 2009-Present, Redis Ltd. | ||
| 3 | # All rights reserved. | ||
| 4 | # | ||
| 5 | # Copyright (c) 2024-present, Valkey contributors. | ||
| 6 | # All rights reserved. | ||
| 7 | # | ||
| 8 | # Licensed under your choice of (a) the Redis Source Available License 2.0 | ||
| 9 | # (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the | ||
| 10 | # GNU Affero General Public License v3 (AGPLv3). | ||
| 11 | # | ||
| 12 | # Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information. | ||
| 13 | # | ||
| 14 | |||
| 15 | proc get_slot_field {slot_output shard_id node_id attrib_id} { | ||
| 16 | return [lindex [lindex [lindex $slot_output $shard_id] $node_id] $attrib_id] | ||
| 17 | } | ||
| 18 | |||
| 19 | # Start a cluster with 3 masters and 4 replicas. | ||
| 20 | # These tests rely on specific node ordering, so make sure no node fails over. | ||
| 21 | start_cluster 3 4 {tags {external:skip cluster} overrides {cluster-replica-no-failover yes}} { | ||
| 22 | test "Set cluster hostnames and verify they are propagated" { | ||
| 23 | for {set j 0} {$j < [llength $::servers]} {incr j} { | ||
| 24 | R $j config set cluster-announce-hostname "host-$j.com" | ||
| 25 | } | ||
| 26 | |||
| 27 | wait_for_condition 50 100 { | ||
| 28 | [are_hostnames_propagated "host-*.com"] eq 1 | ||
| 29 | } else { | ||
| 30 | fail "cluster hostnames were not propagated" | ||
| 31 | } | ||
| 32 | |||
| 33 | # Now that everything is propagated, assert everyone agrees | ||
| 34 | wait_for_cluster_propagation | ||
| 35 | } | ||
| 36 | |||
| 37 | test "Update hostnames and make sure they are all eventually propagated" { | ||
| 38 | for {set j 0} {$j < [llength $::servers]} {incr j} { | ||
| 39 | R $j config set cluster-announce-hostname "host-updated-$j.com" | ||
| 40 | } | ||
| 41 | |||
| 42 | wait_for_condition 50 100 { | ||
| 43 | [are_hostnames_propagated "host-updated-*.com"] eq 1 | ||
| 44 | } else { | ||
| 45 | fail "cluster hostnames were not propagated" | ||
| 46 | } | ||
| 47 | |||
| 48 | # Now that everything is propagated, assert everyone agrees | ||
| 49 | wait_for_cluster_propagation | ||
| 50 | } | ||
| 51 | |||
| 52 | test "Remove hostnames and make sure they are all eventually propagated" { | ||
| 53 | for {set j 0} {$j < [llength $::servers]} {incr j} { | ||
| 54 | R $j config set cluster-announce-hostname "" | ||
| 55 | } | ||
| 56 | |||
| 57 | wait_for_condition 50 100 { | ||
| 58 | [are_hostnames_propagated ""] eq 1 | ||
| 59 | } else { | ||
| 60 | fail "cluster hostnames were not propagated" | ||
| 61 | } | ||
| 62 | |||
| 63 | # Now that everything is propagated, assert everyone agrees | ||
| 64 | wait_for_cluster_propagation | ||
| 65 | } | ||
| 66 | |||
| 67 | test "Verify cluster-preferred-endpoint-type behavior for redirects and info" { | ||
| 68 | R 0 config set cluster-announce-hostname "me.com" | ||
| 69 | R 1 config set cluster-announce-hostname "" | ||
| 70 | R 2 config set cluster-announce-hostname "them.com" | ||
| 71 | |||
| 72 | wait_for_cluster_propagation | ||
| 73 | |||
| 74 | # Verify default behavior | ||
| 75 | set slot_result [R 0 cluster slots] | ||
| 76 | assert_equal "" [lindex [get_slot_field $slot_result 0 2 0] 1] | ||
| 77 | assert_equal "" [lindex [get_slot_field $slot_result 2 2 0] 1] | ||
| 78 | assert_equal "hostname" [lindex [get_slot_field $slot_result 0 2 3] 0] | ||
| 79 | assert_equal "me.com" [lindex [get_slot_field $slot_result 0 2 3] 1] | ||
| 80 | assert_equal "hostname" [lindex [get_slot_field $slot_result 2 2 3] 0] | ||
| 81 | assert_equal "them.com" [lindex [get_slot_field $slot_result 2 2 3] 1] | ||
| 82 | |||
| 83 | # Redirect will use the IP address | ||
| 84 | catch {R 0 set foo foo} redir_err | ||
| 85 | assert_match "MOVED * 127.0.0.1:*" $redir_err | ||
| 86 | |||
| 87 | # Verify prefer hostname behavior | ||
| 88 | R 0 config set cluster-preferred-endpoint-type hostname | ||
| 89 | |||
| 90 | set slot_result [R 0 cluster slots] | ||
| 91 | assert_equal "me.com" [get_slot_field $slot_result 0 2 0] | ||
| 92 | assert_equal "them.com" [get_slot_field $slot_result 2 2 0] | ||
| 93 | |||
| 94 | # Redirect should use hostname | ||
| 95 | catch {R 0 set foo foo} redir_err | ||
| 96 | assert_match "MOVED * them.com:*" $redir_err | ||
| 97 | |||
| 98 | # Redirect to an unknown hostname returns ? | ||
| 99 | catch {R 0 set barfoo bar} redir_err | ||
| 100 | assert_match "MOVED * ?:*" $redir_err | ||
| 101 | |||
| 102 | # Verify unknown hostname behavior | ||
| 103 | R 0 config set cluster-preferred-endpoint-type unknown-endpoint | ||
| 104 | |||
| 105 | # Verify default behavior | ||
| 106 | set slot_result [R 0 cluster slots] | ||
| 107 | assert_equal "ip" [lindex [get_slot_field $slot_result 0 2 3] 0] | ||
| 108 | assert_equal "127.0.0.1" [lindex [get_slot_field $slot_result 0 2 3] 1] | ||
| 109 | assert_equal "ip" [lindex [get_slot_field $slot_result 2 2 3] 0] | ||
| 110 | assert_equal "127.0.0.1" [lindex [get_slot_field $slot_result 2 2 3] 1] | ||
| 111 | assert_equal "ip" [lindex [get_slot_field $slot_result 1 2 3] 0] | ||
| 112 | assert_equal "127.0.0.1" [lindex [get_slot_field $slot_result 1 2 3] 1] | ||
| 113 | # Not required by the protocol, but IP comes before hostname | ||
| 114 | assert_equal "hostname" [lindex [get_slot_field $slot_result 0 2 3] 2] | ||
| 115 | assert_equal "me.com" [lindex [get_slot_field $slot_result 0 2 3] 3] | ||
| 116 | assert_equal "hostname" [lindex [get_slot_field $slot_result 2 2 3] 2] | ||
| 117 | assert_equal "them.com" [lindex [get_slot_field $slot_result 2 2 3] 3] | ||
| 118 | |||
| 119 | # This node doesn't have a hostname | ||
| 120 | assert_equal 2 [llength [get_slot_field $slot_result 1 2 3]] | ||
| 121 | |||
| 122 | # Redirect should use empty string | ||
| 123 | catch {R 0 set foo foo} redir_err | ||
| 124 | assert_match "MOVED * :*" $redir_err | ||
| 125 | |||
| 126 | R 0 config set cluster-preferred-endpoint-type ip | ||
| 127 | } | ||
| 128 | |||
| 129 | test "Verify the nodes configured with prefer hostname only show hostname for new nodes" { | ||
| 130 | # Have everyone forget node 6 and isolate it from the cluster. | ||
| 131 | isolate_node 6 | ||
| 132 | |||
| 133 | set primaries 3 | ||
| 134 | for {set j 0} {$j < $primaries} {incr j} { | ||
| 135 | # Set hostnames for the masters, now that the node is isolated | ||
| 136 | R $j config set cluster-announce-hostname "shard-$j.com" | ||
| 137 | } | ||
| 138 | |||
| 139 | # Prevent Node 0 and Node 6 from properly meeting, | ||
| 140 | # they'll hang in the handshake phase. This allows us to | ||
| 141 | # test the case where we "know" about it but haven't | ||
| 142 | # successfully retrieved information about it yet. | ||
| 143 | R 0 DEBUG DROP-CLUSTER-PACKET-FILTER 0 | ||
| 144 | R 6 DEBUG DROP-CLUSTER-PACKET-FILTER 0 | ||
| 145 | |||
| 146 | # Have a replica meet the isolated node | ||
| 147 | R 3 cluster meet 127.0.0.1 [srv -6 port] | ||
| 148 | |||
| 149 | # Wait for the isolated node to learn about the rest of the cluster, | ||
| 150 | # which correspond to a single entry in cluster nodes. Note this | ||
| 151 | # doesn't mean the isolated node has successfully contacted each | ||
| 152 | # node. | ||
| 153 | wait_for_condition 50 100 { | ||
| 154 | [llength [split [R 6 CLUSTER NODES] "\n"]] eq [expr [llength $::servers] + 1] | ||
| 155 | } else { | ||
| 156 | fail "Isolated node didn't learn about the rest of the cluster *" | ||
| 157 | } | ||
| 158 | |||
| 159 | # Now, we wait until the two nodes that aren't filtering packets | ||
| 160 | # to accept our isolated nodes connections. At this point they will | ||
| 161 | # start showing up in cluster slots. | ||
| 162 | wait_for_condition 50 100 { | ||
| 163 | [llength [R 6 CLUSTER SLOTS]] eq 2 | ||
| 164 | } else { | ||
| 165 | fail "Node did not learn about the 2 shards it can talk to" | ||
| 166 | } | ||
| 167 | wait_for_condition 50 100 { | ||
| 168 | [lindex [get_slot_field [R 6 CLUSTER SLOTS] 0 2 3] 1] eq "shard-1.com" | ||
| 169 | } else { | ||
| 170 | fail "hostname for shard-1 didn't reach node 6" | ||
| 171 | } | ||
| 172 | |||
| 173 | wait_for_condition 50 100 { | ||
| 174 | [lindex [get_slot_field [R 6 CLUSTER SLOTS] 1 2 3] 1] eq "shard-2.com" | ||
| 175 | } else { | ||
| 176 | fail "hostname for shard-2 didn't reach node 6" | ||
| 177 | } | ||
| 178 | |||
| 179 | # Also make sure we know about the isolated master, we | ||
| 180 | # just can't reach it. | ||
| 181 | set master_id [R 0 CLUSTER MYID] | ||
| 182 | assert_match "*$master_id*" [R 6 CLUSTER NODES] | ||
| 183 | |||
| 184 | # Stop dropping cluster packets, and make sure everything | ||
| 185 | # stabilizes | ||
| 186 | R 0 DEBUG DROP-CLUSTER-PACKET-FILTER -1 | ||
| 187 | R 6 DEBUG DROP-CLUSTER-PACKET-FILTER -1 | ||
| 188 | |||
| 189 | # This operation sometimes spikes to around 5 seconds to resolve the state, | ||
| 190 | # so it has a higher timeout. | ||
| 191 | wait_for_condition 50 500 { | ||
| 192 | [llength [R 6 CLUSTER SLOTS]] eq 3 | ||
| 193 | } else { | ||
| 194 | fail "Node did not learn about the 2 shards it can talk to" | ||
| 195 | } | ||
| 196 | |||
| 197 | for {set j 0} {$j < $primaries} {incr j} { | ||
| 198 | wait_for_condition 50 100 { | ||
| 199 | [lindex [get_slot_field [R 6 CLUSTER SLOTS] $j 2 3] 1] eq "shard-$j.com" | ||
| 200 | } else { | ||
| 201 | fail "hostname information for shard-$j didn't reach node 6" | ||
| 202 | } | ||
| 203 | } | ||
| 204 | } | ||
| 205 | |||
| 206 | test "Test restart will keep hostname information" { | ||
| 207 | # Set a new hostname, reboot and make sure it sticks | ||
| 208 | R 0 config set cluster-announce-hostname "restart-1.com" | ||
| 209 | |||
| 210 | # Store the hostname in the config | ||
| 211 | R 0 config rewrite | ||
| 212 | |||
| 213 | restart_server 0 true false | ||
| 214 | set slot_result [R 0 CLUSTER SLOTS] | ||
| 215 | assert_equal [lindex [get_slot_field $slot_result 0 2 3] 1] "restart-1.com" | ||
| 216 | |||
| 217 | # As a sanity check, make sure everyone eventually agrees | ||
| 218 | wait_for_cluster_propagation | ||
| 219 | } | ||
| 220 | |||
| 221 | test "Test hostname validation" { | ||
| 222 | catch {R 0 config set cluster-announce-hostname [string repeat x 256]} err | ||
| 223 | assert_match "*Hostnames must be less than 256 characters*" $err | ||
| 224 | catch {R 0 config set cluster-announce-hostname "?.com"} err | ||
| 225 | assert_match "*Hostnames may only contain alphanumeric characters, hyphens or dots*" $err | ||
| 226 | |||
| 227 | # Note this isn't a valid hostname, but it passes our internal validation | ||
| 228 | R 0 config set cluster-announce-hostname "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-." | ||
| 229 | } | ||
| 230 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/human-announced-nodename.tcl b/examples/redis-unstable/tests/unit/cluster/human-announced-nodename.tcl deleted file mode 100644 index a595ca6..0000000 --- a/examples/redis-unstable/tests/unit/cluster/human-announced-nodename.tcl +++ /dev/null | |||
| @@ -1,29 +0,0 @@ | |||
| 1 | # Check if cluster's view of human announced nodename is reported in logs | ||
| 2 | start_cluster 3 0 {tags {external:skip cluster}} { | ||
| 3 | test "Set cluster human announced nodename and let it propagate" { | ||
| 4 | for {set j 0} {$j < [llength $::servers]} {incr j} { | ||
| 5 | R $j config set cluster-announce-hostname "host-$j.com" | ||
| 6 | R $j config set cluster-announce-human-nodename "nodename-$j" | ||
| 7 | } | ||
| 8 | |||
| 9 | # We wait for everyone to agree on the hostnames. Since they are gossiped | ||
| 10 | # the same way as nodenames, it implies everyone knows the nodenames too. | ||
| 11 | wait_for_condition 50 100 { | ||
| 12 | [are_hostnames_propagated "host-*.com"] eq 1 | ||
| 13 | } else { | ||
| 14 | fail "cluster hostnames were not propagated" | ||
| 15 | } | ||
| 16 | } | ||
| 17 | |||
| 18 | test "Human nodenames are visible in log messages" { | ||
| 19 | # Pause instance 0, so everyone thinks it is dead | ||
| 20 | pause_process [srv 0 pid] | ||
| 21 | |||
| 22 | # We're going to use a message we will know will be sent, node unreachable, | ||
| 23 | # since it includes the other node gossiping. | ||
| 24 | wait_for_log_messages -1 {"*Node * (nodename-2) reported node * (nodename-0) as not reachable*"} 0 20 500 | ||
| 25 | wait_for_log_messages -2 {"*Node * (nodename-1) reported node * (nodename-0) as not reachable*"} 0 20 500 | ||
| 26 | |||
| 27 | resume_process [srv 0 pid] | ||
| 28 | } | ||
| 29 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/internal-secret.tcl b/examples/redis-unstable/tests/unit/cluster/internal-secret.tcl deleted file mode 100644 index f310b74..0000000 --- a/examples/redis-unstable/tests/unit/cluster/internal-secret.tcl +++ /dev/null | |||
| @@ -1,71 +0,0 @@ | |||
| 1 | proc num_unique_secrets {num_nodes} { | ||
| 2 | set secrets [list] | ||
| 3 | for {set i 0} {$i < $num_nodes} {incr i} { | ||
| 4 | lappend secrets [R $i debug internal_secret] | ||
| 5 | } | ||
| 6 | set num_secrets [llength [lsort -unique $secrets]] | ||
| 7 | return $num_secrets | ||
| 8 | } | ||
| 9 | |||
| 10 | proc wait_for_secret_sync {maxtries delay num_nodes} { | ||
| 11 | wait_for_condition $maxtries $delay { | ||
| 12 | [num_unique_secrets $num_nodes] eq 1 | ||
| 13 | } else { | ||
| 14 | fail "Failed waiting for secrets to sync" | ||
| 15 | } | ||
| 16 | } | ||
| 17 | |||
| 18 | start_cluster 3 3 {tags {external:skip cluster}} { | ||
| 19 | test "Test internal secret sync" { | ||
| 20 | wait_for_secret_sync 50 100 6 | ||
| 21 | } | ||
| 22 | |||
| 23 | |||
| 24 | set first_shard_host [srv 0 host] | ||
| 25 | set first_shard_port [srv 0 port] | ||
| 26 | |||
| 27 | if {$::verbose} { | ||
| 28 | puts {cluster internal secret:} | ||
| 29 | puts [R 1 debug internal_secret] | ||
| 30 | } | ||
| 31 | |||
| 32 | test "Join a node to the cluster and make sure it gets the same secret" { | ||
| 33 | start_server {tags {"external:skip"} overrides {cluster-enabled {yes}}} { | ||
| 34 | r cluster meet $first_shard_host $first_shard_port | ||
| 35 | wait_for_condition 50 100 { | ||
| 36 | [r debug internal_secret] eq [R 1 debug internal_secret] | ||
| 37 | } else { | ||
| 38 | puts [r debug internal_secret] | ||
| 39 | puts [R 1 debug internal_secret] | ||
| 40 | fail "Secrets not match" | ||
| 41 | } | ||
| 42 | } | ||
| 43 | } | ||
| 44 | |||
| 45 | test "Join another cluster, make sure clusters sync on the internal secret" { | ||
| 46 | start_server {tags {"external:skip"} overrides {cluster-enabled {yes}}} { | ||
| 47 | set new_shard_host [srv 0 host] | ||
| 48 | set new_shard_port [srv 0 port] | ||
| 49 | start_server {tags {"external:skip"} overrides {cluster-enabled {yes}}} { | ||
| 50 | r cluster meet $new_shard_host $new_shard_port | ||
| 51 | wait_for_condition 50 100 { | ||
| 52 | [r debug internal_secret] eq [r -1 debug internal_secret] | ||
| 53 | } else { | ||
| 54 | puts [r debug internal_secret] | ||
| 55 | puts [r -1 debug internal_secret] | ||
| 56 | fail "Secrets not match" | ||
| 57 | } | ||
| 58 | if {$::verbose} { | ||
| 59 | puts {new cluster internal secret:} | ||
| 60 | puts [r -1 debug internal_secret] | ||
| 61 | } | ||
| 62 | r cluster meet $first_shard_host $first_shard_port | ||
| 63 | wait_for_secret_sync 50 100 8 | ||
| 64 | if {$::verbose} { | ||
| 65 | puts {internal secret after join to bigger cluster:} | ||
| 66 | puts [r -1 debug internal_secret] | ||
| 67 | } | ||
| 68 | } | ||
| 69 | } | ||
| 70 | } | ||
| 71 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/links.tcl b/examples/redis-unstable/tests/unit/cluster/links.tcl deleted file mode 100644 index a202c37..0000000 --- a/examples/redis-unstable/tests/unit/cluster/links.tcl +++ /dev/null | |||
| @@ -1,292 +0,0 @@ | |||
| 1 | proc get_links_with_peer {this_instance_id peer_nodename} { | ||
| 2 | set links [R $this_instance_id cluster links] | ||
| 3 | set links_with_peer {} | ||
| 4 | foreach l $links { | ||
| 5 | if {[dict get $l node] eq $peer_nodename} { | ||
| 6 | lappend links_with_peer $l | ||
| 7 | } | ||
| 8 | } | ||
| 9 | return $links_with_peer | ||
| 10 | } | ||
| 11 | |||
| 12 | # Return the entry in CLUSTER LINKS output by instance identified by `this_instance_id` that | ||
| 13 | # corresponds to the link established toward a peer identified by `peer_nodename` | ||
| 14 | proc get_link_to_peer {this_instance_id peer_nodename} { | ||
| 15 | set links_with_peer [get_links_with_peer $this_instance_id $peer_nodename] | ||
| 16 | foreach l $links_with_peer { | ||
| 17 | if {[dict get $l direction] eq "to"} { | ||
| 18 | return $l | ||
| 19 | } | ||
| 20 | } | ||
| 21 | return {} | ||
| 22 | } | ||
| 23 | |||
| 24 | # Return the entry in CLUSTER LINKS output by instance identified by `this_instance_id` that | ||
| 25 | # corresponds to the link accepted from a peer identified by `peer_nodename` | ||
| 26 | proc get_link_from_peer {this_instance_id peer_nodename} { | ||
| 27 | set links_with_peer [get_links_with_peer $this_instance_id $peer_nodename] | ||
| 28 | foreach l $links_with_peer { | ||
| 29 | if {[dict get $l direction] eq "from"} { | ||
| 30 | return $l | ||
| 31 | } | ||
| 32 | } | ||
| 33 | return {} | ||
| 34 | } | ||
| 35 | |||
| 36 | # Reset cluster links to their original state | ||
| 37 | proc reset_links {id} { | ||
| 38 | set limit [lindex [R $id CONFIG get cluster-link-sendbuf-limit] 1] | ||
| 39 | |||
| 40 | # Set a 1 byte limit and wait for cluster cron to run | ||
| 41 | # (executes every 100ms) and terminate links | ||
| 42 | R $id CONFIG SET cluster-link-sendbuf-limit 1 | ||
| 43 | after 150 | ||
| 44 | |||
| 45 | # Reset limit | ||
| 46 | R $id CONFIG SET cluster-link-sendbuf-limit $limit | ||
| 47 | |||
| 48 | # Wait until the cluster links come back up for each node | ||
| 49 | wait_for_condition 50 100 { | ||
| 50 | [number_of_links $id] == [expr [number_of_peers $id] * 2] | ||
| 51 | } else { | ||
| 52 | fail "Cluster links did not come back up" | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | proc number_of_peers {id} { | ||
| 57 | expr [llength $::servers] - 1 | ||
| 58 | } | ||
| 59 | |||
| 60 | proc number_of_links {id} { | ||
| 61 | llength [R $id cluster links] | ||
| 62 | } | ||
| 63 | |||
| 64 | proc publish_messages {server num_msgs msg_size} { | ||
| 65 | for {set i 0} {$i < $num_msgs} {incr i} { | ||
| 66 | $server PUBLISH channel [string repeat "x" $msg_size] | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | start_cluster 1 2 {tags {external:skip cluster}} { | ||
| 71 | set primary_id 0 | ||
| 72 | set replica1_id 1 | ||
| 73 | |||
| 74 | set primary [Rn $primary_id] | ||
| 75 | set replica1 [Rn $replica1_id] | ||
| 76 | |||
| 77 | test "Broadcast message across a cluster shard while a cluster link is down" { | ||
| 78 | set replica1_node_id [$replica1 CLUSTER MYID] | ||
| 79 | |||
| 80 | set channelname ch3 | ||
| 81 | |||
| 82 | # subscribe on replica1 | ||
| 83 | set subscribeclient1 [redis_deferring_client -1] | ||
| 84 | $subscribeclient1 deferred 1 | ||
| 85 | $subscribeclient1 SSUBSCRIBE $channelname | ||
| 86 | $subscribeclient1 read | ||
| 87 | |||
| 88 | # subscribe on replica2 | ||
| 89 | set subscribeclient2 [redis_deferring_client -2] | ||
| 90 | $subscribeclient2 deferred 1 | ||
| 91 | $subscribeclient2 SSUBSCRIBE $channelname | ||
| 92 | $subscribeclient2 read | ||
| 93 | |||
| 94 | # Verify number of links with cluster stable state | ||
| 95 | assert_equal [expr [number_of_peers $primary_id]*2] [number_of_links $primary_id] | ||
| 96 | |||
| 97 | # Disconnect the cluster between primary and replica1 and publish a message. | ||
| 98 | $primary MULTI | ||
| 99 | $primary DEBUG CLUSTERLINK KILL TO $replica1_node_id | ||
| 100 | $primary SPUBLISH $channelname hello | ||
| 101 | set res [$primary EXEC] | ||
| 102 | |||
| 103 | # Verify no client exists on the primary to receive the published message. | ||
| 104 | assert_equal $res {OK 0} | ||
| 105 | |||
| 106 | # Wait for all the cluster links are healthy | ||
| 107 | wait_for_condition 50 100 { | ||
| 108 | [number_of_peers $primary_id]*2 == [number_of_links $primary_id] | ||
| 109 | } else { | ||
| 110 | fail "All peer links couldn't be established" | ||
| 111 | } | ||
| 112 | |||
| 113 | # Publish a message afterwards. | ||
| 114 | $primary SPUBLISH $channelname world | ||
| 115 | |||
| 116 | # Verify replica1 has received only (world) / hello is lost. | ||
| 117 | assert_equal "smessage ch3 world" [$subscribeclient1 read] | ||
| 118 | |||
| 119 | # Verify replica2 has received both messages (hello/world) | ||
| 120 | assert_equal "smessage ch3 hello" [$subscribeclient2 read] | ||
| 121 | assert_equal "smessage ch3 world" [$subscribeclient2 read] | ||
| 122 | } {} {needs:debug} | ||
| 123 | } | ||
| 124 | |||
| 125 | start_cluster 3 0 {tags {external:skip cluster}} { | ||
| 126 | test "Each node has two links with each peer" { | ||
| 127 | for {set id 0} {$id < [llength $::servers]} {incr id} { | ||
| 128 | # Assert that from point of view of each node, there are two links for | ||
| 129 | # each peer. It might take a while for cluster to stabilize so wait up | ||
| 130 | # to 5 seconds. | ||
| 131 | wait_for_condition 50 100 { | ||
| 132 | [number_of_peers $id]*2 == [number_of_links $id] | ||
| 133 | } else { | ||
| 134 | assert_equal [expr [number_of_peers $id]*2] [number_of_links $id] | ||
| 135 | } | ||
| 136 | |||
| 137 | set nodes [get_cluster_nodes $id] | ||
| 138 | set links [R $id cluster links] | ||
| 139 | |||
| 140 | # For each peer there should be exactly one | ||
| 141 | # link "to" it and one link "from" it. | ||
| 142 | foreach n $nodes { | ||
| 143 | if {[cluster_has_flag $n myself]} continue | ||
| 144 | set peer [dict get $n id] | ||
| 145 | set to 0 | ||
| 146 | set from 0 | ||
| 147 | foreach l $links { | ||
| 148 | if {[dict get $l node] eq $peer} { | ||
| 149 | if {[dict get $l direction] eq "to"} { | ||
| 150 | incr to | ||
| 151 | } elseif {[dict get $l direction] eq "from"} { | ||
| 152 | incr from | ||
| 153 | } | ||
| 154 | } | ||
| 155 | } | ||
| 156 | assert {$to eq 1} | ||
| 157 | assert {$from eq 1} | ||
| 158 | } | ||
| 159 | } | ||
| 160 | } | ||
| 161 | |||
| 162 | test {Validate cluster links format} { | ||
| 163 | set lines [R 0 cluster links] | ||
| 164 | foreach l $lines { | ||
| 165 | if {$l eq {}} continue | ||
| 166 | assert_equal [llength $l] 12 | ||
| 167 | assert_equal 1 [dict exists $l "direction"] | ||
| 168 | assert_equal 1 [dict exists $l "node"] | ||
| 169 | assert_equal 1 [dict exists $l "create-time"] | ||
| 170 | assert_equal 1 [dict exists $l "events"] | ||
| 171 | assert_equal 1 [dict exists $l "send-buffer-allocated"] | ||
| 172 | assert_equal 1 [dict exists $l "send-buffer-used"] | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | set primary1_id 0 | ||
| 177 | set primary2_id 1 | ||
| 178 | |||
| 179 | set primary1 [Rn $primary1_id] | ||
| 180 | set primary2 [Rn $primary2_id] | ||
| 181 | |||
| 182 | test "Disconnect link when send buffer limit reached" { | ||
| 183 | # On primary1, set timeout to 1 hour so links won't get disconnected due to timeouts | ||
| 184 | set oldtimeout [lindex [$primary1 CONFIG get cluster-node-timeout] 1] | ||
| 185 | $primary1 CONFIG set cluster-node-timeout [expr 60*60*1000] | ||
| 186 | |||
| 187 | # Get primary1's links with primary2 | ||
| 188 | set primary2_name [dict get [cluster_get_myself $primary2_id] id] | ||
| 189 | set orig_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name] | ||
| 190 | set orig_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name] | ||
| 191 | |||
| 192 | # On primary1, set cluster link send buffer limit to 256KB, which is large enough to not be | ||
| 193 | # overflowed by regular gossip messages but also small enough that it doesn't take too much | ||
| 194 | # memory to overflow it. If it is set too high, Redis may get OOM killed by kernel before this | ||
| 195 | # limit is overflowed in some RAM-limited test environments. | ||
| 196 | set oldlimit [lindex [$primary1 CONFIG get cluster-link-sendbuf-limit] 1] | ||
| 197 | $primary1 CONFIG set cluster-link-sendbuf-limit [expr 256*1024] | ||
| 198 | assert {[CI $primary1_id total_cluster_links_buffer_limit_exceeded] eq 0} | ||
| 199 | |||
| 200 | # To manufacture an ever-growing send buffer from primary1 to primary2, | ||
| 201 | # make primary2 unresponsive. | ||
| 202 | set primary2_pid [srv [expr -1*$primary2_id] pid] | ||
| 203 | pause_process $primary2_pid | ||
| 204 | |||
| 205 | # On primary1, send 128KB Pubsub messages in a loop until the send buffer of the link from | ||
| 206 | # primary1 to primary2 exceeds buffer limit therefore be dropped. | ||
| 207 | # For the send buffer to grow, we need to first exhaust TCP send buffer of primary1 and TCP | ||
| 208 | # receive buffer of primary2 first. The sizes of these two buffers vary by OS, but 100 128KB | ||
| 209 | # messages should be sufficient. | ||
| 210 | set i 0 | ||
| 211 | wait_for_condition 100 0 { | ||
| 212 | [catch {incr i} e] == 0 && | ||
| 213 | [catch {$primary1 publish channel [prepare_value [expr 128*1024]]} e] == 0 && | ||
| 214 | [catch {after 500} e] == 0 && | ||
| 215 | [CI $primary1_id total_cluster_links_buffer_limit_exceeded] >= 1 | ||
| 216 | } else { | ||
| 217 | fail "Cluster link not freed as expected" | ||
| 218 | } | ||
| 219 | |||
| 220 | # A new link to primary2 should have been recreated | ||
| 221 | set new_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name] | ||
| 222 | assert {[dict get $new_link_p1_to_p2 create-time] > [dict get $orig_link_p1_to_p2 create-time]} | ||
| 223 | |||
| 224 | # Link from primary2 should not be affected | ||
| 225 | set same_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name] | ||
| 226 | assert {[dict get $same_link_p1_from_p2 create-time] eq [dict get $orig_link_p1_from_p2 create-time]} | ||
| 227 | |||
| 228 | # Revive primary2 | ||
| 229 | resume_process $primary2_pid | ||
| 230 | |||
| 231 | # Reset configs on primary1 so config changes don't leak out to other tests | ||
| 232 | $primary1 CONFIG set cluster-node-timeout $oldtimeout | ||
| 233 | $primary1 CONFIG set cluster-link-sendbuf-limit $oldlimit | ||
| 234 | |||
| 235 | reset_links $primary1_id | ||
| 236 | } | ||
| 237 | |||
| 238 | test "Link memory increases with publishes" { | ||
| 239 | set server_id 0 | ||
| 240 | set server [Rn $server_id] | ||
| 241 | set msg_size 10000 | ||
| 242 | set num_msgs 10 | ||
| 243 | |||
| 244 | # Remove any sendbuf limit | ||
| 245 | $primary1 CONFIG set cluster-link-sendbuf-limit 0 | ||
| 246 | |||
| 247 | # Publish ~100KB to one of the servers | ||
| 248 | $server MULTI | ||
| 249 | $server INFO memory | ||
| 250 | publish_messages $server $num_msgs $msg_size | ||
| 251 | $server INFO memory | ||
| 252 | set res [$server EXEC] | ||
| 253 | |||
| 254 | set link_mem_before_pubs [getInfoProperty $res mem_cluster_links] | ||
| 255 | |||
| 256 | # Remove the first half of the response string which contains the | ||
| 257 | # first "INFO memory" results and search for the property again | ||
| 258 | set res [string range $res [expr [string length $res] / 2] end] | ||
| 259 | set link_mem_after_pubs [getInfoProperty $res mem_cluster_links] | ||
| 260 | |||
| 261 | # We expect the memory to have increased by more than | ||
| 262 | # the culmulative size of the publish messages | ||
| 263 | set mem_diff_floor [expr $msg_size * $num_msgs] | ||
| 264 | set mem_diff [expr $link_mem_after_pubs - $link_mem_before_pubs] | ||
| 265 | assert {$mem_diff > $mem_diff_floor} | ||
| 266 | |||
| 267 | # Reset links to ensure no leftover data for the next test | ||
| 268 | reset_links $server_id | ||
| 269 | } | ||
| 270 | |||
| 271 | test "Link memory resets after publish messages flush" { | ||
| 272 | set server [Rn 0] | ||
| 273 | set msg_size 100000 | ||
| 274 | set num_msgs 10 | ||
| 275 | |||
| 276 | set link_mem_before [status $server mem_cluster_links] | ||
| 277 | |||
| 278 | # Publish ~1MB to one of the servers | ||
| 279 | $server MULTI | ||
| 280 | publish_messages $server $num_msgs $msg_size | ||
| 281 | $server EXEC | ||
| 282 | |||
| 283 | # Wait until the cluster link memory has returned to below the pre-publish value. | ||
| 284 | # We can't guarantee it returns to the exact same value since gossip messages | ||
| 285 | # can cause the values to fluctuate. | ||
| 286 | wait_for_condition 1000 500 { | ||
| 287 | [status $server mem_cluster_links] <= $link_mem_before | ||
| 288 | } else { | ||
| 289 | fail "Cluster link memory did not settle back to expected range" | ||
| 290 | } | ||
| 291 | } | ||
| 292 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/misc.tcl b/examples/redis-unstable/tests/unit/cluster/misc.tcl deleted file mode 100644 index 62bdcf7..0000000 --- a/examples/redis-unstable/tests/unit/cluster/misc.tcl +++ /dev/null | |||
| @@ -1,36 +0,0 @@ | |||
| 1 | start_cluster 2 2 {tags {external:skip cluster}} { | ||
| 2 | test {Key lazy expires during key migration} { | ||
| 3 | R 0 DEBUG SET-ACTIVE-EXPIRE 0 | ||
| 4 | |||
| 5 | set key_slot [R 0 CLUSTER KEYSLOT FOO] | ||
| 6 | R 0 set FOO BAR PX 10 | ||
| 7 | set src_id [R 0 CLUSTER MYID] | ||
| 8 | set trg_id [R 1 CLUSTER MYID] | ||
| 9 | R 0 CLUSTER SETSLOT $key_slot MIGRATING $trg_id | ||
| 10 | R 1 CLUSTER SETSLOT $key_slot IMPORTING $src_id | ||
| 11 | after 11 | ||
| 12 | assert_error {ASK*} {R 0 GET FOO} | ||
| 13 | R 0 ping | ||
| 14 | } {PONG} | ||
| 15 | |||
| 16 | test "Coverage: Basic cluster commands" { | ||
| 17 | assert_equal {OK} [R 0 CLUSTER saveconfig] | ||
| 18 | |||
| 19 | set id [R 0 CLUSTER MYID] | ||
| 20 | assert_equal {0} [R 0 CLUSTER count-failure-reports $id] | ||
| 21 | |||
| 22 | R 0 flushall | ||
| 23 | assert_equal {OK} [R 0 CLUSTER flushslots] | ||
| 24 | } | ||
| 25 | |||
| 26 | test "CROSSSLOT error for keys in different slots" { | ||
| 27 | # Test MSET with keys in different slots | ||
| 28 | assert_error {*CROSSSLOT Keys in request don't hash to the same slot*} {R 0 MSET foo bar baz qux} | ||
| 29 | |||
| 30 | # Test DEL with keys in different slots | ||
| 31 | assert_error {*CROSSSLOT Keys in request don't hash to the same slot*} {R 0 DEL foo bar} | ||
| 32 | |||
| 33 | # Test MGET with keys in different slots | ||
| 34 | assert_error {*CROSSSLOT Keys in request don't hash to the same slot*} {R 0 MGET foo bar} | ||
| 35 | } | ||
| 36 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/multi-slot-operations.tcl b/examples/redis-unstable/tests/unit/cluster/multi-slot-operations.tcl deleted file mode 100644 index 5d2d03e..0000000 --- a/examples/redis-unstable/tests/unit/cluster/multi-slot-operations.tcl +++ /dev/null | |||
| @@ -1,182 +0,0 @@ | |||
| 1 | # This test uses a custom slot allocation for testing | ||
| 2 | proc cluster_allocate_with_continuous_slots_local {n} { | ||
| 3 | R 0 cluster ADDSLOTSRANGE 0 3276 | ||
| 4 | R 1 cluster ADDSLOTSRANGE 3277 6552 | ||
| 5 | R 2 cluster ADDSLOTSRANGE 6553 9828 | ||
| 6 | R 3 cluster ADDSLOTSRANGE 9829 13104 | ||
| 7 | R 4 cluster ADDSLOTSRANGE 13105 16383 | ||
| 8 | } | ||
| 9 | |||
| 10 | start_cluster 5 0 {tags {external:skip cluster}} { | ||
| 11 | |||
| 12 | set master1 [srv 0 "client"] | ||
| 13 | set master2 [srv -1 "client"] | ||
| 14 | set master3 [srv -2 "client"] | ||
| 15 | set master4 [srv -3 "client"] | ||
| 16 | set master5 [srv -4 "client"] | ||
| 17 | |||
| 18 | test "Continuous slots distribution" { | ||
| 19 | assert_match "* 0-3276*" [$master1 CLUSTER NODES] | ||
| 20 | assert_match "* 3277-6552*" [$master2 CLUSTER NODES] | ||
| 21 | assert_match "* 6553-9828*" [$master3 CLUSTER NODES] | ||
| 22 | assert_match "* 9829-13104*" [$master4 CLUSTER NODES] | ||
| 23 | assert_match "* 13105-16383*" [$master5 CLUSTER NODES] | ||
| 24 | assert_match "*0 3276*" [$master1 CLUSTER SLOTS] | ||
| 25 | assert_match "*3277 6552*" [$master2 CLUSTER SLOTS] | ||
| 26 | assert_match "*6553 9828*" [$master3 CLUSTER SLOTS] | ||
| 27 | assert_match "*9829 13104*" [$master4 CLUSTER SLOTS] | ||
| 28 | assert_match "*13105 16383*" [$master5 CLUSTER SLOTS] | ||
| 29 | |||
| 30 | $master1 CLUSTER DELSLOTSRANGE 3001 3050 | ||
| 31 | assert_match "* 0-3000 3051-3276*" [$master1 CLUSTER NODES] | ||
| 32 | assert_match "*0 3000*3051 3276*" [$master1 CLUSTER SLOTS] | ||
| 33 | |||
| 34 | $master2 CLUSTER DELSLOTSRANGE 5001 5500 | ||
| 35 | assert_match "* 3277-5000 5501-6552*" [$master2 CLUSTER NODES] | ||
| 36 | assert_match "*3277 5000*5501 6552*" [$master2 CLUSTER SLOTS] | ||
| 37 | |||
| 38 | $master3 CLUSTER DELSLOTSRANGE 7001 7100 8001 8500 | ||
| 39 | assert_match "* 6553-7000 7101-8000 8501-9828*" [$master3 CLUSTER NODES] | ||
| 40 | assert_match "*6553 7000*7101 8000*8501 9828*" [$master3 CLUSTER SLOTS] | ||
| 41 | |||
| 42 | $master4 CLUSTER DELSLOTSRANGE 11001 12000 12101 12200 | ||
| 43 | assert_match "* 9829-11000 12001-12100 12201-13104*" [$master4 CLUSTER NODES] | ||
| 44 | assert_match "*9829 11000*12001 12100*12201 13104*" [$master4 CLUSTER SLOTS] | ||
| 45 | |||
| 46 | $master5 CLUSTER DELSLOTSRANGE 13501 14000 15001 16000 | ||
| 47 | assert_match "* 13105-13500 14001-15000 16001-16383*" [$master5 CLUSTER NODES] | ||
| 48 | assert_match "*13105 13500*14001 15000*16001 16383*" [$master5 CLUSTER SLOTS] | ||
| 49 | } | ||
| 50 | |||
| 51 | test "ADDSLOTS command with several boundary conditions test suite" { | ||
| 52 | assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTS 3001 aaa} | ||
| 53 | assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTS 3001 -1000} | ||
| 54 | assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTS 3001 30003} | ||
| 55 | |||
| 56 | assert_error "ERR Slot 3200 is already busy" {R 0 cluster ADDSLOTS 3200} | ||
| 57 | assert_error "ERR Slot 8501 is already busy" {R 0 cluster ADDSLOTS 8501} | ||
| 58 | |||
| 59 | assert_error "ERR Slot 3001 specified multiple times" {R 0 cluster ADDSLOTS 3001 3002 3001} | ||
| 60 | } | ||
| 61 | |||
| 62 | test "ADDSLOTSRANGE command with several boundary conditions test suite" { | ||
| 63 | # Add multiple slots with incorrect argument number | ||
| 64 | assert_error "ERR wrong number of arguments for 'cluster|addslotsrange' command" {R 0 cluster ADDSLOTSRANGE 3001 3020 3030} | ||
| 65 | |||
| 66 | # Add multiple slots with invalid input slot | ||
| 67 | assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTSRANGE 3001 3020 3030 aaa} | ||
| 68 | assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTSRANGE 3001 3020 3030 70000} | ||
| 69 | assert_error "ERR Invalid or out of range slot" {R 0 cluster ADDSLOTSRANGE 3001 3020 -1000 3030} | ||
| 70 | |||
| 71 | # Add multiple slots when start slot number is greater than the end slot | ||
| 72 | assert_error "ERR start slot number 3030 is greater than end slot number 3025" {R 0 cluster ADDSLOTSRANGE 3001 3020 3030 3025} | ||
| 73 | |||
| 74 | # Add multiple slots with busy slot | ||
| 75 | assert_error "ERR Slot 3200 is already busy" {R 0 cluster ADDSLOTSRANGE 3001 3020 3200 3250} | ||
| 76 | |||
| 77 | # Add multiple slots with assigned multiple times | ||
| 78 | assert_error "ERR Slot 3001 specified multiple times" {R 0 cluster ADDSLOTSRANGE 3001 3020 3001 3020} | ||
| 79 | } | ||
| 80 | |||
| 81 | test "DELSLOTSRANGE command with several boundary conditions test suite" { | ||
| 82 | # Delete multiple slots with incorrect argument number | ||
| 83 | assert_error "ERR wrong number of arguments for 'cluster|delslotsrange' command" {R 0 cluster DELSLOTSRANGE 1000 2000 2100} | ||
| 84 | assert_match "* 0-3000 3051-3276*" [$master1 CLUSTER NODES] | ||
| 85 | assert_match "*0 3000*3051 3276*" [$master1 CLUSTER SLOTS] | ||
| 86 | |||
| 87 | # Delete multiple slots with invalid input slot | ||
| 88 | assert_error "ERR Invalid or out of range slot" {R 0 cluster DELSLOTSRANGE 1000 2000 2100 aaa} | ||
| 89 | assert_error "ERR Invalid or out of range slot" {R 0 cluster DELSLOTSRANGE 1000 2000 2100 70000} | ||
| 90 | assert_error "ERR Invalid or out of range slot" {R 0 cluster DELSLOTSRANGE 1000 2000 -2100 2200} | ||
| 91 | assert_match "* 0-3000 3051-3276*" [$master1 CLUSTER NODES] | ||
| 92 | assert_match "*0 3000*3051 3276*" [$master1 CLUSTER SLOTS] | ||
| 93 | |||
| 94 | # Delete multiple slots when start slot number is greater than the end slot | ||
| 95 | assert_error "ERR start slot number 5800 is greater than end slot number 5750" {R 1 cluster DELSLOTSRANGE 5600 5700 5800 5750} | ||
| 96 | assert_match "* 3277-5000 5501-6552*" [$master2 CLUSTER NODES] | ||
| 97 | assert_match "*3277 5000*5501 6552*" [$master2 CLUSTER SLOTS] | ||
| 98 | |||
| 99 | # Delete multiple slots with already unassigned | ||
| 100 | assert_error "ERR Slot 7001 is already unassigned" {R 2 cluster DELSLOTSRANGE 7001 7100 9000 9200} | ||
| 101 | assert_match "* 6553-7000 7101-8000 8501-9828*" [$master3 CLUSTER NODES] | ||
| 102 | assert_match "*6553 7000*7101 8000*8501 9828*" [$master3 CLUSTER SLOTS] | ||
| 103 | |||
| 104 | # Delete multiple slots with assigned multiple times | ||
| 105 | assert_error "ERR Slot 12500 specified multiple times" {R 3 cluster DELSLOTSRANGE 12500 12600 12500 12600} | ||
| 106 | assert_match "* 9829-11000 12001-12100 12201-13104*" [$master4 CLUSTER NODES] | ||
| 107 | assert_match "*9829 11000*12001 12100*12201 13104*" [$master4 CLUSTER SLOTS] | ||
| 108 | } | ||
| 109 | } cluster_allocate_with_continuous_slots_local | ||
| 110 | |||
| 111 | start_cluster 2 0 {tags {external:skip cluster experimental}} { | ||
| 112 | |||
| 113 | set master1 [srv 0 "client"] | ||
| 114 | set master2 [srv -1 "client"] | ||
| 115 | |||
| 116 | test "SFLUSH - Errors and output validation" { | ||
| 117 | assert_match "* 0-8191*" [$master1 CLUSTER NODES] | ||
| 118 | assert_match "* 8192-16383*" [$master2 CLUSTER NODES] | ||
| 119 | assert_match "*0 8191*" [$master1 CLUSTER SLOTS] | ||
| 120 | assert_match "*8192 16383*" [$master2 CLUSTER SLOTS] | ||
| 121 | |||
| 122 | # make master1 non-continuous slots | ||
| 123 | $master1 cluster DELSLOTSRANGE 1000 2000 | ||
| 124 | |||
| 125 | # Test SFLUSH errors validation | ||
| 126 | assert_error {ERR wrong number of arguments*} {$master1 SFLUSH 4} | ||
| 127 | assert_error {ERR wrong number of arguments*} {$master1 SFLUSH 4 SYNC} | ||
| 128 | assert_error {ERR Invalid or out of range slot} {$master1 SFLUSH x 4} | ||
| 129 | assert_error {ERR Invalid or out of range slot} {$master1 SFLUSH 0 12x} | ||
| 130 | assert_error {ERR Slot 3 specified multiple times} {$master1 SFLUSH 2 4 3 5} | ||
| 131 | assert_error {ERR start slot number 8 is greater than*} {$master1 SFLUSH 8 4} | ||
| 132 | assert_error {ERR wrong number of arguments*} {$master1 SFLUSH 4 8 10} | ||
| 133 | assert_error {ERR wrong number of arguments*} {$master1 SFLUSH 0 999 2001 8191 ASYNCX} | ||
| 134 | |||
| 135 | # Test SFLUSH output validation | ||
| 136 | assert_match "" [$master1 SFLUSH 2 4] | ||
| 137 | assert_match "" [$master1 SFLUSH 0 4] | ||
| 138 | assert_match "" [$master2 SFLUSH 0 4] | ||
| 139 | assert_match "" [$master1 SFLUSH 1 8191] | ||
| 140 | assert_match "" [$master1 SFLUSH 0 8190] | ||
| 141 | assert_match "" [$master1 SFLUSH 0 998 2001 8191] | ||
| 142 | assert_match "" [$master1 SFLUSH 1 999 2001 8191] | ||
| 143 | assert_match "" [$master1 SFLUSH 0 999 2001 8190] | ||
| 144 | assert_match "" [$master1 SFLUSH 0 999 2002 8191] | ||
| 145 | assert_match "{0 999} {2001 8191}" [$master1 SFLUSH 0 999 2001 8191] | ||
| 146 | assert_match "{0 999} {2001 8191}" [$master1 SFLUSH 0 8191] | ||
| 147 | assert_match "{0 999} {2001 8191}" [$master1 SFLUSH 0 4000 4001 8191] | ||
| 148 | assert_match "" [$master2 SFLUSH 8193 16383] | ||
| 149 | assert_match "" [$master2 SFLUSH 8192 16382] | ||
| 150 | assert_match "{8192 16383}" [$master2 SFLUSH 8192 16383] | ||
| 151 | assert_match "{8192 16383}" [$master2 SFLUSH 8192 16383 SYNC] | ||
| 152 | assert_match "{8192 16383}" [$master2 SFLUSH 8192 16383 ASYNC] | ||
| 153 | assert_match "{8192 16383}" [$master2 SFLUSH 8192 9000 9001 16383] | ||
| 154 | assert_match "{8192 16383}" [$master2 SFLUSH 8192 9000 9001 16383 SYNC] | ||
| 155 | assert_match "{8192 16383}" [$master2 SFLUSH 8192 9000 9001 16383 ASYNC] | ||
| 156 | |||
| 157 | # restore master1 continuous slots | ||
| 158 | $master1 cluster ADDSLOTSRANGE 1000 2000 | ||
| 159 | } | ||
| 160 | |||
| 161 | test "SFLUSH - Deletes the keys with argument <NONE>/SYNC/ASYNC" { | ||
| 162 | foreach op {"" "SYNC" "ASYNC"} { | ||
| 163 | for {set i 0} {$i < 100} {incr i} { | ||
| 164 | catch {$master1 SET key$i val$i} | ||
| 165 | catch {$master2 SET key$i val$i} | ||
| 166 | } | ||
| 167 | |||
| 168 | assert {[$master1 DBSIZE] > 0} | ||
| 169 | assert {[$master2 DBSIZE] > 0} | ||
| 170 | if {$op eq ""} { | ||
| 171 | assert_match "{0 8191}" [ $master1 SFLUSH 0 8191] | ||
| 172 | } else { | ||
| 173 | assert_match "{0 8191}" [ $master1 SFLUSH 0 8191 $op] | ||
| 174 | } | ||
| 175 | assert {[$master1 DBSIZE] == 0} | ||
| 176 | assert {[$master2 DBSIZE] > 0} | ||
| 177 | assert_match "{8192 16383}" [ $master2 SFLUSH 8192 16383] | ||
| 178 | assert {[$master2 DBSIZE] == 0} | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/scripting.tcl b/examples/redis-unstable/tests/unit/cluster/scripting.tcl deleted file mode 100644 index 76aa882..0000000 --- a/examples/redis-unstable/tests/unit/cluster/scripting.tcl +++ /dev/null | |||
| @@ -1,91 +0,0 @@ | |||
| 1 | start_cluster 1 0 {tags {external:skip cluster}} { | ||
| 2 | |||
| 3 | test {Eval scripts with shebangs and functions default to no cross slots} { | ||
| 4 | # Test that scripts with shebang block cross slot operations | ||
| 5 | assert_error "ERR Script attempted to access keys that do not hash to the same slot*" { | ||
| 6 | r 0 eval {#!lua | ||
| 7 | redis.call('set', 'foo', 'bar') | ||
| 8 | redis.call('set', 'bar', 'foo') | ||
| 9 | return 'OK' | ||
| 10 | } 0} | ||
| 11 | |||
| 12 | # Test the functions by default block cross slot operations | ||
| 13 | r 0 function load REPLACE {#!lua name=crossslot | ||
| 14 | local function test_cross_slot(keys, args) | ||
| 15 | redis.call('set', 'foo', 'bar') | ||
| 16 | redis.call('set', 'bar', 'foo') | ||
| 17 | return 'OK' | ||
| 18 | end | ||
| 19 | |||
| 20 | redis.register_function('test_cross_slot', test_cross_slot)} | ||
| 21 | assert_error "ERR Script attempted to access keys that do not hash to the same slot*" {r FCALL test_cross_slot 0} | ||
| 22 | } | ||
| 23 | |||
| 24 | test {Cross slot commands are allowed by default for eval scripts and with allow-cross-slot-keys flag} { | ||
| 25 | # Old style lua scripts are allowed to access cross slot operations | ||
| 26 | r 0 eval "redis.call('set', 'foo', 'bar'); redis.call('set', 'bar', 'foo')" 0 | ||
| 27 | |||
| 28 | # scripts with allow-cross-slot-keys flag are allowed | ||
| 29 | r 0 eval {#!lua flags=allow-cross-slot-keys | ||
| 30 | redis.call('set', 'foo', 'bar'); redis.call('set', 'bar', 'foo') | ||
| 31 | } 0 | ||
| 32 | |||
| 33 | # Retrieve data from different slot to verify data has been stored in the correct dictionary in cluster-enabled setup | ||
| 34 | # during cross-slot operation from the above lua script. | ||
| 35 | assert_equal "bar" [r 0 get foo] | ||
| 36 | assert_equal "foo" [r 0 get bar] | ||
| 37 | r 0 del foo | ||
| 38 | r 0 del bar | ||
| 39 | |||
| 40 | # Functions with allow-cross-slot-keys flag are allowed | ||
| 41 | r 0 function load REPLACE {#!lua name=crossslot | ||
| 42 | local function test_cross_slot(keys, args) | ||
| 43 | redis.call('set', 'foo', 'bar') | ||
| 44 | redis.call('set', 'bar', 'foo') | ||
| 45 | return 'OK' | ||
| 46 | end | ||
| 47 | |||
| 48 | redis.register_function{function_name='test_cross_slot', callback=test_cross_slot, flags={ 'allow-cross-slot-keys' }}} | ||
| 49 | r FCALL test_cross_slot 0 | ||
| 50 | |||
| 51 | # Retrieve data from different slot to verify data has been stored in the correct dictionary in cluster-enabled setup | ||
| 52 | # during cross-slot operation from the above lua function. | ||
| 53 | assert_equal "bar" [r 0 get foo] | ||
| 54 | assert_equal "foo" [r 0 get bar] | ||
| 55 | } | ||
| 56 | |||
| 57 | test {Cross slot commands are also blocked if they disagree with pre-declared keys} { | ||
| 58 | assert_error "ERR Script attempted to access keys that do not hash to the same slot*" { | ||
| 59 | r 0 eval {#!lua | ||
| 60 | redis.call('set', 'foo', 'bar') | ||
| 61 | return 'OK' | ||
| 62 | } 1 bar} | ||
| 63 | } | ||
| 64 | |||
| 65 | test {Cross slot commands are allowed by default if they disagree with pre-declared keys} { | ||
| 66 | r 0 flushall | ||
| 67 | r 0 eval "redis.call('set', 'foo', 'bar')" 1 bar | ||
| 68 | |||
| 69 | # Make sure the script writes to the right slot | ||
| 70 | assert_equal 1 [r 0 cluster COUNTKEYSINSLOT 12182] ;# foo slot | ||
| 71 | assert_equal 0 [r 0 cluster COUNTKEYSINSLOT 5061] ;# bar slot | ||
| 72 | } | ||
| 73 | |||
| 74 | test "Function no-cluster flag" { | ||
| 75 | R 0 function load {#!lua name=test | ||
| 76 | redis.register_function{function_name='f1', callback=function() return 'hello' end, flags={'no-cluster'}} | ||
| 77 | } | ||
| 78 | catch {R 0 fcall f1 0} e | ||
| 79 | assert_match {*Can not run script on cluster, 'no-cluster' flag is set*} $e | ||
| 80 | } | ||
| 81 | |||
| 82 | test "Script no-cluster flag" { | ||
| 83 | catch { | ||
| 84 | R 0 eval {#!lua flags=no-cluster | ||
| 85 | return 1 | ||
| 86 | } 0 | ||
| 87 | } e | ||
| 88 | |||
| 89 | assert_match {*Can not run script on cluster, 'no-cluster' flag is set*} $e | ||
| 90 | } | ||
| 91 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/sharded-pubsub.tcl b/examples/redis-unstable/tests/unit/cluster/sharded-pubsub.tcl deleted file mode 100644 index 57b550a..0000000 --- a/examples/redis-unstable/tests/unit/cluster/sharded-pubsub.tcl +++ /dev/null | |||
| @@ -1,67 +0,0 @@ | |||
| 1 | # | ||
| 2 | # Copyright (c) 2009-Present, Redis Ltd. | ||
| 3 | # All rights reserved. | ||
| 4 | # | ||
| 5 | # Licensed under your choice of (a) the Redis Source Available License 2.0 | ||
| 6 | # (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the | ||
| 7 | # GNU Affero General Public License v3 (AGPLv3). | ||
| 8 | # | ||
| 9 | # Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information. | ||
| 10 | # | ||
| 11 | |||
| 12 | start_cluster 1 1 {tags {external:skip cluster}} { | ||
| 13 | set primary_id 0 | ||
| 14 | set replica1_id 1 | ||
| 15 | |||
| 16 | set primary [Rn $primary_id] | ||
| 17 | set replica [Rn $replica1_id] | ||
| 18 | |||
| 19 | test "Sharded pubsub publish behavior within multi/exec" { | ||
| 20 | foreach {node} {primary replica} { | ||
| 21 | set node [set $node] | ||
| 22 | $node MULTI | ||
| 23 | $node SPUBLISH ch1 "hello" | ||
| 24 | $node EXEC | ||
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | test "Sharded pubsub within multi/exec with cross slot operation" { | ||
| 29 | $primary MULTI | ||
| 30 | $primary SPUBLISH ch1 "hello" | ||
| 31 | $primary GET foo | ||
| 32 | catch {$primary EXEC} err | ||
| 33 | assert_match {CROSSSLOT*} $err | ||
| 34 | } | ||
| 35 | |||
| 36 | test "Sharded pubsub publish behavior within multi/exec with read operation on primary" { | ||
| 37 | $primary MULTI | ||
| 38 | $primary SPUBLISH foo "hello" | ||
| 39 | $primary GET foo | ||
| 40 | $primary EXEC | ||
| 41 | } {0 {}} | ||
| 42 | |||
| 43 | test "Sharded pubsub publish behavior within multi/exec with read operation on replica" { | ||
| 44 | $replica MULTI | ||
| 45 | $replica SPUBLISH foo "hello" | ||
| 46 | catch {[$replica GET foo]} err | ||
| 47 | assert_match {MOVED*} $err | ||
| 48 | catch {[$replica EXEC]} err | ||
| 49 | assert_match {EXECABORT*} $err | ||
| 50 | } | ||
| 51 | |||
| 52 | test "Sharded pubsub publish behavior within multi/exec with write operation on primary" { | ||
| 53 | $primary MULTI | ||
| 54 | $primary SPUBLISH foo "hello" | ||
| 55 | $primary SET foo bar | ||
| 56 | $primary EXEC | ||
| 57 | } {0 OK} | ||
| 58 | |||
| 59 | test "Sharded pubsub publish behavior within multi/exec with write operation on replica" { | ||
| 60 | $replica MULTI | ||
| 61 | $replica SPUBLISH foo "hello" | ||
| 62 | catch {[$replica SET foo bar]} err | ||
| 63 | assert_match {MOVED*} $err | ||
| 64 | catch {[$replica EXEC]} err | ||
| 65 | assert_match {EXECABORT*} $err | ||
| 66 | } | ||
| 67 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/slot-ownership.tcl b/examples/redis-unstable/tests/unit/cluster/slot-ownership.tcl deleted file mode 100644 index 0f3e3cc..0000000 --- a/examples/redis-unstable/tests/unit/cluster/slot-ownership.tcl +++ /dev/null | |||
| @@ -1,61 +0,0 @@ | |||
| 1 | start_cluster 2 2 {tags {external:skip cluster}} { | ||
| 2 | |||
| 3 | test "Verify that slot ownership transfer through gossip propagates deletes to replicas" { | ||
| 4 | assert {[s -2 role] eq {slave}} | ||
| 5 | wait_for_condition 1000 50 { | ||
| 6 | [s -2 master_link_status] eq {up} | ||
| 7 | } else { | ||
| 8 | fail "Instance #2 master link status is not up" | ||
| 9 | } | ||
| 10 | |||
| 11 | assert {[s -3 role] eq {slave}} | ||
| 12 | wait_for_condition 1000 50 { | ||
| 13 | [s -3 master_link_status] eq {up} | ||
| 14 | } else { | ||
| 15 | fail "Instance #3 master link status is not up" | ||
| 16 | } | ||
| 17 | |||
| 18 | # Set a single key that will be used to test deletion | ||
| 19 | set key "FOO" | ||
| 20 | R 0 SET $key TEST | ||
| 21 | set key_slot [R 0 cluster keyslot $key] | ||
| 22 | set slot_keys_num [R 0 cluster countkeysinslot $key_slot] | ||
| 23 | assert {$slot_keys_num > 0} | ||
| 24 | |||
| 25 | # Wait for replica to have the key | ||
| 26 | R 2 readonly | ||
| 27 | wait_for_condition 1000 50 { | ||
| 28 | [R 2 exists $key] eq "1" | ||
| 29 | } else { | ||
| 30 | fail "Test key was not replicated" | ||
| 31 | } | ||
| 32 | |||
| 33 | assert_equal [R 2 cluster countkeysinslot $key_slot] $slot_keys_num | ||
| 34 | |||
| 35 | # Assert other shards in cluster doesn't have the key | ||
| 36 | assert_equal [R 1 cluster countkeysinslot $key_slot] "0" | ||
| 37 | assert_equal [R 3 cluster countkeysinslot $key_slot] "0" | ||
| 38 | |||
| 39 | set nodeid [R 1 cluster myid] | ||
| 40 | |||
| 41 | R 1 cluster bumpepoch | ||
| 42 | # Move $key_slot to node 1 | ||
| 43 | assert_equal [R 1 cluster setslot $key_slot node $nodeid] "OK" | ||
| 44 | |||
| 45 | wait_for_cluster_propagation | ||
| 46 | |||
| 47 | # src master will delete keys in the slot | ||
| 48 | wait_for_condition 50 100 { | ||
| 49 | [R 0 cluster countkeysinslot $key_slot] eq 0 | ||
| 50 | } else { | ||
| 51 | fail "master 'countkeysinslot $key_slot' did not eq 0" | ||
| 52 | } | ||
| 53 | |||
| 54 | # src replica will delete keys in the slot | ||
| 55 | wait_for_condition 50 100 { | ||
| 56 | [R 2 cluster countkeysinslot $key_slot] eq 0 | ||
| 57 | } else { | ||
| 58 | fail "replica 'countkeysinslot $key_slot' did not eq 0" | ||
| 59 | } | ||
| 60 | } | ||
| 61 | } | ||
diff --git a/examples/redis-unstable/tests/unit/cluster/slot-stats.tcl b/examples/redis-unstable/tests/unit/cluster/slot-stats.tcl deleted file mode 100644 index 1123731..0000000 --- a/examples/redis-unstable/tests/unit/cluster/slot-stats.tcl +++ /dev/null | |||
| @@ -1,1169 +0,0 @@ | |||
| 1 | # | ||
| 2 | # Copyright (c) 2009-Present, Redis Ltd. | ||
| 3 | # All rights reserved. | ||
| 4 | # | ||
| 5 | # Copyright (c) 2024-present, Valkey contributors. | ||
| 6 | # All rights reserved. | ||
| 7 | # | ||
| 8 | # Licensed under your choice of (a) the Redis Source Available License 2.0 | ||
| 9 | # (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the | ||
| 10 | # GNU Affero General Public License v3 (AGPLv3). | ||
| 11 | # | ||
| 12 | # Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information. | ||
| 13 | # | ||
| 14 | |||
| 15 | # Integration tests for CLUSTER SLOT-STATS command. | ||
| 16 | |||
| 17 | # ----------------------------------------------------------------------------- | ||
| 18 | # Helper functions for CLUSTER SLOT-STATS test cases. | ||
| 19 | # ----------------------------------------------------------------------------- | ||
| 20 | |||
| 21 | # Converts array RESP response into a dict. | ||
| 22 | # This is useful for many test cases, where unnecessary nesting is removed. | ||
| 23 | proc convert_array_into_dict {slot_stats} { | ||
| 24 | set res [dict create] | ||
| 25 | foreach slot_stat $slot_stats { | ||
| 26 | # slot_stat is an array of size 2, where 0th index represents (int) slot, | ||
| 27 | # and 1st index represents (map) usage statistics. | ||
| 28 | dict set res [lindex $slot_stat 0] [lindex $slot_stat 1] | ||
| 29 | } | ||
| 30 | return $res | ||
| 31 | } | ||
| 32 | |||
| 33 | proc get_cmdstat_usec {cmd r} { | ||
| 34 | set cmdstatline [cmdrstat $cmd r] | ||
| 35 | regexp "usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline -> usec _ | ||
| 36 | return $usec | ||
| 37 | } | ||
| 38 | |||
| 39 | proc initialize_expected_slots_dict {} { | ||
| 40 | set expected_slots [dict create] | ||
| 41 | for {set i 0} {$i < 16384} {incr i 1} { | ||
| 42 | dict set expected_slots $i 0 | ||
| 43 | } | ||
| 44 | return $expected_slots | ||
| 45 | } | ||
| 46 | |||
| 47 | proc initialize_expected_slots_dict_with_range {start_slot end_slot} { | ||
| 48 | assert {$start_slot <= $end_slot} | ||
| 49 | set expected_slots [dict create] | ||
| 50 | for {set i $start_slot} {$i <= $end_slot} {incr i 1} { | ||
| 51 | dict set expected_slots $i 0 | ||
| 52 | } | ||
| 53 | return $expected_slots | ||
| 54 | } | ||
| 55 | |||
| 56 | proc assert_empty_slot_stats {slot_stats metrics_to_assert} { | ||
| 57 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 58 | dict for {slot stats} $slot_stats { | ||
| 59 | foreach metric_name $metrics_to_assert { | ||
| 60 | set metric_value [dict get $stats $metric_name] | ||
| 61 | assert {$metric_value == 0} | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | proc assert_empty_slot_stats_with_exception {slot_stats exception_slots metrics_to_assert} { | ||
| 67 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 68 | dict for {slot stats} $exception_slots { | ||
| 69 | assert {[dict exists $slot_stats $slot]} ;# slot_stats must contain the expected slots. | ||
| 70 | } | ||
| 71 | dict for {slot stats} $slot_stats { | ||
| 72 | if {[dict exists $exception_slots $slot]} { | ||
| 73 | foreach metric_name $metrics_to_assert { | ||
| 74 | set metric_value [dict get $exception_slots $slot $metric_name] | ||
| 75 | assert {[dict get $stats $metric_name] == $metric_value} | ||
| 76 | } | ||
| 77 | } else { | ||
| 78 | dict for {metric value} $stats { | ||
| 79 | assert {$value == 0} | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | proc assert_equal_slot_stats {slot_stats_1 slot_stats_2 deterministic_metrics non_deterministic_metrics} { | ||
| 86 | set slot_stats_1 [convert_array_into_dict $slot_stats_1] | ||
| 87 | set slot_stats_2 [convert_array_into_dict $slot_stats_2] | ||
| 88 | assert {[dict size $slot_stats_1] == [dict size $slot_stats_2]} | ||
| 89 | |||
| 90 | dict for {slot stats_1} $slot_stats_1 { | ||
| 91 | assert {[dict exists $slot_stats_2 $slot]} | ||
| 92 | set stats_2 [dict get $slot_stats_2 $slot] | ||
| 93 | |||
| 94 | # For deterministic metrics, we assert their equality. | ||
| 95 | foreach metric $deterministic_metrics { | ||
| 96 | assert {[dict get $stats_1 $metric] == [dict get $stats_2 $metric]} | ||
| 97 | } | ||
| 98 | # For non-deterministic metrics, we assert their non-zeroness as a best-effort. | ||
| 99 | foreach metric $non_deterministic_metrics { | ||
| 100 | assert {([dict get $stats_1 $metric] == 0 && [dict get $stats_2 $metric] == 0) || \ | ||
| 101 | ([dict get $stats_1 $metric] != 0 && [dict get $stats_2 $metric] != 0)} | ||
| 102 | } | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | proc assert_all_slots_have_been_seen {expected_slots} { | ||
| 107 | dict for {k v} $expected_slots { | ||
| 108 | assert {$v == 1} | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | proc assert_slot_visibility {slot_stats expected_slots} { | ||
| 113 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 114 | dict for {slot _} $slot_stats { | ||
| 115 | assert {[dict exists $expected_slots $slot]} | ||
| 116 | dict set expected_slots $slot 1 | ||
| 117 | } | ||
| 118 | |||
| 119 | assert_all_slots_have_been_seen $expected_slots | ||
| 120 | } | ||
| 121 | |||
| 122 | proc assert_slot_stats_monotonic_order {slot_stats orderby is_desc} { | ||
| 123 | # For Tcl dict, the order of iteration is the order in which the keys were inserted into the dictionary | ||
| 124 | # Thus, the response ordering is preserved upon calling 'convert_array_into_dict()'. | ||
| 125 | # Source: https://www.tcl.tk/man/tcl8.6.11/TclCmd/dict.htm | ||
| 126 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 127 | set prev_metric -1 | ||
| 128 | dict for {_ stats} $slot_stats { | ||
| 129 | set curr_metric [dict get $stats $orderby] | ||
| 130 | if {$prev_metric != -1} { | ||
| 131 | if {$is_desc == 1} { | ||
| 132 | assert {$prev_metric >= $curr_metric} | ||
| 133 | } else { | ||
| 134 | assert {$prev_metric <= $curr_metric} | ||
| 135 | } | ||
| 136 | } | ||
| 137 | set prev_metric $curr_metric | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | proc assert_slot_stats_monotonic_descent {slot_stats orderby} { | ||
| 142 | assert_slot_stats_monotonic_order $slot_stats $orderby 1 | ||
| 143 | } | ||
| 144 | |||
| 145 | proc assert_slot_stats_monotonic_ascent {slot_stats orderby} { | ||
| 146 | assert_slot_stats_monotonic_order $slot_stats $orderby 0 | ||
| 147 | } | ||
| 148 | |||
| 149 | proc wait_for_replica_key_exists {key key_count} { | ||
| 150 | wait_for_condition 1000 50 { | ||
| 151 | [R 1 exists $key] eq "$key_count" | ||
| 152 | } else { | ||
| 153 | fail "Test key was not replicated" | ||
| 154 | } | ||
| 155 | } | ||
| 156 | |||
| 157 | # ----------------------------------------------------------------------------- | ||
| 158 | # Test cases for CLUSTER SLOT-STATS cpu-usec metric correctness. | ||
| 159 | # ----------------------------------------------------------------------------- | ||
| 160 | |||
| 161 | start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { | ||
| 162 | |||
| 163 | # Define shared variables. | ||
| 164 | set key "FOO" | ||
| 165 | set key_slot [R 0 cluster keyslot $key] | ||
| 166 | set key_secondary "FOO2" | ||
| 167 | set key_secondary_slot [R 0 cluster keyslot $key_secondary] | ||
| 168 | set metrics_to_assert [list cpu-usec] | ||
| 169 | |||
| 170 | test "CLUSTER SLOT-STATS cpu-usec reset upon CONFIG RESETSTAT." { | ||
| 171 | R 0 SET $key VALUE | ||
| 172 | R 0 DEL $key | ||
| 173 | R 0 CONFIG RESETSTAT | ||
| 174 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 175 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 176 | } | ||
| 177 | R 0 CONFIG RESETSTAT | ||
| 178 | R 0 FLUSHALL | ||
| 179 | |||
| 180 | test "CLUSTER SLOT-STATS cpu-usec reset upon slot migration." { | ||
| 181 | R 0 SET $key VALUE | ||
| 182 | |||
| 183 | R 0 CLUSTER DELSLOTS $key_slot | ||
| 184 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 185 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 186 | |||
| 187 | R 0 CLUSTER ADDSLOTS $key_slot | ||
| 188 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 189 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 190 | } | ||
| 191 | R 0 CONFIG RESETSTAT | ||
| 192 | R 0 FLUSHALL | ||
| 193 | |||
| 194 | test "CLUSTER SLOT-STATS cpu-usec for non-slot specific commands." { | ||
| 195 | R 0 INFO | ||
| 196 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 197 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 198 | } | ||
| 199 | R 0 CONFIG RESETSTAT | ||
| 200 | R 0 FLUSHALL | ||
| 201 | |||
| 202 | test "CLUSTER SLOT-STATS cpu-usec for slot specific commands." { | ||
| 203 | R 0 SET $key VALUE | ||
| 204 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 205 | set usec [get_cmdstat_usec set r] | ||
| 206 | set expected_slot_stats [ | ||
| 207 | dict create $key_slot [ | ||
| 208 | dict create cpu-usec $usec | ||
| 209 | ] | ||
| 210 | ] | ||
| 211 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 212 | } | ||
| 213 | R 0 CONFIG RESETSTAT | ||
| 214 | R 0 FLUSHALL | ||
| 215 | |||
| 216 | test "CLUSTER SLOT-STATS cpu-usec for blocking commands, unblocked on keyspace update." { | ||
| 217 | # Blocking command with no timeout. Only keyspace update can unblock this client. | ||
| 218 | set rd [redis_deferring_client] | ||
| 219 | $rd BLPOP $key 0 | ||
| 220 | wait_for_blocked_clients_count 1 | ||
| 221 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 222 | # When the client is blocked, no accumulation is made. This behaviour is identical to INFO COMMANDSTATS. | ||
| 223 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 224 | |||
| 225 | # Unblocking command. | ||
| 226 | R 0 LPUSH $key value | ||
| 227 | wait_for_blocked_clients_count 0 | ||
| 228 | |||
| 229 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 230 | set lpush_usec [get_cmdstat_usec lpush r] | ||
| 231 | set blpop_usec [get_cmdstat_usec blpop r] | ||
| 232 | |||
| 233 | # Assert that both blocking and non-blocking command times have been accumulated. | ||
| 234 | set expected_slot_stats [ | ||
| 235 | dict create $key_slot [ | ||
| 236 | dict create cpu-usec [expr $lpush_usec + $blpop_usec] | ||
| 237 | ] | ||
| 238 | ] | ||
| 239 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 240 | } | ||
| 241 | R 0 CONFIG RESETSTAT | ||
| 242 | R 0 FLUSHALL | ||
| 243 | |||
| 244 | test "CLUSTER SLOT-STATS cpu-usec for blocking commands, unblocked on timeout." { | ||
| 245 | # Blocking command with 0.5 seconds timeout. | ||
| 246 | set rd [redis_deferring_client] | ||
| 247 | $rd BLPOP $key 0.5 | ||
| 248 | |||
| 249 | # Confirm that the client is blocked, then unblocked within 1 second. | ||
| 250 | wait_for_blocked_clients_count 1 | ||
| 251 | wait_for_blocked_clients_count 0 | ||
| 252 | |||
| 253 | # Assert that the blocking command time has been accumulated. | ||
| 254 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 255 | set blpop_usec [get_cmdstat_usec blpop r] | ||
| 256 | set expected_slot_stats [ | ||
| 257 | dict create $key_slot [ | ||
| 258 | dict create cpu-usec $blpop_usec | ||
| 259 | ] | ||
| 260 | ] | ||
| 261 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 262 | } | ||
| 263 | R 0 CONFIG RESETSTAT | ||
| 264 | R 0 FLUSHALL | ||
| 265 | |||
| 266 | test "CLUSTER SLOT-STATS cpu-usec for transactions." { | ||
| 267 | set r1 [redis_client] | ||
| 268 | $r1 MULTI | ||
| 269 | $r1 SET $key value | ||
| 270 | $r1 GET $key | ||
| 271 | |||
| 272 | # CPU metric is not accumulated until EXEC is reached. This behaviour is identical to INFO COMMANDSTATS. | ||
| 273 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 274 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 275 | |||
| 276 | # Execute transaction, and assert that all nested command times have been accumulated. | ||
| 277 | $r1 EXEC | ||
| 278 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 279 | set exec_usec [get_cmdstat_usec exec r] | ||
| 280 | set expected_slot_stats [ | ||
| 281 | dict create $key_slot [ | ||
| 282 | dict create cpu-usec $exec_usec | ||
| 283 | ] | ||
| 284 | ] | ||
| 285 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 286 | } | ||
| 287 | R 0 CONFIG RESETSTAT | ||
| 288 | R 0 FLUSHALL | ||
| 289 | |||
| 290 | test "CLUSTER SLOT-STATS cpu-usec for lua-scripts, without cross-slot keys." { | ||
| 291 | R 0 eval {#!lua | ||
| 292 | redis.call('set', KEYS[1], 'bar') redis.call('get', KEYS[2]) | ||
| 293 | } 2 $key $key | ||
| 294 | |||
| 295 | set eval_usec [get_cmdstat_usec eval r] | ||
| 296 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 297 | |||
| 298 | set expected_slot_stats [ | ||
| 299 | dict create $key_slot [ | ||
| 300 | dict create cpu-usec $eval_usec | ||
| 301 | ] | ||
| 302 | ] | ||
| 303 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 304 | } | ||
| 305 | R 0 CONFIG RESETSTAT | ||
| 306 | R 0 FLUSHALL | ||
| 307 | |||
| 308 | test "CLUSTER SLOT-STATS cpu-usec for lua-scripts, with cross-slot keys." { | ||
| 309 | R 0 eval {#!lua flags=allow-cross-slot-keys | ||
| 310 | redis.call('set', KEYS[1], 'bar') redis.call('get', ARGV[1]) | ||
| 311 | } 1 $key $key_secondary | ||
| 312 | |||
| 313 | # For cross-slot, we do not accumulate at all. | ||
| 314 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 315 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 316 | } | ||
| 317 | R 0 CONFIG RESETSTAT | ||
| 318 | R 0 FLUSHALL | ||
| 319 | |||
| 320 | test "CLUSTER SLOT-STATS cpu-usec for functions, without cross-slot keys." { | ||
| 321 | R 0 function load replace {#!lua name=f1 | ||
| 322 | redis.register_function{ | ||
| 323 | function_name='f1', | ||
| 324 | callback=function(keys, args) redis.call('set', keys[1], '1') redis.call('get', keys[2]) end | ||
| 325 | } | ||
| 326 | } | ||
| 327 | R 0 fcall f1 2 $key $key | ||
| 328 | |||
| 329 | set fcall_usec [get_cmdstat_usec fcall r] | ||
| 330 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 331 | |||
| 332 | set expected_slot_stats [ | ||
| 333 | dict create $key_slot [ | ||
| 334 | dict create cpu-usec $fcall_usec | ||
| 335 | ] | ||
| 336 | ] | ||
| 337 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 338 | } | ||
| 339 | R 0 CONFIG RESETSTAT | ||
| 340 | R 0 FLUSHALL | ||
| 341 | |||
| 342 | test "CLUSTER SLOT-STATS cpu-usec for functions, with cross-slot keys." { | ||
| 343 | R 0 function load replace {#!lua name=f1 | ||
| 344 | redis.register_function{ | ||
| 345 | function_name='f1', | ||
| 346 | callback=function(keys, args) redis.call('set', keys[1], '1') redis.call('get', args[1]) end, | ||
| 347 | flags={'allow-cross-slot-keys'} | ||
| 348 | } | ||
| 349 | } | ||
| 350 | R 0 fcall f1 1 $key $key_secondary | ||
| 351 | |||
| 352 | # For cross-slot, we do not accumulate at all. | ||
| 353 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 354 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 355 | } | ||
| 356 | R 0 CONFIG RESETSTAT | ||
| 357 | R 0 FLUSHALL | ||
| 358 | } | ||
| 359 | |||
| 360 | # ----------------------------------------------------------------------------- | ||
| 361 | # Test cases for CLUSTER SLOT-STATS network-bytes-in. | ||
| 362 | # ----------------------------------------------------------------------------- | ||
| 363 | |||
| 364 | start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { | ||
| 365 | |||
| 366 | # Define shared variables. | ||
| 367 | set key "key" | ||
| 368 | set key_slot [R 0 cluster keyslot $key] | ||
| 369 | set metrics_to_assert [list network-bytes-in] | ||
| 370 | |||
| 371 | test "CLUSTER SLOT-STATS network-bytes-in, multi bulk buffer processing." { | ||
| 372 | # *3\r\n$3\r\nSET\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 33 bytes. | ||
| 373 | R 0 SET $key value | ||
| 374 | |||
| 375 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 376 | set expected_slot_stats [ | ||
| 377 | dict create $key_slot [ | ||
| 378 | dict create network-bytes-in 33 | ||
| 379 | ] | ||
| 380 | ] | ||
| 381 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 382 | } | ||
| 383 | R 0 CONFIG RESETSTAT | ||
| 384 | R 0 FLUSHALL | ||
| 385 | |||
| 386 | test "CLUSTER SLOT-STATS network-bytes-in, in-line buffer processing." { | ||
| 387 | set rd [redis_deferring_client] | ||
| 388 | # SET key value\r\n --> 15 bytes. | ||
| 389 | $rd write "SET $key value\r\n" | ||
| 390 | $rd flush | ||
| 391 | |||
| 392 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 393 | set expected_slot_stats [ | ||
| 394 | dict create $key_slot [ | ||
| 395 | dict create network-bytes-in 15 | ||
| 396 | ] | ||
| 397 | ] | ||
| 398 | |||
| 399 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 400 | } | ||
| 401 | R 0 CONFIG RESETSTAT | ||
| 402 | R 0 FLUSHALL | ||
| 403 | |||
| 404 | test "CLUSTER SLOT-STATS network-bytes-in, blocking command." { | ||
| 405 | set rd [redis_deferring_client] | ||
| 406 | # *3\r\n$5\r\nblpop\r\n$3\r\nkey\r\n$1\r\n0\r\n --> 31 bytes. | ||
| 407 | $rd BLPOP $key 0 | ||
| 408 | wait_for_blocked_clients_count 1 | ||
| 409 | |||
| 410 | # Slot-stats must be empty here, as the client is yet to be unblocked. | ||
| 411 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 412 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 413 | |||
| 414 | # *3\r\n$5\r\nlpush\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 35 bytes. | ||
| 415 | R 0 LPUSH $key value | ||
| 416 | wait_for_blocked_clients_count 0 | ||
| 417 | |||
| 418 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 419 | set expected_slot_stats [ | ||
| 420 | dict create $key_slot [ | ||
| 421 | dict create network-bytes-in 66 ;# 31 + 35 bytes. | ||
| 422 | ] | ||
| 423 | ] | ||
| 424 | |||
| 425 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 426 | } | ||
| 427 | R 0 CONFIG RESETSTAT | ||
| 428 | R 0 FLUSHALL | ||
| 429 | |||
| 430 | test "CLUSTER SLOT-STATS network-bytes-in, multi-exec transaction." { | ||
| 431 | set r [redis_client] | ||
| 432 | # *1\r\n$5\r\nmulti\r\n --> 15 bytes. | ||
| 433 | $r MULTI | ||
| 434 | # *3\r\n$3\r\nSET\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 33 bytes. | ||
| 435 | assert {[$r SET $key value] eq {QUEUED}} | ||
| 436 | # *1\r\n$4\r\nexec\r\n --> 14 bytes. | ||
| 437 | assert {[$r EXEC] eq {OK}} | ||
| 438 | |||
| 439 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 440 | set expected_slot_stats [ | ||
| 441 | dict create $key_slot [ | ||
| 442 | dict create network-bytes-in 62 ;# 15 + 33 + 14 bytes. | ||
| 443 | ] | ||
| 444 | ] | ||
| 445 | |||
| 446 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 447 | } | ||
| 448 | R 0 CONFIG RESETSTAT | ||
| 449 | R 0 FLUSHALL | ||
| 450 | |||
| 451 | test "CLUSTER SLOT-STATS network-bytes-in, non slot specific command." { | ||
| 452 | R 0 INFO | ||
| 453 | |||
| 454 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 455 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 456 | } | ||
| 457 | R 0 CONFIG RESETSTAT | ||
| 458 | R 0 FLUSHALL | ||
| 459 | |||
| 460 | test "CLUSTER SLOT-STATS network-bytes-in, pub/sub." { | ||
| 461 | # PUB/SUB does not get accumulated at per-slot basis, | ||
| 462 | # as it is cluster-wide and is not slot specific. | ||
| 463 | set rd [redis_deferring_client] | ||
| 464 | $rd subscribe channel | ||
| 465 | R 0 publish channel message | ||
| 466 | |||
| 467 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 468 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 469 | } | ||
| 470 | R 0 CONFIG RESETSTAT | ||
| 471 | R 0 FLUSHALL | ||
| 472 | } | ||
| 473 | |||
| 474 | start_cluster 1 1 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { | ||
| 475 | set channel "channel" | ||
| 476 | set key_slot [R 0 cluster keyslot $channel] | ||
| 477 | set metrics_to_assert [list network-bytes-in] | ||
| 478 | |||
| 479 | # Setup replication. | ||
| 480 | assert {[s -1 role] eq {slave}} | ||
| 481 | wait_for_condition 1000 50 { | ||
| 482 | [s -1 master_link_status] eq {up} | ||
| 483 | } else { | ||
| 484 | fail "Instance #1 master link status is not up" | ||
| 485 | } | ||
| 486 | R 1 readonly | ||
| 487 | |||
| 488 | test "CLUSTER SLOT-STATS network-bytes-in, sharded pub/sub." { | ||
| 489 | set slot [R 0 cluster keyslot $channel] | ||
| 490 | set primary [Rn 0] | ||
| 491 | set replica [Rn 1] | ||
| 492 | set replica_subcriber [redis_deferring_client -1] | ||
| 493 | $replica_subcriber SSUBSCRIBE $channel | ||
| 494 | # *2\r\n$10\r\nssubscribe\r\n$7\r\nchannel\r\n --> 34 bytes. | ||
| 495 | $primary SPUBLISH $channel hello | ||
| 496 | # *3\r\n$8\r\nspublish\r\n$7\r\nchannel\r\n$5\r\nhello\r\n --> 42 bytes. | ||
| 497 | |||
| 498 | set slot_stats [$primary CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 499 | set expected_slot_stats [ | ||
| 500 | dict create $key_slot [ | ||
| 501 | dict create network-bytes-in 42 | ||
| 502 | ] | ||
| 503 | ] | ||
| 504 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 505 | |||
| 506 | set slot_stats [$replica CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 507 | set expected_slot_stats [ | ||
| 508 | dict create $key_slot [ | ||
| 509 | dict create network-bytes-in 34 | ||
| 510 | ] | ||
| 511 | ] | ||
| 512 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 513 | } | ||
| 514 | R 0 CONFIG RESETSTAT | ||
| 515 | R 0 FLUSHALL | ||
| 516 | } | ||
| 517 | |||
| 518 | # ----------------------------------------------------------------------------- | ||
| 519 | # Test cases for CLUSTER SLOT-STATS network-bytes-out correctness. | ||
| 520 | # ----------------------------------------------------------------------------- | ||
| 521 | |||
| 522 | start_cluster 1 0 {tags {external:skip cluster}} { | ||
| 523 | # Define shared variables. | ||
| 524 | set key "FOO" | ||
| 525 | set key_slot [R 0 cluster keyslot $key] | ||
| 526 | set expected_slots_to_key_count [dict create $key_slot 1] | ||
| 527 | set metrics_to_assert [list network-bytes-out] | ||
| 528 | R 0 CONFIG SET cluster-slot-stats-enabled yes | ||
| 529 | |||
| 530 | test "CLUSTER SLOT-STATS network-bytes-out, for non-slot specific commands." { | ||
| 531 | R 0 INFO | ||
| 532 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 533 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 534 | } | ||
| 535 | R 0 CONFIG RESETSTAT | ||
| 536 | R 0 FLUSHALL | ||
| 537 | |||
| 538 | test "CLUSTER SLOT-STATS network-bytes-out, for slot specific commands." { | ||
| 539 | R 0 SET $key value | ||
| 540 | # +OK\r\n --> 5 bytes | ||
| 541 | |||
| 542 | set expected_slot_stats [ | ||
| 543 | dict create $key_slot [ | ||
| 544 | dict create network-bytes-out 5 | ||
| 545 | ] | ||
| 546 | ] | ||
| 547 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 548 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 549 | } | ||
| 550 | R 0 CONFIG RESETSTAT | ||
| 551 | R 0 FLUSHALL | ||
| 552 | |||
| 553 | test "CLUSTER SLOT-STATS network-bytes-out, blocking commands." { | ||
| 554 | set rd [redis_deferring_client] | ||
| 555 | $rd BLPOP $key 0 | ||
| 556 | wait_for_blocked_clients_count 1 | ||
| 557 | |||
| 558 | # Assert empty slot stats here, since COB is yet to be flushed due to the block. | ||
| 559 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 560 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 561 | |||
| 562 | # Unblock the command. | ||
| 563 | # LPUSH client) :1\r\n --> 4 bytes. | ||
| 564 | # BLPOP client) *2\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 24 bytes, upon unblocking. | ||
| 565 | R 0 LPUSH $key value | ||
| 566 | wait_for_blocked_clients_count 0 | ||
| 567 | |||
| 568 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 569 | set expected_slot_stats [ | ||
| 570 | dict create $key_slot [ | ||
| 571 | dict create network-bytes-out 28 ;# 4 + 24 bytes. | ||
| 572 | ] | ||
| 573 | ] | ||
| 574 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 575 | } | ||
| 576 | R 0 CONFIG RESETSTAT | ||
| 577 | R 0 FLUSHALL | ||
| 578 | } | ||
| 579 | |||
| 580 | start_cluster 1 1 {tags {external:skip cluster}} { | ||
| 581 | |||
| 582 | # Define shared variables. | ||
| 583 | set key "FOO" | ||
| 584 | set key_slot [R 0 CLUSTER KEYSLOT $key] | ||
| 585 | set metrics_to_assert [list network-bytes-out] | ||
| 586 | R 0 CONFIG SET cluster-slot-stats-enabled yes | ||
| 587 | |||
| 588 | # Setup replication. | ||
| 589 | assert {[s -1 role] eq {slave}} | ||
| 590 | wait_for_condition 1000 50 { | ||
| 591 | [s -1 master_link_status] eq {up} | ||
| 592 | } else { | ||
| 593 | fail "Instance #1 master link status is not up" | ||
| 594 | } | ||
| 595 | R 1 readonly | ||
| 596 | |||
| 597 | test "CLUSTER SLOT-STATS network-bytes-out, replication stream egress." { | ||
| 598 | assert_equal [R 0 SET $key VALUE] {OK} | ||
| 599 | # Local client) +OK\r\n --> 5 bytes. | ||
| 600 | # Replication stream) *3\r\n$3\r\nSET\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 33 bytes. | ||
| 601 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 602 | set expected_slot_stats [ | ||
| 603 | dict create $key_slot [ | ||
| 604 | dict create network-bytes-out 38 ;# 5 + 33 bytes. | ||
| 605 | ] | ||
| 606 | ] | ||
| 607 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 608 | } | ||
| 609 | } | ||
| 610 | |||
| 611 | start_cluster 1 1 {tags {external:skip cluster}} { | ||
| 612 | |||
| 613 | # Define shared variables. | ||
| 614 | set channel "channel" | ||
| 615 | set key_slot [R 0 cluster keyslot $channel] | ||
| 616 | set channel_secondary "channel2" | ||
| 617 | set key_slot_secondary [R 0 cluster keyslot $channel_secondary] | ||
| 618 | set metrics_to_assert [list network-bytes-out] | ||
| 619 | R 0 CONFIG SET cluster-slot-stats-enabled yes | ||
| 620 | |||
| 621 | test "CLUSTER SLOT-STATS network-bytes-out, sharded pub/sub, single channel." { | ||
| 622 | set slot [R 0 cluster keyslot $channel] | ||
| 623 | set publisher [Rn 0] | ||
| 624 | set subscriber [redis_client] | ||
| 625 | set replica [redis_deferring_client -1] | ||
| 626 | |||
| 627 | # Subscriber client) *3\r\n$10\r\nssubscribe\r\n$7\r\nchannel\r\n:1\r\n --> 38 bytes | ||
| 628 | $subscriber SSUBSCRIBE $channel | ||
| 629 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 630 | set expected_slot_stats [ | ||
| 631 | dict create $key_slot [ | ||
| 632 | dict create network-bytes-out 38 | ||
| 633 | ] | ||
| 634 | ] | ||
| 635 | R 0 CONFIG RESETSTAT | ||
| 636 | |||
| 637 | # Publisher client) :1\r\n --> 4 bytes. | ||
| 638 | # Subscriber client) *3\r\n$8\r\nsmessage\r\n$7\r\nchannel\r\n$5\r\nhello\r\n --> 42 bytes. | ||
| 639 | assert_equal 1 [$publisher SPUBLISH $channel hello] | ||
| 640 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 641 | set expected_slot_stats [ | ||
| 642 | dict create $key_slot [ | ||
| 643 | dict create network-bytes-out 46 ;# 4 + 42 bytes. | ||
| 644 | ] | ||
| 645 | ] | ||
| 646 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 647 | } | ||
| 648 | $subscriber QUIT | ||
| 649 | R 0 FLUSHALL | ||
| 650 | R 0 CONFIG RESETSTAT | ||
| 651 | |||
| 652 | test "CLUSTER SLOT-STATS network-bytes-out, sharded pub/sub, cross-slot channels." { | ||
| 653 | set slot [R 0 cluster keyslot $channel] | ||
| 654 | set publisher [Rn 0] | ||
| 655 | set subscriber [redis_client] | ||
| 656 | set replica [redis_deferring_client -1] | ||
| 657 | |||
| 658 | # Stack multi-slot subscriptions against a single client. | ||
| 659 | # For primary channel; | ||
| 660 | # Subscriber client) *3\r\n$10\r\nssubscribe\r\n$7\r\nchannel\r\n:1\r\n --> 38 bytes | ||
| 661 | # For secondary channel; | ||
| 662 | # Subscriber client) *3\r\n$10\r\nssubscribe\r\n$8\r\nchannel2\r\n:1\r\n --> 39 bytes | ||
| 663 | $subscriber SSUBSCRIBE $channel | ||
| 664 | $subscriber SSUBSCRIBE $channel_secondary | ||
| 665 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 666 | set expected_slot_stats [ | ||
| 667 | dict create \ | ||
| 668 | $key_slot [ \ | ||
| 669 | dict create network-bytes-out 38 | ||
| 670 | ] \ | ||
| 671 | $key_slot_secondary [ \ | ||
| 672 | dict create network-bytes-out 39 | ||
| 673 | ] | ||
| 674 | ] | ||
| 675 | R 0 CONFIG RESETSTAT | ||
| 676 | |||
| 677 | # For primary channel; | ||
| 678 | # Publisher client) :1\r\n --> 4 bytes. | ||
| 679 | # Subscriber client) *3\r\n$8\r\nsmessage\r\n$7\r\nchannel\r\n$5\r\nhello\r\n --> 42 bytes. | ||
| 680 | # For secondary channel; | ||
| 681 | # Publisher client) :1\r\n --> 4 bytes. | ||
| 682 | # Subscriber client) *3\r\n$8\r\nsmessage\r\n$8\r\nchannel2\r\n$5\r\nhello\r\n --> 43 bytes. | ||
| 683 | assert_equal 1 [$publisher SPUBLISH $channel hello] | ||
| 684 | assert_equal 1 [$publisher SPUBLISH $channel_secondary hello] | ||
| 685 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 686 | set expected_slot_stats [ | ||
| 687 | dict create \ | ||
| 688 | $key_slot [ \ | ||
| 689 | dict create network-bytes-out 46 ;# 4 + 42 bytes. | ||
| 690 | ] \ | ||
| 691 | $key_slot_secondary [ \ | ||
| 692 | dict create network-bytes-out 47 ;# 4 + 43 bytes. | ||
| 693 | ] | ||
| 694 | ] | ||
| 695 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 696 | } | ||
| 697 | } | ||
| 698 | |||
| 699 | # ----------------------------------------------------------------------------- | ||
| 700 | # Test cases for CLUSTER SLOT-STATS key-count metric correctness. | ||
| 701 | # ----------------------------------------------------------------------------- | ||
| 702 | |||
| 703 | start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { | ||
| 704 | |||
| 705 | # Define shared variables. | ||
| 706 | set key "FOO" | ||
| 707 | set key_slot [R 0 cluster keyslot $key] | ||
| 708 | set metrics_to_assert [list key-count] | ||
| 709 | set expected_slot_stats [ | ||
| 710 | dict create $key_slot [ | ||
| 711 | dict create key-count 1 | ||
| 712 | ] | ||
| 713 | ] | ||
| 714 | |||
| 715 | test "CLUSTER SLOT-STATS contains default value upon redis-server startup" { | ||
| 716 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 717 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 718 | } | ||
| 719 | |||
| 720 | test "CLUSTER SLOT-STATS contains correct metrics upon key introduction" { | ||
| 721 | R 0 SET $key TEST | ||
| 722 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 723 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 724 | } | ||
| 725 | |||
| 726 | test "CLUSTER SLOT-STATS contains correct metrics upon key mutation" { | ||
| 727 | R 0 SET $key NEW_VALUE | ||
| 728 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 729 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 730 | } | ||
| 731 | |||
| 732 | test "CLUSTER SLOT-STATS contains correct metrics upon key deletion" { | ||
| 733 | R 0 DEL $key | ||
| 734 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 735 | assert_empty_slot_stats $slot_stats $metrics_to_assert | ||
| 736 | } | ||
| 737 | |||
| 738 | test "CLUSTER SLOT-STATS slot visibility based on slot ownership changes" { | ||
| 739 | R 0 CONFIG SET cluster-require-full-coverage no | ||
| 740 | |||
| 741 | R 0 CLUSTER DELSLOTS $key_slot | ||
| 742 | set expected_slots [initialize_expected_slots_dict] | ||
| 743 | dict unset expected_slots $key_slot | ||
| 744 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 745 | assert {[dict size $expected_slots] == 16383} | ||
| 746 | assert_slot_visibility $slot_stats $expected_slots | ||
| 747 | |||
| 748 | R 0 CLUSTER ADDSLOTS $key_slot | ||
| 749 | set expected_slots [initialize_expected_slots_dict] | ||
| 750 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 751 | assert {[dict size $expected_slots] == 16384} | ||
| 752 | assert_slot_visibility $slot_stats $expected_slots | ||
| 753 | } | ||
| 754 | } | ||
| 755 | |||
| 756 | # ----------------------------------------------------------------------------- | ||
| 757 | # Test cases for CLUSTER SLOT-STATS SLOTSRANGE sub-argument. | ||
| 758 | # ----------------------------------------------------------------------------- | ||
| 759 | |||
| 760 | start_cluster 1 0 {tags {external:skip cluster}} { | ||
| 761 | |||
| 762 | test "CLUSTER SLOT-STATS SLOTSRANGE all slots present" { | ||
| 763 | set start_slot 100 | ||
| 764 | set end_slot 102 | ||
| 765 | set expected_slots [initialize_expected_slots_dict_with_range $start_slot $end_slot] | ||
| 766 | |||
| 767 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE $start_slot $end_slot] | ||
| 768 | assert_slot_visibility $slot_stats $expected_slots | ||
| 769 | } | ||
| 770 | |||
| 771 | test "CLUSTER SLOT-STATS SLOTSRANGE some slots missing" { | ||
| 772 | set start_slot 100 | ||
| 773 | set end_slot 102 | ||
| 774 | set expected_slots [initialize_expected_slots_dict_with_range $start_slot $end_slot] | ||
| 775 | |||
| 776 | R 0 CLUSTER DELSLOTS $start_slot | ||
| 777 | dict unset expected_slots $start_slot | ||
| 778 | |||
| 779 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE $start_slot $end_slot] | ||
| 780 | assert_slot_visibility $slot_stats $expected_slots | ||
| 781 | } | ||
| 782 | } | ||
| 783 | |||
| 784 | # ----------------------------------------------------------------------------- | ||
| 785 | # Test cases for CLUSTER SLOT-STATS ORDERBY sub-argument. | ||
| 786 | # ----------------------------------------------------------------------------- | ||
| 787 | |||
| 788 | start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { | ||
| 789 | |||
| 790 | set metrics [list "key-count" "memory-bytes" "cpu-usec" "network-bytes-in" "network-bytes-out"] | ||
| 791 | |||
| 792 | # SET keys for target hashslots, to encourage ordering. | ||
| 793 | set hash_tags [list 0 1 2 3 4] | ||
| 794 | set num_keys 1 | ||
| 795 | foreach hash_tag $hash_tags { | ||
| 796 | for {set i 0} {$i < $num_keys} {incr i 1} { | ||
| 797 | R 0 SET "$i{$hash_tag}" VALUE | ||
| 798 | } | ||
| 799 | incr num_keys 1 | ||
| 800 | } | ||
| 801 | |||
| 802 | # SET keys for random hashslots, for random noise. | ||
| 803 | set num_keys 0 | ||
| 804 | while {$num_keys < 1000} { | ||
| 805 | set random_key [randomInt 16384] | ||
| 806 | R 0 SET $random_key VALUE | ||
| 807 | incr num_keys 1 | ||
| 808 | } | ||
| 809 | |||
| 810 | test "CLUSTER SLOT-STATS ORDERBY DESC correct ordering" { | ||
| 811 | foreach orderby $metrics { | ||
| 812 | set slot_stats [R 0 CLUSTER SLOT-STATS ORDERBY $orderby DESC] | ||
| 813 | assert_slot_stats_monotonic_descent $slot_stats $orderby | ||
| 814 | } | ||
| 815 | } | ||
| 816 | |||
| 817 | test "CLUSTER SLOT-STATS ORDERBY ASC correct ordering" { | ||
| 818 | foreach orderby $metrics { | ||
| 819 | set slot_stats [R 0 CLUSTER SLOT-STATS ORDERBY $orderby ASC] | ||
| 820 | assert_slot_stats_monotonic_ascent $slot_stats $orderby | ||
| 821 | } | ||
| 822 | } | ||
| 823 | |||
| 824 | test "CLUSTER SLOT-STATS ORDERBY LIMIT correct response pagination, where limit is less than number of assigned slots" { | ||
| 825 | R 0 FLUSHALL SYNC | ||
| 826 | R 0 CONFIG RESETSTAT | ||
| 827 | |||
| 828 | foreach orderby $metrics { | ||
| 829 | set limit 5 | ||
| 830 | set slot_stats_desc [R 0 CLUSTER SLOT-STATS ORDERBY $orderby LIMIT $limit DESC] | ||
| 831 | set slot_stats_asc [R 0 CLUSTER SLOT-STATS ORDERBY $orderby LIMIT $limit ASC] | ||
| 832 | set slot_stats_desc_length [llength $slot_stats_desc] | ||
| 833 | set slot_stats_asc_length [llength $slot_stats_asc] | ||
| 834 | assert {$limit == $slot_stats_desc_length && $limit == $slot_stats_asc_length} | ||
| 835 | |||
| 836 | # All slot statistics have been reset to 0, so we will order by slot in ascending order. | ||
| 837 | set expected_slots [dict create 0 0 1 0 2 0 3 0 4 0] | ||
| 838 | assert_slot_visibility $slot_stats_desc $expected_slots | ||
| 839 | assert_slot_visibility $slot_stats_asc $expected_slots | ||
| 840 | } | ||
| 841 | } | ||
| 842 | |||
| 843 | test "CLUSTER SLOT-STATS ORDERBY LIMIT correct response pagination, where limit is greater than number of assigned slots" { | ||
| 844 | R 0 CONFIG SET cluster-require-full-coverage no | ||
| 845 | R 0 FLUSHALL SYNC | ||
| 846 | R 0 CLUSTER FLUSHSLOTS | ||
| 847 | R 0 CLUSTER ADDSLOTS 100 101 | ||
| 848 | |||
| 849 | foreach orderby $metrics { | ||
| 850 | set num_assigned_slots 2 | ||
| 851 | set limit 5 | ||
| 852 | set slot_stats_desc [R 0 CLUSTER SLOT-STATS ORDERBY $orderby LIMIT $limit DESC] | ||
| 853 | set slot_stats_asc [R 0 CLUSTER SLOT-STATS ORDERBY $orderby LIMIT $limit ASC] | ||
| 854 | set slot_stats_desc_length [llength $slot_stats_desc] | ||
| 855 | set slot_stats_asc_length [llength $slot_stats_asc] | ||
| 856 | set expected_response_length [expr min($num_assigned_slots, $limit)] | ||
| 857 | assert {$expected_response_length == $slot_stats_desc_length && $expected_response_length == $slot_stats_asc_length} | ||
| 858 | |||
| 859 | set expected_slots [dict create 100 0 101 0] | ||
| 860 | assert_slot_visibility $slot_stats_desc $expected_slots | ||
| 861 | assert_slot_visibility $slot_stats_asc $expected_slots | ||
| 862 | } | ||
| 863 | } | ||
| 864 | |||
| 865 | test "CLUSTER SLOT-STATS ORDERBY arg sanity check." { | ||
| 866 | # Non-existent argument. | ||
| 867 | assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY key-count non-existent-arg} | ||
| 868 | # Negative LIMIT. | ||
| 869 | assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY key-count DESC LIMIT -1} | ||
| 870 | # Non-existent ORDERBY metric. | ||
| 871 | assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY non-existent-metric} | ||
| 872 | # When cluster-slot-stats-enabled config is disabled, you cannot sort using advanced metrics. | ||
| 873 | R 0 CONFIG SET cluster-slot-stats-enabled no | ||
| 874 | set orderby "cpu-usec" | ||
| 875 | assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY $orderby} | ||
| 876 | set orderby "network-bytes-in" | ||
| 877 | assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY $orderby} | ||
| 878 | set orderby "network-bytes-out" | ||
| 879 | assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY $orderby} | ||
| 880 | set orderby "memory-bytes" | ||
| 881 | assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY $orderby} | ||
| 882 | |||
| 883 | # When only cpu net is enabled, memory-bytes ORDERBY should fail | ||
| 884 | R 0 CONFIG SET cluster-slot-stats-enabled "cpu net" | ||
| 885 | assert_error "ERR*" {R 0 CLUSTER SLOT-STATS ORDERBY memory-bytes} | ||
| 886 | } | ||
| 887 | |||
| 888 | } | ||
| 889 | |||
| 890 | # ----------------------------------------------------------------------------- | ||
| 891 | # Test cases for CLUSTER SLOT-STATS replication. | ||
| 892 | # ----------------------------------------------------------------------------- | ||
| 893 | |||
| 894 | start_cluster 1 1 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { | ||
| 895 | |||
| 896 | # Define shared variables. | ||
| 897 | set key "key" | ||
| 898 | set key_slot [R 0 CLUSTER KEYSLOT $key] | ||
| 899 | set primary [Rn 0] | ||
| 900 | set replica [Rn 1] | ||
| 901 | |||
| 902 | # For replication, assertions are split between deterministic and non-deterministic metrics. | ||
| 903 | # * For deterministic metrics, strict equality assertions are made. | ||
| 904 | # * For non-deterministic metrics, non-zeroness assertions are made. | ||
| 905 | # Non-zeroness as in, both primary and replica should either have some value, or no value at all. | ||
| 906 | # | ||
| 907 | # * key-count is deterministic between primary and its replica. | ||
| 908 | # * cpu-usec is non-deterministic between primary and its replica. | ||
| 909 | # * network-bytes-in is deterministic between primary and its replica. | ||
| 910 | # * network-bytes-out will remain empty in the replica, since primary client do not receive replies, unless for replicationSendAck(). | ||
| 911 | set deterministic_metrics [list key-count network-bytes-in] | ||
| 912 | set non_deterministic_metrics [list cpu-usec] | ||
| 913 | set empty_metrics [list network-bytes-out] | ||
| 914 | |||
| 915 | # Setup replication. | ||
| 916 | assert {[s -1 role] eq {slave}} | ||
| 917 | wait_for_condition 1000 50 { | ||
| 918 | [s -1 master_link_status] eq {up} | ||
| 919 | } else { | ||
| 920 | fail "Instance #1 master link status is not up" | ||
| 921 | } | ||
| 922 | R 1 readonly | ||
| 923 | |||
| 924 | test "CLUSTER SLOT-STATS metrics replication for new keys" { | ||
| 925 | # *3\r\n$3\r\nset\r\n$3\r\nkey\r\n$5\r\nvalue\r\n --> 33 bytes. | ||
| 926 | R 0 SET $key VALUE | ||
| 927 | |||
| 928 | set expected_slot_stats [ | ||
| 929 | dict create $key_slot [ | ||
| 930 | dict create key-count 1 network-bytes-in 33 | ||
| 931 | ] | ||
| 932 | ] | ||
| 933 | set slot_stats_master [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 934 | assert_empty_slot_stats_with_exception $slot_stats_master $expected_slot_stats $deterministic_metrics | ||
| 935 | |||
| 936 | wait_for_condition 500 10 { | ||
| 937 | [string match {*calls=1,*} [cmdrstat set $replica]] | ||
| 938 | } else { | ||
| 939 | fail "Replica did not receive the command." | ||
| 940 | } | ||
| 941 | set slot_stats_replica [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 942 | assert_equal_slot_stats $slot_stats_master $slot_stats_replica $deterministic_metrics $non_deterministic_metrics | ||
| 943 | assert_empty_slot_stats $slot_stats_replica $empty_metrics | ||
| 944 | } | ||
| 945 | R 0 CONFIG RESETSTAT | ||
| 946 | R 1 CONFIG RESETSTAT | ||
| 947 | |||
| 948 | test "CLUSTER SLOT-STATS metrics replication for existing keys" { | ||
| 949 | # *3\r\n$3\r\nset\r\n$3\r\nkey\r\n$13\r\nvalue_updated\r\n --> 42 bytes. | ||
| 950 | R 0 SET $key VALUE_UPDATED | ||
| 951 | |||
| 952 | set expected_slot_stats [ | ||
| 953 | dict create $key_slot [ | ||
| 954 | dict create key-count 1 network-bytes-in 42 | ||
| 955 | ] | ||
| 956 | ] | ||
| 957 | set slot_stats_master [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 958 | assert_empty_slot_stats_with_exception $slot_stats_master $expected_slot_stats $deterministic_metrics | ||
| 959 | |||
| 960 | wait_for_condition 500 10 { | ||
| 961 | [string match {*calls=1,*} [cmdrstat set $replica]] | ||
| 962 | } else { | ||
| 963 | fail "Replica did not receive the command." | ||
| 964 | } | ||
| 965 | set slot_stats_replica [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 966 | assert_equal_slot_stats $slot_stats_master $slot_stats_replica $deterministic_metrics $non_deterministic_metrics | ||
| 967 | assert_empty_slot_stats $slot_stats_replica $empty_metrics | ||
| 968 | } | ||
| 969 | R 0 CONFIG RESETSTAT | ||
| 970 | R 1 CONFIG RESETSTAT | ||
| 971 | |||
| 972 | test "CLUSTER SLOT-STATS metrics replication for deleting keys" { | ||
| 973 | # *2\r\n$3\r\ndel\r\n$3\r\nkey\r\n --> 22 bytes. | ||
| 974 | R 0 DEL $key | ||
| 975 | |||
| 976 | set expected_slot_stats [ | ||
| 977 | dict create $key_slot [ | ||
| 978 | dict create key-count 0 network-bytes-in 22 | ||
| 979 | ] | ||
| 980 | ] | ||
| 981 | set slot_stats_master [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 982 | assert_empty_slot_stats_with_exception $slot_stats_master $expected_slot_stats $deterministic_metrics | ||
| 983 | |||
| 984 | wait_for_condition 500 10 { | ||
| 985 | [string match {*calls=1,*} [cmdrstat del $replica]] | ||
| 986 | } else { | ||
| 987 | fail "Replica did not receive the command." | ||
| 988 | } | ||
| 989 | set slot_stats_replica [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 990 | assert_equal_slot_stats $slot_stats_master $slot_stats_replica $deterministic_metrics $non_deterministic_metrics | ||
| 991 | assert_empty_slot_stats $slot_stats_replica $empty_metrics | ||
| 992 | } | ||
| 993 | R 0 CONFIG RESETSTAT | ||
| 994 | R 1 CONFIG RESETSTAT | ||
| 995 | } | ||
| 996 | |||
| 997 | start_cluster 2 2 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { | ||
| 998 | test "CLUSTER SLOT-STATS reset upon atomic slot migration" { | ||
| 999 | # key on slot-0 | ||
| 1000 | set key0 "{06S}mykey0" | ||
| 1001 | set key0_slot [R 0 CLUSTER KEYSLOT $key0] | ||
| 1002 | R 0 SET $key0 VALUE | ||
| 1003 | |||
| 1004 | # Migrate slot-0 to node-1 | ||
| 1005 | R 1 CLUSTER MIGRATION IMPORT 0 0 | ||
| 1006 | wait_for_condition 1000 10 { | ||
| 1007 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 1008 | [CI 1 cluster_slot_migration_active_tasks] == 0 | ||
| 1009 | } else { | ||
| 1010 | fail "ASM tasks did not complete" | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | set expected_slot_stats [ | ||
| 1014 | dict create \ | ||
| 1015 | $key0_slot [ \ | ||
| 1016 | dict create key-count 1 \ | ||
| 1017 | dict create cpu-usec 0 \ | ||
| 1018 | dict create network-bytes-in 0 \ | ||
| 1019 | dict create network-bytes-out 0 \ | ||
| 1020 | ] | ||
| 1021 | ] | ||
| 1022 | set metrics_to_assert [list key-count cpu-usec network-bytes-in network-bytes-out] | ||
| 1023 | |||
| 1024 | # Verify metrics are reset except key-count | ||
| 1025 | set slot_stats [R 1 CLUSTER SLOT-STATS SLOTSRANGE 0 0] | ||
| 1026 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 1027 | |||
| 1028 | # Migrate slot-0 back to node-0 | ||
| 1029 | R 0 CLUSTER MIGRATION IMPORT 0 0 | ||
| 1030 | wait_for_condition 1000 10 { | ||
| 1031 | [CI 0 cluster_slot_migration_active_tasks] == 0 && | ||
| 1032 | [CI 1 cluster_slot_migration_active_tasks] == 0 | ||
| 1033 | } else { | ||
| 1034 | fail "ASM tasks did not complete" | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | # Verify metrics are reset except key-count | ||
| 1038 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 0] | ||
| 1039 | assert_empty_slot_stats_with_exception $slot_stats $expected_slot_stats $metrics_to_assert | ||
| 1040 | } | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | # ----------------------------------------------------------------------------- | ||
| 1044 | # Test cases for CLUSTER SLOT-STATS memory-bytes field presence. | ||
| 1045 | # ----------------------------------------------------------------------------- | ||
| 1046 | |||
| 1047 | start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled yes}} { | ||
| 1048 | # Define shared variables. | ||
| 1049 | set key "FOO" | ||
| 1050 | set key_slot [R 0 cluster keyslot $key] | ||
| 1051 | |||
| 1052 | test "CLUSTER SLOT-STATS memory-bytes field present when cluster-slot-stats-enabled set on startup" { | ||
| 1053 | R 0 SET $key VALUE | ||
| 1054 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 1055 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 1056 | |||
| 1057 | # Verify memory-bytes field is present | ||
| 1058 | assert {[dict exists $slot_stats $key_slot]} | ||
| 1059 | set stats [dict get $slot_stats $key_slot] | ||
| 1060 | assert {[dict exists $stats memory-bytes]} | ||
| 1061 | assert {[dict get $stats memory-bytes] > 0} | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | test "CLUSTER SLOT-STATS net mem combination shows only net and mem stats" { | ||
| 1065 | R 0 CONFIG SET cluster-slot-stats-enabled "net mem" | ||
| 1066 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 1067 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 1068 | |||
| 1069 | set stats [dict get $slot_stats $key_slot] | ||
| 1070 | assert {[dict exists $stats memory-bytes]} | ||
| 1071 | assert {[dict exists $stats network-bytes-in]} | ||
| 1072 | assert {[dict exists $stats network-bytes-out]} | ||
| 1073 | assert {![dict exists $stats cpu-usec]} | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | test "CLUSTER SLOT-STATS cpu mem combination shows only cpu and mem stats" { | ||
| 1077 | R 0 CONFIG SET cluster-slot-stats-enabled "cpu mem" | ||
| 1078 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 1079 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 1080 | |||
| 1081 | set stats [dict get $slot_stats $key_slot] | ||
| 1082 | assert {[dict exists $stats memory-bytes]} | ||
| 1083 | assert {[dict exists $stats cpu-usec]} | ||
| 1084 | assert {![dict exists $stats network-bytes-in]} | ||
| 1085 | assert {![dict exists $stats network-bytes-out]} | ||
| 1086 | |||
| 1087 | # Restore to yes for subsequent tests | ||
| 1088 | R 0 CONFIG SET cluster-slot-stats-enabled yes | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | test "CLUSTER SLOT-STATS memory-bytes field not present after disabling cluster-slot-stats-enabled" { | ||
| 1092 | R 0 CONFIG SET cluster-slot-stats-enabled no | ||
| 1093 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 1094 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 1095 | |||
| 1096 | # Verify memory-bytes field is not present after disabling config | ||
| 1097 | # (memory tracking is disabled when MEM flag is removed) | ||
| 1098 | assert {[dict exists $slot_stats $key_slot]} | ||
| 1099 | set stats [dict get $slot_stats $key_slot] | ||
| 1100 | assert {![dict exists $stats memory-bytes]} | ||
| 1101 | |||
| 1102 | # Verify other stats fields are not present | ||
| 1103 | assert {![dict exists $stats cpu-usec]} | ||
| 1104 | assert {![dict exists $stats network-bytes-in]} | ||
| 1105 | assert {![dict exists $stats network-bytes-out]} | ||
| 1106 | } | ||
| 1107 | |||
| 1108 | test "CLUSTER SLOT-STATS memory tracking cannot be re-enabled after being disabled" { | ||
| 1109 | # Once memory tracking is disabled, it cannot be re-enabled at runtime | ||
| 1110 | assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled yes} | ||
| 1111 | assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled mem} | ||
| 1112 | |||
| 1113 | # But cpu and net can still be enabled | ||
| 1114 | R 0 CONFIG SET cluster-slot-stats-enabled "cpu net" | ||
| 1115 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 1116 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 1117 | |||
| 1118 | assert {[dict exists $slot_stats $key_slot]} | ||
| 1119 | set stats [dict get $slot_stats $key_slot] | ||
| 1120 | assert {![dict exists $stats memory-bytes]} | ||
| 1121 | assert {[dict exists $stats cpu-usec]} | ||
| 1122 | assert {[dict exists $stats network-bytes-in]} | ||
| 1123 | assert {[dict exists $stats network-bytes-out]} | ||
| 1124 | } | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | start_cluster 1 0 {tags {external:skip cluster} overrides {cluster-slot-stats-enabled no}} { | ||
| 1128 | # Define shared variables. | ||
| 1129 | set key "FOO" | ||
| 1130 | set key_slot [R 0 cluster keyslot $key] | ||
| 1131 | |||
| 1132 | test "CLUSTER SLOT-STATS memory-bytes field not present when cluster-slot-stats-enabled not set on startup" { | ||
| 1133 | R 0 SET $key VALUE | ||
| 1134 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 1135 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 1136 | |||
| 1137 | # Verify memory-bytes field is not present | ||
| 1138 | assert {[dict exists $slot_stats $key_slot]} | ||
| 1139 | set stats [dict get $slot_stats $key_slot] | ||
| 1140 | assert {![dict exists $stats memory-bytes]} | ||
| 1141 | |||
| 1142 | # Only key-count should be present | ||
| 1143 | assert {[dict exists $stats key-count]} | ||
| 1144 | assert {[dict get $stats key-count] == 1} | ||
| 1145 | } | ||
| 1146 | |||
| 1147 | test "CLUSTER SLOT-STATS enabling mem at runtime fails when not enabled at startup" { | ||
| 1148 | # Trying to enable memory tracking at runtime should fail | ||
| 1149 | assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled mem} | ||
| 1150 | assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled yes} | ||
| 1151 | assert_error "ERR*memory tracking cannot be enabled at runtime*" {R 0 CONFIG SET cluster-slot-stats-enabled "cpu net mem"} | ||
| 1152 | } | ||
| 1153 | |||
| 1154 | test "CLUSTER SLOT-STATS enabling cpu and net at runtime works" { | ||
| 1155 | R 0 CONFIG SET cluster-slot-stats-enabled "cpu net" | ||
| 1156 | set slot_stats [R 0 CLUSTER SLOT-STATS SLOTSRANGE 0 16383] | ||
| 1157 | set slot_stats [convert_array_into_dict $slot_stats] | ||
| 1158 | |||
| 1159 | # Verify memory-bytes field is still not present | ||
| 1160 | assert {[dict exists $slot_stats $key_slot]} | ||
| 1161 | set stats [dict get $slot_stats $key_slot] | ||
| 1162 | assert {![dict exists $stats memory-bytes]} | ||
| 1163 | |||
| 1164 | # Other stats fields should now be present | ||
| 1165 | assert {[dict exists $stats cpu-usec]} | ||
| 1166 | assert {[dict exists $stats network-bytes-in]} | ||
| 1167 | assert {[dict exists $stats network-bytes-out]} | ||
| 1168 | } | ||
| 1169 | } | ||
