diff options
Diffstat (limited to 'examples/redis-unstable/tests/integration/replication-buffer.tcl')
| -rw-r--r-- | examples/redis-unstable/tests/integration/replication-buffer.tcl | 365 |
1 files changed, 365 insertions, 0 deletions
diff --git a/examples/redis-unstable/tests/integration/replication-buffer.tcl b/examples/redis-unstable/tests/integration/replication-buffer.tcl new file mode 100644 index 0000000..11e604c --- /dev/null +++ b/examples/redis-unstable/tests/integration/replication-buffer.tcl | |||
| @@ -0,0 +1,365 @@ | |||
| 1 | # | ||
| 2 | # Copyright (c) 2009-Present, Redis Ltd. | ||
| 3 | # All rights reserved. | ||
| 4 | # | ||
| 5 | # Copyright (c) 2024-present, Valkey contributors. | ||
| 6 | # All rights reserved. | ||
| 7 | # | ||
| 8 | # Licensed under your choice of (a) the Redis Source Available License 2.0 | ||
| 9 | # (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the | ||
| 10 | # GNU Affero General Public License v3 (AGPLv3). | ||
| 11 | # | ||
| 12 | # Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information. | ||
| 13 | # | ||
| 14 | |||
| 15 | # This test group aims to test that all replicas share one global replication buffer, | ||
| 16 | # two replicas don't make replication buffer size double, and when there is no replica, | ||
| 17 | # replica buffer will shrink. | ||
| 18 | foreach rdbchannel {"yes" "no"} { | ||
| 19 | start_server {tags {"repl external:skip"}} { | ||
| 20 | start_server {} { | ||
| 21 | start_server {} { | ||
| 22 | start_server {} { | ||
| 23 | set replica1 [srv -3 client] | ||
| 24 | set replica2 [srv -2 client] | ||
| 25 | set replica3 [srv -1 client] | ||
| 26 | |||
| 27 | $replica1 config set repl-rdb-channel $rdbchannel | ||
| 28 | $replica2 config set repl-rdb-channel $rdbchannel | ||
| 29 | $replica3 config set repl-rdb-channel $rdbchannel | ||
| 30 | |||
| 31 | set master [srv 0 client] | ||
| 32 | set master_host [srv 0 host] | ||
| 33 | set master_port [srv 0 port] | ||
| 34 | |||
| 35 | $master config set save "" | ||
| 36 | $master config set repl-backlog-size 16384 | ||
| 37 | $master config set repl-diskless-sync-delay 5 | ||
| 38 | $master config set repl-diskless-sync-max-replicas 1 | ||
| 39 | $master config set client-output-buffer-limit "replica 0 0 0" | ||
| 40 | $master config set repl-rdb-channel $rdbchannel | ||
| 41 | |||
| 42 | # Make sure replica3 is synchronized with master | ||
| 43 | $replica3 replicaof $master_host $master_port | ||
| 44 | wait_for_sync $replica3 | ||
| 45 | |||
| 46 | # Generating RDB will take some 100 seconds | ||
| 47 | $master config set rdb-key-save-delay 1000000 | ||
| 48 | populate 100 "" 16 | ||
| 49 | |||
| 50 | # Make sure replica1 and replica2 are waiting bgsave | ||
| 51 | $master config set repl-diskless-sync-max-replicas 2 | ||
| 52 | $replica1 replicaof $master_host $master_port | ||
| 53 | $replica2 replicaof $master_host $master_port | ||
| 54 | wait_for_condition 50 100 { | ||
| 55 | ([s rdb_bgsave_in_progress] == 1) && | ||
| 56 | [lindex [$replica1 role] 3] eq {sync} && | ||
| 57 | [lindex [$replica2 role] 3] eq {sync} | ||
| 58 | } else { | ||
| 59 | fail "fail to sync with replicas" | ||
| 60 | } | ||
| 61 | |||
| 62 | test "All replicas share one global replication buffer rdbchannel=$rdbchannel" { | ||
| 63 | set before_used [s used_memory] | ||
| 64 | populate 1024 "" 1024 ; # Write extra 1M data | ||
| 65 | |||
| 66 | # In case we are running with IO-threads we need to give a few cycles | ||
| 67 | # for IO-threads to start sending the cmd stream. If we don't do that | ||
| 68 | # the checks related to the repl_buf_mem will be incorrect as the buffer | ||
| 69 | # will still be full with the above 1Mb data. | ||
| 70 | set iothreads [s io_threads_active] | ||
| 71 | if {$iothreads && $rdbchannel == "yes"} { | ||
| 72 | after 1000 | ||
| 73 | } | ||
| 74 | |||
| 75 | # New data uses 1M memory, but all replicas use only one | ||
| 76 | # replication buffer, so all replicas output memory is not | ||
| 77 | # more than double of replication buffer. | ||
| 78 | set repl_buf_mem [s mem_total_replication_buffers] | ||
| 79 | set extra_mem [expr {[s used_memory]-$before_used-1024*1024}] | ||
| 80 | if {$rdbchannel == "yes"} { | ||
| 81 | # master's replication buffers should not grow | ||
| 82 | assert {$extra_mem < 1024*1024} | ||
| 83 | assert {$repl_buf_mem < 1024*1024} | ||
| 84 | } else { | ||
| 85 | assert {$extra_mem < 2*$repl_buf_mem} | ||
| 86 | } | ||
| 87 | |||
| 88 | # Kill replica1, replication_buffer will not become smaller | ||
| 89 | catch {$replica1 shutdown nosave} | ||
| 90 | wait_for_condition 50 100 { | ||
| 91 | [s connected_slaves] eq {2} | ||
| 92 | } else { | ||
| 93 | fail "replica doesn't disconnect with master" | ||
| 94 | } | ||
| 95 | assert_equal $repl_buf_mem [s mem_total_replication_buffers] | ||
| 96 | } | ||
| 97 | |||
| 98 | test "Replication buffer will become smaller when no replica uses rdbchannel=$rdbchannel" { | ||
| 99 | # Make sure replica3 catch up with the master | ||
| 100 | wait_for_ofs_sync $master $replica3 | ||
| 101 | |||
| 102 | set repl_buf_mem [s mem_total_replication_buffers] | ||
| 103 | # Kill replica2, replication_buffer will become smaller | ||
| 104 | catch {$replica2 shutdown nosave} | ||
| 105 | wait_for_condition 50 100 { | ||
| 106 | [s connected_slaves] eq {1} | ||
| 107 | } else { | ||
| 108 | fail "replica2 doesn't disconnect with master" | ||
| 109 | } | ||
| 110 | if {$rdbchannel == "yes"} { | ||
| 111 | # master's replication buffers should not grow | ||
| 112 | assert {1024*512 > [s mem_total_replication_buffers]} | ||
| 113 | } else { | ||
| 114 | assert {[expr $repl_buf_mem - 1024*1024] > [s mem_total_replication_buffers]} | ||
| 115 | } | ||
| 116 | } | ||
| 117 | } | ||
| 118 | } | ||
| 119 | } | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | # This test group aims to test replication backlog size can outgrow the backlog | ||
| 124 | # limit config if there is a slow replica which keep massive replication buffers, | ||
| 125 | # and replicas could use this replication buffer (beyond backlog config) for | ||
| 126 | # partial re-synchronization. Of course, replication backlog memory also can | ||
| 127 | # become smaller when master disconnects with slow replicas since output buffer | ||
| 128 | # limit is reached. | ||
| 129 | foreach rdbchannel {"yes" "no"} { | ||
| 130 | start_server {tags {"repl external:skip debug_defrag:skip"}} { | ||
| 131 | start_server {} { | ||
| 132 | start_server {} { | ||
| 133 | set replica1 [srv -2 client] | ||
| 134 | set replica1_pid [s -2 process_id] | ||
| 135 | set replica2 [srv -1 client] | ||
| 136 | set replica2_pid [s -1 process_id] | ||
| 137 | |||
| 138 | set master [srv 0 client] | ||
| 139 | set master_host [srv 0 host] | ||
| 140 | set master_port [srv 0 port] | ||
| 141 | |||
| 142 | $master config set save "" | ||
| 143 | $master config set repl-backlog-size 16384 | ||
| 144 | $master config set repl-rdb-channel $rdbchannel | ||
| 145 | $master config set client-output-buffer-limit "replica 0 0 0" | ||
| 146 | |||
| 147 | # Executing 'debug digest' on master which has many keys costs much time | ||
| 148 | # (especially in valgrind), this causes that replica1 and replica2 disconnect | ||
| 149 | # with master. | ||
| 150 | $master config set repl-timeout 1000 | ||
| 151 | $replica1 config set repl-timeout 1000 | ||
| 152 | $replica1 config set repl-rdb-channel $rdbchannel | ||
| 153 | $replica1 config set client-output-buffer-limit "replica 1024 0 0" | ||
| 154 | $replica2 config set repl-timeout 1000 | ||
| 155 | $replica2 config set client-output-buffer-limit "replica 1024 0 0" | ||
| 156 | $replica2 config set repl-rdb-channel $rdbchannel | ||
| 157 | |||
| 158 | $replica1 replicaof $master_host $master_port | ||
| 159 | wait_for_sync $replica1 | ||
| 160 | |||
| 161 | test "Replication backlog size can outgrow the backlog limit config rdbchannel=$rdbchannel" { | ||
| 162 | # Generating RDB will take 1000 seconds | ||
| 163 | $master config set rdb-key-save-delay 1000000 | ||
| 164 | populate 1000 master 10000 | ||
| 165 | $replica2 replicaof $master_host $master_port | ||
| 166 | # Make sure replica2 is waiting bgsave | ||
| 167 | wait_for_condition 5000 100 { | ||
| 168 | ([s rdb_bgsave_in_progress] == 1) && | ||
| 169 | [lindex [$replica2 role] 3] eq {sync} | ||
| 170 | } else { | ||
| 171 | fail "fail to sync with replicas" | ||
| 172 | } | ||
| 173 | # Replication actual backlog grow more than backlog setting since | ||
| 174 | # the slow replica2 kept replication buffer. | ||
| 175 | populate 20000 master 10000 | ||
| 176 | assert {[s repl_backlog_histlen] > [expr 10000*10000]} | ||
| 177 | } | ||
| 178 | |||
| 179 | # Wait replica1 catch up with the master | ||
| 180 | wait_for_condition 1000 100 { | ||
| 181 | [s -2 master_repl_offset] eq [s master_repl_offset] | ||
| 182 | } else { | ||
| 183 | fail "Replica offset didn't catch up with the master after too long time" | ||
| 184 | } | ||
| 185 | |||
| 186 | test "Replica could use replication buffer (beyond backlog config) for partial resynchronization rdbchannel=$rdbchannel" { | ||
| 187 | # replica1 disconnects with master | ||
| 188 | $replica1 replicaof [srv -1 host] [srv -1 port] | ||
| 189 | # Write a mass of data that exceeds repl-backlog-size | ||
| 190 | populate 10000 master 10000 | ||
| 191 | # replica1 reconnects with master | ||
| 192 | $replica1 replicaof $master_host $master_port | ||
| 193 | wait_for_condition 1000 100 { | ||
| 194 | [s -2 master_repl_offset] eq [s master_repl_offset] | ||
| 195 | } else { | ||
| 196 | fail "Replica offset didn't catch up with the master after too long time" | ||
| 197 | } | ||
| 198 | |||
| 199 | # replica2 still waits for bgsave ending | ||
| 200 | assert {[s rdb_bgsave_in_progress] eq {1} && [lindex [$replica2 role] 3] eq {sync}} | ||
| 201 | # master accepted replica1 partial resync | ||
| 202 | assert_equal [s sync_partial_ok] {1} | ||
| 203 | assert_equal [$master debug digest] [$replica1 debug digest] | ||
| 204 | } | ||
| 205 | |||
| 206 | test "Replication backlog memory will become smaller if disconnecting with replica rdbchannel=$rdbchannel" { | ||
| 207 | assert {[s repl_backlog_histlen] > [expr 2*10000*10000]} | ||
| 208 | assert_equal [s connected_slaves] {2} | ||
| 209 | |||
| 210 | pause_process $replica2_pid | ||
| 211 | r config set client-output-buffer-limit "replica 128k 0 0" | ||
| 212 | # trigger output buffer limit check | ||
| 213 | r set key [string repeat A [expr 64*1024]] | ||
| 214 | # master will close replica2's connection since replica2's output | ||
| 215 | # buffer limit is reached, so there only is replica1. | ||
| 216 | # In case of rdbchannel=yes, main channel will be disconnected only. | ||
| 217 | wait_for_condition 100 100 { | ||
| 218 | [s connected_slaves] eq {1} || | ||
| 219 | ([s connected_slaves] eq {2} && | ||
| 220 | [string match {*slave*state=wait_bgsave*} [$master info]]) | ||
| 221 | } else { | ||
| 222 | fail "master didn't disconnect with replica2" | ||
| 223 | } | ||
| 224 | |||
| 225 | # Since we trim replication backlog inrementally, replication backlog | ||
| 226 | # memory may take time to be reclaimed. | ||
| 227 | wait_for_condition 1000 100 { | ||
| 228 | [s repl_backlog_histlen] < [expr 10000*10000] | ||
| 229 | } else { | ||
| 230 | fail "Replication backlog memory is not smaller" | ||
| 231 | } | ||
| 232 | resume_process $replica2_pid | ||
| 233 | } | ||
| 234 | # speed up termination | ||
| 235 | $master config set shutdown-timeout 0 | ||
| 236 | } | ||
| 237 | } | ||
| 238 | } | ||
| 239 | } | ||
| 240 | |||
| 241 | foreach rdbchannel {"yes" "no"} { | ||
| 242 | test "Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size rdbchannel=$rdbchannel" { | ||
| 243 | start_server {tags {"repl external:skip"}} { | ||
| 244 | start_server {} { | ||
| 245 | r config set save "" | ||
| 246 | r config set repl-backlog-size 100mb | ||
| 247 | r config set client-output-buffer-limit "replica 512k 0 0" | ||
| 248 | r config set repl-rdb-channel $rdbchannel | ||
| 249 | |||
| 250 | set replica [srv -1 client] | ||
| 251 | $replica config set repl-rdb-channel $rdbchannel | ||
| 252 | $replica replicaof [srv 0 host] [srv 0 port] | ||
| 253 | wait_for_sync $replica | ||
| 254 | |||
| 255 | set big_str [string repeat A [expr 10*1024*1024]] ;# 10mb big string | ||
| 256 | r multi | ||
| 257 | r client kill type replica | ||
| 258 | r set key $big_str | ||
| 259 | r set key $big_str | ||
| 260 | r debug sleep 2 ;# wait for replica reconnecting | ||
| 261 | r exec | ||
| 262 | # When replica reconnects with master, master accepts partial resync, | ||
| 263 | # and don't close replica client even client output buffer limit is | ||
| 264 | # reached. | ||
| 265 | r set key $big_str ;# trigger output buffer limit check | ||
| 266 | wait_for_ofs_sync r $replica | ||
| 267 | # master accepted replica partial resync | ||
| 268 | assert_equal [s sync_full] {1} | ||
| 269 | assert_equal [s sync_partial_ok] {1} | ||
| 270 | |||
| 271 | r multi | ||
| 272 | r set key $big_str | ||
| 273 | r set key $big_str | ||
| 274 | r exec | ||
| 275 | # replica's reply buffer size is more than client-output-buffer-limit but | ||
| 276 | # doesn't exceed repl-backlog-size, we don't close replica client. | ||
| 277 | wait_for_condition 1000 100 { | ||
| 278 | [s -1 master_repl_offset] eq [s master_repl_offset] | ||
| 279 | } else { | ||
| 280 | fail "Replica offset didn't catch up with the master after too long time" | ||
| 281 | } | ||
| 282 | assert_equal [s sync_full] {1} | ||
| 283 | assert_equal [s sync_partial_ok] {1} | ||
| 284 | } | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | # This test was added to make sure big keys added to the backlog do not trigger psync loop. | ||
| 289 | test "Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending rdbchannel=$rdbchannel" { | ||
| 290 | proc client_field {r type f} { | ||
| 291 | set client [$r client list type $type] | ||
| 292 | if {![regexp $f=(\[a-zA-Z0-9-\]+) $client - res]} { | ||
| 293 | error "field $f not found for in $client" | ||
| 294 | } | ||
| 295 | return $res | ||
| 296 | } | ||
| 297 | |||
| 298 | start_server {tags {"repl external:skip"}} { | ||
| 299 | start_server {} { | ||
| 300 | set replica [srv -1 client] | ||
| 301 | set replica_host [srv -1 host] | ||
| 302 | set replica_port [srv -1 port] | ||
| 303 | set master [srv 0 client] | ||
| 304 | set master_host [srv 0 host] | ||
| 305 | set master_port [srv 0 port] | ||
| 306 | $master config set maxmemory-policy allkeys-lru | ||
| 307 | |||
| 308 | $master config set repl-backlog-size 16384 | ||
| 309 | $master config set client-output-buffer-limit "replica 32768 32768 60" | ||
| 310 | $master config set repl-rdb-channel $rdbchannel | ||
| 311 | $replica config set repl-rdb-channel $rdbchannel | ||
| 312 | # Key has has to be larger than replica client-output-buffer limit. | ||
| 313 | set keysize [expr 256*1024] | ||
| 314 | |||
| 315 | $replica replicaof $master_host $master_port | ||
| 316 | wait_for_condition 50 100 { | ||
| 317 | [lindex [$replica role] 0] eq {slave} && | ||
| 318 | [string match {*master_link_status:up*} [$replica info replication]] | ||
| 319 | } else { | ||
| 320 | fail "Can't turn the instance into a replica" | ||
| 321 | } | ||
| 322 | |||
| 323 | # Write a big key that is gonna breach the obuf limit and cause the replica to disconnect, | ||
| 324 | # then in the same event loop, add at least 16 more keys, and enable eviction, so that the | ||
| 325 | # eviction code has a chance to call flushSlavesOutputBuffers, and then run PING to trigger the eviction code | ||
| 326 | set _v [prepare_value $keysize] | ||
| 327 | $master write "[format_command mset key $_v k1 1 k2 2 k3 3 k4 4 k5 5 k6 6 k7 7 k8 8 k9 9 ka a kb b kc c kd d ke e kf f kg g kh h]config set maxmemory 1\r\nping\r\n" | ||
| 328 | $master flush | ||
| 329 | $master read | ||
| 330 | $master read | ||
| 331 | $master read | ||
| 332 | wait_for_ofs_sync $master $replica | ||
| 333 | |||
| 334 | # Write another key to force the test to wait for another event loop iteration so that we | ||
| 335 | # give the serverCron a chance to disconnect replicas with COB size exceeding the limits | ||
| 336 | $master config set maxmemory 0 | ||
| 337 | $master set key1 1 | ||
| 338 | wait_for_ofs_sync $master $replica | ||
| 339 | |||
| 340 | assert {[status $master connected_slaves] == 1} | ||
| 341 | |||
| 342 | wait_for_condition 50 100 { | ||
| 343 | [client_field $master replica tot-mem] < $keysize | ||
| 344 | } else { | ||
| 345 | fail "replica client-output-buffer usage is higher than expected." | ||
| 346 | } | ||
| 347 | |||
| 348 | # now we expect the replica to re-connect but fail partial sync (it doesn't have large | ||
| 349 | # enough COB limit and must result in a full-sync) | ||
| 350 | assert {[status $master sync_partial_ok] == 0} | ||
| 351 | |||
| 352 | # Before this fix (#11905), the test would trigger an assertion in 'o->used >= c->ref_block_pos' | ||
| 353 | test {The update of replBufBlock's repl_offset is ok - Regression test for #11666} { | ||
| 354 | set rd [redis_deferring_client] | ||
| 355 | set replid [status $master master_replid] | ||
| 356 | set offset [status $master repl_backlog_first_byte_offset] | ||
| 357 | $rd psync $replid $offset | ||
| 358 | assert_equal {PONG} [$master ping] ;# Make sure the master doesn't crash. | ||
| 359 | $rd close | ||
| 360 | } | ||
| 361 | } | ||
| 362 | } | ||
| 363 | } | ||
| 364 | } | ||
| 365 | |||
