aboutsummaryrefslogtreecommitdiff
path: root/examples/redis-unstable/tests/integration/replication-buffer.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'examples/redis-unstable/tests/integration/replication-buffer.tcl')
-rw-r--r--examples/redis-unstable/tests/integration/replication-buffer.tcl365
1 files changed, 365 insertions, 0 deletions
diff --git a/examples/redis-unstable/tests/integration/replication-buffer.tcl b/examples/redis-unstable/tests/integration/replication-buffer.tcl
new file mode 100644
index 0000000..11e604c
--- /dev/null
+++ b/examples/redis-unstable/tests/integration/replication-buffer.tcl
@@ -0,0 +1,365 @@
1#
2# Copyright (c) 2009-Present, Redis Ltd.
3# All rights reserved.
4#
5# Copyright (c) 2024-present, Valkey contributors.
6# All rights reserved.
7#
8# Licensed under your choice of (a) the Redis Source Available License 2.0
9# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
10# GNU Affero General Public License v3 (AGPLv3).
11#
12# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
13#
14
15# This test group aims to test that all replicas share one global replication buffer,
16# two replicas don't make replication buffer size double, and when there is no replica,
17# replica buffer will shrink.
18foreach rdbchannel {"yes" "no"} {
19start_server {tags {"repl external:skip"}} {
20start_server {} {
21start_server {} {
22start_server {} {
23 set replica1 [srv -3 client]
24 set replica2 [srv -2 client]
25 set replica3 [srv -1 client]
26
27 $replica1 config set repl-rdb-channel $rdbchannel
28 $replica2 config set repl-rdb-channel $rdbchannel
29 $replica3 config set repl-rdb-channel $rdbchannel
30
31 set master [srv 0 client]
32 set master_host [srv 0 host]
33 set master_port [srv 0 port]
34
35 $master config set save ""
36 $master config set repl-backlog-size 16384
37 $master config set repl-diskless-sync-delay 5
38 $master config set repl-diskless-sync-max-replicas 1
39 $master config set client-output-buffer-limit "replica 0 0 0"
40 $master config set repl-rdb-channel $rdbchannel
41
42 # Make sure replica3 is synchronized with master
43 $replica3 replicaof $master_host $master_port
44 wait_for_sync $replica3
45
46 # Generating RDB will take some 100 seconds
47 $master config set rdb-key-save-delay 1000000
48 populate 100 "" 16
49
50 # Make sure replica1 and replica2 are waiting bgsave
51 $master config set repl-diskless-sync-max-replicas 2
52 $replica1 replicaof $master_host $master_port
53 $replica2 replicaof $master_host $master_port
54 wait_for_condition 50 100 {
55 ([s rdb_bgsave_in_progress] == 1) &&
56 [lindex [$replica1 role] 3] eq {sync} &&
57 [lindex [$replica2 role] 3] eq {sync}
58 } else {
59 fail "fail to sync with replicas"
60 }
61
62 test "All replicas share one global replication buffer rdbchannel=$rdbchannel" {
63 set before_used [s used_memory]
64 populate 1024 "" 1024 ; # Write extra 1M data
65
66 # In case we are running with IO-threads we need to give a few cycles
67 # for IO-threads to start sending the cmd stream. If we don't do that
68 # the checks related to the repl_buf_mem will be incorrect as the buffer
69 # will still be full with the above 1Mb data.
70 set iothreads [s io_threads_active]
71 if {$iothreads && $rdbchannel == "yes"} {
72 after 1000
73 }
74
75 # New data uses 1M memory, but all replicas use only one
76 # replication buffer, so all replicas output memory is not
77 # more than double of replication buffer.
78 set repl_buf_mem [s mem_total_replication_buffers]
79 set extra_mem [expr {[s used_memory]-$before_used-1024*1024}]
80 if {$rdbchannel == "yes"} {
81 # master's replication buffers should not grow
82 assert {$extra_mem < 1024*1024}
83 assert {$repl_buf_mem < 1024*1024}
84 } else {
85 assert {$extra_mem < 2*$repl_buf_mem}
86 }
87
88 # Kill replica1, replication_buffer will not become smaller
89 catch {$replica1 shutdown nosave}
90 wait_for_condition 50 100 {
91 [s connected_slaves] eq {2}
92 } else {
93 fail "replica doesn't disconnect with master"
94 }
95 assert_equal $repl_buf_mem [s mem_total_replication_buffers]
96 }
97
98 test "Replication buffer will become smaller when no replica uses rdbchannel=$rdbchannel" {
99 # Make sure replica3 catch up with the master
100 wait_for_ofs_sync $master $replica3
101
102 set repl_buf_mem [s mem_total_replication_buffers]
103 # Kill replica2, replication_buffer will become smaller
104 catch {$replica2 shutdown nosave}
105 wait_for_condition 50 100 {
106 [s connected_slaves] eq {1}
107 } else {
108 fail "replica2 doesn't disconnect with master"
109 }
110 if {$rdbchannel == "yes"} {
111 # master's replication buffers should not grow
112 assert {1024*512 > [s mem_total_replication_buffers]}
113 } else {
114 assert {[expr $repl_buf_mem - 1024*1024] > [s mem_total_replication_buffers]}
115 }
116 }
117}
118}
119}
120}
121}
122
123# This test group aims to test replication backlog size can outgrow the backlog
124# limit config if there is a slow replica which keep massive replication buffers,
125# and replicas could use this replication buffer (beyond backlog config) for
126# partial re-synchronization. Of course, replication backlog memory also can
127# become smaller when master disconnects with slow replicas since output buffer
128# limit is reached.
129foreach rdbchannel {"yes" "no"} {
130start_server {tags {"repl external:skip debug_defrag:skip"}} {
131start_server {} {
132start_server {} {
133 set replica1 [srv -2 client]
134 set replica1_pid [s -2 process_id]
135 set replica2 [srv -1 client]
136 set replica2_pid [s -1 process_id]
137
138 set master [srv 0 client]
139 set master_host [srv 0 host]
140 set master_port [srv 0 port]
141
142 $master config set save ""
143 $master config set repl-backlog-size 16384
144 $master config set repl-rdb-channel $rdbchannel
145 $master config set client-output-buffer-limit "replica 0 0 0"
146
147 # Executing 'debug digest' on master which has many keys costs much time
148 # (especially in valgrind), this causes that replica1 and replica2 disconnect
149 # with master.
150 $master config set repl-timeout 1000
151 $replica1 config set repl-timeout 1000
152 $replica1 config set repl-rdb-channel $rdbchannel
153 $replica1 config set client-output-buffer-limit "replica 1024 0 0"
154 $replica2 config set repl-timeout 1000
155 $replica2 config set client-output-buffer-limit "replica 1024 0 0"
156 $replica2 config set repl-rdb-channel $rdbchannel
157
158 $replica1 replicaof $master_host $master_port
159 wait_for_sync $replica1
160
161 test "Replication backlog size can outgrow the backlog limit config rdbchannel=$rdbchannel" {
162 # Generating RDB will take 1000 seconds
163 $master config set rdb-key-save-delay 1000000
164 populate 1000 master 10000
165 $replica2 replicaof $master_host $master_port
166 # Make sure replica2 is waiting bgsave
167 wait_for_condition 5000 100 {
168 ([s rdb_bgsave_in_progress] == 1) &&
169 [lindex [$replica2 role] 3] eq {sync}
170 } else {
171 fail "fail to sync with replicas"
172 }
173 # Replication actual backlog grow more than backlog setting since
174 # the slow replica2 kept replication buffer.
175 populate 20000 master 10000
176 assert {[s repl_backlog_histlen] > [expr 10000*10000]}
177 }
178
179 # Wait replica1 catch up with the master
180 wait_for_condition 1000 100 {
181 [s -2 master_repl_offset] eq [s master_repl_offset]
182 } else {
183 fail "Replica offset didn't catch up with the master after too long time"
184 }
185
186 test "Replica could use replication buffer (beyond backlog config) for partial resynchronization rdbchannel=$rdbchannel" {
187 # replica1 disconnects with master
188 $replica1 replicaof [srv -1 host] [srv -1 port]
189 # Write a mass of data that exceeds repl-backlog-size
190 populate 10000 master 10000
191 # replica1 reconnects with master
192 $replica1 replicaof $master_host $master_port
193 wait_for_condition 1000 100 {
194 [s -2 master_repl_offset] eq [s master_repl_offset]
195 } else {
196 fail "Replica offset didn't catch up with the master after too long time"
197 }
198
199 # replica2 still waits for bgsave ending
200 assert {[s rdb_bgsave_in_progress] eq {1} && [lindex [$replica2 role] 3] eq {sync}}
201 # master accepted replica1 partial resync
202 assert_equal [s sync_partial_ok] {1}
203 assert_equal [$master debug digest] [$replica1 debug digest]
204 }
205
206 test "Replication backlog memory will become smaller if disconnecting with replica rdbchannel=$rdbchannel" {
207 assert {[s repl_backlog_histlen] > [expr 2*10000*10000]}
208 assert_equal [s connected_slaves] {2}
209
210 pause_process $replica2_pid
211 r config set client-output-buffer-limit "replica 128k 0 0"
212 # trigger output buffer limit check
213 r set key [string repeat A [expr 64*1024]]
214 # master will close replica2's connection since replica2's output
215 # buffer limit is reached, so there only is replica1.
216 # In case of rdbchannel=yes, main channel will be disconnected only.
217 wait_for_condition 100 100 {
218 [s connected_slaves] eq {1} ||
219 ([s connected_slaves] eq {2} &&
220 [string match {*slave*state=wait_bgsave*} [$master info]])
221 } else {
222 fail "master didn't disconnect with replica2"
223 }
224
225 # Since we trim replication backlog inrementally, replication backlog
226 # memory may take time to be reclaimed.
227 wait_for_condition 1000 100 {
228 [s repl_backlog_histlen] < [expr 10000*10000]
229 } else {
230 fail "Replication backlog memory is not smaller"
231 }
232 resume_process $replica2_pid
233 }
234 # speed up termination
235 $master config set shutdown-timeout 0
236}
237}
238}
239}
240
241foreach rdbchannel {"yes" "no"} {
242test "Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size rdbchannel=$rdbchannel" {
243 start_server {tags {"repl external:skip"}} {
244 start_server {} {
245 r config set save ""
246 r config set repl-backlog-size 100mb
247 r config set client-output-buffer-limit "replica 512k 0 0"
248 r config set repl-rdb-channel $rdbchannel
249
250 set replica [srv -1 client]
251 $replica config set repl-rdb-channel $rdbchannel
252 $replica replicaof [srv 0 host] [srv 0 port]
253 wait_for_sync $replica
254
255 set big_str [string repeat A [expr 10*1024*1024]] ;# 10mb big string
256 r multi
257 r client kill type replica
258 r set key $big_str
259 r set key $big_str
260 r debug sleep 2 ;# wait for replica reconnecting
261 r exec
262 # When replica reconnects with master, master accepts partial resync,
263 # and don't close replica client even client output buffer limit is
264 # reached.
265 r set key $big_str ;# trigger output buffer limit check
266 wait_for_ofs_sync r $replica
267 # master accepted replica partial resync
268 assert_equal [s sync_full] {1}
269 assert_equal [s sync_partial_ok] {1}
270
271 r multi
272 r set key $big_str
273 r set key $big_str
274 r exec
275 # replica's reply buffer size is more than client-output-buffer-limit but
276 # doesn't exceed repl-backlog-size, we don't close replica client.
277 wait_for_condition 1000 100 {
278 [s -1 master_repl_offset] eq [s master_repl_offset]
279 } else {
280 fail "Replica offset didn't catch up with the master after too long time"
281 }
282 assert_equal [s sync_full] {1}
283 assert_equal [s sync_partial_ok] {1}
284 }
285 }
286}
287
288# This test was added to make sure big keys added to the backlog do not trigger psync loop.
289test "Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending rdbchannel=$rdbchannel" {
290 proc client_field {r type f} {
291 set client [$r client list type $type]
292 if {![regexp $f=(\[a-zA-Z0-9-\]+) $client - res]} {
293 error "field $f not found for in $client"
294 }
295 return $res
296 }
297
298 start_server {tags {"repl external:skip"}} {
299 start_server {} {
300 set replica [srv -1 client]
301 set replica_host [srv -1 host]
302 set replica_port [srv -1 port]
303 set master [srv 0 client]
304 set master_host [srv 0 host]
305 set master_port [srv 0 port]
306 $master config set maxmemory-policy allkeys-lru
307
308 $master config set repl-backlog-size 16384
309 $master config set client-output-buffer-limit "replica 32768 32768 60"
310 $master config set repl-rdb-channel $rdbchannel
311 $replica config set repl-rdb-channel $rdbchannel
312 # Key has has to be larger than replica client-output-buffer limit.
313 set keysize [expr 256*1024]
314
315 $replica replicaof $master_host $master_port
316 wait_for_condition 50 100 {
317 [lindex [$replica role] 0] eq {slave} &&
318 [string match {*master_link_status:up*} [$replica info replication]]
319 } else {
320 fail "Can't turn the instance into a replica"
321 }
322
323 # Write a big key that is gonna breach the obuf limit and cause the replica to disconnect,
324 # then in the same event loop, add at least 16 more keys, and enable eviction, so that the
325 # eviction code has a chance to call flushSlavesOutputBuffers, and then run PING to trigger the eviction code
326 set _v [prepare_value $keysize]
327 $master write "[format_command mset key $_v k1 1 k2 2 k3 3 k4 4 k5 5 k6 6 k7 7 k8 8 k9 9 ka a kb b kc c kd d ke e kf f kg g kh h]config set maxmemory 1\r\nping\r\n"
328 $master flush
329 $master read
330 $master read
331 $master read
332 wait_for_ofs_sync $master $replica
333
334 # Write another key to force the test to wait for another event loop iteration so that we
335 # give the serverCron a chance to disconnect replicas with COB size exceeding the limits
336 $master config set maxmemory 0
337 $master set key1 1
338 wait_for_ofs_sync $master $replica
339
340 assert {[status $master connected_slaves] == 1}
341
342 wait_for_condition 50 100 {
343 [client_field $master replica tot-mem] < $keysize
344 } else {
345 fail "replica client-output-buffer usage is higher than expected."
346 }
347
348 # now we expect the replica to re-connect but fail partial sync (it doesn't have large
349 # enough COB limit and must result in a full-sync)
350 assert {[status $master sync_partial_ok] == 0}
351
352 # Before this fix (#11905), the test would trigger an assertion in 'o->used >= c->ref_block_pos'
353 test {The update of replBufBlock's repl_offset is ok - Regression test for #11666} {
354 set rd [redis_deferring_client]
355 set replid [status $master master_replid]
356 set offset [status $master repl_backlog_first_byte_offset]
357 $rd psync $replid $offset
358 assert_equal {PONG} [$master ping] ;# Make sure the master doesn't crash.
359 $rd close
360 }
361 }
362 }
363}
364}
365