aboutsummaryrefslogtreecommitdiff
path: root/examples/redis-unstable/tests/integration/corrupt-dump-fuzzer.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'examples/redis-unstable/tests/integration/corrupt-dump-fuzzer.tcl')
-rw-r--r--examples/redis-unstable/tests/integration/corrupt-dump-fuzzer.tcl261
1 files changed, 261 insertions, 0 deletions
diff --git a/examples/redis-unstable/tests/integration/corrupt-dump-fuzzer.tcl b/examples/redis-unstable/tests/integration/corrupt-dump-fuzzer.tcl
new file mode 100644
index 0000000..5c7c992
--- /dev/null
+++ b/examples/redis-unstable/tests/integration/corrupt-dump-fuzzer.tcl
@@ -0,0 +1,261 @@
1# tests of corrupt listpack payload with valid CRC
2
3# The fuzzer can cause corrupt the state in many places, which could
4# mess up the reply, so we decided to skip logreqres.
5tags {"dump" "corruption" "external:skip" "logreqres:skip"} {
6
7# catch sigterm so that in case one of the random command hangs the test,
8# usually due to redis not putting a response in the output buffers,
9# we'll know which command it was
10if { ! [ catch {
11 package require Tclx
12} err ] } {
13 signal error SIGTERM
14}
15
16proc generate_collections {suffix elements} {
17 set rd [redis_deferring_client]
18 set numcmd 7
19 set has_vsets [server_has_command vadd]
20 if {$has_vsets} {incr numcmd}
21
22 for {set j 0} {$j < $elements} {incr j} {
23 # add both string values and integers
24 if {$j % 2 == 0} {set val $j} else {set val "_$j"}
25 $rd hset hash$suffix $j $val
26 $rd hset hashmd$suffix $j $val
27 $rd hexpire hashmd$suffix [expr {int(rand() * 10000)}] FIELDS 1 $j
28 $rd lpush list$suffix $val
29 $rd zadd zset$suffix $j $val
30 $rd sadd set$suffix $val
31 $rd xadd stream$suffix * item 1 value $val
32 if {$has_vsets} {
33 $rd vadd vset$suffix VALUES 3 1 1 1 $j
34 }
35 }
36 for {set j 0} {$j < $elements * $numcmd} {incr j} {
37 $rd read ; # Discard replies
38 }
39 $rd close
40}
41
42# generate keys with various types and encodings
43proc generate_types {} {
44 r config set list-max-ziplist-size 5
45 r config set hash-max-ziplist-entries 5
46 r config set set-max-listpack-entries 5
47 r config set zset-max-ziplist-entries 5
48 r config set stream-node-max-entries 5
49
50 # create small (ziplist / listpack encoded) objects with 3 items
51 generate_collections "" 3
52
53 # add some metadata to the stream
54 r xgroup create stream mygroup 0
55 set records [r xreadgroup GROUP mygroup Alice COUNT 2 STREAMS stream >]
56 r xdel stream [lindex [lindex [lindex [lindex $records 0] 1] 1] 0]
57 r xack stream mygroup [lindex [lindex [lindex [lindex $records 0] 1] 0] 0]
58
59 # create other non-collection types
60 r incr int
61 r set string str
62
63 # create bigger objects with 10 items (more than a single ziplist / listpack)
64 generate_collections big 10
65
66 # make sure our big stream also has a listpack record that has different
67 # field names than the master recorded
68 r xadd streambig * item 1 value 1
69 r xadd streambig * item 1 unique value
70}
71
72proc corrupt_payload {payload} {
73 set len [string length $payload]
74 set count 1 ;# usually corrupt only one byte
75 if {rand() > 0.9} { set count 2 }
76 while { $count > 0 } {
77 set idx [expr {int(rand() * $len)}]
78 set ch [binary format c [expr {int(rand()*255)}]]
79 set payload [string replace $payload $idx $idx $ch]
80 incr count -1
81 }
82 return $payload
83}
84
85# fuzzy tester for corrupt RESTORE payloads
86# valgrind will make sure there were no leaks in the rdb loader error handling code
87foreach sanitize_dump {no yes} {
88 if {$::accurate} {
89 set min_duration [expr {60 * 10}] ;# run at least 10 minutes
90 set min_cycles 1000 ;# run at least 1k cycles (max 16 minutes)
91 } else {
92 set min_duration 10 ; # run at least 10 seconds
93 set min_cycles 10 ; # run at least 10 cycles
94 }
95
96 # Don't execute this on FreeBSD due to a yet-undiscovered memory issue
97 # which causes tclsh to bloat.
98 if {[exec uname] == "FreeBSD"} {
99 set min_cycles 1
100 set min_duration 1
101 }
102
103 test "Fuzzer corrupt restore payloads - sanitize_dump: $sanitize_dump" {
104 if {$min_duration * 2 > $::timeout} {
105 fail "insufficient timeout"
106 }
107 # start a server, fill with data and save an RDB file once (avoid re-save)
108 start_server [list overrides [list "save" "" use-exit-on-panic yes crash-memcheck-enabled no loglevel verbose] ] {
109 set stdout [srv 0 stdout]
110 r config set sanitize-dump-payload $sanitize_dump
111 r debug set-skip-checksum-validation 1
112 set start_time [clock seconds]
113 generate_types
114 set dbsize [r dbsize]
115 r save
116 set cycle 0
117 set stat_terminated_in_restore 0
118 set stat_terminated_in_traffic 0
119 set stat_terminated_by_signal 0
120 set stat_successful_restore 0
121 set stat_rejected_restore 0
122 set stat_traffic_commands_sent 0
123 # repeatedly DUMP a random key, corrupt it and try RESTORE into a new key
124 while true {
125 set k [r randomkey]
126 set dump [r dump $k]
127 set dump [corrupt_payload $dump]
128 set printable_dump [string2printable $dump]
129 set restore_failed false
130 set report_and_restart false
131 set sent {}
132 set expired_subkeys [s expired_subkeys]
133 # RESTORE can fail, but hopefully not terminate
134 if { [catch { r restore "_$k" 0 $dump REPLACE } err] } {
135 set restore_failed true
136 # skip if return failed with an error response.
137 if {[string match "ERR*" $err]} {
138 incr stat_rejected_restore
139 } else {
140 set report_and_restart true
141 incr stat_terminated_in_restore
142 write_log_line 0 "corrupt payload: $printable_dump"
143 if {$sanitize_dump == yes} {
144 puts "Server crashed in RESTORE with payload: $printable_dump"
145 }
146 }
147 } else {
148 r ping ;# an attempt to check if the server didn't terminate (this will throw an error that will terminate the tests)
149 }
150
151 set print_commands false
152 if {!$restore_failed} {
153 # if RESTORE didn't fail or terminate, run some random traffic on the new key
154 incr stat_successful_restore
155 if { [ catch {
156 set type [r type "_$k"]
157 if {$type eq {none}} {
158 # The key has been removed due to expiration.
159 # Ensure the server didn't terminate during expiration and verify
160 # expire stats to confirm the key was removed due to expiration.
161 r ping
162 assert_morethan [s expired_subkeys] $expired_subkeys
163 } else {
164 set sent [generate_fuzzy_traffic_on_key "_$k" $type 1] ;# traffic for 1 second
165 }
166
167 incr stat_traffic_commands_sent [llength $sent]
168 r del "_$k" ;# in case the server terminated, here's where we'll detect it.
169 if {$dbsize != [r dbsize]} {
170 puts "unexpected keys"
171 puts "keys: [r keys *]"
172 puts "commands leading to it:"
173 foreach cmd $sent {
174 foreach arg $cmd {
175 puts -nonewline "[string2printable $arg] "
176 }
177 puts ""
178 }
179 exit 1
180 }
181 } err ] } {
182 set err [format "%s" $err] ;# convert to string for pattern matching
183 if {[string match "*SIGTERM*" $err]} {
184 puts "payload that caused test to hang: $printable_dump"
185 if {$::dump_logs} {
186 set srv [get_srv 0]
187 dump_server_log $srv
188 }
189 exit 1
190 }
191 # if the server terminated update stats and restart it
192 set report_and_restart true
193 incr stat_terminated_in_traffic
194 set by_signal [count_log_message 0 "crashed by signal"]
195 incr stat_terminated_by_signal $by_signal
196
197 if {$by_signal != 0 || $sanitize_dump == yes} {
198 if {$::dump_logs} {
199 set srv [get_srv 0]
200 dump_server_log $srv
201 }
202
203 puts "Server crashed (by signal: $by_signal, err: $err), with payload: $printable_dump"
204 set print_commands true
205 }
206 }
207 }
208
209 # check valgrind report for invalid reads after each RESTORE
210 # payload so that we have a report that is easier to reproduce
211 set valgrind_errors [find_valgrind_errors [srv 0 stderr] false]
212 set asan_errors [sanitizer_errors_from_file [srv 0 stderr]]
213 if {$valgrind_errors != "" || $asan_errors != ""} {
214 puts "valgrind or asan found an issue for payload: $printable_dump"
215 set report_and_restart true
216 set print_commands true
217 }
218
219 if {$report_and_restart} {
220 if {$print_commands} {
221 puts "violating commands:"
222 foreach cmd $sent {
223 foreach arg $cmd {
224 puts -nonewline "[string2printable $arg] "
225 }
226 puts ""
227 }
228 }
229
230 # restart the server and re-apply debug configuration
231 write_log_line 0 "corrupt payload: $printable_dump"
232 restart_server 0 true true
233 r config set sanitize-dump-payload $sanitize_dump
234 r debug set-skip-checksum-validation 1
235 }
236
237 incr cycle
238 if { ([clock seconds]-$start_time) >= $min_duration && $cycle >= $min_cycles} {
239 break
240 }
241 }
242 if {$::verbose} {
243 puts "Done $cycle cycles in [expr {[clock seconds]-$start_time}] seconds."
244 puts "RESTORE: successful: $stat_successful_restore, rejected: $stat_rejected_restore"
245 puts "Total commands sent in traffic: $stat_traffic_commands_sent, crashes during traffic: $stat_terminated_in_traffic ($stat_terminated_by_signal by signal)."
246 }
247 }
248 # if we run sanitization we never expect the server to crash at runtime
249 if {$sanitize_dump == yes} {
250 assert_equal $stat_terminated_in_restore 0
251 assert_equal $stat_terminated_in_traffic 0
252 }
253 # make sure all terminations where due to assertion and not a SIGSEGV
254 assert_equal $stat_terminated_by_signal 0
255 }
256}
257
258
259
260} ;# tags
261