summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/tests/unit/hyperloglog.tcl
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:40:55 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:40:55 +0100
commit5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch)
tree1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/tests/unit/hyperloglog.tcl
parentc7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff)
downloadcrep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz
Add Redis source code for testing
Diffstat (limited to 'examples/redis-unstable/tests/unit/hyperloglog.tcl')
-rw-r--r--examples/redis-unstable/tests/unit/hyperloglog.tcl374
1 files changed, 374 insertions, 0 deletions
diff --git a/examples/redis-unstable/tests/unit/hyperloglog.tcl b/examples/redis-unstable/tests/unit/hyperloglog.tcl
new file mode 100644
index 0000000..2202c30
--- /dev/null
+++ b/examples/redis-unstable/tests/unit/hyperloglog.tcl
@@ -0,0 +1,374 @@
+start_server {tags {"hll"}} {
+ test {HyperLogLog self test passes} {
+ catch {r pfselftest} e
+ set e
+ } {OK} {needs:pfdebug}
+
+ test {PFADD without arguments creates an HLL value} {
+ r pfadd hll
+ r exists hll
+ } {1}
+
+ test {Approximated cardinality after creation is zero} {
+ r pfcount hll
+ } {0}
+
+ test {PFADD returns 1 when at least 1 reg was modified} {
+ r pfadd hll a b c
+ } {1}
+
+ test {PFADD returns 0 when no reg was modified} {
+ r pfadd hll a b c
+ } {0}
+
+ test {PFADD works with empty string (regression)} {
+ r pfadd hll ""
+ }
+
+ # Note that the self test stresses much better the
+ # cardinality estimation error. We are testing just the
+ # command implementation itself here.
+ test {PFCOUNT returns approximated cardinality of set} {
+ r del hll
+ set res {}
+ r pfadd hll 1 2 3 4 5
+ lappend res [r pfcount hll]
+ # Call it again to test cached value invalidation.
+ r pfadd hll 6 7 8 8 9 10
+ lappend res [r pfcount hll]
+ set res
+ } {5 10}
+
+ test {HyperLogLogs are promote from sparse to dense} {
+ r del hll
+ r config set hll-sparse-max-bytes 3000
+ set n 0
+ while {$n < 100000} {
+ set elements {}
+ for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]}
+ incr n 100
+ r pfadd hll {*}$elements
+ set card [r pfcount hll]
+ set err [expr {abs($card-$n)}]
+ assert {$err < (double($card)/100)*5}
+ if {$n < 1000} {
+ assert {[r pfdebug encoding hll] eq {sparse}}
+ } elseif {$n > 10000} {
+ assert {[r pfdebug encoding hll] eq {dense}}
+ }
+ }
+ } {} {needs:pfdebug}
+
+ test {Change hll-sparse-max-bytes} {
+ r config set hll-sparse-max-bytes 3000
+ r del hll
+ r pfadd hll a b c d e d g h i j k
+ assert {[r pfdebug encoding hll] eq {sparse}}
+ r config set hll-sparse-max-bytes 30
+ r pfadd hll new_element
+ assert {[r pfdebug encoding hll] eq {dense}}
+ } {} {needs:pfdebug}
+
+ test {Hyperloglog promote to dense well in different hll-sparse-max-bytes} {
+ set max(0) 100
+ set max(1) 500
+ set max(2) 3000
+ for {set i 0} {$i < [array size max]} {incr i} {
+ r config set hll-sparse-max-bytes $max($i)
+ r del hll
+ r pfadd hll
+ set len [r strlen hll]
+ while {$len <= $max($i)} {
+ assert {[r pfdebug encoding hll] eq {sparse}}
+ set elements {}
+ for {set j 0} {$j < 10} {incr j} { lappend elements [expr rand()]}
+ r pfadd hll {*}$elements
+ set len [r strlen hll]
+ }
+ assert {[r pfdebug encoding hll] eq {dense}}
+ }
+ } {} {needs:pfdebug}
+
+ test {HyperLogLog sparse encoding stress test} {
+ for {set x 0} {$x < 1000} {incr x} {
+ r del hll1
+ r del hll2
+ set numele [randomInt 100]
+ set elements {}
+ for {set j 0} {$j < $numele} {incr j} {
+ lappend elements [expr rand()]
+ }
+ # Force dense representation of hll2
+ r pfadd hll2
+ r pfdebug todense hll2
+ r pfadd hll1 {*}$elements
+ r pfadd hll2 {*}$elements
+ assert {[r pfdebug encoding hll1] eq {sparse}}
+ assert {[r pfdebug encoding hll2] eq {dense}}
+ # Cardinality estimated should match exactly.
+ assert {[r pfcount hll1] eq [r pfcount hll2]}
+ }
+ } {} {needs:pfdebug}
+
+ test {Corrupted sparse HyperLogLogs are detected: Additional at tail} {
+ r del hll
+ r pfadd hll a b c
+ r append hll "hello"
+ set e {}
+ catch {r pfcount hll} e
+ set e
+ } {*INVALIDOBJ*}
+
+ test {Corrupted sparse HyperLogLogs are detected: Broken magic} {
+ r del hll
+ r pfadd hll a b c
+ r setrange hll 0 "0123"
+ set e {}
+ catch {r pfcount hll} e
+ set e
+ } {*WRONGTYPE*}
+
+ test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} {
+ r del hll
+ r pfadd hll a b c
+ r setrange hll 4 "x"
+ set e {}
+ catch {r pfcount hll} e
+ set e
+ } {*WRONGTYPE*}
+
+ test {Corrupted sparse HyperLogLogs doesn't cause overflow and out-of-bounds with XZERO opcode} {
+ r del hll
+
+ # Create a sparse-encoded HyperLogLog header
+ set header "HYLL"
+ set payload [binary format c12 {1 0 0 0 0 0 0 0 0 0 0 0}]
+ set pl [binary format a4a12 $header $payload]
+
+ # Create an XZERO opcode with the maximum run length of 16384(2^14)
+ set runlen [expr 16384 - 1]
+ set chunk [binary format cc [expr {0b01000000 | ($runlen >> 8)}] [expr {$runlen & 0xff}]]
+ # Fill the HLL with more than 131072(2^17) XZERO opcodes to make the total
+ # run length exceed 4GB, will cause an integer overflow.
+ set repeat [expr 131072 + 1000]
+ for {set i 0} {$i < $repeat} {incr i} {
+ append pl $chunk
+ }
+
+ # Create a VAL opcode with a value that will cause out-of-bounds.
+ append pl [binary format c 0b11111111]
+ r set hll $pl
+
+ # This should not overflow and out-of-bounds.
+ assert_error {*INVALIDOBJ*} {r pfcount hll hll}
+ assert_error {*INVALIDOBJ*} {r pfdebug getreg hll}
+ r ping
+ }
+
+ test {Corrupted sparse HyperLogLogs doesn't cause overflow and out-of-bounds with ZERO opcode} {
+ r del hll
+
+ # Create a sparse-encoded HyperLogLog header
+ set header "HYLL"
+ set payload [binary format c12 {1 0 0 0 0 0 0 0 0 0 0 0}]
+ set pl [binary format a4a12 $header $payload]
+
+ # # Create an ZERO opcode with the maximum run length of 64(2^6)
+ set chunk [binary format c [expr {0b00000000 | 0x3f}]]
+ # Fill the HLL with more than 33554432(2^17) ZERO opcodes to make the total
+ # run length exceed 4GB, will cause an integer overflow.
+ set repeat [expr 33554432 + 1000]
+ for {set i 0} {$i < $repeat} {incr i} {
+ append pl $chunk
+ }
+
+ # Create a VAL opcode with a value that will cause out-of-bounds.
+ append pl [binary format c 0b11111111]
+ r set hll $pl
+
+ # This should not overflow and out-of-bounds.
+ assert_error {*INVALIDOBJ*} {r pfcount hll hll}
+ assert_error {*INVALIDOBJ*} {r pfdebug getreg hll}
+ r ping
+ }
+
+ test {Corrupted dense HyperLogLogs are detected: Wrong length} {
+ r del hll
+ r pfadd hll a b c
+ r setrange hll 4 "\x00"
+ set e {}
+ catch {r pfcount hll} e
+ set e
+ } {*WRONGTYPE*}
+
+ test {Fuzzing dense/sparse encoding: Redis should always detect errors} {
+ for {set j 0} {$j < 1000} {incr j} {
+ r del hll
+ set items {}
+ set numitems [randomInt 3000]
+ for {set i 0} {$i < $numitems} {incr i} {
+ lappend items [expr {rand()}]
+ }
+ r pfadd hll {*}$items
+
+ # Corrupt it in some random way.
+ for {set i 0} {$i < 5} {incr i} {
+ set len [r strlen hll]
+ set pos [randomInt $len]
+ set byte [randstring 1 1 binary]
+ r setrange hll $pos $byte
+ # Don't modify more bytes 50% of times
+ if {rand() < 0.5} break
+ }
+
+ # Use the hyperloglog to check if it crashes
+ # Redis in some way.
+ catch {
+ r pfcount hll
+ }
+ }
+ }
+
+ test {PFADD, PFCOUNT, PFMERGE type checking works} {
+ r set foo{t} bar
+ catch {r pfadd foo{t} 1} e
+ assert_match {*WRONGTYPE*} $e
+ catch {r pfcount foo{t}} e
+ assert_match {*WRONGTYPE*} $e
+ catch {r pfmerge bar{t} foo{t}} e
+ assert_match {*WRONGTYPE*} $e
+ catch {r pfmerge foo{t} bar{t}} e
+ assert_match {*WRONGTYPE*} $e
+ }
+
+ test {PFMERGE results on the cardinality of union of sets} {
+ r del hll{t} hll1{t} hll2{t} hll3{t}
+ r pfadd hll1{t} a b c
+ r pfadd hll2{t} b c d
+ r pfadd hll3{t} c d e
+ r pfmerge hll{t} hll1{t} hll2{t} hll3{t}
+ r pfcount hll{t}
+ } {5}
+
+ test {PFMERGE on missing source keys will create an empty destkey} {
+ r del sourcekey{t} sourcekey2{t} destkey{t} destkey2{t}
+
+ assert_equal {OK} [r pfmerge destkey{t} sourcekey{t}]
+ assert_equal 1 [r exists destkey{t}]
+ assert_equal 0 [r pfcount destkey{t}]
+
+ assert_equal {OK} [r pfmerge destkey2{t} sourcekey{t} sourcekey2{t}]
+ assert_equal 1 [r exists destkey2{t}]
+ assert_equal 0 [r pfcount destkey{t}]
+ }
+
+ test {PFMERGE with one empty input key, create an empty destkey} {
+ r del destkey
+ assert_equal {OK} [r pfmerge destkey]
+ assert_equal 1 [r exists destkey]
+ assert_equal 0 [r pfcount destkey]
+ }
+
+ test {PFMERGE with one non-empty input key, dest key is actually one of the source keys} {
+ r del destkey
+ assert_equal 1 [r pfadd destkey a b c]
+ assert_equal {OK} [r pfmerge destkey]
+ assert_equal 1 [r exists destkey]
+ assert_equal 3 [r pfcount destkey]
+ }
+
+ test {PFMERGE results with simd} {
+ r del hllscalar{t} hllsimd{t} hll1{t} hll2{t} hll3{t}
+ for {set x 1} {$x < 2000} {incr x} {
+ r pfadd hll1{t} [expr rand()]
+ }
+ for {set x 1} {$x < 4000} {incr x} {
+ r pfadd hll2{t} [expr rand()]
+ }
+ for {set x 1} {$x < 8000} {incr x} {
+ r pfadd hll3{t} [expr rand()]
+ }
+ assert {[r pfcount hll1{t}] > 0}
+ assert {[r pfcount hll2{t}] > 0}
+ assert {[r pfcount hll3{t}] > 0}
+
+ r pfdebug simd off
+ set scalar [r pfcount hll1{t} hll2{t} hll3{t}]
+ r pfdebug simd on
+ set simd [r pfcount hll1{t} hll2{t} hll3{t}]
+ assert {$scalar > 0}
+ assert {$simd > 0}
+ assert_equal $scalar $simd
+
+ r pfdebug simd off
+ r pfmerge hllscalar{t} hll1{t} hll2{t} hll3{t}
+ r pfdebug simd on
+ r pfmerge hllsimd{t} hll1{t} hll2{t} hll3{t}
+
+ set scalar [r pfcount hllscalar{t}]
+ set simd [r pfcount hllsimd{t}]
+ assert {$scalar > 0}
+ assert {$simd > 0}
+ assert_equal $scalar $simd
+
+ set scalar [r get hllscalar{t}]
+ set simd [r get hllsimd{t}]
+ assert_equal $scalar $simd
+
+ } {} {needs:pfdebug}
+
+ test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
+ r del hll1{t} hll2{t} hll3{t}
+ for {set x 1} {$x < 10000} {incr x} {
+ r pfadd hll1{t} "foo-$x"
+ r pfadd hll2{t} "bar-$x"
+ r pfadd hll3{t} "zap-$x"
+
+ set card [r pfcount hll1{t} hll2{t} hll3{t}]
+ set realcard [expr {$x*3}]
+ set err [expr {abs($card-$realcard)}]
+ assert {$err < (double($card)/100)*5}
+ }
+ }
+
+ test {PFCOUNT multiple-keys merge returns cardinality of union #2} {
+ r del hll1{t} hll2{t} hll3{t}
+ set elements {}
+ for {set x 1} {$x < 10000} {incr x} {
+ for {set j 1} {$j <= 3} {incr j} {
+ set rint [randomInt 20000]
+ r pfadd hll$j{t} $rint
+ lappend elements $rint
+ }
+ }
+ set realcard [llength [lsort -unique $elements]]
+ set card [r pfcount hll1{t} hll2{t} hll3{t}]
+ set err [expr {abs($card-$realcard)}]
+ assert {$err < (double($card)/100)*5}
+ }
+
+ test {PFDEBUG GETREG returns the HyperLogLog raw registers} {
+ r del hll
+ r pfadd hll 1 2 3
+ llength [r pfdebug getreg hll]
+ } {16384} {needs:pfdebug}
+
+ test {PFADD / PFCOUNT cache invalidation works} {
+ r del hll
+ r pfadd hll a b c
+ r pfcount hll
+ assert {[r getrange hll 15 15] eq "\x00"}
+ r pfadd hll a b c
+ assert {[r getrange hll 15 15] eq "\x00"}
+ r pfadd hll 1 2 3
+ assert {[r getrange hll 15 15] eq "\x80"}
+ }
+
+ test {PFADD with 2GB entry should not crash server due to overflow in MurmurHash64A} {
+ r config set proto-max-bulk-len 3221225472
+ r config set client-query-buffer-limit 3221225472
+ r write "*3\r\n\$5\r\nPFADD\r\n\$3\r\nhll\r\n"
+ write_big_bulk 2147483648;
+ r ping
+ } {PONG} {large-memory}
+}