From dcacc00e3750300617ba6e16eb346713f91a783a Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Wed, 21 Jan 2026 22:52:54 +0100 Subject: Remove testing data --- examples/redis-unstable/src/.gitignore | 5 - examples/redis-unstable/src/Makefile | 563 - examples/redis-unstable/src/acl.c | 3313 ---- examples/redis-unstable/src/adlist.c | 395 - examples/redis-unstable/src/adlist.h | 80 - examples/redis-unstable/src/ae.c | 511 - examples/redis-unstable/src/ae.h | 118 - examples/redis-unstable/src/ae_epoll.c | 119 - examples/redis-unstable/src/ae_evport.c | 323 - examples/redis-unstable/src/ae_kqueue.c | 183 - examples/redis-unstable/src/ae_select.c | 90 - examples/redis-unstable/src/anet.c | 812 - examples/redis-unstable/src/anet.h | 58 - examples/redis-unstable/src/aof.c | 2921 ---- examples/redis-unstable/src/asciilogo.h | 27 - examples/redis-unstable/src/atomicvar.h | 186 - examples/redis-unstable/src/bio.c | 445 - examples/redis-unstable/src/bio.h | 47 - examples/redis-unstable/src/bitops.c | 2037 --- examples/redis-unstable/src/blocked.c | 787 - examples/redis-unstable/src/call_reply.c | 540 - examples/redis-unstable/src/call_reply.h | 40 - examples/redis-unstable/src/childinfo.c | 163 - examples/redis-unstable/src/chk.c | 822 - examples/redis-unstable/src/chk.h | 89 - examples/redis-unstable/src/cli_commands.c | 13 - examples/redis-unstable/src/cli_commands.h | 46 - examples/redis-unstable/src/cli_common.c | 424 - examples/redis-unstable/src/cli_common.h | 59 - examples/redis-unstable/src/cluster.c | 2263 --- examples/redis-unstable/src/cluster.h | 354 - examples/redis-unstable/src/cluster_asm.c | 3602 ----- examples/redis-unstable/src/cluster_asm.h | 57 - examples/redis-unstable/src/cluster_legacy.c | 6581 -------- examples/redis-unstable/src/cluster_legacy.h | 385 - examples/redis-unstable/src/cluster_slot_stats.c | 373 - examples/redis-unstable/src/cluster_slot_stats.h | 33 - examples/redis-unstable/src/commands.c | 13 - examples/redis-unstable/src/commands.def | 11962 -------------- examples/redis-unstable/src/commands.h | 40 - examples/redis-unstable/src/commands/README.md | 15 - examples/redis-unstable/src/commands/acl-cat.json | 42 - .../redis-unstable/src/commands/acl-deluser.json | 33 - .../redis-unstable/src/commands/acl-dryrun.json | 47 - .../redis-unstable/src/commands/acl-genpass.json | 28 - .../redis-unstable/src/commands/acl-getuser.json | 91 - examples/redis-unstable/src/commands/acl-help.json | 23 - examples/redis-unstable/src/commands/acl-list.json | 25 - examples/redis-unstable/src/commands/acl-load.json | 21 - examples/redis-unstable/src/commands/acl-log.json | 90 - examples/redis-unstable/src/commands/acl-save.json | 25 - .../redis-unstable/src/commands/acl-setuser.json | 47 - .../redis-unstable/src/commands/acl-users.json | 25 - .../redis-unstable/src/commands/acl-whoami.json | 21 - examples/redis-unstable/src/commands/acl.json | 12 - examples/redis-unstable/src/commands/append.json | 53 - examples/redis-unstable/src/commands/asking.json | 19 - examples/redis-unstable/src/commands/auth.json | 43 - .../redis-unstable/src/commands/bgrewriteaof.json | 19 - examples/redis-unstable/src/commands/bgsave.json | 40 - examples/redis-unstable/src/commands/bitcount.json | 87 - examples/redis-unstable/src/commands/bitfield.json | 159 - .../redis-unstable/src/commands/bitfield_ro.json | 69 - examples/redis-unstable/src/commands/bitop.json | 119 - examples/redis-unstable/src/commands/bitpos.json | 106 - examples/redis-unstable/src/commands/blmove.json | 117 - examples/redis-unstable/src/commands/blmpop.json | 105 - examples/redis-unstable/src/commands/blpop.json | 80 - examples/redis-unstable/src/commands/brpop.json | 79 - .../redis-unstable/src/commands/brpoplpush.json | 96 - examples/redis-unstable/src/commands/bzmpop.json | 117 - examples/redis-unstable/src/commands/bzpopmax.json | 85 - examples/redis-unstable/src/commands/bzpopmin.json | 85 - .../src/commands/client-caching.json | 41 - .../src/commands/client-getname.json | 32 - .../src/commands/client-getredir.json | 37 - .../redis-unstable/src/commands/client-help.json | 26 - .../redis-unstable/src/commands/client-id.json | 24 - .../redis-unstable/src/commands/client-info.json | 27 - .../redis-unstable/src/commands/client-kill.json | 170 - .../redis-unstable/src/commands/client-list.json | 105 - .../src/commands/client-no-evict.json | 42 - .../src/commands/client-no-touch.json | 40 - .../redis-unstable/src/commands/client-pause.json | 54 - .../redis-unstable/src/commands/client-reply.json | 47 - .../src/commands/client-setinfo.json | 45 - .../src/commands/client-setname.json | 33 - .../src/commands/client-tracking.json | 80 - .../src/commands/client-trackinginfo.json | 80 - .../src/commands/client-unblock.json | 56 - .../src/commands/client-unpause.json | 24 - examples/redis-unstable/src/commands/client.json | 12 - .../src/commands/cluster-addslots.json | 26 - .../src/commands/cluster-addslotsrange.json | 36 - .../src/commands/cluster-bumpepoch.json | 33 - .../commands/cluster-count-failure-reports.json | 29 - .../src/commands/cluster-countkeysinslot.json | 25 - .../src/commands/cluster-delslots.json | 26 - .../src/commands/cluster-delslotsrange.json | 36 - .../src/commands/cluster-failover.json | 38 - .../src/commands/cluster-flushslots.json | 19 - .../src/commands/cluster-forget.json | 25 - .../src/commands/cluster-getkeysinslot.json | 35 - .../redis-unstable/src/commands/cluster-help.json | 22 - .../redis-unstable/src/commands/cluster-info.json | 21 - .../src/commands/cluster-keyslot.json | 25 - .../redis-unstable/src/commands/cluster-links.json | 60 - .../redis-unstable/src/commands/cluster-meet.json | 41 - .../src/commands/cluster-migration.json | 141 - .../redis-unstable/src/commands/cluster-myid.json | 18 - .../src/commands/cluster-myshardid.json | 22 - .../redis-unstable/src/commands/cluster-nodes.json | 21 - .../src/commands/cluster-replicas.json | 32 - .../src/commands/cluster-replicate.json | 25 - .../redis-unstable/src/commands/cluster-reset.json | 38 - .../src/commands/cluster-saveconfig.json | 19 - .../src/commands/cluster-set-config-epoch.json | 25 - .../src/commands/cluster-setslot.json | 54 - .../src/commands/cluster-shards.json | 90 - .../src/commands/cluster-slaves.json | 37 - .../src/commands/cluster-slot-stats.json | 114 - .../redis-unstable/src/commands/cluster-slots.json | 136 - .../src/commands/cluster-syncslots.json | 117 - examples/redis-unstable/src/commands/cluster.json | 9 - .../redis-unstable/src/commands/command-count.json | 23 - .../redis-unstable/src/commands/command-docs.json | 211 - .../src/commands/command-getkeys.json | 39 - .../src/commands/command-getkeysandflags.json | 55 - .../redis-unstable/src/commands/command-help.json | 26 - .../redis-unstable/src/commands/command-info.json | 213 - .../redis-unstable/src/commands/command-list.json | 55 - examples/redis-unstable/src/commands/command.json | 21 - .../redis-unstable/src/commands/config-get.json | 36 - .../redis-unstable/src/commands/config-help.json | 22 - .../src/commands/config-resetstat.json | 24 - .../src/commands/config-rewrite.json | 24 - .../redis-unstable/src/commands/config-set.json | 47 - examples/redis-unstable/src/commands/config.json | 9 - examples/redis-unstable/src/commands/copy.json | 91 - examples/redis-unstable/src/commands/dbsize.json | 25 - examples/redis-unstable/src/commands/debug.json | 20 - examples/redis-unstable/src/commands/decr.json | 50 - examples/redis-unstable/src/commands/decrby.json | 54 - examples/redis-unstable/src/commands/del.json | 53 - examples/redis-unstable/src/commands/delex.json | 89 - examples/redis-unstable/src/commands/digest.json | 56 - examples/redis-unstable/src/commands/discard.json | 23 - examples/redis-unstable/src/commands/dump.json | 58 - examples/redis-unstable/src/commands/echo.json | 28 - examples/redis-unstable/src/commands/eval.json | 69 - examples/redis-unstable/src/commands/eval_ro.json | 68 - examples/redis-unstable/src/commands/evalsha.json | 68 - .../redis-unstable/src/commands/evalsha_ro.json | 67 - examples/redis-unstable/src/commands/exec.json | 31 - examples/redis-unstable/src/commands/exists.json | 58 - examples/redis-unstable/src/commands/expire.json | 94 - examples/redis-unstable/src/commands/expireat.json | 94 - .../redis-unstable/src/commands/expiretime.json | 61 - examples/redis-unstable/src/commands/failover.json | 54 - examples/redis-unstable/src/commands/fcall.json | 69 - examples/redis-unstable/src/commands/fcall_ro.json | 68 - examples/redis-unstable/src/commands/flushall.json | 55 - examples/redis-unstable/src/commands/flushdb.json | 55 - .../src/commands/function-delete.json | 31 - .../redis-unstable/src/commands/function-dump.json | 21 - .../src/commands/function-flush.json | 44 - .../redis-unstable/src/commands/function-help.json | 25 - .../redis-unstable/src/commands/function-kill.json | 25 - .../redis-unstable/src/commands/function-list.json | 87 - .../redis-unstable/src/commands/function-load.json | 39 - .../src/commands/function-restore.json | 54 - .../src/commands/function-stats.json | 81 - examples/redis-unstable/src/commands/function.json | 9 - examples/redis-unstable/src/commands/geoadd.json | 98 - examples/redis-unstable/src/commands/geodist.json | 91 - examples/redis-unstable/src/commands/geohash.json | 56 - examples/redis-unstable/src/commands/geopos.json | 76 - .../redis-unstable/src/commands/georadius.json | 270 - .../redis-unstable/src/commands/georadius_ro.json | 205 - .../src/commands/georadiusbymember.json | 265 - .../src/commands/georadiusbymember_ro.json | 200 - .../redis-unstable/src/commands/geosearch.json | 267 - .../src/commands/geosearchstore.json | 228 - examples/redis-unstable/src/commands/get.json | 56 - examples/redis-unstable/src/commands/getbit.json | 59 - examples/redis-unstable/src/commands/getdel.json | 57 - examples/redis-unstable/src/commands/getex.json | 90 - examples/redis-unstable/src/commands/getrange.json | 55 - examples/redis-unstable/src/commands/getset.json | 67 - examples/redis-unstable/src/commands/hdel.json | 59 - examples/redis-unstable/src/commands/hello.json | 111 - examples/redis-unstable/src/commands/hexists.json | 59 - examples/redis-unstable/src/commands/hexpire.json | 118 - .../redis-unstable/src/commands/hexpireat.json | 118 - .../redis-unstable/src/commands/hexpiretime.json | 84 - examples/redis-unstable/src/commands/hget.json | 60 - examples/redis-unstable/src/commands/hgetall.json | 53 - examples/redis-unstable/src/commands/hgetdel.json | 78 - examples/redis-unstable/src/commands/hgetex.json | 111 - examples/redis-unstable/src/commands/hincrby.json | 58 - .../redis-unstable/src/commands/hincrbyfloat.json | 58 - examples/redis-unstable/src/commands/hkeys.json | 54 - examples/redis-unstable/src/commands/hlen.json | 47 - examples/redis-unstable/src/commands/hmget.json | 64 - examples/redis-unstable/src/commands/hmset.json | 68 - .../redis-unstable/src/commands/hotkeys-get.json | 51 - .../redis-unstable/src/commands/hotkeys-reset.json | 18 - .../redis-unstable/src/commands/hotkeys-start.json | 80 - .../redis-unstable/src/commands/hotkeys-stop.json | 19 - examples/redis-unstable/src/commands/hotkeys.json | 10 - examples/redis-unstable/src/commands/hpersist.json | 83 - examples/redis-unstable/src/commands/hpexpire.json | 118 - .../redis-unstable/src/commands/hpexpireat.json | 118 - .../redis-unstable/src/commands/hpexpiretime.json | 84 - examples/redis-unstable/src/commands/hpttl.json | 87 - .../redis-unstable/src/commands/hrandfield.json | 101 - examples/redis-unstable/src/commands/hscan.json | 87 - examples/redis-unstable/src/commands/hset.json | 70 - examples/redis-unstable/src/commands/hsetex.json | 132 - examples/redis-unstable/src/commands/hsetnx.json | 65 - examples/redis-unstable/src/commands/hstrlen.json | 52 - examples/redis-unstable/src/commands/httl.json | 87 - examples/redis-unstable/src/commands/hvals.json | 53 - examples/redis-unstable/src/commands/incr.json | 50 - examples/redis-unstable/src/commands/incrby.json | 54 - .../redis-unstable/src/commands/incrbyfloat.json | 54 - examples/redis-unstable/src/commands/info.json | 41 - examples/redis-unstable/src/commands/keys.json | 34 - examples/redis-unstable/src/commands/lastsave.json | 26 - .../src/commands/latency-doctor.json | 26 - .../redis-unstable/src/commands/latency-graph.json | 32 - .../redis-unstable/src/commands/latency-help.json | 22 - .../src/commands/latency-histogram.json | 54 - .../src/commands/latency-history.json | 49 - .../src/commands/latency-latest.json | 49 - .../redis-unstable/src/commands/latency-reset.json | 33 - examples/redis-unstable/src/commands/latency.json | 9 - examples/redis-unstable/src/commands/lcs.json | 127 - examples/redis-unstable/src/commands/lindex.json | 59 - examples/redis-unstable/src/commands/linsert.json | 85 - examples/redis-unstable/src/commands/llen.json | 48 - examples/redis-unstable/src/commands/lmove.json | 104 - examples/redis-unstable/src/commands/lmpop.json | 100 - examples/redis-unstable/src/commands/lolwut.json | 25 - examples/redis-unstable/src/commands/lpop.json | 77 - examples/redis-unstable/src/commands/lpos.json | 85 - examples/redis-unstable/src/commands/lpush.json | 60 - examples/redis-unstable/src/commands/lpushx.json | 61 - examples/redis-unstable/src/commands/lrange.json | 58 - examples/redis-unstable/src/commands/lrem.json | 56 - examples/redis-unstable/src/commands/lset.json | 55 - examples/redis-unstable/src/commands/ltrim.json | 54 - .../redis-unstable/src/commands/memory-doctor.json | 20 - .../redis-unstable/src/commands/memory-help.json | 22 - .../src/commands/memory-malloc-stats.json | 20 - .../redis-unstable/src/commands/memory-purge.json | 18 - .../redis-unstable/src/commands/memory-stats.json | 136 - .../redis-unstable/src/commands/memory-usage.json | 58 - examples/redis-unstable/src/commands/memory.json | 9 - examples/redis-unstable/src/commands/mget.json | 63 - examples/redis-unstable/src/commands/migrate.json | 181 - .../redis-unstable/src/commands/module-help.json | 22 - .../redis-unstable/src/commands/module-list.json | 47 - .../redis-unstable/src/commands/module-load.json | 32 - .../redis-unstable/src/commands/module-loadex.json | 51 - .../redis-unstable/src/commands/module-unload.json | 26 - examples/redis-unstable/src/commands/module.json | 9 - examples/redis-unstable/src/commands/monitor.json | 16 - examples/redis-unstable/src/commands/move.json | 61 - examples/redis-unstable/src/commands/mset.json | 62 - examples/redis-unstable/src/commands/msetex.json | 124 - examples/redis-unstable/src/commands/msetnx.json | 67 - examples/redis-unstable/src/commands/multi.json | 23 - .../src/commands/object-encoding.json | 58 - .../redis-unstable/src/commands/object-freq.json | 50 - .../redis-unstable/src/commands/object-help.json | 25 - .../src/commands/object-idletime.json | 50 - .../src/commands/object-refcount.json | 50 - examples/redis-unstable/src/commands/object.json | 9 - examples/redis-unstable/src/commands/persist.json | 56 - examples/redis-unstable/src/commands/pexpire.json | 94 - .../redis-unstable/src/commands/pexpireat.json | 94 - .../redis-unstable/src/commands/pexpiretime.json | 61 - examples/redis-unstable/src/commands/pfadd.json | 63 - examples/redis-unstable/src/commands/pfcount.json | 50 - examples/redis-unstable/src/commands/pfdebug.json | 52 - examples/redis-unstable/src/commands/pfmerge.json | 73 - .../redis-unstable/src/commands/pfselftest.json | 22 - examples/redis-unstable/src/commands/ping.json | 40 - examples/redis-unstable/src/commands/psetex.json | 60 - .../redis-unstable/src/commands/psubscribe.json | 24 - examples/redis-unstable/src/commands/psync.json | 25 - examples/redis-unstable/src/commands/pttl.json | 70 - examples/redis-unstable/src/commands/publish.json | 33 - .../src/commands/pubsub-channels.json | 31 - .../redis-unstable/src/commands/pubsub-help.json | 22 - .../redis-unstable/src/commands/pubsub-numpat.json | 21 - .../redis-unstable/src/commands/pubsub-numsub.json | 28 - .../src/commands/pubsub-shardchannels.json | 31 - .../src/commands/pubsub-shardnumsub.json | 28 - examples/redis-unstable/src/commands/pubsub.json | 9 - .../redis-unstable/src/commands/punsubscribe.json | 25 - examples/redis-unstable/src/commands/quit.json | 29 - .../redis-unstable/src/commands/randomkey.json | 34 - examples/redis-unstable/src/commands/readonly.json | 21 - .../redis-unstable/src/commands/readwrite.json | 21 - examples/redis-unstable/src/commands/rename.json | 72 - examples/redis-unstable/src/commands/renamenx.json | 86 - examples/redis-unstable/src/commands/replconf.json | 23 - .../redis-unstable/src/commands/replicaof.json | 59 - examples/redis-unstable/src/commands/reset.json | 24 - .../src/commands/restore-asking.json | 102 - examples/redis-unstable/src/commands/restore.json | 98 - examples/redis-unstable/src/commands/role.json | 134 - examples/redis-unstable/src/commands/rpop.json | 76 - .../redis-unstable/src/commands/rpoplpush.json | 85 - examples/redis-unstable/src/commands/rpush.json | 61 - examples/redis-unstable/src/commands/rpushx.json | 61 - examples/redis-unstable/src/commands/sadd.json | 60 - examples/redis-unstable/src/commands/save.json | 19 - examples/redis-unstable/src/commands/scan.json | 72 - examples/redis-unstable/src/commands/scard.json | 48 - .../redis-unstable/src/commands/script-debug.json | 43 - .../redis-unstable/src/commands/script-exists.json | 44 - .../redis-unstable/src/commands/script-flush.json | 50 - .../redis-unstable/src/commands/script-help.json | 25 - .../redis-unstable/src/commands/script-kill.json | 25 - .../redis-unstable/src/commands/script-load.json | 32 - examples/redis-unstable/src/commands/script.json | 9 - examples/redis-unstable/src/commands/sdiff.json | 55 - .../redis-unstable/src/commands/sdiffstore.json | 73 - examples/redis-unstable/src/commands/select.json | 27 - .../src/commands/sentinel-ckquorum.json | 26 - .../src/commands/sentinel-config.json | 121 - .../src/commands/sentinel-debug.json | 49 - .../src/commands/sentinel-failover.json | 25 - .../src/commands/sentinel-flushconfig.json | 20 - .../commands/sentinel-get-master-addr-by-name.json | 38 - .../redis-unstable/src/commands/sentinel-help.json | 24 - .../src/commands/sentinel-info-cache.json | 64 - .../commands/sentinel-is-master-down-by-addr.json | 61 - .../src/commands/sentinel-master.json | 29 - .../src/commands/sentinel-masters.json | 26 - .../src/commands/sentinel-monitor.json | 37 - .../redis-unstable/src/commands/sentinel-myid.json | 20 - .../src/commands/sentinel-pending-scripts.json | 52 - .../src/commands/sentinel-remove.json | 25 - .../src/commands/sentinel-replicas.json | 32 - .../src/commands/sentinel-reset.json | 26 - .../src/commands/sentinel-sentinels.json | 32 - .../redis-unstable/src/commands/sentinel-set.json | 40 - .../src/commands/sentinel-simulate-failure.json | 52 - .../src/commands/sentinel-slaves.json | 37 - examples/redis-unstable/src/commands/sentinel.json | 14 - examples/redis-unstable/src/commands/set.json | 180 - examples/redis-unstable/src/commands/setbit.json | 64 - examples/redis-unstable/src/commands/setex.json | 60 - examples/redis-unstable/src/commands/setnx.json | 66 - examples/redis-unstable/src/commands/setrange.json | 57 - examples/redis-unstable/src/commands/sflush.json | 75 - examples/redis-unstable/src/commands/shutdown.json | 69 - examples/redis-unstable/src/commands/sinter.json | 55 - .../redis-unstable/src/commands/sintercard.json | 60 - .../redis-unstable/src/commands/sinterstore.json | 73 - .../redis-unstable/src/commands/sismember.json | 59 - examples/redis-unstable/src/commands/slaveof.json | 64 - .../redis-unstable/src/commands/slowlog-get.json | 74 - .../redis-unstable/src/commands/slowlog-help.json | 22 - .../redis-unstable/src/commands/slowlog-len.json | 26 - .../redis-unstable/src/commands/slowlog-reset.json | 23 - examples/redis-unstable/src/commands/slowlog.json | 9 - examples/redis-unstable/src/commands/smembers.json | 54 - .../redis-unstable/src/commands/smismember.json | 66 - examples/redis-unstable/src/commands/smove.json | 84 - examples/redis-unstable/src/commands/sort.json | 162 - examples/redis-unstable/src/commands/sort_ro.json | 132 - examples/redis-unstable/src/commands/spop.json | 80 - examples/redis-unstable/src/commands/spublish.json | 51 - .../redis-unstable/src/commands/srandmember.json | 83 - examples/redis-unstable/src/commands/srem.json | 60 - examples/redis-unstable/src/commands/sscan.json | 81 - .../redis-unstable/src/commands/ssubscribe.json | 42 - examples/redis-unstable/src/commands/strlen.json | 48 - .../redis-unstable/src/commands/subscribe.json | 25 - examples/redis-unstable/src/commands/substr.json | 60 - examples/redis-unstable/src/commands/sunion.json | 55 - .../redis-unstable/src/commands/sunionstore.json | 73 - .../redis-unstable/src/commands/sunsubscribe.json | 43 - examples/redis-unstable/src/commands/swapdb.json | 31 - examples/redis-unstable/src/commands/sync.json | 15 - examples/redis-unstable/src/commands/time.json | 28 - examples/redis-unstable/src/commands/touch.json | 53 - .../redis-unstable/src/commands/trimslots.json | 48 - examples/redis-unstable/src/commands/ttl.json | 70 - examples/redis-unstable/src/commands/type.json | 55 - examples/redis-unstable/src/commands/unlink.json | 54 - .../redis-unstable/src/commands/unsubscribe.json | 25 - examples/redis-unstable/src/commands/unwatch.json | 23 - examples/redis-unstable/src/commands/wait.json | 35 - examples/redis-unstable/src/commands/waitaof.json | 52 - examples/redis-unstable/src/commands/watch.json | 50 - examples/redis-unstable/src/commands/xack.json | 58 - examples/redis-unstable/src/commands/xackdel.json | 109 - examples/redis-unstable/src/commands/xadd.json | 231 - .../redis-unstable/src/commands/xautoclaim.json | 158 - examples/redis-unstable/src/commands/xcfgset.json | 79 - examples/redis-unstable/src/commands/xclaim.json | 138 - examples/redis-unstable/src/commands/xdel.json | 54 - examples/redis-unstable/src/commands/xdelex.json | 104 - .../redis-unstable/src/commands/xgroup-create.json | 86 - .../src/commands/xgroup-createconsumer.json | 64 - .../src/commands/xgroup-delconsumer.json | 57 - .../src/commands/xgroup-destroy.json | 59 - .../redis-unstable/src/commands/xgroup-help.json | 25 - .../redis-unstable/src/commands/xgroup-setid.json | 79 - examples/redis-unstable/src/commands/xgroup.json | 9 - .../src/commands/xinfo-consumers.json | 80 - .../redis-unstable/src/commands/xinfo-groups.json | 92 - .../redis-unstable/src/commands/xinfo-help.json | 25 - .../redis-unstable/src/commands/xinfo-stream.json | 414 - examples/redis-unstable/src/commands/xinfo.json | 9 - examples/redis-unstable/src/commands/xlen.json | 48 - examples/redis-unstable/src/commands/xpending.json | 190 - examples/redis-unstable/src/commands/xrange.json | 87 - examples/redis-unstable/src/commands/xread.json | 107 - .../redis-unstable/src/commands/xreadgroup.json | 180 - .../redis-unstable/src/commands/xrevrange.json | 86 - examples/redis-unstable/src/commands/xsetid.json | 72 - examples/redis-unstable/src/commands/xtrim.json | 134 - examples/redis-unstable/src/commands/zadd.json | 144 - examples/redis-unstable/src/commands/zcard.json | 47 - examples/redis-unstable/src/commands/zcount.json | 56 - examples/redis-unstable/src/commands/zdiff.json | 85 - .../redis-unstable/src/commands/zdiffstore.json | 77 - examples/redis-unstable/src/commands/zincrby.json | 58 - examples/redis-unstable/src/commands/zinter.json | 115 - .../redis-unstable/src/commands/zintercard.json | 60 - .../redis-unstable/src/commands/zinterstore.json | 108 - .../redis-unstable/src/commands/zlexcount.json | 57 - examples/redis-unstable/src/commands/zmpop.json | 111 - examples/redis-unstable/src/commands/zmscore.json | 65 - examples/redis-unstable/src/commands/zpopmax.json | 89 - examples/redis-unstable/src/commands/zpopmin.json | 89 - .../redis-unstable/src/commands/zrandmember.json | 101 - examples/redis-unstable/src/commands/zrange.json | 137 - .../redis-unstable/src/commands/zrangebylex.json | 80 - .../redis-unstable/src/commands/zrangebyscore.json | 119 - .../redis-unstable/src/commands/zrangestore.json | 118 - examples/redis-unstable/src/commands/zrank.json | 86 - examples/redis-unstable/src/commands/zrem.json | 60 - .../src/commands/zremrangebylex.json | 55 - .../src/commands/zremrangebyrank.json | 55 - .../src/commands/zremrangebyscore.json | 55 - .../redis-unstable/src/commands/zrevrange.json | 94 - .../src/commands/zrevrangebylex.json | 80 - .../src/commands/zrevrangebyscore.json | 118 - examples/redis-unstable/src/commands/zrevrank.json | 86 - examples/redis-unstable/src/commands/zscan.json | 81 - examples/redis-unstable/src/commands/zscore.json | 60 - examples/redis-unstable/src/commands/zunion.json | 115 - .../redis-unstable/src/commands/zunionstore.json | 107 - examples/redis-unstable/src/config.c | 3775 ----- examples/redis-unstable/src/config.h | 375 - examples/redis-unstable/src/connection.c | 209 - examples/redis-unstable/src/connection.h | 471 - examples/redis-unstable/src/connhelpers.h | 68 - examples/redis-unstable/src/crc16.c | 88 - examples/redis-unstable/src/crc16_slottable.h | 836 - examples/redis-unstable/src/crc64.c | 371 - examples/redis-unstable/src/crc64.h | 13 - examples/redis-unstable/src/crccombine.c | 252 - examples/redis-unstable/src/crccombine.h | 10 - examples/redis-unstable/src/crcspeed.c | 410 - examples/redis-unstable/src/crcspeed.h | 62 - examples/redis-unstable/src/db.c | 3793 ----- examples/redis-unstable/src/debug.c | 2849 ---- examples/redis-unstable/src/debugmacro.h | 26 - examples/redis-unstable/src/defrag.c | 1985 --- examples/redis-unstable/src/dict.c | 2340 --- examples/redis-unstable/src/dict.h | 319 - examples/redis-unstable/src/ebuckets.c | 2725 ---- examples/redis-unstable/src/ebuckets.h | 336 - examples/redis-unstable/src/endianconv.c | 109 - examples/redis-unstable/src/endianconv.h | 69 - examples/redis-unstable/src/entry.c | 408 - examples/redis-unstable/src/entry.h | 141 - examples/redis-unstable/src/estore.c | 496 - examples/redis-unstable/src/estore.h | 91 - examples/redis-unstable/src/eval.c | 1760 --- examples/redis-unstable/src/eventnotifier.c | 98 - examples/redis-unstable/src/eventnotifier.h | 34 - examples/redis-unstable/src/evict.c | 764 - examples/redis-unstable/src/expire.c | 932 -- examples/redis-unstable/src/fmacros.h | 56 - examples/redis-unstable/src/fmtargs.h | 213 - examples/redis-unstable/src/function_lua.c | 513 - examples/redis-unstable/src/functions.c | 1138 -- examples/redis-unstable/src/functions.h | 127 - examples/redis-unstable/src/fwtree.c | 237 - examples/redis-unstable/src/fwtree.h | 71 - examples/redis-unstable/src/geo.c | 1006 -- examples/redis-unstable/src/geo.h | 22 - examples/redis-unstable/src/geohash.c | 299 - examples/redis-unstable/src/geohash.h | 135 - examples/redis-unstable/src/geohash_helper.c | 280 - examples/redis-unstable/src/geohash_helper.h | 65 - examples/redis-unstable/src/hotkeys.c | 614 - examples/redis-unstable/src/hyperloglog.c | 2099 --- examples/redis-unstable/src/intset.c | 566 - examples/redis-unstable/src/intset.h | 58 - examples/redis-unstable/src/iothread.c | 955 -- examples/redis-unstable/src/keymeta.c | 935 -- examples/redis-unstable/src/keymeta.h | 182 - examples/redis-unstable/src/kvstore.c | 1171 -- examples/redis-unstable/src/kvstore.h | 159 - examples/redis-unstable/src/latency.c | 721 - examples/redis-unstable/src/latency.h | 88 - examples/redis-unstable/src/lazyfree.c | 362 - examples/redis-unstable/src/listpack.c | 3334 ---- examples/redis-unstable/src/listpack.h | 97 - examples/redis-unstable/src/listpack_malloc.h | 29 - examples/redis-unstable/src/localtime.c | 103 - examples/redis-unstable/src/logreqres.c | 347 - examples/redis-unstable/src/lolwut.c | 172 - examples/redis-unstable/src/lolwut.h | 35 - examples/redis-unstable/src/lolwut5.c | 157 - examples/redis-unstable/src/lolwut6.c | 181 - examples/redis-unstable/src/lolwut8.c | 179 - examples/redis-unstable/src/lzf.h | 100 - examples/redis-unstable/src/lzfP.h | 190 - examples/redis-unstable/src/lzf_c.c | 309 - examples/redis-unstable/src/lzf_d.c | 191 - examples/redis-unstable/src/memory_prefetch.c | 413 - examples/redis-unstable/src/memory_prefetch.h | 26 - examples/redis-unstable/src/memtest.c | 347 - examples/redis-unstable/src/mkreleasehdr.sh | 16 - examples/redis-unstable/src/module.c | 15545 ------------------- examples/redis-unstable/src/modules/.gitignore | 2 - examples/redis-unstable/src/modules/Makefile | 69 - examples/redis-unstable/src/modules/helloacl.c | 190 - examples/redis-unstable/src/modules/helloblock.c | 198 - examples/redis-unstable/src/modules/hellocluster.c | 98 - examples/redis-unstable/src/modules/hellodict.c | 111 - examples/redis-unstable/src/modules/hellohook.c | 72 - examples/redis-unstable/src/modules/hellotimer.c | 55 - examples/redis-unstable/src/modules/hellotype.c | 342 - examples/redis-unstable/src/modules/helloworld.c | 601 - examples/redis-unstable/src/monotonic.c | 247 - examples/redis-unstable/src/monotonic.h | 61 - examples/redis-unstable/src/mstr.c | 528 - examples/redis-unstable/src/mstr.h | 227 - examples/redis-unstable/src/mt19937-64.c | 187 - examples/redis-unstable/src/mt19937-64.h | 87 - examples/redis-unstable/src/multi.c | 509 - examples/redis-unstable/src/networking.c | 5743 ------- examples/redis-unstable/src/notify.c | 129 - examples/redis-unstable/src/object.c | 1821 --- examples/redis-unstable/src/object.h | 194 - examples/redis-unstable/src/pqsort.c | 185 - examples/redis-unstable/src/pqsort.h | 20 - examples/redis-unstable/src/pubsub.c | 768 - examples/redis-unstable/src/quicklist.c | 3658 ----- examples/redis-unstable/src/quicklist.h | 218 - examples/redis-unstable/src/rand.c | 93 - examples/redis-unstable/src/rand.h | 18 - examples/redis-unstable/src/rax.c | 2098 --- examples/redis-unstable/src/rax.h | 204 - examples/redis-unstable/src/rax_malloc.h | 28 - examples/redis-unstable/src/rdb.c | 4483 ------ examples/redis-unstable/src/rdb.h | 170 - examples/redis-unstable/src/redis-benchmark.c | 2028 --- examples/redis-unstable/src/redis-check-aof.c | 589 - examples/redis-unstable/src/redis-check-rdb.c | 451 - examples/redis-unstable/src/redis-cli.c | 11143 ------------- examples/redis-unstable/src/redis-trib.rb | 129 - examples/redis-unstable/src/redisassert.c | 61 - examples/redis-unstable/src/redisassert.h | 35 - examples/redis-unstable/src/redismodule.h | 1885 --- examples/redis-unstable/src/release.c | 49 - examples/redis-unstable/src/replication.c | 5387 ------- examples/redis-unstable/src/resp_parser.c | 209 - examples/redis-unstable/src/resp_parser.h | 74 - examples/redis-unstable/src/rio.c | 640 - examples/redis-unstable/src/rio.h | 188 - examples/redis-unstable/src/script.c | 699 - examples/redis-unstable/src/script.h | 95 - examples/redis-unstable/src/script_lua.c | 1767 --- examples/redis-unstable/src/script_lua.h | 68 - examples/redis-unstable/src/sds.c | 1565 -- examples/redis-unstable/src/sds.h | 340 - examples/redis-unstable/src/sdsalloc.h | 33 - examples/redis-unstable/src/sentinel.c | 5474 ------- examples/redis-unstable/src/server.c | 7941 ---------- examples/redis-unstable/src/server.h | 4478 ------ examples/redis-unstable/src/setcpuaffinity.c | 155 - examples/redis-unstable/src/setproctitle.c | 331 - examples/redis-unstable/src/sha1.c | 239 - examples/redis-unstable/src/sha1.h | 27 - examples/redis-unstable/src/sha256.c | 163 - examples/redis-unstable/src/sha256.h | 35 - examples/redis-unstable/src/siphash.c | 373 - examples/redis-unstable/src/slowlog.c | 186 - examples/redis-unstable/src/slowlog.h | 31 - examples/redis-unstable/src/socket.c | 468 - examples/redis-unstable/src/solarisfixes.h | 34 - examples/redis-unstable/src/sort.c | 665 - examples/redis-unstable/src/sparkline.c | 162 - examples/redis-unstable/src/sparkline.h | 39 - examples/redis-unstable/src/stream.h | 208 - examples/redis-unstable/src/strl.c | 86 - examples/redis-unstable/src/syncio.c | 125 - examples/redis-unstable/src/syscheck.c | 354 - examples/redis-unstable/src/syscheck.h | 26 - examples/redis-unstable/src/t_hash.c | 4068 ----- examples/redis-unstable/src/t_list.c | 1465 -- examples/redis-unstable/src/t_set.c | 1863 --- examples/redis-unstable/src/t_stream.c | 5755 ------- examples/redis-unstable/src/t_string.c | 1217 -- examples/redis-unstable/src/t_zset.c | 5024 ------ examples/redis-unstable/src/testhelp.h | 44 - examples/redis-unstable/src/threads_mngr.c | 171 - examples/redis-unstable/src/threads_mngr.h | 49 - examples/redis-unstable/src/timeout.c | 183 - examples/redis-unstable/src/tls.c | 1295 -- examples/redis-unstable/src/tracking.c | 666 - examples/redis-unstable/src/tsan.sup | 21 - examples/redis-unstable/src/unix.c | 219 - examples/redis-unstable/src/util.c | 1774 --- examples/redis-unstable/src/util.h | 102 - examples/redis-unstable/src/valgrind.sup | 26 - examples/redis-unstable/src/version.h | 2 - examples/redis-unstable/src/ziplist.c | 2665 ---- examples/redis-unstable/src/ziplist.h | 74 - examples/redis-unstable/src/zipmap.c | 524 - examples/redis-unstable/src/zipmap.h | 34 - examples/redis-unstable/src/zmalloc.c | 1217 -- examples/redis-unstable/src/zmalloc.h | 168 - 637 files changed, 228441 deletions(-) delete mode 100644 examples/redis-unstable/src/.gitignore delete mode 100644 examples/redis-unstable/src/Makefile delete mode 100644 examples/redis-unstable/src/acl.c delete mode 100644 examples/redis-unstable/src/adlist.c delete mode 100644 examples/redis-unstable/src/adlist.h delete mode 100644 examples/redis-unstable/src/ae.c delete mode 100644 examples/redis-unstable/src/ae.h delete mode 100644 examples/redis-unstable/src/ae_epoll.c delete mode 100644 examples/redis-unstable/src/ae_evport.c delete mode 100644 examples/redis-unstable/src/ae_kqueue.c delete mode 100644 examples/redis-unstable/src/ae_select.c delete mode 100644 examples/redis-unstable/src/anet.c delete mode 100644 examples/redis-unstable/src/anet.h delete mode 100644 examples/redis-unstable/src/aof.c delete mode 100644 examples/redis-unstable/src/asciilogo.h delete mode 100644 examples/redis-unstable/src/atomicvar.h delete mode 100644 examples/redis-unstable/src/bio.c delete mode 100644 examples/redis-unstable/src/bio.h delete mode 100644 examples/redis-unstable/src/bitops.c delete mode 100644 examples/redis-unstable/src/blocked.c delete mode 100644 examples/redis-unstable/src/call_reply.c delete mode 100644 examples/redis-unstable/src/call_reply.h delete mode 100644 examples/redis-unstable/src/childinfo.c delete mode 100644 examples/redis-unstable/src/chk.c delete mode 100644 examples/redis-unstable/src/chk.h delete mode 100644 examples/redis-unstable/src/cli_commands.c delete mode 100644 examples/redis-unstable/src/cli_commands.h delete mode 100644 examples/redis-unstable/src/cli_common.c delete mode 100644 examples/redis-unstable/src/cli_common.h delete mode 100644 examples/redis-unstable/src/cluster.c delete mode 100644 examples/redis-unstable/src/cluster.h delete mode 100644 examples/redis-unstable/src/cluster_asm.c delete mode 100644 examples/redis-unstable/src/cluster_asm.h delete mode 100644 examples/redis-unstable/src/cluster_legacy.c delete mode 100644 examples/redis-unstable/src/cluster_legacy.h delete mode 100644 examples/redis-unstable/src/cluster_slot_stats.c delete mode 100644 examples/redis-unstable/src/cluster_slot_stats.h delete mode 100644 examples/redis-unstable/src/commands.c delete mode 100644 examples/redis-unstable/src/commands.def delete mode 100644 examples/redis-unstable/src/commands.h delete mode 100644 examples/redis-unstable/src/commands/README.md delete mode 100644 examples/redis-unstable/src/commands/acl-cat.json delete mode 100644 examples/redis-unstable/src/commands/acl-deluser.json delete mode 100644 examples/redis-unstable/src/commands/acl-dryrun.json delete mode 100644 examples/redis-unstable/src/commands/acl-genpass.json delete mode 100644 examples/redis-unstable/src/commands/acl-getuser.json delete mode 100644 examples/redis-unstable/src/commands/acl-help.json delete mode 100644 examples/redis-unstable/src/commands/acl-list.json delete mode 100644 examples/redis-unstable/src/commands/acl-load.json delete mode 100644 examples/redis-unstable/src/commands/acl-log.json delete mode 100644 examples/redis-unstable/src/commands/acl-save.json delete mode 100644 examples/redis-unstable/src/commands/acl-setuser.json delete mode 100644 examples/redis-unstable/src/commands/acl-users.json delete mode 100644 examples/redis-unstable/src/commands/acl-whoami.json delete mode 100644 examples/redis-unstable/src/commands/acl.json delete mode 100644 examples/redis-unstable/src/commands/append.json delete mode 100644 examples/redis-unstable/src/commands/asking.json delete mode 100644 examples/redis-unstable/src/commands/auth.json delete mode 100644 examples/redis-unstable/src/commands/bgrewriteaof.json delete mode 100644 examples/redis-unstable/src/commands/bgsave.json delete mode 100644 examples/redis-unstable/src/commands/bitcount.json delete mode 100644 examples/redis-unstable/src/commands/bitfield.json delete mode 100644 examples/redis-unstable/src/commands/bitfield_ro.json delete mode 100644 examples/redis-unstable/src/commands/bitop.json delete mode 100644 examples/redis-unstable/src/commands/bitpos.json delete mode 100644 examples/redis-unstable/src/commands/blmove.json delete mode 100644 examples/redis-unstable/src/commands/blmpop.json delete mode 100644 examples/redis-unstable/src/commands/blpop.json delete mode 100644 examples/redis-unstable/src/commands/brpop.json delete mode 100644 examples/redis-unstable/src/commands/brpoplpush.json delete mode 100644 examples/redis-unstable/src/commands/bzmpop.json delete mode 100644 examples/redis-unstable/src/commands/bzpopmax.json delete mode 100644 examples/redis-unstable/src/commands/bzpopmin.json delete mode 100644 examples/redis-unstable/src/commands/client-caching.json delete mode 100644 examples/redis-unstable/src/commands/client-getname.json delete mode 100644 examples/redis-unstable/src/commands/client-getredir.json delete mode 100644 examples/redis-unstable/src/commands/client-help.json delete mode 100644 examples/redis-unstable/src/commands/client-id.json delete mode 100644 examples/redis-unstable/src/commands/client-info.json delete mode 100644 examples/redis-unstable/src/commands/client-kill.json delete mode 100644 examples/redis-unstable/src/commands/client-list.json delete mode 100644 examples/redis-unstable/src/commands/client-no-evict.json delete mode 100644 examples/redis-unstable/src/commands/client-no-touch.json delete mode 100644 examples/redis-unstable/src/commands/client-pause.json delete mode 100644 examples/redis-unstable/src/commands/client-reply.json delete mode 100644 examples/redis-unstable/src/commands/client-setinfo.json delete mode 100644 examples/redis-unstable/src/commands/client-setname.json delete mode 100644 examples/redis-unstable/src/commands/client-tracking.json delete mode 100644 examples/redis-unstable/src/commands/client-trackinginfo.json delete mode 100644 examples/redis-unstable/src/commands/client-unblock.json delete mode 100644 examples/redis-unstable/src/commands/client-unpause.json delete mode 100644 examples/redis-unstable/src/commands/client.json delete mode 100644 examples/redis-unstable/src/commands/cluster-addslots.json delete mode 100644 examples/redis-unstable/src/commands/cluster-addslotsrange.json delete mode 100644 examples/redis-unstable/src/commands/cluster-bumpepoch.json delete mode 100644 examples/redis-unstable/src/commands/cluster-count-failure-reports.json delete mode 100644 examples/redis-unstable/src/commands/cluster-countkeysinslot.json delete mode 100644 examples/redis-unstable/src/commands/cluster-delslots.json delete mode 100644 examples/redis-unstable/src/commands/cluster-delslotsrange.json delete mode 100644 examples/redis-unstable/src/commands/cluster-failover.json delete mode 100644 examples/redis-unstable/src/commands/cluster-flushslots.json delete mode 100644 examples/redis-unstable/src/commands/cluster-forget.json delete mode 100644 examples/redis-unstable/src/commands/cluster-getkeysinslot.json delete mode 100644 examples/redis-unstable/src/commands/cluster-help.json delete mode 100644 examples/redis-unstable/src/commands/cluster-info.json delete mode 100644 examples/redis-unstable/src/commands/cluster-keyslot.json delete mode 100644 examples/redis-unstable/src/commands/cluster-links.json delete mode 100644 examples/redis-unstable/src/commands/cluster-meet.json delete mode 100644 examples/redis-unstable/src/commands/cluster-migration.json delete mode 100644 examples/redis-unstable/src/commands/cluster-myid.json delete mode 100644 examples/redis-unstable/src/commands/cluster-myshardid.json delete mode 100644 examples/redis-unstable/src/commands/cluster-nodes.json delete mode 100644 examples/redis-unstable/src/commands/cluster-replicas.json delete mode 100644 examples/redis-unstable/src/commands/cluster-replicate.json delete mode 100644 examples/redis-unstable/src/commands/cluster-reset.json delete mode 100644 examples/redis-unstable/src/commands/cluster-saveconfig.json delete mode 100644 examples/redis-unstable/src/commands/cluster-set-config-epoch.json delete mode 100644 examples/redis-unstable/src/commands/cluster-setslot.json delete mode 100644 examples/redis-unstable/src/commands/cluster-shards.json delete mode 100644 examples/redis-unstable/src/commands/cluster-slaves.json delete mode 100644 examples/redis-unstable/src/commands/cluster-slot-stats.json delete mode 100644 examples/redis-unstable/src/commands/cluster-slots.json delete mode 100644 examples/redis-unstable/src/commands/cluster-syncslots.json delete mode 100644 examples/redis-unstable/src/commands/cluster.json delete mode 100644 examples/redis-unstable/src/commands/command-count.json delete mode 100644 examples/redis-unstable/src/commands/command-docs.json delete mode 100644 examples/redis-unstable/src/commands/command-getkeys.json delete mode 100644 examples/redis-unstable/src/commands/command-getkeysandflags.json delete mode 100644 examples/redis-unstable/src/commands/command-help.json delete mode 100644 examples/redis-unstable/src/commands/command-info.json delete mode 100644 examples/redis-unstable/src/commands/command-list.json delete mode 100644 examples/redis-unstable/src/commands/command.json delete mode 100644 examples/redis-unstable/src/commands/config-get.json delete mode 100644 examples/redis-unstable/src/commands/config-help.json delete mode 100644 examples/redis-unstable/src/commands/config-resetstat.json delete mode 100644 examples/redis-unstable/src/commands/config-rewrite.json delete mode 100644 examples/redis-unstable/src/commands/config-set.json delete mode 100644 examples/redis-unstable/src/commands/config.json delete mode 100644 examples/redis-unstable/src/commands/copy.json delete mode 100644 examples/redis-unstable/src/commands/dbsize.json delete mode 100644 examples/redis-unstable/src/commands/debug.json delete mode 100644 examples/redis-unstable/src/commands/decr.json delete mode 100644 examples/redis-unstable/src/commands/decrby.json delete mode 100644 examples/redis-unstable/src/commands/del.json delete mode 100644 examples/redis-unstable/src/commands/delex.json delete mode 100644 examples/redis-unstable/src/commands/digest.json delete mode 100644 examples/redis-unstable/src/commands/discard.json delete mode 100644 examples/redis-unstable/src/commands/dump.json delete mode 100644 examples/redis-unstable/src/commands/echo.json delete mode 100644 examples/redis-unstable/src/commands/eval.json delete mode 100644 examples/redis-unstable/src/commands/eval_ro.json delete mode 100644 examples/redis-unstable/src/commands/evalsha.json delete mode 100644 examples/redis-unstable/src/commands/evalsha_ro.json delete mode 100644 examples/redis-unstable/src/commands/exec.json delete mode 100644 examples/redis-unstable/src/commands/exists.json delete mode 100644 examples/redis-unstable/src/commands/expire.json delete mode 100644 examples/redis-unstable/src/commands/expireat.json delete mode 100644 examples/redis-unstable/src/commands/expiretime.json delete mode 100644 examples/redis-unstable/src/commands/failover.json delete mode 100644 examples/redis-unstable/src/commands/fcall.json delete mode 100644 examples/redis-unstable/src/commands/fcall_ro.json delete mode 100644 examples/redis-unstable/src/commands/flushall.json delete mode 100644 examples/redis-unstable/src/commands/flushdb.json delete mode 100644 examples/redis-unstable/src/commands/function-delete.json delete mode 100644 examples/redis-unstable/src/commands/function-dump.json delete mode 100644 examples/redis-unstable/src/commands/function-flush.json delete mode 100644 examples/redis-unstable/src/commands/function-help.json delete mode 100644 examples/redis-unstable/src/commands/function-kill.json delete mode 100644 examples/redis-unstable/src/commands/function-list.json delete mode 100644 examples/redis-unstable/src/commands/function-load.json delete mode 100644 examples/redis-unstable/src/commands/function-restore.json delete mode 100644 examples/redis-unstable/src/commands/function-stats.json delete mode 100644 examples/redis-unstable/src/commands/function.json delete mode 100644 examples/redis-unstable/src/commands/geoadd.json delete mode 100644 examples/redis-unstable/src/commands/geodist.json delete mode 100644 examples/redis-unstable/src/commands/geohash.json delete mode 100644 examples/redis-unstable/src/commands/geopos.json delete mode 100644 examples/redis-unstable/src/commands/georadius.json delete mode 100644 examples/redis-unstable/src/commands/georadius_ro.json delete mode 100644 examples/redis-unstable/src/commands/georadiusbymember.json delete mode 100644 examples/redis-unstable/src/commands/georadiusbymember_ro.json delete mode 100644 examples/redis-unstable/src/commands/geosearch.json delete mode 100644 examples/redis-unstable/src/commands/geosearchstore.json delete mode 100644 examples/redis-unstable/src/commands/get.json delete mode 100644 examples/redis-unstable/src/commands/getbit.json delete mode 100644 examples/redis-unstable/src/commands/getdel.json delete mode 100644 examples/redis-unstable/src/commands/getex.json delete mode 100644 examples/redis-unstable/src/commands/getrange.json delete mode 100644 examples/redis-unstable/src/commands/getset.json delete mode 100644 examples/redis-unstable/src/commands/hdel.json delete mode 100644 examples/redis-unstable/src/commands/hello.json delete mode 100644 examples/redis-unstable/src/commands/hexists.json delete mode 100644 examples/redis-unstable/src/commands/hexpire.json delete mode 100644 examples/redis-unstable/src/commands/hexpireat.json delete mode 100644 examples/redis-unstable/src/commands/hexpiretime.json delete mode 100644 examples/redis-unstable/src/commands/hget.json delete mode 100644 examples/redis-unstable/src/commands/hgetall.json delete mode 100644 examples/redis-unstable/src/commands/hgetdel.json delete mode 100644 examples/redis-unstable/src/commands/hgetex.json delete mode 100644 examples/redis-unstable/src/commands/hincrby.json delete mode 100644 examples/redis-unstable/src/commands/hincrbyfloat.json delete mode 100644 examples/redis-unstable/src/commands/hkeys.json delete mode 100644 examples/redis-unstable/src/commands/hlen.json delete mode 100644 examples/redis-unstable/src/commands/hmget.json delete mode 100644 examples/redis-unstable/src/commands/hmset.json delete mode 100644 examples/redis-unstable/src/commands/hotkeys-get.json delete mode 100644 examples/redis-unstable/src/commands/hotkeys-reset.json delete mode 100644 examples/redis-unstable/src/commands/hotkeys-start.json delete mode 100644 examples/redis-unstable/src/commands/hotkeys-stop.json delete mode 100644 examples/redis-unstable/src/commands/hotkeys.json delete mode 100644 examples/redis-unstable/src/commands/hpersist.json delete mode 100644 examples/redis-unstable/src/commands/hpexpire.json delete mode 100644 examples/redis-unstable/src/commands/hpexpireat.json delete mode 100644 examples/redis-unstable/src/commands/hpexpiretime.json delete mode 100644 examples/redis-unstable/src/commands/hpttl.json delete mode 100644 examples/redis-unstable/src/commands/hrandfield.json delete mode 100644 examples/redis-unstable/src/commands/hscan.json delete mode 100644 examples/redis-unstable/src/commands/hset.json delete mode 100644 examples/redis-unstable/src/commands/hsetex.json delete mode 100644 examples/redis-unstable/src/commands/hsetnx.json delete mode 100644 examples/redis-unstable/src/commands/hstrlen.json delete mode 100644 examples/redis-unstable/src/commands/httl.json delete mode 100644 examples/redis-unstable/src/commands/hvals.json delete mode 100644 examples/redis-unstable/src/commands/incr.json delete mode 100644 examples/redis-unstable/src/commands/incrby.json delete mode 100644 examples/redis-unstable/src/commands/incrbyfloat.json delete mode 100644 examples/redis-unstable/src/commands/info.json delete mode 100644 examples/redis-unstable/src/commands/keys.json delete mode 100644 examples/redis-unstable/src/commands/lastsave.json delete mode 100644 examples/redis-unstable/src/commands/latency-doctor.json delete mode 100644 examples/redis-unstable/src/commands/latency-graph.json delete mode 100644 examples/redis-unstable/src/commands/latency-help.json delete mode 100644 examples/redis-unstable/src/commands/latency-histogram.json delete mode 100644 examples/redis-unstable/src/commands/latency-history.json delete mode 100644 examples/redis-unstable/src/commands/latency-latest.json delete mode 100644 examples/redis-unstable/src/commands/latency-reset.json delete mode 100644 examples/redis-unstable/src/commands/latency.json delete mode 100644 examples/redis-unstable/src/commands/lcs.json delete mode 100644 examples/redis-unstable/src/commands/lindex.json delete mode 100644 examples/redis-unstable/src/commands/linsert.json delete mode 100644 examples/redis-unstable/src/commands/llen.json delete mode 100644 examples/redis-unstable/src/commands/lmove.json delete mode 100644 examples/redis-unstable/src/commands/lmpop.json delete mode 100644 examples/redis-unstable/src/commands/lolwut.json delete mode 100644 examples/redis-unstable/src/commands/lpop.json delete mode 100644 examples/redis-unstable/src/commands/lpos.json delete mode 100644 examples/redis-unstable/src/commands/lpush.json delete mode 100644 examples/redis-unstable/src/commands/lpushx.json delete mode 100644 examples/redis-unstable/src/commands/lrange.json delete mode 100644 examples/redis-unstable/src/commands/lrem.json delete mode 100644 examples/redis-unstable/src/commands/lset.json delete mode 100644 examples/redis-unstable/src/commands/ltrim.json delete mode 100644 examples/redis-unstable/src/commands/memory-doctor.json delete mode 100644 examples/redis-unstable/src/commands/memory-help.json delete mode 100644 examples/redis-unstable/src/commands/memory-malloc-stats.json delete mode 100644 examples/redis-unstable/src/commands/memory-purge.json delete mode 100644 examples/redis-unstable/src/commands/memory-stats.json delete mode 100644 examples/redis-unstable/src/commands/memory-usage.json delete mode 100644 examples/redis-unstable/src/commands/memory.json delete mode 100644 examples/redis-unstable/src/commands/mget.json delete mode 100644 examples/redis-unstable/src/commands/migrate.json delete mode 100644 examples/redis-unstable/src/commands/module-help.json delete mode 100644 examples/redis-unstable/src/commands/module-list.json delete mode 100644 examples/redis-unstable/src/commands/module-load.json delete mode 100644 examples/redis-unstable/src/commands/module-loadex.json delete mode 100644 examples/redis-unstable/src/commands/module-unload.json delete mode 100644 examples/redis-unstable/src/commands/module.json delete mode 100644 examples/redis-unstable/src/commands/monitor.json delete mode 100644 examples/redis-unstable/src/commands/move.json delete mode 100644 examples/redis-unstable/src/commands/mset.json delete mode 100644 examples/redis-unstable/src/commands/msetex.json delete mode 100644 examples/redis-unstable/src/commands/msetnx.json delete mode 100644 examples/redis-unstable/src/commands/multi.json delete mode 100644 examples/redis-unstable/src/commands/object-encoding.json delete mode 100644 examples/redis-unstable/src/commands/object-freq.json delete mode 100644 examples/redis-unstable/src/commands/object-help.json delete mode 100644 examples/redis-unstable/src/commands/object-idletime.json delete mode 100644 examples/redis-unstable/src/commands/object-refcount.json delete mode 100644 examples/redis-unstable/src/commands/object.json delete mode 100644 examples/redis-unstable/src/commands/persist.json delete mode 100644 examples/redis-unstable/src/commands/pexpire.json delete mode 100644 examples/redis-unstable/src/commands/pexpireat.json delete mode 100644 examples/redis-unstable/src/commands/pexpiretime.json delete mode 100644 examples/redis-unstable/src/commands/pfadd.json delete mode 100644 examples/redis-unstable/src/commands/pfcount.json delete mode 100644 examples/redis-unstable/src/commands/pfdebug.json delete mode 100644 examples/redis-unstable/src/commands/pfmerge.json delete mode 100644 examples/redis-unstable/src/commands/pfselftest.json delete mode 100644 examples/redis-unstable/src/commands/ping.json delete mode 100644 examples/redis-unstable/src/commands/psetex.json delete mode 100644 examples/redis-unstable/src/commands/psubscribe.json delete mode 100644 examples/redis-unstable/src/commands/psync.json delete mode 100644 examples/redis-unstable/src/commands/pttl.json delete mode 100644 examples/redis-unstable/src/commands/publish.json delete mode 100644 examples/redis-unstable/src/commands/pubsub-channels.json delete mode 100644 examples/redis-unstable/src/commands/pubsub-help.json delete mode 100644 examples/redis-unstable/src/commands/pubsub-numpat.json delete mode 100644 examples/redis-unstable/src/commands/pubsub-numsub.json delete mode 100644 examples/redis-unstable/src/commands/pubsub-shardchannels.json delete mode 100644 examples/redis-unstable/src/commands/pubsub-shardnumsub.json delete mode 100644 examples/redis-unstable/src/commands/pubsub.json delete mode 100644 examples/redis-unstable/src/commands/punsubscribe.json delete mode 100644 examples/redis-unstable/src/commands/quit.json delete mode 100644 examples/redis-unstable/src/commands/randomkey.json delete mode 100644 examples/redis-unstable/src/commands/readonly.json delete mode 100644 examples/redis-unstable/src/commands/readwrite.json delete mode 100644 examples/redis-unstable/src/commands/rename.json delete mode 100644 examples/redis-unstable/src/commands/renamenx.json delete mode 100644 examples/redis-unstable/src/commands/replconf.json delete mode 100644 examples/redis-unstable/src/commands/replicaof.json delete mode 100644 examples/redis-unstable/src/commands/reset.json delete mode 100644 examples/redis-unstable/src/commands/restore-asking.json delete mode 100644 examples/redis-unstable/src/commands/restore.json delete mode 100644 examples/redis-unstable/src/commands/role.json delete mode 100644 examples/redis-unstable/src/commands/rpop.json delete mode 100644 examples/redis-unstable/src/commands/rpoplpush.json delete mode 100644 examples/redis-unstable/src/commands/rpush.json delete mode 100644 examples/redis-unstable/src/commands/rpushx.json delete mode 100644 examples/redis-unstable/src/commands/sadd.json delete mode 100644 examples/redis-unstable/src/commands/save.json delete mode 100644 examples/redis-unstable/src/commands/scan.json delete mode 100644 examples/redis-unstable/src/commands/scard.json delete mode 100644 examples/redis-unstable/src/commands/script-debug.json delete mode 100644 examples/redis-unstable/src/commands/script-exists.json delete mode 100644 examples/redis-unstable/src/commands/script-flush.json delete mode 100644 examples/redis-unstable/src/commands/script-help.json delete mode 100644 examples/redis-unstable/src/commands/script-kill.json delete mode 100644 examples/redis-unstable/src/commands/script-load.json delete mode 100644 examples/redis-unstable/src/commands/script.json delete mode 100644 examples/redis-unstable/src/commands/sdiff.json delete mode 100644 examples/redis-unstable/src/commands/sdiffstore.json delete mode 100644 examples/redis-unstable/src/commands/select.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-ckquorum.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-config.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-debug.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-failover.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-flushconfig.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-get-master-addr-by-name.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-help.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-info-cache.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-is-master-down-by-addr.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-master.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-masters.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-monitor.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-myid.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-pending-scripts.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-remove.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-replicas.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-reset.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-sentinels.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-set.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-simulate-failure.json delete mode 100644 examples/redis-unstable/src/commands/sentinel-slaves.json delete mode 100644 examples/redis-unstable/src/commands/sentinel.json delete mode 100644 examples/redis-unstable/src/commands/set.json delete mode 100644 examples/redis-unstable/src/commands/setbit.json delete mode 100644 examples/redis-unstable/src/commands/setex.json delete mode 100644 examples/redis-unstable/src/commands/setnx.json delete mode 100644 examples/redis-unstable/src/commands/setrange.json delete mode 100644 examples/redis-unstable/src/commands/sflush.json delete mode 100644 examples/redis-unstable/src/commands/shutdown.json delete mode 100644 examples/redis-unstable/src/commands/sinter.json delete mode 100644 examples/redis-unstable/src/commands/sintercard.json delete mode 100644 examples/redis-unstable/src/commands/sinterstore.json delete mode 100644 examples/redis-unstable/src/commands/sismember.json delete mode 100644 examples/redis-unstable/src/commands/slaveof.json delete mode 100644 examples/redis-unstable/src/commands/slowlog-get.json delete mode 100644 examples/redis-unstable/src/commands/slowlog-help.json delete mode 100644 examples/redis-unstable/src/commands/slowlog-len.json delete mode 100644 examples/redis-unstable/src/commands/slowlog-reset.json delete mode 100644 examples/redis-unstable/src/commands/slowlog.json delete mode 100644 examples/redis-unstable/src/commands/smembers.json delete mode 100644 examples/redis-unstable/src/commands/smismember.json delete mode 100644 examples/redis-unstable/src/commands/smove.json delete mode 100644 examples/redis-unstable/src/commands/sort.json delete mode 100644 examples/redis-unstable/src/commands/sort_ro.json delete mode 100644 examples/redis-unstable/src/commands/spop.json delete mode 100644 examples/redis-unstable/src/commands/spublish.json delete mode 100644 examples/redis-unstable/src/commands/srandmember.json delete mode 100644 examples/redis-unstable/src/commands/srem.json delete mode 100644 examples/redis-unstable/src/commands/sscan.json delete mode 100644 examples/redis-unstable/src/commands/ssubscribe.json delete mode 100644 examples/redis-unstable/src/commands/strlen.json delete mode 100644 examples/redis-unstable/src/commands/subscribe.json delete mode 100644 examples/redis-unstable/src/commands/substr.json delete mode 100644 examples/redis-unstable/src/commands/sunion.json delete mode 100644 examples/redis-unstable/src/commands/sunionstore.json delete mode 100644 examples/redis-unstable/src/commands/sunsubscribe.json delete mode 100644 examples/redis-unstable/src/commands/swapdb.json delete mode 100644 examples/redis-unstable/src/commands/sync.json delete mode 100644 examples/redis-unstable/src/commands/time.json delete mode 100644 examples/redis-unstable/src/commands/touch.json delete mode 100644 examples/redis-unstable/src/commands/trimslots.json delete mode 100644 examples/redis-unstable/src/commands/ttl.json delete mode 100644 examples/redis-unstable/src/commands/type.json delete mode 100644 examples/redis-unstable/src/commands/unlink.json delete mode 100644 examples/redis-unstable/src/commands/unsubscribe.json delete mode 100644 examples/redis-unstable/src/commands/unwatch.json delete mode 100644 examples/redis-unstable/src/commands/wait.json delete mode 100644 examples/redis-unstable/src/commands/waitaof.json delete mode 100644 examples/redis-unstable/src/commands/watch.json delete mode 100644 examples/redis-unstable/src/commands/xack.json delete mode 100644 examples/redis-unstable/src/commands/xackdel.json delete mode 100644 examples/redis-unstable/src/commands/xadd.json delete mode 100644 examples/redis-unstable/src/commands/xautoclaim.json delete mode 100644 examples/redis-unstable/src/commands/xcfgset.json delete mode 100644 examples/redis-unstable/src/commands/xclaim.json delete mode 100644 examples/redis-unstable/src/commands/xdel.json delete mode 100644 examples/redis-unstable/src/commands/xdelex.json delete mode 100644 examples/redis-unstable/src/commands/xgroup-create.json delete mode 100644 examples/redis-unstable/src/commands/xgroup-createconsumer.json delete mode 100644 examples/redis-unstable/src/commands/xgroup-delconsumer.json delete mode 100644 examples/redis-unstable/src/commands/xgroup-destroy.json delete mode 100644 examples/redis-unstable/src/commands/xgroup-help.json delete mode 100644 examples/redis-unstable/src/commands/xgroup-setid.json delete mode 100644 examples/redis-unstable/src/commands/xgroup.json delete mode 100644 examples/redis-unstable/src/commands/xinfo-consumers.json delete mode 100644 examples/redis-unstable/src/commands/xinfo-groups.json delete mode 100644 examples/redis-unstable/src/commands/xinfo-help.json delete mode 100644 examples/redis-unstable/src/commands/xinfo-stream.json delete mode 100644 examples/redis-unstable/src/commands/xinfo.json delete mode 100644 examples/redis-unstable/src/commands/xlen.json delete mode 100644 examples/redis-unstable/src/commands/xpending.json delete mode 100644 examples/redis-unstable/src/commands/xrange.json delete mode 100644 examples/redis-unstable/src/commands/xread.json delete mode 100644 examples/redis-unstable/src/commands/xreadgroup.json delete mode 100644 examples/redis-unstable/src/commands/xrevrange.json delete mode 100644 examples/redis-unstable/src/commands/xsetid.json delete mode 100644 examples/redis-unstable/src/commands/xtrim.json delete mode 100644 examples/redis-unstable/src/commands/zadd.json delete mode 100644 examples/redis-unstable/src/commands/zcard.json delete mode 100644 examples/redis-unstable/src/commands/zcount.json delete mode 100644 examples/redis-unstable/src/commands/zdiff.json delete mode 100644 examples/redis-unstable/src/commands/zdiffstore.json delete mode 100644 examples/redis-unstable/src/commands/zincrby.json delete mode 100644 examples/redis-unstable/src/commands/zinter.json delete mode 100644 examples/redis-unstable/src/commands/zintercard.json delete mode 100644 examples/redis-unstable/src/commands/zinterstore.json delete mode 100644 examples/redis-unstable/src/commands/zlexcount.json delete mode 100644 examples/redis-unstable/src/commands/zmpop.json delete mode 100644 examples/redis-unstable/src/commands/zmscore.json delete mode 100644 examples/redis-unstable/src/commands/zpopmax.json delete mode 100644 examples/redis-unstable/src/commands/zpopmin.json delete mode 100644 examples/redis-unstable/src/commands/zrandmember.json delete mode 100644 examples/redis-unstable/src/commands/zrange.json delete mode 100644 examples/redis-unstable/src/commands/zrangebylex.json delete mode 100644 examples/redis-unstable/src/commands/zrangebyscore.json delete mode 100644 examples/redis-unstable/src/commands/zrangestore.json delete mode 100644 examples/redis-unstable/src/commands/zrank.json delete mode 100644 examples/redis-unstable/src/commands/zrem.json delete mode 100644 examples/redis-unstable/src/commands/zremrangebylex.json delete mode 100644 examples/redis-unstable/src/commands/zremrangebyrank.json delete mode 100644 examples/redis-unstable/src/commands/zremrangebyscore.json delete mode 100644 examples/redis-unstable/src/commands/zrevrange.json delete mode 100644 examples/redis-unstable/src/commands/zrevrangebylex.json delete mode 100644 examples/redis-unstable/src/commands/zrevrangebyscore.json delete mode 100644 examples/redis-unstable/src/commands/zrevrank.json delete mode 100644 examples/redis-unstable/src/commands/zscan.json delete mode 100644 examples/redis-unstable/src/commands/zscore.json delete mode 100644 examples/redis-unstable/src/commands/zunion.json delete mode 100644 examples/redis-unstable/src/commands/zunionstore.json delete mode 100644 examples/redis-unstable/src/config.c delete mode 100644 examples/redis-unstable/src/config.h delete mode 100644 examples/redis-unstable/src/connection.c delete mode 100644 examples/redis-unstable/src/connection.h delete mode 100644 examples/redis-unstable/src/connhelpers.h delete mode 100644 examples/redis-unstable/src/crc16.c delete mode 100644 examples/redis-unstable/src/crc16_slottable.h delete mode 100644 examples/redis-unstable/src/crc64.c delete mode 100644 examples/redis-unstable/src/crc64.h delete mode 100644 examples/redis-unstable/src/crccombine.c delete mode 100644 examples/redis-unstable/src/crccombine.h delete mode 100644 examples/redis-unstable/src/crcspeed.c delete mode 100644 examples/redis-unstable/src/crcspeed.h delete mode 100644 examples/redis-unstable/src/db.c delete mode 100644 examples/redis-unstable/src/debug.c delete mode 100644 examples/redis-unstable/src/debugmacro.h delete mode 100644 examples/redis-unstable/src/defrag.c delete mode 100644 examples/redis-unstable/src/dict.c delete mode 100644 examples/redis-unstable/src/dict.h delete mode 100644 examples/redis-unstable/src/ebuckets.c delete mode 100644 examples/redis-unstable/src/ebuckets.h delete mode 100644 examples/redis-unstable/src/endianconv.c delete mode 100644 examples/redis-unstable/src/endianconv.h delete mode 100644 examples/redis-unstable/src/entry.c delete mode 100644 examples/redis-unstable/src/entry.h delete mode 100644 examples/redis-unstable/src/estore.c delete mode 100644 examples/redis-unstable/src/estore.h delete mode 100644 examples/redis-unstable/src/eval.c delete mode 100644 examples/redis-unstable/src/eventnotifier.c delete mode 100644 examples/redis-unstable/src/eventnotifier.h delete mode 100644 examples/redis-unstable/src/evict.c delete mode 100644 examples/redis-unstable/src/expire.c delete mode 100644 examples/redis-unstable/src/fmacros.h delete mode 100644 examples/redis-unstable/src/fmtargs.h delete mode 100644 examples/redis-unstable/src/function_lua.c delete mode 100644 examples/redis-unstable/src/functions.c delete mode 100644 examples/redis-unstable/src/functions.h delete mode 100644 examples/redis-unstable/src/fwtree.c delete mode 100644 examples/redis-unstable/src/fwtree.h delete mode 100644 examples/redis-unstable/src/geo.c delete mode 100644 examples/redis-unstable/src/geo.h delete mode 100644 examples/redis-unstable/src/geohash.c delete mode 100644 examples/redis-unstable/src/geohash.h delete mode 100644 examples/redis-unstable/src/geohash_helper.c delete mode 100644 examples/redis-unstable/src/geohash_helper.h delete mode 100644 examples/redis-unstable/src/hotkeys.c delete mode 100644 examples/redis-unstable/src/hyperloglog.c delete mode 100644 examples/redis-unstable/src/intset.c delete mode 100644 examples/redis-unstable/src/intset.h delete mode 100644 examples/redis-unstable/src/iothread.c delete mode 100644 examples/redis-unstable/src/keymeta.c delete mode 100644 examples/redis-unstable/src/keymeta.h delete mode 100644 examples/redis-unstable/src/kvstore.c delete mode 100644 examples/redis-unstable/src/kvstore.h delete mode 100644 examples/redis-unstable/src/latency.c delete mode 100644 examples/redis-unstable/src/latency.h delete mode 100644 examples/redis-unstable/src/lazyfree.c delete mode 100644 examples/redis-unstable/src/listpack.c delete mode 100644 examples/redis-unstable/src/listpack.h delete mode 100644 examples/redis-unstable/src/listpack_malloc.h delete mode 100644 examples/redis-unstable/src/localtime.c delete mode 100644 examples/redis-unstable/src/logreqres.c delete mode 100644 examples/redis-unstable/src/lolwut.c delete mode 100644 examples/redis-unstable/src/lolwut.h delete mode 100644 examples/redis-unstable/src/lolwut5.c delete mode 100644 examples/redis-unstable/src/lolwut6.c delete mode 100644 examples/redis-unstable/src/lolwut8.c delete mode 100644 examples/redis-unstable/src/lzf.h delete mode 100644 examples/redis-unstable/src/lzfP.h delete mode 100644 examples/redis-unstable/src/lzf_c.c delete mode 100644 examples/redis-unstable/src/lzf_d.c delete mode 100644 examples/redis-unstable/src/memory_prefetch.c delete mode 100644 examples/redis-unstable/src/memory_prefetch.h delete mode 100644 examples/redis-unstable/src/memtest.c delete mode 100755 examples/redis-unstable/src/mkreleasehdr.sh delete mode 100644 examples/redis-unstable/src/module.c delete mode 100644 examples/redis-unstable/src/modules/.gitignore delete mode 100644 examples/redis-unstable/src/modules/Makefile delete mode 100644 examples/redis-unstable/src/modules/helloacl.c delete mode 100644 examples/redis-unstable/src/modules/helloblock.c delete mode 100644 examples/redis-unstable/src/modules/hellocluster.c delete mode 100644 examples/redis-unstable/src/modules/hellodict.c delete mode 100644 examples/redis-unstable/src/modules/hellohook.c delete mode 100644 examples/redis-unstable/src/modules/hellotimer.c delete mode 100644 examples/redis-unstable/src/modules/hellotype.c delete mode 100644 examples/redis-unstable/src/modules/helloworld.c delete mode 100644 examples/redis-unstable/src/monotonic.c delete mode 100644 examples/redis-unstable/src/monotonic.h delete mode 100644 examples/redis-unstable/src/mstr.c delete mode 100644 examples/redis-unstable/src/mstr.h delete mode 100644 examples/redis-unstable/src/mt19937-64.c delete mode 100644 examples/redis-unstable/src/mt19937-64.h delete mode 100644 examples/redis-unstable/src/multi.c delete mode 100644 examples/redis-unstable/src/networking.c delete mode 100644 examples/redis-unstable/src/notify.c delete mode 100644 examples/redis-unstable/src/object.c delete mode 100644 examples/redis-unstable/src/object.h delete mode 100644 examples/redis-unstable/src/pqsort.c delete mode 100644 examples/redis-unstable/src/pqsort.h delete mode 100644 examples/redis-unstable/src/pubsub.c delete mode 100644 examples/redis-unstable/src/quicklist.c delete mode 100644 examples/redis-unstable/src/quicklist.h delete mode 100644 examples/redis-unstable/src/rand.c delete mode 100644 examples/redis-unstable/src/rand.h delete mode 100644 examples/redis-unstable/src/rax.c delete mode 100644 examples/redis-unstable/src/rax.h delete mode 100644 examples/redis-unstable/src/rax_malloc.h delete mode 100644 examples/redis-unstable/src/rdb.c delete mode 100644 examples/redis-unstable/src/rdb.h delete mode 100644 examples/redis-unstable/src/redis-benchmark.c delete mode 100644 examples/redis-unstable/src/redis-check-aof.c delete mode 100644 examples/redis-unstable/src/redis-check-rdb.c delete mode 100644 examples/redis-unstable/src/redis-cli.c delete mode 100755 examples/redis-unstable/src/redis-trib.rb delete mode 100644 examples/redis-unstable/src/redisassert.c delete mode 100644 examples/redis-unstable/src/redisassert.h delete mode 100644 examples/redis-unstable/src/redismodule.h delete mode 100644 examples/redis-unstable/src/release.c delete mode 100644 examples/redis-unstable/src/replication.c delete mode 100644 examples/redis-unstable/src/resp_parser.c delete mode 100644 examples/redis-unstable/src/resp_parser.h delete mode 100644 examples/redis-unstable/src/rio.c delete mode 100644 examples/redis-unstable/src/rio.h delete mode 100644 examples/redis-unstable/src/script.c delete mode 100644 examples/redis-unstable/src/script.h delete mode 100644 examples/redis-unstable/src/script_lua.c delete mode 100644 examples/redis-unstable/src/script_lua.h delete mode 100644 examples/redis-unstable/src/sds.c delete mode 100644 examples/redis-unstable/src/sds.h delete mode 100644 examples/redis-unstable/src/sdsalloc.h delete mode 100644 examples/redis-unstable/src/sentinel.c delete mode 100644 examples/redis-unstable/src/server.c delete mode 100644 examples/redis-unstable/src/server.h delete mode 100644 examples/redis-unstable/src/setcpuaffinity.c delete mode 100644 examples/redis-unstable/src/setproctitle.c delete mode 100644 examples/redis-unstable/src/sha1.c delete mode 100644 examples/redis-unstable/src/sha1.h delete mode 100644 examples/redis-unstable/src/sha256.c delete mode 100644 examples/redis-unstable/src/sha256.h delete mode 100644 examples/redis-unstable/src/siphash.c delete mode 100644 examples/redis-unstable/src/slowlog.c delete mode 100644 examples/redis-unstable/src/slowlog.h delete mode 100644 examples/redis-unstable/src/socket.c delete mode 100644 examples/redis-unstable/src/solarisfixes.h delete mode 100644 examples/redis-unstable/src/sort.c delete mode 100644 examples/redis-unstable/src/sparkline.c delete mode 100644 examples/redis-unstable/src/sparkline.h delete mode 100644 examples/redis-unstable/src/stream.h delete mode 100644 examples/redis-unstable/src/strl.c delete mode 100644 examples/redis-unstable/src/syncio.c delete mode 100644 examples/redis-unstable/src/syscheck.c delete mode 100644 examples/redis-unstable/src/syscheck.h delete mode 100644 examples/redis-unstable/src/t_hash.c delete mode 100644 examples/redis-unstable/src/t_list.c delete mode 100644 examples/redis-unstable/src/t_set.c delete mode 100644 examples/redis-unstable/src/t_stream.c delete mode 100644 examples/redis-unstable/src/t_string.c delete mode 100644 examples/redis-unstable/src/t_zset.c delete mode 100644 examples/redis-unstable/src/testhelp.h delete mode 100644 examples/redis-unstable/src/threads_mngr.c delete mode 100644 examples/redis-unstable/src/threads_mngr.h delete mode 100644 examples/redis-unstable/src/timeout.c delete mode 100644 examples/redis-unstable/src/tls.c delete mode 100644 examples/redis-unstable/src/tracking.c delete mode 100644 examples/redis-unstable/src/tsan.sup delete mode 100644 examples/redis-unstable/src/unix.c delete mode 100644 examples/redis-unstable/src/util.c delete mode 100644 examples/redis-unstable/src/util.h delete mode 100644 examples/redis-unstable/src/valgrind.sup delete mode 100644 examples/redis-unstable/src/version.h delete mode 100644 examples/redis-unstable/src/ziplist.c delete mode 100644 examples/redis-unstable/src/ziplist.h delete mode 100644 examples/redis-unstable/src/zipmap.c delete mode 100644 examples/redis-unstable/src/zipmap.h delete mode 100644 examples/redis-unstable/src/zmalloc.c delete mode 100644 examples/redis-unstable/src/zmalloc.h (limited to 'examples/redis-unstable/src') diff --git a/examples/redis-unstable/src/.gitignore b/examples/redis-unstable/src/.gitignore deleted file mode 100644 index aee7aac..0000000 --- a/examples/redis-unstable/src/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.gcda -*.gcno -*.gcov -redis.info -lcov-html diff --git a/examples/redis-unstable/src/Makefile b/examples/redis-unstable/src/Makefile deleted file mode 100644 index 24b9a39..0000000 --- a/examples/redis-unstable/src/Makefile +++ /dev/null @@ -1,563 +0,0 @@ -# Redis Makefile -# Copyright (c) 2011-Present, Redis Ltd. -# All rights reserved. -# -# Licensed under your choice of (a) the Redis Source Available License 2.0 -# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the -# GNU Affero General Public License v3 (AGPLv3). -# -# The Makefile composes the final FINAL_CFLAGS and FINAL_LDFLAGS using -# what is needed for Redis plus the standard CFLAGS and LDFLAGS passed. -# However when building the dependencies (Jemalloc, Lua, Hiredis, ...) -# CFLAGS and LDFLAGS are propagated to the dependencies, so to pass -# flags only to be used when compiling / linking Redis itself REDIS_CFLAGS -# and REDIS_LDFLAGS are used instead (this is the case of 'make gcov'). -# -# Dependencies are stored in the Makefile.dep file. To rebuild this file -# Just use 'make dep', but this is only needed by developers. - -release_hdr := $(shell sh -c './mkreleasehdr.sh') -uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') -uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') -CLANG := $(findstring clang,$(shell sh -c '$(CC) --version | head -1')) - -# Optimization flags. To override, the OPTIMIZATION variable can be passed, but -# some automatic defaults are added to it. To specify optimization flags -# explicitly without any defaults added, pass the OPT variable instead. -OPTIMIZATION?=-O3 -ifeq ($(OPTIMIZATION),-O3) - ifeq (clang,$(CLANG)) - OPTIMIZATION+=-flto - else - OPTIMIZATION+=-flto=auto - endif -endif -ifneq ($(OPTIMIZATION),-O0) - OPTIMIZATION+=-fno-omit-frame-pointer -endif -DEPENDENCY_TARGETS=hiredis linenoise lua hdr_histogram fpconv fast_float xxhash -NODEPS:=clean distclean - -# Default settings -STD=-pedantic -DREDIS_STATIC='' - -# Use -Wno-c11-extensions on clang, either where explicitly used or on -# platforms we can assume it's being used. -ifeq (clang,$(CLANG)) - STD+=-Wno-c11-extensions -else -ifneq (,$(findstring FreeBSD,$(uname_S))) - STD+=-Wno-c11-extensions -endif -endif -WARN=-Wall -W -Wno-missing-field-initializers -Werror=deprecated-declarations -Wstrict-prototypes -OPT=$(OPTIMIZATION) - -SKIP_VEC_SETS?=no -# Detect if the compiler supports C11 _Atomic. -# NUMBER_SIGN_CHAR is a workaround to support both GNU Make 4.3 and older versions. -NUMBER_SIGN_CHAR := \# -C11_ATOMIC := $(shell sh -c 'echo "$(NUMBER_SIGN_CHAR)include " > foo.c; \ - $(CC) -std=gnu11 -c foo.c -o foo.o > /dev/null 2>&1; \ - if [ -f foo.o ]; then echo "yes"; rm foo.o; fi; rm foo.c') -ifeq ($(C11_ATOMIC),yes) - STD+=-std=gnu11 -else - SKIP_VEC_SETS=yes - STD+=-std=c99 -endif - -PREFIX?=/usr/local -INSTALL_BIN=$(PREFIX)/bin -INSTALL=install -PKG_CONFIG?=pkg-config - -ifndef PYTHON -PYTHON := $(shell which python3 || which python) -endif - -# Default allocator defaults to Jemalloc on Linux and libc otherwise -MALLOC=libc -ifeq ($(uname_S),Linux) - MALLOC=jemalloc -endif - -# To get ARM stack traces if Redis crashes we need a special C flag. -ifneq (,$(filter aarch64 armv%,$(uname_M))) - CFLAGS+=-funwind-tables -endif - -# Backwards compatibility for selecting an allocator -ifeq ($(USE_TCMALLOC),yes) - MALLOC=tcmalloc -endif - -ifeq ($(USE_TCMALLOC_MINIMAL),yes) - MALLOC=tcmalloc_minimal -endif - -ifeq ($(USE_JEMALLOC),yes) - MALLOC=jemalloc -endif - -ifeq ($(USE_JEMALLOC),no) - MALLOC=libc -endif - -ifdef SANITIZER -ifeq ($(SANITIZER),address) - MALLOC=libc - CFLAGS+=-fsanitize=address -fno-sanitize-recover=all -fno-omit-frame-pointer - LDFLAGS+=-fsanitize=address -else -ifeq ($(SANITIZER),undefined) - MALLOC=libc - CFLAGS+=-fsanitize=undefined -fno-sanitize-recover=all -fno-omit-frame-pointer - LDFLAGS+=-fsanitize=undefined -else -ifeq ($(SANITIZER),thread) - CFLAGS+=-fsanitize=thread -fno-sanitize-recover=all -fno-omit-frame-pointer - LDFLAGS+=-fsanitize=thread -else -ifeq ($(SANITIZER),memory) -ifeq (clang, $(CLANG)) - export CXX:=clang - export LD:=clang - MALLOC=libc # MSan provides its own allocator so make sure not to use jemalloc as they clash - CFLAGS+=-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-sanitize-recover=all -fno-omit-frame-pointer - LDFLAGS+=-fsanitize=memory -else - $(error "MemorySanitizer needs to be compiled and linked with clang. Please use CC=clang") -endif -else - $(error "unknown sanitizer=${SANITIZER}") -endif -endif -endif -endif -endif - -# Special case of forcing defrag to run even though we have no Jemlloc support -ifeq ($(DEBUG_DEFRAG), force) - CFLAGS +=-DDEBUG_DEFRAG_FORCE -else ifeq ($(DEBUG_DEFRAG), fully) - CFLAGS +=-DDEBUG_DEFRAG_FORCE -DDEBUG_DEFRAG_FULLY -endif - -# Override default settings if possible --include .make-settings - -FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(REDIS_CFLAGS) -FINAL_LDFLAGS=$(LDFLAGS) $(OPT) $(REDIS_LDFLAGS) $(DEBUG) -FINAL_LIBS=-lm -lstdc++ -DEBUG=-g -ggdb - -# Linux ARM32 needs -latomic at linking time -ifneq (,$(findstring armv,$(uname_M))) - FINAL_LIBS+=-latomic -endif - -ifeq ($(uname_S),SunOS) - # SunOS - ifeq ($(findstring -m32,$(FINAL_CFLAGS)),) - CFLAGS+=-m64 - endif - ifeq ($(findstring -m32,$(FINAL_LDFLAGS)),) - LDFLAGS+=-m64 - endif - DEBUG=-g - DEBUG_FLAGS=-g - export CFLAGS LDFLAGS DEBUG DEBUG_FLAGS - INSTALL=cp -pf - FINAL_CFLAGS+= -D__EXTENSIONS__ -D_XPG6 - FINAL_LIBS+= -ldl -lnsl -lsocket -lresolv -lpthread -lrt - ifeq ($(USE_BACKTRACE),yes) - FINAL_CFLAGS+= -DUSE_BACKTRACE - endif -else -ifeq ($(uname_S),Darwin) - # Darwin - FINAL_LIBS+= -ldl - # Homebrew's OpenSSL is not linked to /usr/local to avoid - # conflicts with the system's LibreSSL installation so it - # must be referenced explicitly during build. -ifeq ($(uname_M),arm64) - # Homebrew arm64 uses /opt/homebrew as HOMEBREW_PREFIX - OPENSSL_PREFIX?=/opt/homebrew/opt/openssl -else - # Homebrew x86/ppc uses /usr/local as HOMEBREW_PREFIX - OPENSSL_PREFIX?=/usr/local/opt/openssl -endif -else -ifeq ($(uname_S),AIX) - # AIX - FINAL_LDFLAGS+= -Wl,-bexpall - FINAL_LIBS+=-ldl -pthread -lcrypt -lbsd -else -ifeq ($(uname_S),OpenBSD) - # OpenBSD - FINAL_LIBS+= -lpthread - ifeq ($(USE_BACKTRACE),yes) - FINAL_CFLAGS+= -DUSE_BACKTRACE -I/usr/local/include - FINAL_LDFLAGS+= -L/usr/local/lib - FINAL_LIBS+= -lexecinfo - endif - -else -ifeq ($(uname_S),NetBSD) - # NetBSD - FINAL_LIBS+= -lpthread - ifeq ($(USE_BACKTRACE),yes) - FINAL_CFLAGS+= -DUSE_BACKTRACE -I/usr/pkg/include - FINAL_LDFLAGS+= -L/usr/pkg/lib - FINAL_LIBS+= -lexecinfo - endif -else -ifeq ($(uname_S),FreeBSD) - # FreeBSD - FINAL_LIBS+= -lpthread -lexecinfo -else -ifeq ($(uname_S),DragonFly) - # DragonFly - FINAL_LIBS+= -lpthread -lexecinfo -else -ifeq ($(uname_S),OpenBSD) - # OpenBSD - FINAL_LIBS+= -lpthread -lexecinfo -else -ifeq ($(uname_S),NetBSD) - # NetBSD - FINAL_LIBS+= -lpthread -lexecinfo -else -ifeq ($(uname_S),Haiku) - # Haiku - FINAL_CFLAGS+= -DBSD_SOURCE - FINAL_LDFLAGS+= -lbsd -lnetwork - FINAL_LIBS+= -lpthread -else - # All the other OSes (notably Linux) - FINAL_LDFLAGS+= -rdynamic - FINAL_LIBS+=-ldl -pthread -lrt -endif -endif -endif -endif -endif -endif -endif -endif -endif -endif - -ifdef OPENSSL_PREFIX - OPENSSL_CFLAGS=-I$(OPENSSL_PREFIX)/include - OPENSSL_LDFLAGS=-L$(OPENSSL_PREFIX)/lib - # Also export OPENSSL_PREFIX so it ends up in deps sub-Makefiles - export OPENSSL_PREFIX -endif - -# Include paths to dependencies -FINAL_CFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src -I../deps/hdr_histogram -I../deps/fpconv -I../deps/fast_float -I../deps/xxhash - -# Determine systemd support and/or build preference (defaulting to auto-detection) -BUILD_WITH_SYSTEMD=no -LIBSYSTEMD_LIBS=-lsystemd - -# If 'USE_SYSTEMD' in the environment is neither "no" nor "yes", try to -# auto-detect libsystemd's presence and link accordingly. -ifneq ($(USE_SYSTEMD),no) - LIBSYSTEMD_PKGCONFIG := $(shell $(PKG_CONFIG) --exists libsystemd && echo $$?) -# If libsystemd cannot be detected, continue building without support for it -# (unless a later check tells us otherwise) -ifeq ($(LIBSYSTEMD_PKGCONFIG),0) - BUILD_WITH_SYSTEMD=yes - LIBSYSTEMD_LIBS=$(shell $(PKG_CONFIG) --libs libsystemd) -endif -endif - -# If 'USE_SYSTEMD' is set to "yes" use pkg-config if available or fall back to -# default -lsystemd. -ifeq ($(USE_SYSTEMD),yes) - BUILD_WITH_SYSTEMD=yes -endif - -ifeq ($(BUILD_WITH_SYSTEMD),yes) - FINAL_LIBS+=$(LIBSYSTEMD_LIBS) - FINAL_CFLAGS+= -DHAVE_LIBSYSTEMD -endif - -ifeq ($(MALLOC),tcmalloc) - FINAL_CFLAGS+= -DUSE_TCMALLOC - FINAL_LIBS+= -ltcmalloc -endif - -ifeq ($(MALLOC),tcmalloc_minimal) - FINAL_CFLAGS+= -DUSE_TCMALLOC - FINAL_LIBS+= -ltcmalloc_minimal -endif - -ifeq ($(MALLOC),jemalloc) - DEPENDENCY_TARGETS+= jemalloc - FINAL_CFLAGS+= -DUSE_JEMALLOC -I../deps/jemalloc/include - FINAL_LIBS := ../deps/jemalloc/lib/libjemalloc.a $(FINAL_LIBS) -endif - -# LIBSSL & LIBCRYPTO -LIBSSL_LIBS= -LIBSSL_PKGCONFIG := $(shell $(PKG_CONFIG) --exists libssl && echo $$?) -ifeq ($(LIBSSL_PKGCONFIG),0) - LIBSSL_LIBS=$(shell $(PKG_CONFIG) --libs libssl) -else - LIBSSL_LIBS=-lssl -endif -LIBCRYPTO_LIBS= -LIBCRYPTO_PKGCONFIG := $(shell $(PKG_CONFIG) --exists libcrypto && echo $$?) -ifeq ($(LIBCRYPTO_PKGCONFIG),0) - LIBCRYPTO_LIBS=$(shell $(PKG_CONFIG) --libs libcrypto) -else - LIBCRYPTO_LIBS=-lcrypto -endif - -BUILD_NO:=0 -BUILD_YES:=1 -BUILD_MODULE:=2 -ifeq ($(BUILD_TLS),yes) - FINAL_CFLAGS+=-DUSE_OPENSSL=$(BUILD_YES) $(OPENSSL_CFLAGS) -DBUILD_TLS_MODULE=$(BUILD_NO) - FINAL_LDFLAGS+=$(OPENSSL_LDFLAGS) - FINAL_LIBS += ../deps/hiredis/libhiredis_ssl.a $(LIBSSL_LIBS) $(LIBCRYPTO_LIBS) -endif - -TLS_MODULE= -TLS_MODULE_NAME:=redis-tls$(PROG_SUFFIX).so -TLS_MODULE_CFLAGS:=$(FINAL_CFLAGS) -ifeq ($(BUILD_TLS),module) - FINAL_CFLAGS+=-DUSE_OPENSSL=$(BUILD_MODULE) $(OPENSSL_CFLAGS) - TLS_CLIENT_LIBS = ../deps/hiredis/libhiredis_ssl.a $(LIBSSL_LIBS) $(LIBCRYPTO_LIBS) - TLS_MODULE=$(TLS_MODULE_NAME) - TLS_MODULE_CFLAGS+=-DUSE_OPENSSL=$(BUILD_MODULE) $(OPENSSL_CFLAGS) -DBUILD_TLS_MODULE=$(BUILD_MODULE) -endif - -ifneq ($(SKIP_VEC_SETS),yes) - vpath %.c ../modules/vector-sets - REDIS_VEC_SETS_OBJ=hnsw.o vset.o vset_config.o - FINAL_CFLAGS+=-DINCLUDE_VEC_SETS=1 -endif - -ifndef V - define MAKE_INSTALL - @printf ' %b %b\n' $(LINKCOLOR)INSTALL$(ENDCOLOR) $(BINCOLOR)$(1)$(ENDCOLOR) 1>&2 - @$(INSTALL) $(1) $(2) - endef -else - define MAKE_INSTALL - $(INSTALL) $(1) $(2) - endef -endif - -REDIS_CC=$(QUIET_CC)$(CC) $(FINAL_CFLAGS) -REDIS_LD=$(QUIET_LINK)$(CC) $(FINAL_LDFLAGS) -REDIS_INSTALL=$(QUIET_INSTALL)$(INSTALL) - -CCCOLOR="\033[34m" -LINKCOLOR="\033[34;1m" -SRCCOLOR="\033[33m" -BINCOLOR="\033[37;1m" -MAKECOLOR="\033[32;1m" -ENDCOLOR="\033[0m" - -ifndef V -QUIET_CC = @printf ' %b %b\n' $(CCCOLOR)CC$(ENDCOLOR) $(SRCCOLOR)$@$(ENDCOLOR) 1>&2; -QUIET_GEN = @printf ' %b %b\n' $(CCCOLOR)GEN$(ENDCOLOR) $(SRCCOLOR)$@$(ENDCOLOR) 1>&2; -QUIET_LINK = @printf ' %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) 1>&2; -QUIET_INSTALL = @printf ' %b %b\n' $(LINKCOLOR)INSTALL$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) 1>&2; -endif - -ifneq (, $(findstring LOG_REQ_RES, $(REDIS_CFLAGS))) - COMMANDS_DEF_FILENAME=commands_with_reply_schema - GEN_COMMANDS_FLAGS=--with-reply-schema -else - COMMANDS_DEF_FILENAME=commands - GEN_COMMANDS_FLAGS= -endif - -REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX) -REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX) -REDIS_SERVER_OBJ=threads_mngr.o memory_prefetch.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o entry.o kvstore.o fwtree.o estore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_asm.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o lolwut8.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o keymeta.o chk.o hotkeys.o -REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX) -REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o -REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX) -REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o redisassert.o release.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o -REDIS_CHECK_RDB_NAME=redis-check-rdb$(PROG_SUFFIX) -REDIS_CHECK_AOF_NAME=redis-check-aof$(PROG_SUFFIX) -ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(REDIS_SERVER_OBJ) $(REDIS_VEC_SETS_OBJ) $(REDIS_CLI_OBJ) $(REDIS_BENCHMARK_OBJ))) - -all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) $(TLS_MODULE) module_tests - @echo "" - @echo "Hint: It's a good idea to run 'make test' ;)" - @echo "" - -Makefile.dep: - -$(REDIS_CC) -MM $(ALL_SOURCES) > Makefile.dep 2> /dev/null || true - -ifeq (0, $(words $(findstring $(MAKECMDGOALS), $(NODEPS)))) --include Makefile.dep -endif - -.PHONY: all - -module_tests: $(REDIS_SERVER_NAME) - $(MAKE) -C ../tests/modules - -.PHONY: module_tests - -persist-settings: distclean - echo STD=$(STD) >> .make-settings - echo WARN=$(WARN) >> .make-settings - echo OPT=$(OPT) >> .make-settings - echo MALLOC=$(MALLOC) >> .make-settings - echo BUILD_TLS=$(BUILD_TLS) >> .make-settings - echo USE_SYSTEMD=$(USE_SYSTEMD) >> .make-settings - echo CFLAGS=$(CFLAGS) >> .make-settings - echo LDFLAGS=$(LDFLAGS) >> .make-settings - echo REDIS_CFLAGS=$(REDIS_CFLAGS) >> .make-settings - echo REDIS_LDFLAGS=$(REDIS_LDFLAGS) >> .make-settings - echo PREV_FINAL_CFLAGS=$(FINAL_CFLAGS) >> .make-settings - echo PREV_FINAL_LDFLAGS=$(FINAL_LDFLAGS) >> .make-settings - -(cd ../deps && $(MAKE) $(DEPENDENCY_TARGETS)) - -.PHONY: persist-settings - -# Prerequisites target -.make-prerequisites: - @touch $@ - -# Clean everything, persist settings and build dependencies if anything changed -ifneq ($(strip $(PREV_FINAL_CFLAGS)), $(strip $(FINAL_CFLAGS))) -.make-prerequisites: persist-settings -endif - -ifneq ($(strip $(PREV_FINAL_LDFLAGS)), $(strip $(FINAL_LDFLAGS))) -.make-prerequisites: persist-settings -endif - -# redis-server -$(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ) $(REDIS_VEC_SETS_OBJ) - $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a ../deps/fast_float/libfast_float.a ../deps/xxhash/libxxhash.a $(FINAL_LIBS) - -# redis-sentinel -$(REDIS_SENTINEL_NAME): $(REDIS_SERVER_NAME) - $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) - -# redis-check-rdb -$(REDIS_CHECK_RDB_NAME): $(REDIS_SERVER_NAME) - $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_RDB_NAME) - -# redis-check-aof -$(REDIS_CHECK_AOF_NAME): $(REDIS_SERVER_NAME) - $(REDIS_INSTALL) $(REDIS_SERVER_NAME) $(REDIS_CHECK_AOF_NAME) - -# redis-tls.so -$(TLS_MODULE_NAME): $(REDIS_SERVER_NAME) - $(QUIET_CC)$(CC) -o $@ tls.c -shared -fPIC $(TLS_MODULE_CFLAGS) $(TLS_CLIENT_LIBS) - -# redis-cli -$(REDIS_CLI_NAME): $(REDIS_CLI_OBJ) - $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/linenoise/linenoise.o ../deps/hdr_histogram/libhdrhistogram.a $(FINAL_LIBS) $(TLS_CLIENT_LIBS) - -# redis-benchmark -$(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ) - $(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/hdr_histogram/libhdrhistogram.a $(FINAL_LIBS) $(TLS_CLIENT_LIBS) - -DEP = $(REDIS_SERVER_OBJ:%.o=%.d) $(REDIS_VEC_SETS_OBJ:%.o=%.d) $(REDIS_CLI_OBJ:%.o=%.d) $(REDIS_BENCHMARK_OBJ:%.o=%.d) --include $(DEP) - -# Because the jemalloc.h header is generated as a part of the jemalloc build, -# building it should complete before building any other object. Instead of -# depending on a single artifact, build all dependencies first. -%.o: %.c .make-prerequisites - $(REDIS_CC) -MMD -o $@ -c $< - -# The following files are checked in and don't normally need to be rebuilt. They -# are built only if python is available and their prereqs are modified. -ifneq (,$(PYTHON)) -$(COMMANDS_DEF_FILENAME).def: commands/*.json ../utils/generate-command-code.py - $(QUIET_GEN)$(PYTHON) ../utils/generate-command-code.py $(GEN_COMMANDS_FLAGS) - -fmtargs.h: ../utils/generate-fmtargs.py - $(QUITE_GEN)sed '/Everything below this line/,$$d' $@ > $@.tmp - $(QUITE_GEN)$(PYTHON) ../utils/generate-fmtargs.py >> $@.tmp - $(QUITE_GEN)mv $@.tmp $@ -endif - -commands.c: $(COMMANDS_DEF_FILENAME).def - -clean: - rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html Makefile.dep *.so - rm -f $(DEP) - -(cd ../tests/modules && $(MAKE) clean) - -.PHONY: clean - -distclean: clean - -(cd ../deps && $(MAKE) distclean) - -(cd modules && $(MAKE) clean) - -(cd ../tests/modules && $(MAKE) clean) - -(rm -f .make-*) - -.PHONY: distclean - -test: $(REDIS_SERVER_NAME) $(REDIS_CHECK_AOF_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) module_tests - @(cd ..; ./runtest) - -test-modules: $(REDIS_SERVER_NAME) - @(cd ..; ./runtest-moduleapi) - -test-sentinel: $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) - @(cd ..; ./runtest-sentinel) - -test-cluster: $(REDIS_SERVER_NAME) $(REDIS_CLI_NAME) - @(cd ..; ./runtest-cluster) - -check: test - -lcov: - @lcov --version - $(MAKE) gcov - @(set -e; cd ..; ./runtest) - @geninfo -o redis.info . - @genhtml --legend -o lcov-html redis.info - -.PHONY: lcov - -bench: $(REDIS_BENCHMARK_NAME) - ./$(REDIS_BENCHMARK_NAME) - -32bit: - @echo "" - @echo "WARNING: if it fails under Linux you probably need to install libc6-dev-i386" - @echo "" - $(MAKE) CFLAGS="-m32" LDFLAGS="-m32" SKIP_VEC_SETS="yes" - -gcov: - $(MAKE) REDIS_CFLAGS="-fprofile-arcs -ftest-coverage -DCOVERAGE_TEST" REDIS_LDFLAGS="-fprofile-arcs -ftest-coverage" - -noopt: - $(MAKE) OPTIMIZATION="-O0" - -valgrind: - $(MAKE) OPTIMIZATION="-O0" MALLOC="libc" - -helgrind: - $(MAKE) OPTIMIZATION="-O0" MALLOC="libc" CFLAGS="-D__ATOMIC_VAR_FORCE_SYNC_MACROS" REDIS_CFLAGS="-I/usr/local/include" REDIS_LDFLAGS="-L/usr/local/lib" - -install: all - @mkdir -p $(INSTALL_BIN) - $(call MAKE_INSTALL,$(REDIS_SERVER_NAME),$(INSTALL_BIN)) - $(call MAKE_INSTALL,$(REDIS_BENCHMARK_NAME),$(INSTALL_BIN)) - $(call MAKE_INSTALL,$(REDIS_CLI_NAME),$(INSTALL_BIN)) - @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_CHECK_RDB_NAME) - @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_CHECK_AOF_NAME) - @ln -sf $(REDIS_SERVER_NAME) $(INSTALL_BIN)/$(REDIS_SENTINEL_NAME) - -uninstall: - rm -f $(INSTALL_BIN)/{$(REDIS_SERVER_NAME),$(REDIS_BENCHMARK_NAME),$(REDIS_CLI_NAME),$(REDIS_CHECK_RDB_NAME),$(REDIS_CHECK_AOF_NAME),$(REDIS_SENTINEL_NAME)} diff --git a/examples/redis-unstable/src/acl.c b/examples/redis-unstable/src/acl.c deleted file mode 100644 index 37d504d..0000000 --- a/examples/redis-unstable/src/acl.c +++ /dev/null @@ -1,3313 +0,0 @@ -/* - * Copyright (c) 2018-Present, Redis Ltd. - * All rights reserved. - * - * Copyright (c) 2024-present, Valkey contributors. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "server.h" -#include "cluster.h" -#include "sha256.h" -#include -#include - -/* ============================================================================= - * Global state for ACLs - * ==========================================================================*/ - -rax *Users; /* Table mapping usernames to user structures. */ - -user *DefaultUser; /* Global reference to the default user. - Every new connection is associated to it, if no - AUTH or HELLO is used to authenticate with a - different user. */ - -list *UsersToLoad; /* This is a list of users found in the configuration file - that we'll need to load in the final stage of Redis - initialization, after all the modules are already - loaded. Every list element is a NULL terminated - array of SDS pointers: the first is the user name, - all the remaining pointers are ACL rules in the same - format as ACLSetUser(). */ -list *ACLLog; /* Our security log, the user is able to inspect that - using the ACL LOG command .*/ - -long long ACLLogEntryCount = 0; /* Number of ACL log entries created */ - -static rax *commandId = NULL; /* Command name to id mapping */ - -static unsigned long nextid = 0; /* Next command id that has not been assigned */ - -#define ACL_MAX_CATEGORIES 64 /* Maximum number of command categories */ - -struct ACLCategoryItem { - char *name; - uint64_t flag; -} ACLDefaultCommandCategories[] = { /* See redis.conf for details on each category. */ - {"keyspace", ACL_CATEGORY_KEYSPACE}, - {"read", ACL_CATEGORY_READ}, - {"write", ACL_CATEGORY_WRITE}, - {"set", ACL_CATEGORY_SET}, - {"sortedset", ACL_CATEGORY_SORTEDSET}, - {"list", ACL_CATEGORY_LIST}, - {"hash", ACL_CATEGORY_HASH}, - {"string", ACL_CATEGORY_STRING}, - {"bitmap", ACL_CATEGORY_BITMAP}, - {"hyperloglog", ACL_CATEGORY_HYPERLOGLOG}, - {"geo", ACL_CATEGORY_GEO}, - {"stream", ACL_CATEGORY_STREAM}, - {"pubsub", ACL_CATEGORY_PUBSUB}, - {"admin", ACL_CATEGORY_ADMIN}, - {"fast", ACL_CATEGORY_FAST}, - {"slow", ACL_CATEGORY_SLOW}, - {"blocking", ACL_CATEGORY_BLOCKING}, - {"dangerous", ACL_CATEGORY_DANGEROUS}, - {"connection", ACL_CATEGORY_CONNECTION}, - {"transaction", ACL_CATEGORY_TRANSACTION}, - {"scripting", ACL_CATEGORY_SCRIPTING}, - {NULL,0} /* Terminator. */ -}; - -static struct ACLCategoryItem *ACLCommandCategories = NULL; -static size_t nextCommandCategory = 0; /* Index of the next command category to be added */ - -/* Implements the ability to add to the list of ACL categories at runtime. Since each ACL category - * also requires a bit in the acl_categories flag, there is a limit to the number that can be added. - * The new ACL categories occupy the remaining bits of acl_categories flag, other than the bits - * occupied by the default ACL command categories. - * - * The optional `flag` argument allows the assignment of the `acl_categories` flag bit to the ACL category. - * When adding a new category, except for the default ACL command categories, this arguments should be `0` - * to allow the function to assign the next available `acl_categories` flag bit to the new ACL category. - * - * returns 1 -> Added, 0 -> Failed (out of space) - * - * This function is present here to gain access to the ACLCommandCategories array and add a new ACL category. - */ -int ACLAddCommandCategory(const char *name, uint64_t flag) { - if (nextCommandCategory >= ACL_MAX_CATEGORIES) return 0; - ACLCommandCategories[nextCommandCategory].name = zstrdup(name); - ACLCommandCategories[nextCommandCategory].flag = flag != 0 ? flag : (1ULL<>4)]; - hex[j*2+1] = cset[(hash[j]&0xF)]; - } - return sdsnewlen(hex,HASH_PASSWORD_LEN); -} - -/* Given a hash and the hash length, returns C_OK if it is a valid password - * hash, or C_ERR otherwise. */ -int ACLCheckPasswordHash(unsigned char *hash, int hashlen) { - if (hashlen != HASH_PASSWORD_LEN) { - return C_ERR; - } - - /* Password hashes can only be characters that represent - * hexadecimal values, which are numbers and lowercase - * characters 'a' through 'f'. */ - for(int i = 0; i < HASH_PASSWORD_LEN; i++) { - char c = hash[i]; - if ((c < 'a' || c > 'f') && (c < '0' || c > '9')) { - return C_ERR; - } - } - return C_OK; -} - -/* ============================================================================= - * Low level ACL API - * ==========================================================================*/ - -/* Return 1 if the specified string contains spaces or null characters. - * We do this for usernames and key patterns for simpler rewriting of - * ACL rules, presentation on ACL list, and to avoid subtle security bugs - * that may arise from parsing the rules in presence of escapes. - * The function returns 0 if the string has no spaces. */ -int ACLStringHasSpaces(const char *s, size_t len) { - for (size_t i = 0; i < len; i++) { - if (isspace(s[i]) || s[i] == 0) return 1; - } - return 0; -} - -/* Given the category name the command returns the corresponding flag, or - * zero if there is no match. */ -uint64_t ACLGetCommandCategoryFlagByName(const char *name) { - for (int j = 0; ACLCommandCategories[j].flag != 0; j++) { - if (!strcasecmp(name,ACLCommandCategories[j].name)) { - return ACLCommandCategories[j].flag; - } - } - return 0; /* No match. */ -} - -/* Method for searching for a user within a list of user definitions. The - * list contains an array of user arguments, and we are only - * searching the first argument, the username, for a match. */ -int ACLListMatchLoadedUser(void *definition, void *user) { - sds *user_definition = definition; - return sdscmp(user_definition[0], user) == 0; -} - -/* Method for passwords/pattern comparison used for the user->passwords list - * so that we can search for items with listSearchKey(). */ -int ACLListMatchSds(void *a, void *b) { - return sdscmp(a,b) == 0; -} - -/* Method to free list elements from ACL users password/patterns lists. */ -void ACLListFreeSds(void *item) { - sdsfreegeneric(item); -} - -/* Method to duplicate list elements from ACL users password/patterns lists. */ -void *ACLListDupSds(void *item) { - return sdsdup(item); -} - -/* Structure used for handling key patterns with different key - * based permissions. */ -typedef struct { - int flags; /* The ACL key permission types for this key pattern */ - sds pattern; /* The pattern to match keys against */ -} keyPattern; - -/* Create a new key pattern. */ -keyPattern *ACLKeyPatternCreate(sds pattern, int flags) { - keyPattern *new = (keyPattern *) zmalloc(sizeof(keyPattern)); - new->pattern = pattern; - new->flags = flags; - return new; -} - -/* Free a key pattern and internal structures. */ -void ACLKeyPatternFree(keyPattern *pattern) { - sdsfree(pattern->pattern); - zfree(pattern); -} - -/* Method for passwords/pattern comparison used for the user->passwords list - * so that we can search for items with listSearchKey(). */ -int ACLListMatchKeyPattern(void *a, void *b) { - return sdscmp(((keyPattern *) a)->pattern,((keyPattern *) b)->pattern) == 0; -} - -/* Method to free list elements from ACL users password/patterns lists. */ -void ACLListFreeKeyPattern(void *item) { - ACLKeyPatternFree(item); -} - -/* Method to duplicate list elements from ACL users password/patterns lists. */ -void *ACLListDupKeyPattern(void *item) { - keyPattern *old = (keyPattern *) item; - return ACLKeyPatternCreate(sdsdup(old->pattern), old->flags); -} - -/* Append the string representation of a key pattern onto the - * provided base string. */ -sds sdsCatPatternString(sds base, keyPattern *pat) { - if (pat->flags == ACL_ALL_PERMISSION) { - base = sdscatlen(base,"~",1); - } else if (pat->flags == ACL_READ_PERMISSION) { - base = sdscatlen(base,"%R~",3); - } else if (pat->flags == ACL_WRITE_PERMISSION) { - base = sdscatlen(base,"%W~",3); - } else { - serverPanic("Invalid key pattern flag detected"); - } - return sdscatsds(base, pat->pattern); -} - -/* Create an empty selector with the provided set of initial - * flags. The selector will be default have no permissions. */ -aclSelector *ACLCreateSelector(int flags) { - aclSelector *selector = zmalloc(sizeof(aclSelector)); - selector->flags = flags | server.acl_pubsub_default; - selector->patterns = listCreate(); - selector->channels = listCreate(); - selector->allowed_firstargs = NULL; - selector->command_rules = sdsempty(); - - listSetMatchMethod(selector->patterns,ACLListMatchKeyPattern); - listSetFreeMethod(selector->patterns,ACLListFreeKeyPattern); - listSetDupMethod(selector->patterns,ACLListDupKeyPattern); - listSetMatchMethod(selector->channels,ACLListMatchSds); - listSetFreeMethod(selector->channels,ACLListFreeSds); - listSetDupMethod(selector->channels,ACLListDupSds); - memset(selector->allowed_commands,0,sizeof(selector->allowed_commands)); - - return selector; -} - -/* Cleanup the provided selector, including all interior structures. */ -void ACLFreeSelector(aclSelector *selector) { - listRelease(selector->patterns); - listRelease(selector->channels); - sdsfree(selector->command_rules); - ACLResetFirstArgs(selector); - zfree(selector); -} - -/* Create an exact copy of the provided selector. */ -aclSelector *ACLCopySelector(aclSelector *src) { - aclSelector *dst = zmalloc(sizeof(aclSelector)); - dst->flags = src->flags; - dst->patterns = listDup(src->patterns); - dst->channels = listDup(src->channels); - dst->command_rules = sdsdup(src->command_rules); - memcpy(dst->allowed_commands,src->allowed_commands, - sizeof(dst->allowed_commands)); - dst->allowed_firstargs = NULL; - /* Copy the allowed first-args array of array of SDS strings. */ - if (src->allowed_firstargs) { - for (int j = 0; j < USER_COMMAND_BITS_COUNT; j++) { - if (!(src->allowed_firstargs[j])) continue; - for (int i = 0; src->allowed_firstargs[j][i]; i++) { - ACLAddAllowedFirstArg(dst, j, src->allowed_firstargs[j][i]); - } - } - } - return dst; -} - -/* List method for freeing a selector */ -void ACLListFreeSelector(void *a) { - ACLFreeSelector((aclSelector *) a); -} - -/* List method for duplicating a selector */ -void *ACLListDuplicateSelector(void *src) { - return ACLCopySelector((aclSelector *)src); -} - -/* All users have an implicit root selector which - * provides backwards compatibility to the old ACLs- - * permissions. */ -aclSelector *ACLUserGetRootSelector(user *u) { - serverAssert(listLength(u->selectors)); - aclSelector *s = (aclSelector *) listNodeValue(listFirst(u->selectors)); - serverAssert(s->flags & SELECTOR_FLAG_ROOT); - return s; -} - -/* Create a new user with the specified name, store it in the list - * of users (the Users global radix tree), and returns a reference to - * the structure representing the user. - * - * If the user with such name already exists NULL is returned. */ -user *ACLCreateUser(const char *name, size_t namelen) { - if (raxFind(Users,(unsigned char*)name,namelen,NULL)) return NULL; - user *u = zmalloc(sizeof(*u)); - u->name = sdsnewlen(name,namelen); - atomicSet(u->flags, USER_FLAG_DISABLED | USER_FLAG_SANITIZE_PAYLOAD); - u->passwords = listCreate(); - u->acl_string = NULL; - listSetMatchMethod(u->passwords,ACLListMatchSds); - listSetFreeMethod(u->passwords,ACLListFreeSds); - listSetDupMethod(u->passwords,ACLListDupSds); - - u->selectors = listCreate(); - listSetFreeMethod(u->selectors,ACLListFreeSelector); - listSetDupMethod(u->selectors,ACLListDuplicateSelector); - - /* Add the initial root selector */ - aclSelector *s = ACLCreateSelector(SELECTOR_FLAG_ROOT); - listAddNodeHead(u->selectors, s); - - raxInsert(Users,(unsigned char*)name,namelen,u,NULL); - return u; -} - -/* This function should be called when we need an unlinked "fake" user - * we can use in order to validate ACL rules or for other similar reasons. - * The user will not get linked to the Users radix tree. The returned - * user should be released with ACLFreeUser() as usually. */ -user *ACLCreateUnlinkedUser(void) { - char username[64]; - for (int j = 0; ; j++) { - snprintf(username,sizeof(username),"__fakeuser:%d__",j); - user *fakeuser = ACLCreateUser(username,strlen(username)); - if (fakeuser == NULL) continue; - int retval = raxRemove(Users,(unsigned char*) username, - strlen(username),NULL); - serverAssert(retval != 0); - return fakeuser; - } -} - -/* Release the memory used by the user structure. Note that this function - * will not remove the user from the Users global radix tree. */ -void ACLFreeUser(user *u) { - sdsfree(u->name); - if (u->acl_string) { - decrRefCount(u->acl_string); - u->acl_string = NULL; - } - listRelease(u->passwords); - listRelease(u->selectors); - zfree(u); -} - -/* Generic version of ACLFreeUser. */ -void ACLFreeUserGeneric(void *u) { - ACLFreeUser((user *)u); -} - -/* When a user is deleted we need to cycle the active - * connections in order to kill all the pending ones that - * are authenticated with such user. */ -void ACLFreeUserAndKillClients(user *u) { - listIter li; - listNode *ln; - listRewind(server.clients,&li); - while ((ln = listNext(&li)) != NULL) { - client *c = listNodeValue(ln); - if (c->user == u) { - /* We'll free the connection asynchronously, so - * in theory to set a different user is not needed. - * However if there are bugs in Redis, soon or later - * this may result in some security hole: it's much - * more defensive to set the default user and put - * it in non authenticated mode. */ - deauthenticateAndCloseClient(c); - } - } - ACLFreeUser(u); -} - -/* Copy the user ACL rules from the source user 'src' to the destination - * user 'dst' so that at the end of the process they'll have exactly the - * same rules (but the names will continue to be the original ones). */ -void ACLCopyUser(user *dst, user *src) { - listRelease(dst->passwords); - listRelease(dst->selectors); - dst->passwords = listDup(src->passwords); - dst->selectors = listDup(src->selectors); - dst->flags = src->flags; - if (dst->acl_string) { - decrRefCount(dst->acl_string); - } - dst->acl_string = src->acl_string; - if (dst->acl_string) { - /* if src is NULL, we set it to NULL, if not, need to increment reference count */ - incrRefCount(dst->acl_string); - } -} - -/* Given a command ID, this function set by reference 'word' and 'bit' - * so that user->allowed_commands[word] will address the right word - * where the corresponding bit for the provided ID is stored, and - * so that user->allowed_commands[word]&bit will identify that specific - * bit. The function returns C_ERR in case the specified ID overflows - * the bitmap in the user representation. */ -int ACLGetCommandBitCoordinates(uint64_t id, uint64_t *word, uint64_t *bit) { - if (id >= USER_COMMAND_BITS_COUNT) return C_ERR; - *word = id / sizeof(uint64_t) / 8; - *bit = 1ULL << (id % (sizeof(uint64_t) * 8)); - return C_OK; -} - -/* Check if the specified command bit is set for the specified user. - * The function returns 1 is the bit is set or 0 if it is not. - * Note that this function does not check the ALLCOMMANDS flag of the user - * but just the lowlevel bitmask. - * - * If the bit overflows the user internal representation, zero is returned - * in order to disallow the execution of the command in such edge case. */ -int ACLGetSelectorCommandBit(const aclSelector *selector, unsigned long id) { - uint64_t word, bit; - if (ACLGetCommandBitCoordinates(id,&word,&bit) == C_ERR) return 0; - return (selector->allowed_commands[word] & bit) != 0; -} - -/* When +@all or allcommands is given, we set a reserved bit as well that we - * can later test, to see if the user has the right to execute "future commands", - * that is, commands loaded later via modules. */ -int ACLSelectorCanExecuteFutureCommands(aclSelector *selector) { - return ACLGetSelectorCommandBit(selector,USER_COMMAND_BITS_COUNT-1); -} - -/* Set the specified command bit for the specified user to 'value' (0 or 1). - * If the bit overflows the user internal representation, no operation - * is performed. As a side effect of calling this function with a value of - * zero, the user flag ALLCOMMANDS is cleared since it is no longer possible - * to skip the command bit explicit test. */ -void ACLSetSelectorCommandBit(aclSelector *selector, unsigned long id, int value) { - uint64_t word, bit; - if (ACLGetCommandBitCoordinates(id,&word,&bit) == C_ERR) return; - if (value) { - selector->allowed_commands[word] |= bit; - } else { - selector->allowed_commands[word] &= ~bit; - selector->flags &= ~SELECTOR_FLAG_ALLCOMMANDS; - } -} - -/* Remove a rule from the retained command rules. Always match rules - * verbatim, but also remove subcommand rules if we are adding or removing the - * entire command. */ -void ACLSelectorRemoveCommandRule(aclSelector *selector, sds new_rule) { - size_t new_len = sdslen(new_rule); - char *existing_rule = selector->command_rules; - - /* Loop over the existing rules, trying to find a rule that "matches" - * the new rule. If we find a match, then remove the command from the string by - * copying the later rules over it. */ - while(existing_rule[0]) { - /* The first character of the rule is +/-, which we don't need to compare. */ - char *copy_position = existing_rule; - existing_rule += 1; - - /* Assume a trailing space after a command is part of the command, like '+get ', so trim it - * as well if the command is removed. */ - char *rule_end = strchr(existing_rule, ' '); - if (!rule_end) { - /* This is the last rule, so move it to the end of the string. */ - rule_end = existing_rule + strlen(existing_rule); - - /* This approach can leave a trailing space if the last rule is removed, - * but only if it's not the first rule, so handle that case. */ - if (copy_position != selector->command_rules) copy_position -= 1; - } - char *copy_end = rule_end; - if (*copy_end == ' ') copy_end++; - - /* Exact match or the rule we are comparing is a subcommand denoted by '|' */ - size_t existing_len = rule_end - existing_rule; - if (!memcmp(existing_rule, new_rule, min(existing_len, new_len))) { - if ((existing_len == new_len) || (existing_len > new_len && (existing_rule[new_len]) == '|')) { - /* Copy the remaining rules starting at the next rule to replace the rule to be - * deleted, including the terminating NULL character. */ - memmove(copy_position, copy_end, strlen(copy_end) + 1); - existing_rule = copy_position; - continue; - } - } - existing_rule = copy_end; - } - - /* There is now extra padding at the end of the rules, so clean that up. */ - sdsupdatelen(selector->command_rules); -} - -/* This function is resopnsible for updating the command_rules struct so that relative ordering of - * commands and categories is maintained and can be reproduced without loss. */ -void ACLUpdateCommandRules(aclSelector *selector, const char *rule, int allow) { - sds new_rule = sdsnew(rule); - sdstolower(new_rule); - - ACLSelectorRemoveCommandRule(selector, new_rule); - if (sdslen(selector->command_rules)) selector->command_rules = sdscat(selector->command_rules, " "); - selector->command_rules = sdscatfmt(selector->command_rules, allow ? "+%S" : "-%S", new_rule); - sdsfree(new_rule); -} - -/* This function is used to allow/block a specific command. - * Allowing/blocking a container command also applies for its subcommands */ -void ACLChangeSelectorPerm(aclSelector *selector, struct redisCommand *cmd, int allow) { - unsigned long id = cmd->id; - ACLSetSelectorCommandBit(selector,id,allow); - ACLResetFirstArgsForCommand(selector,id); - if (cmd->subcommands_dict) { - dictEntry *de; - dictIterator di; - dictInitSafeIterator(&di, cmd->subcommands_dict); - while((de = dictNext(&di)) != NULL) { - struct redisCommand *sub = (struct redisCommand *)dictGetVal(de); - ACLSetSelectorCommandBit(selector,sub->id,allow); - } - dictResetIterator(&di); - } -} - -/* This is like ACLSetSelectorCommandBit(), but instead of setting the specified - * ID, it will check all the commands in the category specified as argument, - * and will set all the bits corresponding to such commands to the specified - * value. Since the category passed by the user may be non existing, the - * function returns C_ERR if the category was not found, or C_OK if it was - * found and the operation was performed. */ -void ACLSetSelectorCommandBitsForCategory(dict *commands, aclSelector *selector, uint64_t cflag, int value) { - dictIterator di; - dictEntry *de; - dictInitIterator(&di, commands); - while ((de = dictNext(&di)) != NULL) { - struct redisCommand *cmd = dictGetVal(de); - if (cmd->acl_categories & cflag) { - ACLChangeSelectorPerm(selector,cmd,value); - } - if (cmd->subcommands_dict) { - ACLSetSelectorCommandBitsForCategory(cmd->subcommands_dict, selector, cflag, value); - } - } - dictResetIterator(&di); -} - -/* This function is responsible for recomputing the command bits for all selectors of the existing users. - * It uses the 'command_rules', a string representation of the ordered categories and commands, - * to recompute the command bits. */ -void ACLRecomputeCommandBitsFromCommandRulesAllUsers(void) { - raxIterator ri; - raxStart(&ri,Users); - raxSeek(&ri,"^",NULL,0); - while(raxNext(&ri)) { - user *u = ri.data; - listIter li; - listNode *ln; - listRewind(u->selectors,&li); - while((ln = listNext(&li))) { - aclSelector *selector = (aclSelector *) listNodeValue(ln); - int argc = 0; - sds *argv = sdssplitargs(selector->command_rules, &argc); - serverAssert(argv != NULL); - /* Checking selector's permissions for all commands to start with a clean state. */ - if (ACLSelectorCanExecuteFutureCommands(selector)) { - int res = ACLSetSelector(selector,"+@all",-1); - serverAssert(res == C_OK); - } else { - int res = ACLSetSelector(selector,"-@all",-1); - serverAssert(res == C_OK); - } - - /* Apply all of the commands and categories to this selector. */ - for(int i = 0; i < argc; i++) { - int res = ACLSetSelector(selector, argv[i], sdslen(argv[i])); - serverAssert(res == C_OK); - } - sdsfreesplitres(argv, argc); - } - } - raxStop(&ri); - -} - -int ACLSetSelectorCategory(aclSelector *selector, const char *category, int allow) { - uint64_t cflag = ACLGetCommandCategoryFlagByName(category + 1); - if (!cflag) return C_ERR; - - ACLUpdateCommandRules(selector, category, allow); - - /* Set the actual command bits on the selector. */ - ACLSetSelectorCommandBitsForCategory(server.orig_commands, selector, cflag, allow); - return C_OK; -} - -void ACLCountCategoryBitsForCommands(dict *commands, aclSelector *selector, unsigned long *on, unsigned long *off, uint64_t cflag) { - dictIterator di; - dictEntry *de; - dictInitIterator(&di, commands); - while ((de = dictNext(&di)) != NULL) { - struct redisCommand *cmd = dictGetVal(de); - if (cmd->acl_categories & cflag) { - if (ACLGetSelectorCommandBit(selector,cmd->id)) - (*on)++; - else - (*off)++; - } - if (cmd->subcommands_dict) { - ACLCountCategoryBitsForCommands(cmd->subcommands_dict, selector, on, off, cflag); - } - } - dictResetIterator(&di); -} - -/* Return the number of commands allowed (on) and denied (off) for the user 'u' - * in the subset of commands flagged with the specified category name. - * If the category name is not valid, C_ERR is returned, otherwise C_OK is - * returned and on and off are populated by reference. */ -int ACLCountCategoryBitsForSelector(aclSelector *selector, unsigned long *on, unsigned long *off, - const char *category) -{ - uint64_t cflag = ACLGetCommandCategoryFlagByName(category); - if (!cflag) return C_ERR; - - *on = *off = 0; - ACLCountCategoryBitsForCommands(server.orig_commands, selector, on, off, cflag); - return C_OK; -} - -/* This function returns an SDS string representing the specified selector ACL - * rules related to command execution, in the same format you could set them - * back using ACL SETUSER. The function will return just the set of rules needed - * to recreate the user commands bitmap, without including other user flags such - * as on/off, passwords and so forth. The returned string always starts with - * the +@all or -@all rule, depending on the user bitmap, and is followed, if - * needed, by the other rules needed to narrow or extend what the user can do. */ -sds ACLDescribeSelectorCommandRules(aclSelector *selector) { - sds rules = sdsempty(); - - /* We use this fake selector as a "sanity" check to make sure the rules - * we generate have the same bitmap as those on the current selector. */ - aclSelector *fake_selector = ACLCreateSelector(0); - - /* Here we want to understand if we should start with +@all or -@all. - * Note that when starting with +@all and subtracting, the user - * will be able to execute future commands, while -@all and adding will just - * allow the user the run the selected commands and/or categories. - * How do we test for that? We use the trick of a reserved command ID bit - * that is set only by +@all (and its alias "allcommands"). */ - if (ACLSelectorCanExecuteFutureCommands(selector)) { - rules = sdscat(rules,"+@all "); - ACLSetSelector(fake_selector,"+@all",-1); - } else { - rules = sdscat(rules,"-@all "); - ACLSetSelector(fake_selector,"-@all",-1); - } - - /* Apply all of the commands and categories to the fake selector. */ - int argc = 0; - sds *argv = sdssplitargs(selector->command_rules, &argc); - serverAssert(argv != NULL); - - for(int i = 0; i < argc; i++) { - int res = ACLSetSelector(fake_selector, argv[i], -1); - serverAssert(res == C_OK); - } - if (sdslen(selector->command_rules)) { - rules = sdscatfmt(rules, "%S ", selector->command_rules); - } - sdsfreesplitres(argv, argc); - - /* Trim the final useless space. */ - sdsrange(rules,0,-2); - - /* This is technically not needed, but we want to verify that now the - * predicted bitmap is exactly the same as the user bitmap, and abort - * otherwise, because aborting is better than a security risk in this - * code path. */ - if (memcmp(fake_selector->allowed_commands, - selector->allowed_commands, - sizeof(selector->allowed_commands)) != 0) - { - serverLog(LL_WARNING, - "CRITICAL ERROR: User ACLs don't match final bitmap: '%s'", - redactLogCstr(rules)); - serverPanic("No bitmap match in ACLDescribeSelectorCommandRules()"); - } - ACLFreeSelector(fake_selector); - return rules; -} - -sds ACLDescribeSelector(aclSelector *selector) { - listIter li; - listNode *ln; - sds res = sdsempty(); - /* Key patterns. */ - if (selector->flags & SELECTOR_FLAG_ALLKEYS) { - res = sdscatlen(res,"~* ",3); - } else { - listRewind(selector->patterns,&li); - while((ln = listNext(&li))) { - keyPattern *thispat = (keyPattern *)listNodeValue(ln); - res = sdsCatPatternString(res, thispat); - res = sdscatlen(res," ",1); - } - } - - /* Pub/sub channel patterns. */ - if (selector->flags & SELECTOR_FLAG_ALLCHANNELS) { - res = sdscatlen(res,"&* ",3); - } else { - res = sdscatlen(res,"resetchannels ",14); - listRewind(selector->channels,&li); - while((ln = listNext(&li))) { - sds thispat = listNodeValue(ln); - res = sdscatlen(res,"&",1); - res = sdscatsds(res,thispat); - res = sdscatlen(res," ",1); - } - } - - /* Command rules. */ - sds rules = ACLDescribeSelectorCommandRules(selector); - res = sdscatsds(res,rules); - sdsfree(rules); - return res; -} - -/* This is similar to ACLDescribeSelectorCommandRules(), however instead of - * describing just the user command rules, everything is described: user - * flags, keys, passwords and finally the command rules obtained via - * the ACLDescribeSelectorCommandRules() function. This is the function we call - * when we want to rewrite the configuration files describing ACLs and - * in order to show users with ACL LIST. */ -robj *ACLDescribeUser(user *u) { - if (u->acl_string) { - incrRefCount(u->acl_string); - return u->acl_string; - } - - sds res = sdsempty(); - - /* Flags. */ - for (int j = 0; ACLUserFlags[j].flag; j++) { - if (u->flags & ACLUserFlags[j].flag) { - res = sdscat(res,ACLUserFlags[j].name); - res = sdscatlen(res," ",1); - } - } - - /* Passwords. */ - listIter li; - listNode *ln; - listRewind(u->passwords,&li); - while((ln = listNext(&li))) { - sds thispass = listNodeValue(ln); - res = sdscatlen(res,"#",1); - res = sdscatsds(res,thispass); - res = sdscatlen(res," ",1); - } - - /* Selectors (Commands and keys) */ - listRewind(u->selectors,&li); - while((ln = listNext(&li))) { - aclSelector *selector = (aclSelector *) listNodeValue(ln); - sds default_perm = ACLDescribeSelector(selector); - if (selector->flags & SELECTOR_FLAG_ROOT) { - res = sdscatfmt(res, "%s", default_perm); - } else { - res = sdscatfmt(res, " (%s)", default_perm); - } - sdsfree(default_perm); - } - - u->acl_string = createObject(OBJ_STRING, res); - /* because we are returning it, have to increase count */ - incrRefCount(u->acl_string); - - return u->acl_string; -} - -/* Get a command from the original command table, that is not affected - * by the command renaming operations: we base all the ACL work from that - * table, so that ACLs are valid regardless of command renaming. */ -struct redisCommand *ACLLookupCommand(const char *name) { - struct redisCommand *cmd; - sds sdsname = sdsnew(name); - cmd = lookupCommandBySdsLogic(server.orig_commands,sdsname); - sdsfree(sdsname); - return cmd; -} - -/* Flush the array of allowed first-args for the specified user - * and command ID. */ -void ACLResetFirstArgsForCommand(aclSelector *selector, unsigned long id) { - if (selector->allowed_firstargs && selector->allowed_firstargs[id]) { - for (int i = 0; selector->allowed_firstargs[id][i]; i++) - sdsfree(selector->allowed_firstargs[id][i]); - zfree(selector->allowed_firstargs[id]); - selector->allowed_firstargs[id] = NULL; - } -} - -/* Flush the entire table of first-args. This is useful on +@all, -@all - * or similar to return back to the minimal memory usage (and checks to do) - * for the user. */ -void ACLResetFirstArgs(aclSelector *selector) { - if (selector->allowed_firstargs == NULL) return; - for (int j = 0; j < USER_COMMAND_BITS_COUNT; j++) { - if (selector->allowed_firstargs[j]) { - for (int i = 0; selector->allowed_firstargs[j][i]; i++) - sdsfree(selector->allowed_firstargs[j][i]); - zfree(selector->allowed_firstargs[j]); - } - } - zfree(selector->allowed_firstargs); - selector->allowed_firstargs = NULL; -} - -/* Add a first-arg to the list of subcommands for the user 'u' and - * the command id specified. */ -void ACLAddAllowedFirstArg(aclSelector *selector, unsigned long id, const char *sub) { - /* If this is the first first-arg to be configured for - * this user, we have to allocate the first-args array. */ - if (selector->allowed_firstargs == NULL) { - selector->allowed_firstargs = zcalloc(USER_COMMAND_BITS_COUNT * sizeof(sds*)); - } - - /* We also need to enlarge the allocation pointing to the - * null terminated SDS array, to make space for this one. - * To start check the current size, and while we are here - * make sure the first-arg is not already specified inside. */ - long items = 0; - if (selector->allowed_firstargs[id]) { - while(selector->allowed_firstargs[id][items]) { - /* If it's already here do not add it again. */ - if (!strcasecmp(selector->allowed_firstargs[id][items],sub)) - return; - items++; - } - } - - /* Now we can make space for the new item (and the null term). */ - items += 2; - selector->allowed_firstargs[id] = zrealloc(selector->allowed_firstargs[id], sizeof(sds)*items); - selector->allowed_firstargs[id][items-2] = sdsnew(sub); - selector->allowed_firstargs[id][items-1] = NULL; -} - -/* Create an ACL selector from the given ACL operations, which should be - * a list of space separate ACL operations that starts and ends - * with parentheses. - * - * If any of the operations are invalid, NULL will be returned instead - * and errno will be set corresponding to the interior error. */ -aclSelector *aclCreateSelectorFromOpSet(const char *opset, size_t opsetlen) { - serverAssert(opset[0] == '(' && opset[opsetlen - 1] == ')'); - aclSelector *s = ACLCreateSelector(0); - - int argc = 0; - sds trimmed = sdsnewlen(opset + 1, opsetlen - 2); - sds *argv = sdssplitargs(trimmed, &argc); - for (int i = 0; i < argc; i++) { - if (ACLSetSelector(s, argv[i], sdslen(argv[i])) == C_ERR) { - ACLFreeSelector(s); - s = NULL; - goto cleanup; - } - } - -cleanup: - sdsfreesplitres(argv, argc); - sdsfree(trimmed); - return s; -} - -/* Set a selector's properties with the provided 'op'. - * - * + Allow the execution of that command. - * May be used with `|` for allowing subcommands (e.g "+config|get") - * - Disallow the execution of that command. - * May be used with `|` for blocking subcommands (e.g "-config|set") - * +@ Allow the execution of all the commands in such category - * with valid categories are like @admin, @set, @sortedset, ... - * and so forth, see the full list in the server.c file where - * the Redis command table is described and defined. - * The special category @all means all the commands, but currently - * present in the server, and that will be loaded in the future - * via modules. - * +|first-arg Allow a specific first argument of an otherwise - * disabled command. Note that this form is not - * allowed as negative like -SELECT|1, but - * only additive starting with "+". - * allcommands Alias for +@all. Note that it implies the ability to execute - * all the future commands loaded via the modules system. - * nocommands Alias for -@all. - * ~ Add a pattern of keys that can be mentioned as part of - * commands. For instance ~* allows all the keys. The pattern - * is a glob-style pattern like the one of KEYS. - * It is possible to specify multiple patterns. - * %R~ Add key read pattern that specifies which keys can be read - * from. - * %W~ Add key write pattern that specifies which keys can be - * written to. - * allkeys Alias for ~* - * resetkeys Flush the list of allowed keys patterns. - * & Add a pattern of channels that can be mentioned as part of - * Pub/Sub commands. For instance &* allows all the channels. The - * pattern is a glob-style pattern like the one of PSUBSCRIBE. - * It is possible to specify multiple patterns. - * allchannels Alias for &* - * resetchannels Flush the list of allowed channel patterns. - */ -int ACLSetSelector(aclSelector *selector, const char* op, size_t oplen) { - if (!strcasecmp(op,"allkeys") || - !strcasecmp(op,"~*")) - { - selector->flags |= SELECTOR_FLAG_ALLKEYS; - listEmpty(selector->patterns); - } else if (!strcasecmp(op,"resetkeys")) { - selector->flags &= ~SELECTOR_FLAG_ALLKEYS; - listEmpty(selector->patterns); - } else if (!strcasecmp(op,"allchannels") || - !strcasecmp(op,"&*")) - { - selector->flags |= SELECTOR_FLAG_ALLCHANNELS; - listEmpty(selector->channels); - } else if (!strcasecmp(op,"resetchannels")) { - selector->flags &= ~SELECTOR_FLAG_ALLCHANNELS; - listEmpty(selector->channels); - } else if (!strcasecmp(op,"allcommands") || - !strcasecmp(op,"+@all")) - { - memset(selector->allowed_commands,255,sizeof(selector->allowed_commands)); - selector->flags |= SELECTOR_FLAG_ALLCOMMANDS; - sdsclear(selector->command_rules); - ACLResetFirstArgs(selector); - } else if (!strcasecmp(op,"nocommands") || - !strcasecmp(op,"-@all")) - { - memset(selector->allowed_commands,0,sizeof(selector->allowed_commands)); - selector->flags &= ~SELECTOR_FLAG_ALLCOMMANDS; - sdsclear(selector->command_rules); - ACLResetFirstArgs(selector); - } else if (op[0] == '~' || op[0] == '%') { - if (selector->flags & SELECTOR_FLAG_ALLKEYS) { - errno = EEXIST; - return C_ERR; - } - int flags = 0; - size_t offset = 1; - if (op[0] == '%') { - int perm_ok = 1; - for (; offset < oplen; offset++) { - if (toupper(op[offset]) == 'R' && !(flags & ACL_READ_PERMISSION)) { - flags |= ACL_READ_PERMISSION; - } else if (toupper(op[offset]) == 'W' && !(flags & ACL_WRITE_PERMISSION)) { - flags |= ACL_WRITE_PERMISSION; - } else if (op[offset] == '~') { - offset++; - break; - } else { - perm_ok = 0; - break; - } - } - if (!flags || !perm_ok) { - errno = EINVAL; - return C_ERR; - } - } else { - flags = ACL_ALL_PERMISSION; - } - - if (ACLStringHasSpaces(op+offset,oplen-offset)) { - errno = EINVAL; - return C_ERR; - } - keyPattern *newpat = ACLKeyPatternCreate(sdsnewlen(op+offset,oplen-offset), flags); - listNode *ln = listSearchKey(selector->patterns,newpat); - /* Avoid re-adding the same key pattern multiple times. */ - if (ln == NULL) { - listAddNodeTail(selector->patterns,newpat); - } else { - ((keyPattern *)listNodeValue(ln))->flags |= flags; - ACLKeyPatternFree(newpat); - } - selector->flags &= ~SELECTOR_FLAG_ALLKEYS; - } else if (op[0] == '&') { - if (selector->flags & SELECTOR_FLAG_ALLCHANNELS) { - errno = EISDIR; - return C_ERR; - } - if (ACLStringHasSpaces(op+1,oplen-1)) { - errno = EINVAL; - return C_ERR; - } - sds newpat = sdsnewlen(op+1,oplen-1); - listNode *ln = listSearchKey(selector->channels,newpat); - /* Avoid re-adding the same channel pattern multiple times. */ - if (ln == NULL) - listAddNodeTail(selector->channels,newpat); - else - sdsfree(newpat); - selector->flags &= ~SELECTOR_FLAG_ALLCHANNELS; - } else if (op[0] == '+' && op[1] != '@') { - if (strrchr(op,'|') == NULL) { - struct redisCommand *cmd = ACLLookupCommand(op+1); - if (cmd == NULL) { - errno = ENOENT; - return C_ERR; - } - ACLChangeSelectorPerm(selector,cmd,1); - ACLUpdateCommandRules(selector,cmd->fullname,1); - } else { - /* Split the command and subcommand parts. */ - char *copy = zstrdup(op+1); - char *sub = strrchr(copy,'|'); - sub[0] = '\0'; - sub++; - - struct redisCommand *cmd = ACLLookupCommand(copy); - - /* Check if the command exists. We can't check the - * first-arg to see if it is valid. */ - if (cmd == NULL) { - zfree(copy); - errno = ENOENT; - return C_ERR; - } - - /* We do not support allowing first-arg of a subcommand */ - if (cmd->parent) { - zfree(copy); - errno = ECHILD; - return C_ERR; - } - - /* The subcommand cannot be empty, so things like DEBUG| - * are syntax errors of course. */ - if (strlen(sub) == 0) { - zfree(copy); - errno = EINVAL; - return C_ERR; - } - - if (cmd->subcommands_dict) { - /* If user is trying to allow a valid subcommand we can just add its unique ID */ - cmd = ACLLookupCommand(op+1); - if (cmd == NULL) { - zfree(copy); - errno = ENOENT; - return C_ERR; - } - ACLChangeSelectorPerm(selector,cmd,1); - } else { - /* If user is trying to use the ACL mech to block SELECT except SELECT 0 or - * block DEBUG except DEBUG OBJECT (DEBUG subcommands are not considered - * subcommands for now) we use the allowed_firstargs mechanism. */ - - /* Add the first-arg to the list of valid ones. */ - serverLog(LL_WARNING, "Deprecation warning: Allowing a first arg of an otherwise " - "blocked command is a misuse of ACL and may get disabled " - "in the future (offender: +%s)", redactLogCstr(op+1)); - ACLAddAllowedFirstArg(selector,cmd->id,sub); - } - ACLUpdateCommandRules(selector,op+1,1); - zfree(copy); - } - } else if (op[0] == '-' && op[1] != '@') { - struct redisCommand *cmd = ACLLookupCommand(op+1); - if (cmd == NULL) { - errno = ENOENT; - return C_ERR; - } - ACLChangeSelectorPerm(selector,cmd,0); - ACLUpdateCommandRules(selector,cmd->fullname,0); - } else if ((op[0] == '+' || op[0] == '-') && op[1] == '@') { - int bitval = op[0] == '+' ? 1 : 0; - if (ACLSetSelectorCategory(selector,op+1,bitval) == C_ERR) { - errno = ENOENT; - return C_ERR; - } - } else { - errno = EINVAL; - return C_ERR; - } - return C_OK; -} - -/* Set user properties according to the string "op". The following - * is a description of what different strings will do: - * - * on Enable the user: it is possible to authenticate as this user. - * off Disable the user: it's no longer possible to authenticate - * with this user, however the already authenticated connections - * will still work. - * skip-sanitize-payload RESTORE dump-payload sanitization is skipped. - * sanitize-payload RESTORE dump-payload is sanitized (default). - * > Add this password to the list of valid password for the user. - * For example >mypass will add "mypass" to the list. - * This directive clears the "nopass" flag (see later). - * # Add this password hash to the list of valid hashes for - * the user. This is useful if you have previously computed - * the hash, and don't want to store it in plaintext. - * This directive clears the "nopass" flag (see later). - * < Remove this password from the list of valid passwords. - * ! Remove this hashed password from the list of valid passwords. - * This is useful when you want to remove a password just by - * hash without knowing its plaintext version at all. - * nopass All the set passwords of the user are removed, and the user - * is flagged as requiring no password: it means that every - * password will work against this user. If this directive is - * used for the default user, every new connection will be - * immediately authenticated with the default user without - * any explicit AUTH command required. Note that the "resetpass" - * directive will clear this condition. - * resetpass Flush the list of allowed passwords. Moreover removes the - * "nopass" status. After "resetpass" the user has no associated - * passwords and there is no way to authenticate without adding - * some password (or setting it as "nopass" later). - * reset Performs the following actions: resetpass, resetkeys, resetchannels, - * allchannels (if acl-pubsub-default is set), off, clearselectors, -@all. - * The user returns to the same state it has immediately after its creation. - * () Create a new selector with the options specified within the - * parentheses and attach it to the user. Each option should be - * space separated. The first character must be ( and the last - * character must be ). - * clearselectors Remove all of the currently attached selectors. - * Note this does not change the "root" user permissions, - * which are the permissions directly applied onto the - * user (outside the parentheses). - * - * Selector options can also be specified by this function, in which case - * they update the root selector for the user. - * - * The 'op' string must be null terminated. The 'oplen' argument should - * specify the length of the 'op' string in case the caller requires to pass - * binary data (for instance the >password form may use a binary password). - * Otherwise the field can be set to -1 and the function will use strlen() - * to determine the length. - * - * The function returns C_OK if the action to perform was understood because - * the 'op' string made sense. Otherwise C_ERR is returned if the operation - * is unknown or has some syntax error. - * - * When an error is returned, errno is set to the following values: - * - * EINVAL: The specified opcode is not understood or the key/channel pattern is - * invalid (contains non allowed characters). - * ENOENT: The command name or command category provided with + or - is not - * known. - * EEXIST: You are adding a key pattern after "*" was already added. This is - * almost surely an error on the user side. - * EISDIR: You are adding a channel pattern after "*" was already added. This is - * almost surely an error on the user side. - * ENODEV: The password you are trying to remove from the user does not exist. - * EBADMSG: The hash you are trying to add is not a valid hash. - * ECHILD: Attempt to allow a specific first argument of a subcommand - */ -int ACLSetUser(user *u, const char *op, ssize_t oplen) { - /* as we are changing the ACL, the old generated string is now invalid */ - if (u->acl_string) { - decrRefCount(u->acl_string); - u->acl_string = NULL; - } - - if (oplen == -1) oplen = strlen(op); - if (oplen == 0) return C_OK; /* Empty string is a no-operation. */ - if (!strcasecmp(op,"on")) { - atomicSet(u->flags, (u->flags | USER_FLAG_ENABLED) & ~USER_FLAG_DISABLED); - } else if (!strcasecmp(op,"off")) { - atomicSet(u->flags, (u->flags | USER_FLAG_DISABLED) & ~USER_FLAG_ENABLED); - } else if (!strcasecmp(op,"skip-sanitize-payload")) { - atomicSet(u->flags, (u->flags | USER_FLAG_SANITIZE_PAYLOAD_SKIP) & ~USER_FLAG_SANITIZE_PAYLOAD); - } else if (!strcasecmp(op,"sanitize-payload")) { - atomicSet(u->flags, (u->flags | USER_FLAG_SANITIZE_PAYLOAD) & ~USER_FLAG_SANITIZE_PAYLOAD_SKIP); - } else if (!strcasecmp(op,"nopass")) { - atomicSet(u->flags, u->flags | USER_FLAG_NOPASS); - listEmpty(u->passwords); - } else if (!strcasecmp(op,"resetpass")) { - atomicSet(u->flags, u->flags & ~USER_FLAG_NOPASS); - listEmpty(u->passwords); - } else if (op[0] == '>' || op[0] == '#') { - sds newpass; - if (op[0] == '>') { - newpass = ACLHashPassword((unsigned char*)op+1,oplen-1); - } else { - if (ACLCheckPasswordHash((unsigned char*)op+1,oplen-1) == C_ERR) { - errno = EBADMSG; - return C_ERR; - } - newpass = sdsnewlen(op+1,oplen-1); - } - - listNode *ln = listSearchKey(u->passwords,newpass); - /* Avoid re-adding the same password multiple times. */ - if (ln == NULL) - listAddNodeTail(u->passwords,newpass); - else - sdsfree(newpass); - atomicSet(u->flags, u->flags & ~USER_FLAG_NOPASS); - } else if (op[0] == '<' || op[0] == '!') { - sds delpass; - if (op[0] == '<') { - delpass = ACLHashPassword((unsigned char*)op+1,oplen-1); - } else { - if (ACLCheckPasswordHash((unsigned char*)op+1,oplen-1) == C_ERR) { - errno = EBADMSG; - return C_ERR; - } - delpass = sdsnewlen(op+1,oplen-1); - } - listNode *ln = listSearchKey(u->passwords,delpass); - sdsfree(delpass); - if (ln) { - listDelNode(u->passwords,ln); - } else { - errno = ENODEV; - return C_ERR; - } - } else if (op[0] == '(' && op[oplen - 1] == ')') { - aclSelector *selector = aclCreateSelectorFromOpSet(op, oplen); - if (!selector) { - /* No errorno set, propagate it from interior error. */ - return C_ERR; - } - listAddNodeTail(u->selectors, selector); - return C_OK; - } else if (!strcasecmp(op,"clearselectors")) { - listIter li; - listNode *ln; - listRewind(u->selectors,&li); - /* There has to be a root selector */ - serverAssert(listNext(&li)); - while((ln = listNext(&li))) { - listDelNode(u->selectors, ln); - } - return C_OK; - } else if (!strcasecmp(op,"reset")) { - serverAssert(ACLSetUser(u,"resetpass",-1) == C_OK); - serverAssert(ACLSetUser(u,"resetkeys",-1) == C_OK); - serverAssert(ACLSetUser(u,"resetchannels",-1) == C_OK); - if (server.acl_pubsub_default & SELECTOR_FLAG_ALLCHANNELS) - serverAssert(ACLSetUser(u,"allchannels",-1) == C_OK); - serverAssert(ACLSetUser(u,"off",-1) == C_OK); - serverAssert(ACLSetUser(u,"sanitize-payload",-1) == C_OK); - serverAssert(ACLSetUser(u,"clearselectors",-1) == C_OK); - serverAssert(ACLSetUser(u,"-@all",-1) == C_OK); - } else { - aclSelector *selector = ACLUserGetRootSelector(u); - if (ACLSetSelector(selector, op, oplen) == C_ERR) { - return C_ERR; - } - } - return C_OK; -} - -/* Return a description of the error that occurred in ACLSetUser() according to - * the errno value set by the function on error. */ -const char *ACLSetUserStringError(void) { - const char *errmsg = "Wrong format"; - if (errno == ENOENT) - errmsg = "Unknown command or category name in ACL"; - else if (errno == EINVAL) - errmsg = "Syntax error"; - else if (errno == EEXIST) - errmsg = "Adding a pattern after the * pattern (or the " - "'allkeys' flag) is not valid and does not have any " - "effect. Try 'resetkeys' to start with an empty " - "list of patterns"; - else if (errno == EISDIR) - errmsg = "Adding a pattern after the * pattern (or the " - "'allchannels' flag) is not valid and does not have any " - "effect. Try 'resetchannels' to start with an empty " - "list of channels"; - else if (errno == ENODEV) - errmsg = "The password you are trying to remove from the user does " - "not exist"; - else if (errno == EBADMSG) - errmsg = "The password hash must be exactly 64 characters and contain " - "only lowercase hexadecimal characters"; - else if (errno == EALREADY) - errmsg = "Duplicate user found. A user can only be defined once in " - "config files"; - else if (errno == ECHILD) - errmsg = "Allowing first-arg of a subcommand is not supported"; - return errmsg; -} - -/* Create the default user, this has special permissions. */ -user *ACLCreateDefaultUser(void) { - user *new = ACLCreateUser("default",7); - ACLSetUser(new,"+@all",-1); - ACLSetUser(new,"~*",-1); - ACLSetUser(new,"&*",-1); - ACLSetUser(new,"on",-1); - ACLSetUser(new,"nopass",-1); - return new; -} - -/* Initialization of the ACL subsystem. */ -void ACLInit(void) { - Users = raxNew(); - UsersToLoad = listCreate(); - ACLInitCommandCategories(); - listSetMatchMethod(UsersToLoad, ACLListMatchLoadedUser); - ACLLog = listCreate(); - DefaultUser = ACLCreateDefaultUser(); -} - -/* Check the username and password pair and return C_OK if they are valid, - * otherwise C_ERR is returned and errno is set to: - * - * EINVAL: if the username-password do not match. - * ENOENT: if the specified user does not exist at all. - */ -int ACLCheckUserCredentials(robj *username, robj *password) { - user *u = ACLGetUserByName(username->ptr,sdslen(username->ptr)); - if (u == NULL) { - errno = ENOENT; - return C_ERR; - } - - /* Disabled users can't login. */ - if (u->flags & USER_FLAG_DISABLED) { - errno = EINVAL; - return C_ERR; - } - - /* If the user is configured to don't require any password, we - * are already fine here. */ - if (u->flags & USER_FLAG_NOPASS) return C_OK; - - /* Check all the user passwords for at least one to match. */ - listIter li; - listNode *ln; - listRewind(u->passwords,&li); - sds hashed = ACLHashPassword(password->ptr,sdslen(password->ptr)); - while((ln = listNext(&li))) { - sds thispass = listNodeValue(ln); - if (!time_independent_strcmp(hashed, thispass, HASH_PASSWORD_LEN)) { - sdsfree(hashed); - return C_OK; - } - } - sdsfree(hashed); - - /* If we reached this point, no password matched. */ - errno = EINVAL; - return C_ERR; -} - -/* If `err` is provided, this is added as an error reply to the client. - * Otherwise, the standard Auth error is added as a reply. */ -void addAuthErrReply(client *c, robj *err) { - if (clientHasPendingReplies(c)) return; - if (!err) { - addReplyError(c, "-WRONGPASS invalid username-password pair or user is disabled."); - return; - } - addReplyError(c, err->ptr); -} - -/* This is like ACLCheckUserCredentials(), however if the user/pass - * are correct, the connection is put in authenticated state and the - * connection user reference is populated. - * - * The return value is AUTH_OK on success (valid username / password pair) & AUTH_ERR otherwise. */ -int checkPasswordBasedAuth(client *c, robj *username, robj *password) { - if (ACLCheckUserCredentials(username,password) == C_OK) { - c->authenticated = 1; - c->user = ACLGetUserByName(username->ptr,sdslen(username->ptr)); - moduleNotifyUserChanged(c); - return AUTH_OK; - } else { - addACLLogEntry(c,ACL_DENIED_AUTH,(c->flags & CLIENT_MULTI) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL,0,username->ptr,NULL); - return AUTH_ERR; - } -} - -/* Attempt authenticating the user - first through module based authentication, - * and then, if needed, with normal password based authentication. - * Returns one of the following codes: - * AUTH_OK - Indicates that authentication succeeded. - * AUTH_ERR - Indicates that authentication failed. - * AUTH_BLOCKED - Indicates module authentication is in progress through a blocking implementation. - */ -int ACLAuthenticateUser(client *c, robj *username, robj *password, robj **err) { - int result = checkModuleAuthentication(c, username, password, err); - /* If authentication was not handled by any Module, attempt normal password based auth. */ - if (result == AUTH_NOT_HANDLED) { - result = checkPasswordBasedAuth(c, username, password); - } - return result; -} - -/* For ACL purposes, every user has a bitmap with the commands that such - * user is allowed to execute. In order to populate the bitmap, every command - * should have an assigned ID (that is used to index the bitmap). This function - * creates such an ID: it uses sequential IDs, reusing the same ID for the same - * command name, so that a command retains the same ID in case of modules that - * are unloaded and later reloaded. - * - * The function does not take ownership of the 'cmdname' SDS string. - * */ -unsigned long ACLGetCommandID(sds cmdname) { - sds lowername = sdsdup(cmdname); - sdstolower(lowername); - if (commandId == NULL) commandId = raxNew(); - void *id; - if (raxFind(commandId,(unsigned char*)lowername,sdslen(lowername),&id)) { - sdsfree(lowername); - return (unsigned long)id; - } - raxInsert(commandId,(unsigned char*)lowername,strlen(lowername), - (void*)nextid,NULL); - sdsfree(lowername); - unsigned long thisid = nextid; - nextid++; - - /* We never assign the last bit in the user commands bitmap structure, - * this way we can later check if this bit is set, understanding if the - * current ACL for the user was created starting with a +@all to add all - * the possible commands and just subtracting other single commands or - * categories, or if, instead, the ACL was created just adding commands - * and command categories from scratch, not allowing future commands by - * default (loaded via modules). This is useful when rewriting the ACLs - * with ACL SAVE. */ - if (nextid == USER_COMMAND_BITS_COUNT-1) nextid++; - return thisid; -} - -/* Clear command id table and reset nextid to 0. */ -void ACLClearCommandID(void) { - if (commandId) raxFree(commandId); - commandId = NULL; - nextid = 0; -} - -/* Return an username by its name, or NULL if the user does not exist. */ -user *ACLGetUserByName(const char *name, size_t namelen) { - void *myuser = NULL; - raxFind(Users,(unsigned char*)name,namelen,&myuser); - return myuser; -} - -/* ============================================================================= - * ACL permission checks - * ==========================================================================*/ - -/* Check if the key can be accessed by the selector. - * - * If the selector can access the key, ACL_OK is returned, otherwise - * ACL_DENIED_KEY is returned. */ -static int ACLSelectorCheckKey(aclSelector *selector, const char *key, int keylen, int keyspec_flags) { - /* The selector can access any key */ - if (selector->flags & SELECTOR_FLAG_ALLKEYS) return ACL_OK; - - listIter li; - listNode *ln; - listRewind(selector->patterns,&li); - - int key_flags = 0; - if (keyspec_flags & CMD_KEY_ACCESS) key_flags |= ACL_READ_PERMISSION; - if (keyspec_flags & CMD_KEY_INSERT) key_flags |= ACL_WRITE_PERMISSION; - if (keyspec_flags & CMD_KEY_DELETE) key_flags |= ACL_WRITE_PERMISSION; - if (keyspec_flags & CMD_KEY_UPDATE) key_flags |= ACL_WRITE_PERMISSION; - - /* Is given key represent a prefix of a set of keys */ - int prefix = keyspec_flags & CMD_KEY_PREFIX; - - /* Test this key against every pattern. */ - while((ln = listNext(&li))) { - keyPattern *pattern = listNodeValue(ln); - if ((pattern->flags & key_flags) != key_flags) - continue; - size_t plen = sdslen(pattern->pattern); - if (prefix) { - if (prefixmatch(pattern->pattern,plen,key,keylen,0)) - return ACL_OK; - } else { - if (stringmatchlen(pattern->pattern, plen, key, keylen, 0)) - return ACL_OK; - } - } - return ACL_DENIED_KEY; -} - -/* Checks if the provided selector selector has access specified in flags - * to all keys in the keyspace. For example, CMD_KEY_READ access requires either - * '%R~*', '~*', or allkeys to be granted to the selector. Returns 1 if all - * the access flags are satisfied with this selector or 0 otherwise. - */ -static int ACLSelectorHasUnrestrictedKeyAccess(aclSelector *selector, int flags) { - /* The selector can access any key */ - if (selector->flags & SELECTOR_FLAG_ALLKEYS) return 1; - - listIter li; - listNode *ln; - listRewind(selector->patterns,&li); - - int access_flags = 0; - if (flags & CMD_KEY_ACCESS) access_flags |= ACL_READ_PERMISSION; - if (flags & CMD_KEY_INSERT) access_flags |= ACL_WRITE_PERMISSION; - if (flags & CMD_KEY_DELETE) access_flags |= ACL_WRITE_PERMISSION; - if (flags & CMD_KEY_UPDATE) access_flags |= ACL_WRITE_PERMISSION; - - /* Test this key against every pattern. */ - while((ln = listNext(&li))) { - keyPattern *pattern = listNodeValue(ln); - if ((pattern->flags & access_flags) != access_flags) - continue; - if (!strcmp(pattern->pattern,"*")) { - return 1; - } - } - return 0; -} - -/* Checks a channel against a provided list of channels. The is_pattern - * argument should only be used when subscribing (not when publishing) - * and controls whether the input channel is evaluated as a channel pattern - * (like in PSUBSCRIBE) or a plain channel name (like in SUBSCRIBE). - * - * Note that a plain channel name like in PUBLISH or SUBSCRIBE can be - * matched against ACL channel patterns, but the pattern provided in PSUBSCRIBE - * can only be matched as a literal against an ACL pattern (using plain string compare). */ -static int ACLCheckChannelAgainstList(list *reference, const char *channel, int channellen, int is_pattern) { - listIter li; - listNode *ln; - - listRewind(reference, &li); - while((ln = listNext(&li))) { - sds pattern = listNodeValue(ln); - size_t plen = sdslen(pattern); - /* Channel patterns are matched literally against the channels in - * the list. Regular channels perform pattern matching. */ - if ((is_pattern && !strcmp(pattern,channel)) || - (!is_pattern && stringmatchlen(pattern,plen,channel,channellen,0))) - { - return ACL_OK; - } - } - return ACL_DENIED_CHANNEL; -} - -/* To prevent duplicate calls to getKeysResult, a cache is maintained - * in between calls to the various selectors. */ -typedef struct { - int keys_init; - getKeysResult keys; -} aclKeyResultCache; - -void initACLKeyResultCache(aclKeyResultCache *cache) { - cache->keys_init = 0; -} - -void cleanupACLKeyResultCache(aclKeyResultCache *cache) { - if (cache->keys_init) getKeysFreeResult(&(cache->keys)); -} - -/* Check if the command is ready to be executed according to the - * ACLs associated with the specified selector. - * - * If the selector can execute the command ACL_OK is returned, otherwise - * ACL_DENIED_CMD, ACL_DENIED_KEY, or ACL_DENIED_CHANNEL is returned: the first in case the - * command cannot be executed because the selector is not allowed to run such - * command, the second and third if the command is denied because the selector is trying - * to access a key or channel that are not among the specified patterns. */ -static int ACLSelectorCheckCmd(aclSelector *selector, struct redisCommand *cmd, robj **argv, int argc, int *keyidxptr, aclKeyResultCache *cache) { - uint64_t id = cmd->id; - int ret; - if (!(selector->flags & SELECTOR_FLAG_ALLCOMMANDS) && !(cmd->flags & CMD_NO_AUTH)) { - /* If the bit is not set we have to check further, in case the - * command is allowed just with that specific first argument. */ - if (ACLGetSelectorCommandBit(selector,id) == 0) { - /* Check if the first argument matches. */ - if (argc < 2 || - selector->allowed_firstargs == NULL || - selector->allowed_firstargs[id] == NULL) - { - return ACL_DENIED_CMD; - } - - long subid = 0; - while (1) { - if (selector->allowed_firstargs[id][subid] == NULL) - return ACL_DENIED_CMD; - int idx = cmd->parent ? 2 : 1; - if (!strcasecmp(argv[idx]->ptr,selector->allowed_firstargs[id][subid])) - break; /* First argument match found. Stop here. */ - subid++; - } - } - } - - /* Check if the user can execute commands explicitly touching the keys - * mentioned in the command arguments. */ - if (!(selector->flags & SELECTOR_FLAG_ALLKEYS) && doesCommandHaveKeys(cmd)) { - if (!(cache->keys_init)) { - cache->keys = (getKeysResult) GETKEYS_RESULT_INIT; - getKeysFromCommandWithSpecs(cmd, argv, argc, GET_KEYSPEC_DEFAULT, &(cache->keys)); - cache->keys_init = 1; - } - getKeysResult *result = &(cache->keys); - keyReference *resultidx = result->keys; - for (int j = 0; j < result->numkeys; j++) { - int idx = resultidx[j].pos; - ret = ACLSelectorCheckKey(selector, argv[idx]->ptr, sdslen(argv[idx]->ptr), resultidx[j].flags); - if (ret != ACL_OK) { - if (keyidxptr) *keyidxptr = resultidx[j].pos; - return ret; - } - } - } - - /* Check if the user can execute commands explicitly touching the channels - * mentioned in the command arguments */ - const int channel_flags = CMD_CHANNEL_PUBLISH | CMD_CHANNEL_SUBSCRIBE; - if (!(selector->flags & SELECTOR_FLAG_ALLCHANNELS) && doesCommandHaveChannelsWithFlags(cmd, channel_flags)) { - getKeysResult channels = (getKeysResult) GETKEYS_RESULT_INIT; - getChannelsFromCommand(cmd, argv, argc, &channels); - keyReference *channelref = channels.keys; - for (int j = 0; j < channels.numkeys; j++) { - int idx = channelref[j].pos; - if (!(channelref[j].flags & channel_flags)) continue; - int is_pattern = channelref[j].flags & CMD_CHANNEL_PATTERN; - int ret = ACLCheckChannelAgainstList(selector->channels, argv[idx]->ptr, sdslen(argv[idx]->ptr), is_pattern); - if (ret != ACL_OK) { - if (keyidxptr) *keyidxptr = channelref[j].pos; - getKeysFreeResult(&channels); - return ret; - } - } - getKeysFreeResult(&channels); - } - return ACL_OK; -} - -/* Check if the key can be accessed by the client according to - * the ACLs associated with the specified user according to the - * keyspec access flags. - * - * If the user can access the key, ACL_OK is returned, otherwise - * ACL_DENIED_KEY is returned. */ -int ACLUserCheckKeyPerm(user *u, const char *key, int keylen, int flags) { - listIter li; - listNode *ln; - - /* If there is no associated user, the connection can run anything. */ - if (u == NULL) return ACL_OK; - - /* Check all of the selectors */ - listRewind(u->selectors,&li); - while((ln = listNext(&li))) { - aclSelector *s = (aclSelector *) listNodeValue(ln); - if (ACLSelectorCheckKey(s, key, keylen, flags) == ACL_OK) { - return ACL_OK; - } - } - return ACL_DENIED_KEY; -} - -/* Checks if the user can execute the given command with the added restriction - * it must also have the access specified in flags to any key in the key space. - * For example, CMD_KEY_READ access requires either '%R~*', '~*', or allkeys to be - * granted in addition to the access required by the command. Returns 1 - * if the user has access or 0 otherwise. - */ -int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, int flags) { - listIter li; - listNode *ln; - int local_idxptr; - - /* If there is no associated user, the connection can run anything. */ - if (u == NULL) return 1; - - /* For multiple selectors, we cache the key result in between selector - * calls to prevent duplicate lookups. */ - aclKeyResultCache cache; - initACLKeyResultCache(&cache); - - /* Check each selector sequentially */ - listRewind(u->selectors,&li); - while((ln = listNext(&li))) { - aclSelector *s = (aclSelector *) listNodeValue(ln); - int acl_retval = ACLSelectorCheckCmd(s, cmd, argv, argc, &local_idxptr, &cache); - if (acl_retval == ACL_OK && ACLSelectorHasUnrestrictedKeyAccess(s, flags)) { - cleanupACLKeyResultCache(&cache); - return 1; - } - } - cleanupACLKeyResultCache(&cache); - return 0; -} - -/* Check if the channel can be accessed by the client according to - * the ACLs associated with the specified user. - * - * If the user can access the key, ACL_OK is returned, otherwise - * ACL_DENIED_CHANNEL is returned. */ -int ACLUserCheckChannelPerm(user *u, sds channel, int is_pattern) { - listIter li; - listNode *ln; - - /* If there is no associated user, the connection can run anything. */ - if (u == NULL) return ACL_OK; - - /* Check all of the selectors */ - listRewind(u->selectors,&li); - while((ln = listNext(&li))) { - aclSelector *s = (aclSelector *) listNodeValue(ln); - /* The selector can run any keys */ - if (s->flags & SELECTOR_FLAG_ALLCHANNELS) return ACL_OK; - - /* Otherwise, loop over the selectors list and check each channel */ - if (ACLCheckChannelAgainstList(s->channels, channel, sdslen(channel), is_pattern) == ACL_OK) { - return ACL_OK; - } - } - return ACL_DENIED_CHANNEL; -} - -/* Lower level API that checks if a specified user is able to execute a given command. - * - * If the command fails an ACL check, idxptr will be to set to the first argv entry that - * causes the failure, either 0 if the command itself fails or the idx of the key/channel - * that causes the failure */ -int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, int argc, getKeysResult *key_result, int *idxptr) { - listIter li; - listNode *ln; - - /* If there is no associated user, the connection can run anything. */ - if (u == NULL) return ACL_OK; - - /* Quick check if the user has all permissions, return early if so. */ - if (likely(listFirst(u->selectors) != NULL)) { - aclSelector *s = listNodeValue(listFirst(u->selectors)); - const uint32_t all_perms = SELECTOR_FLAG_ALLCOMMANDS | - SELECTOR_FLAG_ALLKEYS | - SELECTOR_FLAG_ALLCHANNELS; - if ((s->flags & all_perms) == all_perms) return ACL_OK; - } - - /* We have to pick a single error to log, the logic for picking is as follows: - * 1) If no selector can execute the command, return the command. - * 2) Return the last key or channel that no selector could match. */ - int relevant_error = ACL_DENIED_CMD; - int local_idxptr = 0, last_idx = 0; - - /* For multiple selectors, we cache the key result in between selector - * calls to prevent duplicate lookups. */ - aclKeyResultCache cache; - initACLKeyResultCache(&cache); - if (key_result) { - cache.keys = *key_result; - cache.keys_init = 1; - } - - /* Check each selector sequentially */ - listRewind(u->selectors,&li); - while((ln = listNext(&li))) { - aclSelector *s = (aclSelector *) listNodeValue(ln); - int acl_retval = ACLSelectorCheckCmd(s, cmd, argv, argc, &local_idxptr, &cache); - if (acl_retval == ACL_OK) { - if (!key_result) cleanupACLKeyResultCache(&cache); - return ACL_OK; - } - if (acl_retval > relevant_error || - (acl_retval == relevant_error && local_idxptr > last_idx)) - { - relevant_error = acl_retval; - last_idx = local_idxptr; - } - } - - *idxptr = last_idx; - if (!key_result) cleanupACLKeyResultCache(&cache); - return relevant_error; -} - -/* High level API for checking if a client can execute the queued up command */ -int ACLCheckAllPerm(client *c, int *idxptr) { - return ACLCheckAllUserCommandPerm(c->user, c->cmd, c->argv, c->argc, getClientCachedKeyResult(c), idxptr); -} - -/* If 'new' can access all channels 'original' could then return NULL; - Otherwise return a list of channels that the new user can access */ -list *getUpcomingChannelList(user *new, user *original) { - listIter li, lpi; - listNode *ln, *lpn; - - /* Optimization: we check if any selector has all channel permissions. */ - listRewind(new->selectors,&li); - while((ln = listNext(&li))) { - aclSelector *s = (aclSelector *) listNodeValue(ln); - if (s->flags & SELECTOR_FLAG_ALLCHANNELS) return NULL; - } - - /* Next, check if the new list of channels - * is a strict superset of the original. This is done by - * created an "upcoming" list of all channels that are in - * the new user and checking each of the existing channels - * against it. */ - list *upcoming = listCreate(); - listRewind(new->selectors,&li); - while((ln = listNext(&li))) { - aclSelector *s = (aclSelector *) listNodeValue(ln); - listRewind(s->channels, &lpi); - while((lpn = listNext(&lpi))) { - listAddNodeTail(upcoming, listNodeValue(lpn)); - } - } - - int match = 1; - listRewind(original->selectors,&li); - while((ln = listNext(&li)) && match) { - aclSelector *s = (aclSelector *) listNodeValue(ln); - /* If any of the original selectors has the all-channels permission, but - * the new ones don't (this is checked earlier in this function), then the - * new list is not a strict superset of the original. */ - if (s->flags & SELECTOR_FLAG_ALLCHANNELS) { - match = 0; - break; - } - listRewind(s->channels, &lpi); - while((lpn = listNext(&lpi)) && match) { - if (!listSearchKey(upcoming, listNodeValue(lpn))) { - match = 0; - break; - } - } - } - - if (match) { - /* All channels were matched, no need to kill clients. */ - listRelease(upcoming); - return NULL; - } - - return upcoming; -} - -/* Check if the client should be killed because it is subscribed to channels that were - * permitted in the past, are not in the `upcoming` channel list. */ -int ACLShouldKillPubsubClient(client *c, list *upcoming) { - robj *o; - int kill = 0; - - if (getClientType(c) == CLIENT_TYPE_PUBSUB) { - /* Check for pattern violations. */ - dictIterator di; - dictEntry *de; - dictInitIterator(&di, c->pubsub_patterns); - while (!kill && ((de = dictNext(&di)) != NULL)) { - o = dictGetKey(de); - int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 1); - kill = (res == ACL_DENIED_CHANNEL); - } - dictResetIterator(&di); - - /* Check for channel violations. */ - if (!kill) { - /* Check for global channels violation. */ - dictInitIterator(&di, c->pubsub_channels); - - while (!kill && ((de = dictNext(&di)) != NULL)) { - o = dictGetKey(de); - int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 0); - kill = (res == ACL_DENIED_CHANNEL); - } - dictResetIterator(&di); - } - if (!kill) { - /* Check for shard channels violation. */ - dictInitIterator(&di, c->pubsubshard_channels); - while (!kill && ((de = dictNext(&di)) != NULL)) { - o = dictGetKey(de); - int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 0); - kill = (res == ACL_DENIED_CHANNEL); - } - dictResetIterator(&di); - } - - if (kill) { - return 1; - } - } - return 0; -} - -/* Check if the user's existing pub/sub clients violate the ACL pub/sub - * permissions specified via the upcoming argument, and kill them if so. */ -void ACLKillPubsubClientsIfNeeded(user *new, user *original) { - /* Do nothing if there are no subscribers. */ - if (pubsubTotalSubscriptions() == 0) - return; - - list *channels = getUpcomingChannelList(new, original); - /* If the new user's pubsub permissions are a strict superset of the original, return early. */ - if (!channels) - return; - - listIter li; - listNode *ln; - - /* Permissions have changed, so we need to iterate through all - * the clients and disconnect those that are no longer valid. - * Scan all connected clients to find the user's pub/subs. */ - listRewind(server.clients,&li); - while ((ln = listNext(&li)) != NULL) { - client *c = listNodeValue(ln); - if (c->user != original) - continue; - if (ACLShouldKillPubsubClient(c, channels)) - deauthenticateAndCloseClient(c); - } - - listRelease(channels); -} - -/* ============================================================================= - * ACL loading / saving functions - * ==========================================================================*/ - - -/* Selector definitions should be sent as a single argument, however - * we will be lenient and try to find selector definitions spread - * across multiple arguments since it makes for a simpler user experience - * for ACL SETUSER as well as when loading from conf files. - * - * This function takes in an array of ACL operators, excluding the username, - * and merges selector operations that are spread across multiple arguments. The return - * value is a new SDS array, with length set to the passed in merged_argc. Arguments - * that are untouched are still duplicated. If there is an unmatched parenthesis, NULL - * is returned and invalid_idx is set to the argument with the start of the opening - * parenthesis. */ -sds *ACLMergeSelectorArguments(sds *argv, int argc, int *merged_argc, int *invalid_idx) { - *merged_argc = 0; - int open_bracket_start = -1; - - sds *acl_args = (sds *) zmalloc(sizeof(sds) * argc); - - sds selector = NULL; - for (int j = 0; j < argc; j++) { - char *op = argv[j]; - - if (open_bracket_start == -1 && - (op[0] == '(' && op[sdslen(op) - 1] != ')')) { - selector = sdsdup(argv[j]); - open_bracket_start = j; - continue; - } - - if (open_bracket_start != -1) { - selector = sdscatfmt(selector, " %s", op); - if (op[sdslen(op) - 1] == ')') { - open_bracket_start = -1; - acl_args[*merged_argc] = selector; - (*merged_argc)++; - } - continue; - } - - acl_args[*merged_argc] = sdsdup(argv[j]); - (*merged_argc)++; - } - - if (open_bracket_start != -1) { - for (int i = 0; i < *merged_argc; i++) sdsfree(acl_args[i]); - zfree(acl_args); - sdsfree(selector); - if (invalid_idx) *invalid_idx = open_bracket_start; - return NULL; - } - - return acl_args; -} - -/* takes an acl string already split on spaces and adds it to the given user - * if the user object is NULL, will create a user with the given username - * - * Returns an error as an sds string if the ACL string is not parsable - */ -sds ACLStringSetUser(user *u, sds username, sds *argv, int argc) { - serverAssert(u != NULL || username != NULL); - - sds error = NULL; - - int merged_argc = 0, invalid_idx = 0; - sds *acl_args = ACLMergeSelectorArguments(argv, argc, &merged_argc, &invalid_idx); - - if (!acl_args) { - return sdscatfmt(sdsempty(), - "Unmatched parenthesis in acl selector starting " - "at '%s'.", (char *) argv[invalid_idx]); - } - - /* Create a temporary user to validate and stage all changes against - * before applying to an existing user or creating a new user. If all - * arguments are valid the user parameters will all be applied together. - * If there are any errors then none of the changes will be applied. */ - user *tempu = ACLCreateUnlinkedUser(); - if (u) { - ACLCopyUser(tempu, u); - } - - for (int j = 0; j < merged_argc; j++) { - if (ACLSetUser(tempu,acl_args[j],(ssize_t) sdslen(acl_args[j])) != C_OK) { - const char *errmsg = ACLSetUserStringError(); - error = sdscatfmt(sdsempty(), - "Error in ACL SETUSER modifier '%s': %s", - (char*)acl_args[j], errmsg); - goto cleanup; - } - } - - /* Existing pub/sub clients authenticated with the user may need to be - * disconnected if (some of) their channel permissions were revoked. */ - if (u) { - ACLKillPubsubClientsIfNeeded(tempu, u); - } - - /* Overwrite the user with the temporary user we modified above. */ - if (!u) { - u = ACLCreateUser(username,sdslen(username)); - } - serverAssert(u != NULL); - - ACLCopyUser(u, tempu); - -cleanup: - ACLFreeUser(tempu); - for (int i = 0; i < merged_argc; i++) { - sdsfree(acl_args[i]); - } - zfree(acl_args); - - return error; -} - -/* Given an argument vector describing a user in the form: - * - * user ... ACL rules and flags ... - * - * this function validates, and if the syntax is valid, appends - * the user definition to a list for later loading. - * - * The rules are tested for validity and if there obvious syntax errors - * the function returns C_ERR and does nothing, otherwise C_OK is returned - * and the user is appended to the list. - * - * Note that this function cannot stop in case of commands that are not found - * and, in that case, the error will be emitted later, because certain - * commands may be defined later once modules are loaded. - * - * When an error is detected and C_ERR is returned, the function populates - * by reference (if not set to NULL) the argc_err argument with the index - * of the argv vector that caused the error. */ -int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err) { - if (argc < 2 || strcasecmp(argv[0],"user")) { - if (argc_err) *argc_err = 0; - return C_ERR; - } - - if (listSearchKey(UsersToLoad, argv[1])) { - if (argc_err) *argc_err = 1; - errno = EALREADY; - return C_ERR; - } - - /* Merged selectors before trying to process */ - int merged_argc; - sds *acl_args = ACLMergeSelectorArguments(argv + 2, argc - 2, &merged_argc, argc_err); - - if (!acl_args) { - return C_ERR; - } - - /* Try to apply the user rules in a fake user to see if they - * are actually valid. */ - user *fakeuser = ACLCreateUnlinkedUser(); - - for (int j = 0; j < merged_argc; j++) { - if (ACLSetUser(fakeuser,acl_args[j],sdslen(acl_args[j])) == C_ERR) { - if (errno != ENOENT) { - ACLFreeUser(fakeuser); - if (argc_err) *argc_err = j; - for (int i = 0; i < merged_argc; i++) sdsfree(acl_args[i]); - zfree(acl_args); - return C_ERR; - } - } - } - - /* Rules look valid, let's append the user to the list. */ - sds *copy = zmalloc(sizeof(sds)*(merged_argc + 2)); - copy[0] = sdsdup(argv[1]); - for (int j = 0; j < merged_argc; j++) copy[j+1] = sdsdup(acl_args[j]); - copy[merged_argc + 1] = NULL; - listAddNodeTail(UsersToLoad,copy); - ACLFreeUser(fakeuser); - for (int i = 0; i < merged_argc; i++) sdsfree(acl_args[i]); - zfree(acl_args); - return C_OK; -} - -/* This function will load the configured users appended to the server - * configuration via ACLAppendUserForLoading(). On loading errors it will - * log an error and return C_ERR, otherwise C_OK will be returned. */ -int ACLLoadConfiguredUsers(void) { - listIter li; - listNode *ln; - listRewind(UsersToLoad,&li); - while ((ln = listNext(&li)) != NULL) { - sds *aclrules = listNodeValue(ln); - sds username = aclrules[0]; - - if (ACLStringHasSpaces(aclrules[0],sdslen(aclrules[0]))) { - serverLog(LL_WARNING,"Spaces not allowed in ACL usernames"); - return C_ERR; - } - - user *u = ACLCreateUser(username,sdslen(username)); - if (!u) { - /* Only valid duplicate user is the default one. */ - serverAssert(!strcmp(username, "default")); - u = ACLGetUserByName("default",7); - ACLSetUser(u,"reset",-1); - } - - /* Load every rule defined for this user. */ - for (int j = 1; aclrules[j]; j++) { - if (ACLSetUser(u,aclrules[j],sdslen(aclrules[j])) != C_OK) { - const char *errmsg = ACLSetUserStringError(); - serverLog(LL_WARNING,"Error loading ACL rule '%s' for " - "the user named '%s': %s", - redactLogCstr(aclrules[j]),redactLogCstr(aclrules[0]),errmsg); - return C_ERR; - } - } - - /* Having a disabled user in the configuration may be an error, - * warn about it without returning any error to the caller. */ - if (u->flags & USER_FLAG_DISABLED) { - serverLog(LL_NOTICE, "The user '%s' is disabled (there is no " - "'on' modifier in the user description). Make " - "sure this is not a configuration error.", - redactLogCstr(aclrules[0])); - } - } - return C_OK; -} - -/* This function loads the ACL from the specified filename: every line - * is validated and should be either empty or in the format used to specify - * users in the redis.conf configuration or in the ACL file, that is: - * - * user ... rules ... - * - * Note that this function considers comments starting with '#' as errors - * because the ACL file is meant to be rewritten, and comments would be - * lost after the rewrite. Yet empty lines are allowed to avoid being too - * strict. - * - * One important part of implementing ACL LOAD, that uses this function, is - * to avoid ending with broken rules if the ACL file is invalid for some - * reason, so the function will attempt to validate the rules before loading - * each user. For every line that will be found broken the function will - * collect an error message. - * - * IMPORTANT: If there is at least a single error, nothing will be loaded - * and the rules will remain exactly as they were. - * - * At the end of the process, if no errors were found in the whole file then - * NULL is returned. Otherwise an SDS string describing in a single line - * a description of all the issues found is returned. */ -sds ACLLoadFromFile(const char *filename) { - FILE *fp; - char buf[1024]; - - /* Open the ACL file. */ - if ((fp = fopen(filename,"r")) == NULL) { - sds errors = sdscatprintf(sdsempty(), - "Error loading ACLs, opening file '%s': %s", - filename, strerror(errno)); - return errors; - } - - /* Load the whole file as a single string in memory. */ - sds acls = sdsempty(); - while(fgets(buf,sizeof(buf),fp) != NULL) - acls = sdscat(acls,buf); - fclose(fp); - - /* Split the file into lines and attempt to load each line. */ - int totlines; - sds *lines, errors = sdsempty(); - lines = sdssplitlen(acls,strlen(acls),"\n",1,&totlines); - sdsfree(acls); - - /* We do all the loading in a fresh instance of the Users radix tree, - * so if there are errors loading the ACL file we can rollback to the - * old version. */ - rax *old_users = Users; - Users = raxNew(); - - /* Load each line of the file. */ - for (int i = 0; i < totlines; i++) { - sds *argv; - int argc; - int linenum = i+1; - - lines[i] = sdstrim(lines[i]," \t\r\n"); - - /* Skip blank lines */ - if (lines[i][0] == '\0') continue; - - /* Split into arguments */ - argv = sdssplitlen(lines[i],sdslen(lines[i])," ",1,&argc); - if (argv == NULL) { - errors = sdscatprintf(errors, - "%s:%d: unbalanced quotes in acl line. ", - server.acl_filename, linenum); - continue; - } - - /* Skip this line if the resulting command vector is empty. */ - if (argc == 0) { - sdsfreesplitres(argv,argc); - continue; - } - - /* The line should start with the "user" keyword. */ - if (strcmp(argv[0],"user") || argc < 2) { - errors = sdscatprintf(errors, - "%s:%d should start with user keyword followed " - "by the username. ", server.acl_filename, - linenum); - sdsfreesplitres(argv,argc); - continue; - } - - /* Spaces are not allowed in usernames. */ - if (ACLStringHasSpaces(argv[1],sdslen(argv[1]))) { - errors = sdscatprintf(errors, - "'%s:%d: username '%s' contains invalid characters. ", - server.acl_filename, linenum, argv[1]); - sdsfreesplitres(argv,argc); - continue; - } - - user *u = ACLCreateUser(argv[1],sdslen(argv[1])); - - /* If the user already exists we assume it's an error and abort. */ - if (!u) { - errors = sdscatprintf(errors,"WARNING: Duplicate user '%s' found on line %d. ", argv[1], linenum); - sdsfreesplitres(argv,argc); - continue; - } - - /* Finally process the options and validate they can - * be cleanly applied to the user. If any option fails - * to apply, the other values won't be applied since - * all the pending changes will get dropped. */ - int merged_argc; - sds *acl_args = ACLMergeSelectorArguments(argv + 2, argc - 2, &merged_argc, NULL); - if (!acl_args) { - errors = sdscatprintf(errors, - "%s:%d: Unmatched parenthesis in selector definition.", - server.acl_filename, linenum); - } - - int syntax_error = 0; - for (int j = 0; j < merged_argc; j++) { - acl_args[j] = sdstrim(acl_args[j],"\t\r\n"); - if (ACLSetUser(u,acl_args[j],sdslen(acl_args[j])) != C_OK) { - const char *errmsg = ACLSetUserStringError(); - if (errno == ENOENT) { - /* For missing commands, we print out more information since - * it shouldn't contain any sensitive information. */ - errors = sdscatprintf(errors, - "%s:%d: Error in applying operation '%s': %s. ", - server.acl_filename, linenum, acl_args[j], errmsg); - } else if (syntax_error == 0) { - /* For all other errors, only print out the first error encountered - * since it might affect future operations. */ - errors = sdscatprintf(errors, - "%s:%d: %s. ", - server.acl_filename, linenum, errmsg); - syntax_error = 1; - } - } - } - - for (int i = 0; i < merged_argc; i++) sdsfree(acl_args[i]); - zfree(acl_args); - - /* Apply the rule to the new users set only if so far there - * are no errors, otherwise it's useless since we are going - * to discard the new users set anyway. */ - if (sdslen(errors) != 0) { - sdsfreesplitres(argv,argc); - continue; - } - - sdsfreesplitres(argv,argc); - } - - sdsfreesplitres(lines,totlines); - - /* Check if we found errors and react accordingly. */ - if (sdslen(errors) == 0) { - /* The default user pointer is referenced in different places: instead - * of replacing such occurrences it is much simpler to copy the new - * default user configuration in the old one. */ - user *new_default = ACLGetUserByName("default",7); - if (!new_default) { - new_default = ACLCreateDefaultUser(); - } - - ACLCopyUser(DefaultUser,new_default); - ACLFreeUser(new_default); - raxInsert(Users,(unsigned char*)"default",7,DefaultUser,NULL); - raxRemove(old_users,(unsigned char*)"default",7,NULL); - - /* If there are some subscribers, we need to check if we need to drop some clients. */ - rax *user_channels = NULL; - if (pubsubTotalSubscriptions() > 0) { - user_channels = raxNew(); - } - - listIter li; - listNode *ln; - - listRewind(server.clients,&li); - while ((ln = listNext(&li)) != NULL) { - client *c = listNodeValue(ln); - /* a MASTER client can do everything (and user = NULL) so we can skip it */ - if (c->flags & CLIENT_MASTER) - continue; - user *original = c->user; - list *channels = NULL; - user *new = ACLGetUserByName(c->user->name, sdslen(c->user->name)); - if (new && user_channels) { - if (!raxFind(user_channels, (unsigned char*)(new->name), sdslen(new->name), (void**)&channels)) { - channels = getUpcomingChannelList(new, original); - raxInsert(user_channels, (unsigned char*)(new->name), sdslen(new->name), channels, NULL); - } - } - /* When the new channel list is NULL, it means the new user's channel list is a superset of the old user's list. */ - if (!new || (channels && ACLShouldKillPubsubClient(c, channels))) { - deauthenticateAndCloseClient(c); - continue; - } - c->user = new; - } - - if (user_channels) - raxFreeWithCallback(user_channels, listReleaseGeneric); - raxFreeWithCallback(old_users, ACLFreeUserGeneric); - sdsfree(errors); - return NULL; - } else { - raxFreeWithCallback(Users, ACLFreeUserGeneric); - Users = old_users; - errors = sdscat(errors,"WARNING: ACL errors detected, no change to the previously active ACL rules was performed"); - return errors; - } -} - -/* Generate a copy of the ACLs currently in memory in the specified filename. - * Returns C_OK on success or C_ERR if there was an error during the I/O. - * When C_ERR is returned a log is produced with hints about the issue. */ -int ACLSaveToFile(const char *filename) { - sds acl = sdsempty(); - int fd = -1; - sds tmpfilename = NULL; - int retval = C_ERR; - - /* Let's generate an SDS string containing the new version of the - * ACL file. */ - raxIterator ri; - raxStart(&ri,Users); - raxSeek(&ri,"^",NULL,0); - while(raxNext(&ri)) { - user *u = ri.data; - /* Return information in the configuration file format. */ - sds user = sdsnew("user "); - user = sdscatsds(user,u->name); - user = sdscatlen(user," ",1); - robj *descr = ACLDescribeUser(u); - user = sdscatsds(user,descr->ptr); - decrRefCount(descr); - acl = sdscatsds(acl,user); - acl = sdscatlen(acl,"\n",1); - sdsfree(user); - } - raxStop(&ri); - - /* Create a temp file with the new content. */ - tmpfilename = sdsnew(filename); - tmpfilename = sdscatfmt(tmpfilename,".tmp-%i-%I", - (int) getpid(),commandTimeSnapshot()); - if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) { - serverLog(LL_WARNING,"Opening temp ACL file for ACL SAVE: %s", - strerror(errno)); - goto cleanup; - } - - /* Write it. */ - size_t offset = 0; - while (offset < sdslen(acl)) { - ssize_t written_bytes = write(fd,acl + offset,sdslen(acl) - offset); - if (written_bytes <= 0) { - if (errno == EINTR) continue; - serverLog(LL_WARNING,"Writing ACL file for ACL SAVE: %s", - strerror(errno)); - goto cleanup; - } - offset += written_bytes; - } - if (redis_fsync(fd) == -1) { - serverLog(LL_WARNING,"Syncing ACL file for ACL SAVE: %s", - strerror(errno)); - goto cleanup; - } - close(fd); fd = -1; - - /* Let's replace the new file with the old one. */ - if (rename(tmpfilename,filename) == -1) { - serverLog(LL_WARNING,"Renaming ACL file for ACL SAVE: %s", - strerror(errno)); - goto cleanup; - } - if (fsyncFileDir(filename) == -1) { - serverLog(LL_WARNING,"Syncing ACL directory for ACL SAVE: %s", - strerror(errno)); - goto cleanup; - } - sdsfree(tmpfilename); tmpfilename = NULL; - retval = C_OK; /* If we reached this point, everything is fine. */ - -cleanup: - if (fd != -1) close(fd); - if (tmpfilename) unlink(tmpfilename); - sdsfree(tmpfilename); - sdsfree(acl); - return retval; -} - -/* This function is called once the server is already running, modules are - * loaded, and we are ready to start, in order to load the ACLs either from - * the pending list of users defined in redis.conf, or from the ACL file. - * The function will just exit with an error if the user is trying to mix - * both the loading methods. */ -void ACLLoadUsersAtStartup(void) { - if (server.acl_filename[0] != '\0' && listLength(UsersToLoad) != 0) { - serverLog(LL_WARNING, - "Configuring Redis with users defined in redis.conf and at " - "the same setting an ACL file path is invalid. This setup " - "is very likely to lead to configuration errors and security " - "holes, please define either an ACL file or declare users " - "directly in your redis.conf, but not both."); - exit(1); - } - - if (ACLLoadConfiguredUsers() == C_ERR) { - serverLog(LL_WARNING, - "Critical error while loading ACLs. Exiting."); - exit(1); - } - - if (server.acl_filename[0] != '\0') { - sds errors = ACLLoadFromFile(server.acl_filename); - if (errors) { - serverLog(LL_WARNING, - "Aborting Redis startup because of ACL errors: %s", errors); - sdsfree(errors); - exit(1); - } - } -} - -/* ============================================================================= - * ACL log - * ==========================================================================*/ - -#define ACL_LOG_GROUPING_MAX_TIME_DELTA 60000 - -/* This structure defines an entry inside the ACL log. */ -typedef struct ACLLogEntry { - uint64_t count; /* Number of times this happened recently. */ - int reason; /* Reason for denying the command. ACL_DENIED_*. */ - int context; /* Toplevel, Lua or MULTI/EXEC? ACL_LOG_CTX_*. */ - sds object; /* The key name or command name. */ - sds username; /* User the client is authenticated with. */ - mstime_t ctime; /* Milliseconds time of last update to this entry. */ - sds cinfo; /* Client info (last client if updated). */ - long long entry_id; /* The pair (entry_id, timestamp_created) is a unique identifier of this entry - * in case the node dies and is restarted, it can detect that if it's a new series. */ - mstime_t timestamp_created; /* UNIX time in milliseconds at the time of this entry's creation. */ -} ACLLogEntry; - -/* This function will check if ACL entries 'a' and 'b' are similar enough - * that we should actually update the existing entry in our ACL log instead - * of creating a new one. */ -int ACLLogMatchEntry(ACLLogEntry *a, ACLLogEntry *b) { - if (a->reason != b->reason) return 0; - if (a->context != b->context) return 0; - mstime_t delta = a->ctime - b->ctime; - if (delta < 0) delta = -delta; - if (delta > ACL_LOG_GROUPING_MAX_TIME_DELTA) return 0; - if (sdscmp(a->object,b->object) != 0) return 0; - if (sdscmp(a->username,b->username) != 0) return 0; - return 1; -} - -/* Release an ACL log entry. */ -void ACLFreeLogEntry(void *leptr) { - ACLLogEntry *le = leptr; - sdsfree(le->object); - sdsfree(le->username); - sdsfree(le->cinfo); - zfree(le); -} - -/* Update the relevant counter by the reason */ -void ACLUpdateInfoMetrics(int reason){ - if (reason == ACL_DENIED_AUTH) { - server.acl_info.user_auth_failures++; - } else if (reason == ACL_DENIED_CMD) { - server.acl_info.invalid_cmd_accesses++; - } else if (reason == ACL_DENIED_KEY) { - server.acl_info.invalid_key_accesses++; - } else if (reason == ACL_DENIED_CHANNEL) { - server.acl_info.invalid_channel_accesses++; - } else if (reason == ACL_INVALID_TLS_CERT_AUTH) { - server.acl_info.acl_access_denied_tls_cert++; - } else { - serverPanic("Unknown ACL_DENIED encoding"); - } -} - -static void trimACLLogEntriesToMaxLen(void) { - while(listLength(ACLLog) > server.acllog_max_len) { - listNode *ln = listLast(ACLLog); - ACLLogEntry *le = listNodeValue(ln); - ACLFreeLogEntry(le); - listDelNode(ACLLog,ln); - } -} - -/* Adds a new entry in the ACL log, making sure to delete the old entry - * if we reach the maximum length allowed for the log. This function attempts - * to find similar entries in the current log in order to bump the counter of - * the log entry instead of creating many entries for very similar ACL - * rules issues. - * - * The argpos argument is used when the reason is ACL_DENIED_KEY or - * ACL_DENIED_CHANNEL, since it allows the function to log the key or channel - * name that caused the problem. - * - * The last 2 arguments are a manual override to be used, instead of any of the automatic - * ones which depend on the client and reason arguments (use NULL for default). - * - * If `object` is not NULL, this functions takes over it. - */ -void addACLLogEntry(client *c, int reason, int context, int argpos, sds username, sds object) { - /* Update ACL info metrics */ - ACLUpdateInfoMetrics(reason); - - if (server.acllog_max_len == 0) { - trimACLLogEntriesToMaxLen(); - return; - } - - /* Create a new entry. */ - struct ACLLogEntry *le = zmalloc(sizeof(*le)); - le->count = 1; - le->reason = reason; - le->username = sdsdup(username ? username : c->user->name); - le->ctime = commandTimeSnapshot(); - le->entry_id = ACLLogEntryCount; - le->timestamp_created = le->ctime; - - if (object) { - le->object = object; - } else { - switch(reason) { - case ACL_DENIED_CMD: le->object = sdsdup(c->cmd->fullname); break; - case ACL_DENIED_KEY: le->object = sdsdup(c->argv[argpos]->ptr); break; - case ACL_DENIED_CHANNEL: le->object = sdsdup(c->argv[argpos]->ptr); break; - case ACL_DENIED_AUTH: le->object = sdsdup(c->argv[0]->ptr); break; - default: le->object = sdsempty(); - } - } - - /* if we have a real client from the network, use it (could be missing on module timers) */ - client *realclient = server.current_client? server.current_client : c; - - le->cinfo = catClientInfoString(sdsempty(),realclient); - le->context = context; - - /* Try to match this entry with past ones, to see if we can just - * update an existing entry instead of creating a new one. */ - long toscan = 10; /* Do a limited work trying to find duplicated. */ - listIter li; - listNode *ln; - listRewind(ACLLog,&li); - ACLLogEntry *match = NULL; - while (toscan-- && (ln = listNext(&li)) != NULL) { - ACLLogEntry *current = listNodeValue(ln); - if (ACLLogMatchEntry(current,le)) { - match = current; - listDelNode(ACLLog,ln); - listAddNodeHead(ACLLog,current); - break; - } - } - - /* If there is a match update the entry, otherwise add it as a - * new one. */ - if (match) { - /* We update a few fields of the existing entry and bump the - * counter of events for this entry. */ - sdsfree(match->cinfo); - match->cinfo = le->cinfo; - match->ctime = le->ctime; - match->count++; - - /* Release the old entry. */ - le->cinfo = NULL; - ACLFreeLogEntry(le); - } else { - /* Add it to our list of entries. We'll have to trim the list - * to its maximum size. */ - ACLLogEntryCount++; /* Incrementing the entry_id count to make each record in the log unique. */ - listAddNodeHead(ACLLog, le); - trimACLLogEntriesToMaxLen(); - } -} - -sds getAclErrorMessage(int acl_res, user *user, struct redisCommand *cmd, sds errored_val, int verbose) { - switch (acl_res) { - case ACL_DENIED_CMD: - return sdscatfmt(sdsempty(), "User %S has no permissions to run " - "the '%S' command", user->name, cmd->fullname); - case ACL_DENIED_KEY: - if (verbose) { - return sdscatfmt(sdsempty(), "User %S has no permissions to access " - "the '%S' key", user->name, errored_val); - } else { - return sdsnew("No permissions to access a key"); - } - case ACL_DENIED_CHANNEL: - if (verbose) { - return sdscatfmt(sdsempty(), "User %S has no permissions to access " - "the '%S' channel", user->name, errored_val); - } else { - return sdsnew("No permissions to access a channel"); - } - } - serverPanic("Reached deadcode on getAclErrorMessage"); -} - -/* ============================================================================= - * ACL related commands - * ==========================================================================*/ - -/* ACL CAT category */ -void aclCatWithFlags(client *c, dict *commands, uint64_t cflag, int *arraylen) { - dictEntry *de; - dictIterator di; - dictInitIterator(&di, commands); - while ((de = dictNext(&di)) != NULL) { - struct redisCommand *cmd = dictGetVal(de); - if (cmd->acl_categories & cflag) { - addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname)); - (*arraylen)++; - } - - if (cmd->subcommands_dict) { - aclCatWithFlags(c, cmd->subcommands_dict, cflag, arraylen); - } - } - dictResetIterator(&di); -} - -/* Add the formatted response from a single selector to the ACL GETUSER - * response. This function returns the number of fields added. - * - * Setting verbose to 1 means that the full qualifier for key and channel - * permissions are shown. - */ -int aclAddReplySelectorDescription(client *c, aclSelector *s) { - listIter li; - listNode *ln; - - /* Commands */ - addReplyBulkCString(c,"commands"); - sds cmddescr = ACLDescribeSelectorCommandRules(s); - addReplyBulkSds(c,cmddescr); - - /* Key patterns */ - addReplyBulkCString(c,"keys"); - if (s->flags & SELECTOR_FLAG_ALLKEYS) { - addReplyBulkCBuffer(c,"~*",2); - } else { - sds dsl = sdsempty(); - listRewind(s->patterns,&li); - while((ln = listNext(&li))) { - keyPattern *thispat = (keyPattern *) listNodeValue(ln); - if (ln != listFirst(s->patterns)) dsl = sdscat(dsl, " "); - dsl = sdsCatPatternString(dsl, thispat); - } - addReplyBulkSds(c, dsl); - } - - /* Pub/sub patterns */ - addReplyBulkCString(c,"channels"); - if (s->flags & SELECTOR_FLAG_ALLCHANNELS) { - addReplyBulkCBuffer(c,"&*",2); - } else { - sds dsl = sdsempty(); - listRewind(s->channels,&li); - while((ln = listNext(&li))) { - sds thispat = listNodeValue(ln); - if (ln != listFirst(s->channels)) dsl = sdscat(dsl, " "); - dsl = sdscatfmt(dsl, "&%S", thispat); - } - addReplyBulkSds(c, dsl); - } - return 3; -} - -/* ACL -- show and modify the configuration of ACL users. - * ACL HELP - * ACL LOAD - * ACL SAVE - * ACL LIST - * ACL USERS - * ACL CAT [] - * ACL SETUSER ... acl rules ... - * ACL DELUSER [...] - * ACL GETUSER - * ACL GENPASS [] - * ACL WHOAMI - * ACL LOG [ | RESET] - */ -void aclCommand(client *c) { - char *sub = c->argv[1]->ptr; - if (!strcasecmp(sub,"setuser") && c->argc >= 3) { - /* Initially redact all of the arguments to not leak any information - * about the user. */ - for (int j = 2; j < c->argc; j++) { - redactClientCommandArgument(c, j); - } - - sds username = c->argv[2]->ptr; - /* Check username validity. */ - if (ACLStringHasSpaces(username,sdslen(username))) { - addReplyError(c, "Usernames can't contain spaces or null characters"); - return; - } - - user *u = ACLGetUserByName(username,sdslen(username)); - - sds *temp_argv = zmalloc(c->argc * sizeof(sds)); - for (int i = 3; i < c->argc; i++) temp_argv[i-3] = c->argv[i]->ptr; - - sds error = ACLStringSetUser(u, username, temp_argv, c->argc - 3); - zfree(temp_argv); - if (error == NULL) { - addReply(c,shared.ok); - } else { - addReplyErrorSdsSafe(c, error); - } - return; - } else if (!strcasecmp(sub,"deluser") && c->argc >= 3) { - /* Initially redact all the arguments to not leak any information - * about the users. */ - for (int j = 2; j < c->argc; j++) redactClientCommandArgument(c, j); - - int deleted = 0; - for (int j = 2; j < c->argc; j++) { - sds username = c->argv[j]->ptr; - if (!strcmp(username,"default")) { - addReplyError(c,"The 'default' user cannot be removed"); - return; - } - } - - for (int j = 2; j < c->argc; j++) { - sds username = c->argv[j]->ptr; - user *u; - if (raxRemove(Users,(unsigned char*)username, - sdslen(username), - (void**)&u)) - { - ACLFreeUserAndKillClients(u); - deleted++; - } - } - addReplyLongLong(c,deleted); - } else if (!strcasecmp(sub,"getuser") && c->argc == 3) { - /* Redact the username to not leak any information about the user. */ - redactClientCommandArgument(c, 2); - - user *u = ACLGetUserByName(c->argv[2]->ptr,sdslen(c->argv[2]->ptr)); - if (u == NULL) { - addReplyNull(c); - return; - } - - void *ufields = addReplyDeferredLen(c); - int fields = 3; - - /* Flags */ - addReplyBulkCString(c,"flags"); - void *deflen = addReplyDeferredLen(c); - int numflags = 0; - for (int j = 0; ACLUserFlags[j].flag; j++) { - if (u->flags & ACLUserFlags[j].flag) { - addReplyBulkCString(c,ACLUserFlags[j].name); - numflags++; - } - } - setDeferredSetLen(c,deflen,numflags); - - /* Passwords */ - addReplyBulkCString(c,"passwords"); - addReplyArrayLen(c,listLength(u->passwords)); - listIter li; - listNode *ln; - listRewind(u->passwords,&li); - while((ln = listNext(&li))) { - sds thispass = listNodeValue(ln); - addReplyBulkCBuffer(c,thispass,sdslen(thispass)); - } - /* Include the root selector at the top level for backwards compatibility */ - fields += aclAddReplySelectorDescription(c, ACLUserGetRootSelector(u)); - - /* Describe all of the selectors on this user, including duplicating the root selector */ - addReplyBulkCString(c,"selectors"); - addReplyArrayLen(c, listLength(u->selectors) - 1); - listRewind(u->selectors,&li); - serverAssert(listNext(&li)); - while((ln = listNext(&li))) { - void *slen = addReplyDeferredLen(c); - int sfields = aclAddReplySelectorDescription(c, (aclSelector *)listNodeValue(ln)); - setDeferredMapLen(c, slen, sfields); - } - setDeferredMapLen(c, ufields, fields); - } else if ((!strcasecmp(sub,"list") || !strcasecmp(sub,"users")) && - c->argc == 2) - { - int justnames = !strcasecmp(sub,"users"); - addReplyArrayLen(c,raxSize(Users)); - raxIterator ri; - raxStart(&ri,Users); - raxSeek(&ri,"^",NULL,0); - while(raxNext(&ri)) { - user *u = ri.data; - if (justnames) { - addReplyBulkCBuffer(c,u->name,sdslen(u->name)); - } else { - /* Return information in the configuration file format. */ - sds config = sdsnew("user "); - config = sdscatsds(config,u->name); - config = sdscatlen(config," ",1); - robj *descr = ACLDescribeUser(u); - config = sdscatsds(config,descr->ptr); - decrRefCount(descr); - addReplyBulkSds(c,config); - } - } - raxStop(&ri); - } else if (!strcasecmp(sub,"whoami") && c->argc == 2) { - if (c->user != NULL) { - addReplyBulkCBuffer(c,c->user->name,sdslen(c->user->name)); - } else { - addReplyNull(c); - } - } else if (server.acl_filename[0] == '\0' && - (!strcasecmp(sub,"load") || !strcasecmp(sub,"save"))) - { - addReplyError(c,"This Redis instance is not configured to use an ACL file. You may want to specify users via the ACL SETUSER command and then issue a CONFIG REWRITE (assuming you have a Redis configuration file set) in order to store users in the Redis configuration."); - return; - } else if (!strcasecmp(sub,"load") && c->argc == 2) { - sds errors = ACLLoadFromFile(server.acl_filename); - if (errors == NULL) { - addReply(c,shared.ok); - } else { - addReplyError(c,errors); - sdsfree(errors); - } - } else if (!strcasecmp(sub,"save") && c->argc == 2) { - if (ACLSaveToFile(server.acl_filename) == C_OK) { - addReply(c,shared.ok); - } else { - addReplyError(c,"There was an error trying to save the ACLs. " - "Please check the server logs for more " - "information"); - } - } else if (!strcasecmp(sub,"cat") && c->argc == 2) { - void *dl = addReplyDeferredLen(c); - int j; - for (j = 0; ACLCommandCategories[j].flag != 0; j++) - addReplyBulkCString(c,ACLCommandCategories[j].name); - setDeferredArrayLen(c,dl,j); - } else if (!strcasecmp(sub,"cat") && c->argc == 3) { - uint64_t cflag = ACLGetCommandCategoryFlagByName(c->argv[2]->ptr); - if (cflag == 0) { - addReplyErrorFormat(c, "Unknown category '%.128s'", (char*)c->argv[2]->ptr); - return; - } - int arraylen = 0; - void *dl = addReplyDeferredLen(c); - aclCatWithFlags(c, server.orig_commands, cflag, &arraylen); - setDeferredArrayLen(c,dl,arraylen); - } else if (!strcasecmp(sub,"genpass") && (c->argc == 2 || c->argc == 3)) { - #define GENPASS_MAX_BITS 4096 - char pass[GENPASS_MAX_BITS/8*2]; /* Hex representation. */ - long bits = 256; /* By default generate 256 bits passwords. */ - - if (c->argc == 3 && getLongFromObjectOrReply(c,c->argv[2],&bits,NULL) - != C_OK) return; - - if (bits <= 0 || bits > GENPASS_MAX_BITS) { - addReplyErrorFormat(c, - "ACL GENPASS argument must be the number of " - "bits for the output password, a positive number " - "up to %d",GENPASS_MAX_BITS); - return; - } - - long chars = (bits+3)/4; /* Round to number of characters to emit. */ - getRandomHexChars(pass,chars); - addReplyBulkCBuffer(c,pass,chars); - } else if (!strcasecmp(sub,"log") && (c->argc == 2 || c->argc ==3)) { - long count = 10; /* Number of entries to emit by default. */ - - /* Parse the only argument that LOG may have: it could be either - * the number of entries the user wants to display, or alternatively - * the "RESET" command in order to flush the old entries. */ - if (c->argc == 3) { - if (!strcasecmp(c->argv[2]->ptr,"reset")) { - listSetFreeMethod(ACLLog,ACLFreeLogEntry); - listEmpty(ACLLog); - listSetFreeMethod(ACLLog,NULL); - addReply(c,shared.ok); - return; - } else if (getLongFromObjectOrReply(c,c->argv[2],&count,NULL) - != C_OK) - { - return; - } - if (count < 0) count = 0; - } - - /* Fix the count according to the number of entries we got. */ - if ((size_t)count > listLength(ACLLog)) - count = listLength(ACLLog); - - addReplyArrayLen(c,count); - listIter li; - listNode *ln; - listRewind(ACLLog,&li); - mstime_t now = commandTimeSnapshot(); - while (count-- && (ln = listNext(&li)) != NULL) { - ACLLogEntry *le = listNodeValue(ln); - addReplyMapLen(c,10); - addReplyBulkCString(c,"count"); - addReplyLongLong(c,le->count); - - addReplyBulkCString(c,"reason"); - char *reasonstr; - switch(le->reason) { - case ACL_DENIED_CMD: reasonstr="command"; break; - case ACL_DENIED_KEY: reasonstr="key"; break; - case ACL_DENIED_CHANNEL: reasonstr="channel"; break; - case ACL_DENIED_AUTH: reasonstr="auth"; break; - case ACL_INVALID_TLS_CERT_AUTH: reasonstr = "tls-cert"; break; - default: reasonstr="unknown"; - } - addReplyBulkCString(c,reasonstr); - - addReplyBulkCString(c,"context"); - char *ctxstr; - switch(le->context) { - case ACL_LOG_CTX_TOPLEVEL: ctxstr="toplevel"; break; - case ACL_LOG_CTX_MULTI: ctxstr="multi"; break; - case ACL_LOG_CTX_LUA: ctxstr="lua"; break; - case ACL_LOG_CTX_MODULE: ctxstr="module"; break; - default: ctxstr="unknown"; - } - addReplyBulkCString(c,ctxstr); - - addReplyBulkCString(c,"object"); - addReplyBulkCBuffer(c,le->object,sdslen(le->object)); - addReplyBulkCString(c,"username"); - addReplyBulkCBuffer(c,le->username,sdslen(le->username)); - addReplyBulkCString(c,"age-seconds"); - double age = (double)(now - le->ctime)/1000; - addReplyDouble(c,age); - addReplyBulkCString(c,"client-info"); - addReplyBulkCBuffer(c,le->cinfo,sdslen(le->cinfo)); - addReplyBulkCString(c, "entry-id"); - addReplyLongLong(c, le->entry_id); - addReplyBulkCString(c, "timestamp-created"); - addReplyLongLong(c, le->timestamp_created); - addReplyBulkCString(c, "timestamp-last-updated"); - addReplyLongLong(c, le->ctime); - } - } else if (!strcasecmp(sub,"dryrun") && c->argc >= 4) { - struct redisCommand *cmd; - user *u = ACLGetUserByName(c->argv[2]->ptr,sdslen(c->argv[2]->ptr)); - if (u == NULL) { - addReplyErrorFormat(c, "User '%s' not found", (char *)c->argv[2]->ptr); - return; - } - - if ((cmd = lookupCommand(c->argv + 3, c->argc - 3)) == NULL) { - addReplyErrorFormat(c, "Command '%s' not found", (char *)c->argv[3]->ptr); - return; - } - - if ((cmd->arity > 0 && cmd->arity != c->argc-3) || - (c->argc-3 < -cmd->arity)) - { - addReplyErrorFormat(c,"wrong number of arguments for '%s' command", cmd->fullname); - return; - } - - int idx; - int result = ACLCheckAllUserCommandPerm(u, cmd, c->argv + 3, c->argc - 3, NULL, &idx); - if (result != ACL_OK) { - sds err = getAclErrorMessage(result, u, cmd, c->argv[idx+3]->ptr, 1); - addReplyBulkSds(c, err); - return; - } - - addReply(c,shared.ok); - } else if (c->argc == 2 && !strcasecmp(sub,"help")) { - const char *help[] = { -"CAT []", -" List all commands that belong to , or all command categories", -" when no category is specified.", -"DELUSER [ ...]", -" Delete a list of users.", -"DRYRUN [ ...]", -" Returns whether the user can execute the given command without executing the command.", -"GETUSER ", -" Get the user's details.", -"GENPASS []", -" Generate a secure 256-bit user password. The optional `bits` argument can", -" be used to specify a different size.", -"LIST", -" Show users details in config file format.", -"LOAD", -" Reload users from the ACL file.", -"LOG [ | RESET]", -" Show the ACL log entries.", -"SAVE", -" Save the current config to the ACL file.", -"SETUSER [ ...]", -" Create or modify a user with the specified attributes.", -"USERS", -" List all the registered usernames.", -"WHOAMI", -" Return the current connection username.", -NULL - }; - addReplyHelp(c,help); - } else { - addReplySubcommandSyntaxError(c); - } -} - -void addReplyCommandCategories(client *c, struct redisCommand *cmd) { - int flagcount = 0; - void *flaglen = addReplyDeferredLen(c); - for (int j = 0; ACLCommandCategories[j].flag != 0; j++) { - if (cmd->acl_categories & ACLCommandCategories[j].flag) { - addReplyStatusFormat(c, "@%s", ACLCommandCategories[j].name); - flagcount++; - } - } - setDeferredSetLen(c, flaglen, flagcount); -} - -/* When successful, initiates an internal connection, that is able to execute - * internal commands (see CMD_INTERNAL). */ -static void internalAuth(client *c) { - if (!server.cluster_enabled) { - addReplyError(c, "Cannot authenticate as an internal connection on non-cluster instances"); - return; - } - - sds password = c->argv[2]->ptr; - - /* Get internal secret. */ - size_t len = -1; - const char *internal_secret = clusterGetSecret(&len); - if (sdslen(password) != len) { - addReplyError(c, "-WRONGPASS invalid internal password"); - return; - } - if (!time_independent_strcmp((char *)internal_secret, (char *)password, len)) { - c->flags |= CLIENT_INTERNAL; - /* No further authentication is needed. */ - c->authenticated = 1; - /* Set the user to the unrestricted user, if it is not already set (default). */ - if (c->user != NULL) { - c->user = NULL; - moduleNotifyUserChanged(c); - } - addReply(c, shared.ok); - } else { - addReplyError(c, "-WRONGPASS invalid internal password"); - } -} - -/* AUTH - * AUTH (Redis >= 6.0 form) - * - * When the user is omitted it means that we are trying to authenticate - * against the default user. */ -void authCommand(client *c) { - /* Only two or three argument forms are allowed. */ - if (c->argc > 3) { - addReplyErrorObject(c,shared.syntaxerr); - return; - } - /* Always redact the second argument */ - redactClientCommandArgument(c, 1); - - /* Handle the two different forms here. The form with two arguments - * will just use "default" as username. */ - robj *username, *password; - if (c->argc == 2) { - /* Mimic the old behavior of giving an error for the two argument - * form if no password is configured. */ - if (DefaultUser->flags & USER_FLAG_NOPASS) { - addReplyError(c,"AUTH called without any password " - "configured for the default user. Are you sure " - "your configuration is correct?"); - return; - } - - username = shared.default_username; - password = c->argv[1]; - } else { - username = c->argv[1]; - password = c->argv[2]; - redactClientCommandArgument(c, 2); - - /* Handle internal authentication commands. - * Note: No user-defined ACL user can have this username (no spaces - * allowed), thus no conflicts with ACL possible. */ - if (!strcmp(username->ptr, "internal connection")) { - internalAuth(c); - return; - } - } - - robj *err = NULL; - int result = ACLAuthenticateUser(c, username, password, &err); - if (result == AUTH_OK) { - addReply(c, shared.ok); - } else if (result == AUTH_ERR) { - addAuthErrReply(c, err); - } - if (err) decrRefCount(err); -} - -/* Set the password for the "default" ACL user. This implements supports for - * requirepass config, so passing in NULL will set the user to be nopass. */ -void ACLUpdateDefaultUserPassword(sds password) { - ACLSetUser(DefaultUser,"resetpass",-1); - if (password) { - sds aclop = sdscatlen(sdsnew(">"), password, sdslen(password)); - ACLSetUser(DefaultUser,aclop,sdslen(aclop)); - sdsfree(aclop); - } else { - ACLSetUser(DefaultUser,"nopass",-1); - } -} diff --git a/examples/redis-unstable/src/adlist.c b/examples/redis-unstable/src/adlist.c deleted file mode 100644 index d7ca5fb..0000000 --- a/examples/redis-unstable/src/adlist.c +++ /dev/null @@ -1,395 +0,0 @@ -/* adlist.c - A generic doubly linked list implementation - * - * Copyright (c) 2006-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - - -#include -#include "adlist.h" -#include "zmalloc.h" - -/* Create a new list. The created list can be freed with - * listRelease(), but private value of every node need to be freed - * by the user before to call listRelease(), or by setting a free method using - * listSetFreeMethod. - * - * On error, NULL is returned. Otherwise the pointer to the new list. */ -list *listCreate(void) -{ - struct list *list; - - if ((list = zmalloc(sizeof(*list))) == NULL) - return NULL; - list->head = list->tail = NULL; - list->len = 0; - list->dup = NULL; - list->free = NULL; - list->match = NULL; - return list; -} - -/* Remove all the elements from the list without destroying the list itself. */ -void listEmpty(list *list) -{ - unsigned long len; - listNode *current, *next; - - current = list->head; - len = list->len; - while(len--) { - next = current->next; - if (list->free) list->free(current->value); - zfree(current); - current = next; - } - list->head = list->tail = NULL; - list->len = 0; -} - -/* Free the whole list. - * - * This function can't fail. */ -void listRelease(list *list) -{ - if (!list) - return; - listEmpty(list); - zfree(list); -} - -/* Generic version of listRelease. */ -void listReleaseGeneric(void *list) { - listRelease((struct list*)list); -} - -/* Add a new node to the list, to head, containing the specified 'value' - * pointer as value. - * - * On error, NULL is returned and no operation is performed (i.e. the - * list remains unaltered). - * On success the 'list' pointer you pass to the function is returned. */ -list *listAddNodeHead(list *list, void *value) -{ - listNode *node; - - if ((node = zmalloc(sizeof(*node))) == NULL) - return NULL; - node->value = value; - listLinkNodeHead(list, node); - return list; -} - -/* - * Add a node that has already been allocated to the head of list - */ -void listLinkNodeHead(list* list, listNode *node) { - if (list->len == 0) { - list->head = list->tail = node; - node->prev = node->next = NULL; - } else { - node->prev = NULL; - node->next = list->head; - list->head->prev = node; - list->head = node; - } - list->len++; -} - -/* Add a new node to the list, to tail, containing the specified 'value' - * pointer as value. - * - * On error, NULL is returned and no operation is performed (i.e. the - * list remains unaltered). - * On success the 'list' pointer you pass to the function is returned. */ -list *listAddNodeTail(list *list, void *value) -{ - listNode *node; - - if ((node = zmalloc(sizeof(*node))) == NULL) - return NULL; - node->value = value; - listLinkNodeTail(list, node); - return list; -} - -/* - * Add a node that has already been allocated to the tail of list - */ -void listLinkNodeTail(list *list, listNode *node) { - if (list->len == 0) { - list->head = list->tail = node; - node->prev = node->next = NULL; - } else { - node->prev = list->tail; - node->next = NULL; - list->tail->next = node; - list->tail = node; - } - list->len++; -} - -list *listInsertNode(list *list, listNode *old_node, void *value, int after) { - listNode *node; - - if ((node = zmalloc(sizeof(*node))) == NULL) - return NULL; - node->value = value; - if (after) { - node->prev = old_node; - node->next = old_node->next; - if (list->tail == old_node) { - list->tail = node; - } - } else { - node->next = old_node; - node->prev = old_node->prev; - if (list->head == old_node) { - list->head = node; - } - } - if (node->prev != NULL) { - node->prev->next = node; - } - if (node->next != NULL) { - node->next->prev = node; - } - list->len++; - return list; -} - -/* Remove the specified node from the specified list. - * The node is freed. If free callback is provided the value is freed as well. - * - * This function can't fail. */ -void listDelNode(list *list, listNode *node) -{ - listUnlinkNode(list, node); - if (list->free) list->free(node->value); - zfree(node); -} - -/* - * Remove the specified node from the list without freeing it. - */ -void listUnlinkNode(list *list, listNode *node) { - if (node->prev) - node->prev->next = node->next; - else - list->head = node->next; - if (node->next) - node->next->prev = node->prev; - else - list->tail = node->prev; - - node->next = NULL; - node->prev = NULL; - - list->len--; -} - -/* Returns a list iterator 'iter'. After the initialization every - * call to listNext() will return the next element of the list. - * - * This function can't fail. */ -void listInitIterator(listIter *iter, list *list, int direction) -{ - if (direction == AL_START_HEAD) - iter->next = list->head; - else - iter->next = list->tail; - iter->direction = direction; -} - -/* Create an iterator in the list private iterator structure */ -void listRewind(list *list, listIter *li) { - li->next = list->head; - li->direction = AL_START_HEAD; -} - -void listRewindTail(list *list, listIter *li) { - li->next = list->tail; - li->direction = AL_START_TAIL; -} - -/* Return the next element of an iterator. - * It's valid to remove the currently returned element using - * listDelNode(), but not to remove other elements. - * - * The function returns a pointer to the next element of the list, - * or NULL if there are no more elements, so the classical usage - * pattern is: - * - * iter = listGetIterator(list,); - * while ((node = listNext(iter)) != NULL) { - * doSomethingWith(listNodeValue(node)); - * } - * - * */ -listNode *listNext(listIter *iter) -{ - listNode *current = iter->next; - - if (current != NULL) { - if (iter->direction == AL_START_HEAD) - iter->next = current->next; - else - iter->next = current->prev; - } - return current; -} - -/* Duplicate the whole list. On out of memory NULL is returned. - * On success a copy of the original list is returned. - * - * The 'Dup' method set with listSetDupMethod() function is used - * to copy the node value. Otherwise the same pointer value of - * the original node is used as value of the copied node. - * - * The original list both on success or error is never modified. */ -list *listDup(list *orig) -{ - list *copy; - listIter iter; - listNode *node; - - if ((copy = listCreate()) == NULL) - return NULL; - copy->dup = orig->dup; - copy->free = orig->free; - copy->match = orig->match; - listRewind(orig, &iter); - while((node = listNext(&iter)) != NULL) { - void *value; - - if (copy->dup) { - value = copy->dup(node->value); - if (value == NULL) { - listRelease(copy); - return NULL; - } - } else { - value = node->value; - } - - if (listAddNodeTail(copy, value) == NULL) { - /* Free value if dup succeed but listAddNodeTail failed. */ - if (copy->free) copy->free(value); - - listRelease(copy); - return NULL; - } - } - return copy; -} - -/* Search the list for a node matching a given key. - * The match is performed using the 'match' method - * set with listSetMatchMethod(). If no 'match' method - * is set, the 'value' pointer of every node is directly - * compared with the 'key' pointer. - * - * On success the first matching node pointer is returned - * (search starts from head). If no matching node exists - * NULL is returned. */ -listNode *listSearchKey(list *list, void *key) -{ - listIter iter; - listNode *node; - - listRewind(list, &iter); - while((node = listNext(&iter)) != NULL) { - if (list->match) { - if (list->match(node->value, key)) { - return node; - } - } else { - if (key == node->value) { - return node; - } - } - } - return NULL; -} - -/* Return the element at the specified zero-based index - * where 0 is the head, 1 is the element next to head - * and so on. Negative integers are used in order to count - * from the tail, -1 is the last element, -2 the penultimate - * and so on. If the index is out of range NULL is returned. */ -listNode *listIndex(list *list, long index) { - listNode *n; - - if (index < 0) { - index = (-index)-1; - n = list->tail; - while(index-- && n) n = n->prev; - } else { - n = list->head; - while(index-- && n) n = n->next; - } - return n; -} - -/* Rotate the list removing the tail node and inserting it to the head. */ -void listRotateTailToHead(list *list) { - if (listLength(list) <= 1) return; - - /* Detach current tail */ - listNode *tail = list->tail; - list->tail = tail->prev; - list->tail->next = NULL; - /* Move it as head */ - list->head->prev = tail; - tail->prev = NULL; - tail->next = list->head; - list->head = tail; -} - -/* Rotate the list removing the head node and inserting it to the tail. */ -void listRotateHeadToTail(list *list) { - if (listLength(list) <= 1) return; - - listNode *head = list->head; - /* Detach current head */ - list->head = head->next; - list->head->prev = NULL; - /* Move it as tail */ - list->tail->next = head; - head->next = NULL; - head->prev = list->tail; - list->tail = head; -} - -/* Add all the elements of the list 'o' at the end of the - * list 'l'. The list 'other' remains empty but otherwise valid. */ -void listJoin(list *l, list *o) { - if (o->len == 0) return; - - o->head->prev = l->tail; - - if (l->tail) - l->tail->next = o->head; - else - l->head = o->head; - - l->tail = o->tail; - l->len += o->len; - - /* Setup other as an empty list. */ - o->head = o->tail = NULL; - o->len = 0; -} - -/* Initializes the node's value and sets its pointers - * so that it is initially not a member of any list. - */ -void listInitNode(listNode *node, void *value) { - node->prev = NULL; - node->next = NULL; - node->value = value; -} diff --git a/examples/redis-unstable/src/adlist.h b/examples/redis-unstable/src/adlist.h deleted file mode 100644 index bb0eed1..0000000 --- a/examples/redis-unstable/src/adlist.h +++ /dev/null @@ -1,80 +0,0 @@ -/* adlist.h - A generic doubly linked list implementation - * - * Copyright (c) 2006-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#ifndef __ADLIST_H__ -#define __ADLIST_H__ - -/* Node, List, and Iterator are the only data structures used currently. */ - -typedef struct listNode { - struct listNode *prev; - struct listNode *next; - void *value; -} listNode; - -typedef struct listIter { - listNode *next; - int direction; -} listIter; - -typedef struct list { - listNode *head; - listNode *tail; - void *(*dup)(void *ptr); - void (*free)(void *ptr); - int (*match)(void *ptr, void *key); - unsigned long len; -} list; - -/* Functions implemented as macros */ -#define listLength(l) ((l)->len) -#define listFirst(l) ((l)->head) -#define listLast(l) ((l)->tail) -#define listPrevNode(n) ((n)->prev) -#define listNextNode(n) ((n)->next) -#define listNodeValue(n) ((n)->value) - -#define listSetDupMethod(l,m) ((l)->dup = (m)) -#define listSetFreeMethod(l,m) ((l)->free = (m)) -#define listSetMatchMethod(l,m) ((l)->match = (m)) - -#define listGetDupMethod(l) ((l)->dup) -#define listGetFreeMethod(l) ((l)->free) -#define listGetMatchMethod(l) ((l)->match) - -/* Prototypes */ -list *listCreate(void); -void listRelease(list *list); -void listReleaseGeneric(void *list); -void listEmpty(list *list); -list *listAddNodeHead(list *list, void *value); -list *listAddNodeTail(list *list, void *value); -list *listInsertNode(list *list, listNode *old_node, void *value, int after); -void listDelNode(list *list, listNode *node); -void listInitIterator(listIter *iter, list *list, int direction); -listNode *listNext(listIter *iter); -list *listDup(list *orig); -listNode *listSearchKey(list *list, void *key); -listNode *listIndex(list *list, long index); -void listRewind(list *list, listIter *li); -void listRewindTail(list *list, listIter *li); -void listRotateTailToHead(list *list); -void listRotateHeadToTail(list *list); -void listJoin(list *l, list *o); -void listInitNode(listNode *node, void *value); -void listLinkNodeHead(list *list, listNode *node); -void listLinkNodeTail(list *list, listNode *node); -void listUnlinkNode(list *list, listNode *node); - -/* Directions for iterators */ -#define AL_START_HEAD 0 -#define AL_START_TAIL 1 - -#endif /* __ADLIST_H__ */ diff --git a/examples/redis-unstable/src/ae.c b/examples/redis-unstable/src/ae.c deleted file mode 100644 index 733c88d..0000000 --- a/examples/redis-unstable/src/ae.c +++ /dev/null @@ -1,511 +0,0 @@ -/* A simple event-driven programming library. Originally I wrote this code - * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated - * it in form of a library for easy reuse. - * - * Copyright (c) 2006-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "ae.h" -#include "anet.h" -#include "redisassert.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "zmalloc.h" -#include "config.h" - -/* Include the best multiplexing layer supported by this system. - * The following should be ordered by performances, descending. */ -#ifdef HAVE_EVPORT -#include "ae_evport.c" -#else - #ifdef HAVE_EPOLL - #include "ae_epoll.c" - #else - #ifdef HAVE_KQUEUE - #include "ae_kqueue.c" - #else - #include "ae_select.c" - #endif - #endif -#endif - -#define INITIAL_EVENT 1024 -aeEventLoop *aeCreateEventLoop(int setsize) { - aeEventLoop *eventLoop; - int i; - - monotonicInit(); /* just in case the calling app didn't initialize */ - - if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err; - eventLoop->nevents = setsize < INITIAL_EVENT ? setsize : INITIAL_EVENT; - eventLoop->events = zmalloc(sizeof(aeFileEvent)*eventLoop->nevents); - eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*eventLoop->nevents); - if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err; - eventLoop->setsize = setsize; - eventLoop->timeEventHead = NULL; - eventLoop->timeEventNextId = 0; - eventLoop->stop = 0; - eventLoop->maxfd = -1; - eventLoop->beforesleep = NULL; - eventLoop->aftersleep = NULL; - eventLoop->flags = 0; - memset(eventLoop->privdata, 0, sizeof(eventLoop->privdata)); - if (aeApiCreate(eventLoop) == -1) goto err; - /* Events with mask == AE_NONE are not set. So let's initialize the - * vector with it. */ - for (i = 0; i < eventLoop->nevents; i++) - eventLoop->events[i].mask = AE_NONE; - return eventLoop; - -err: - if (eventLoop) { - zfree(eventLoop->events); - zfree(eventLoop->fired); - zfree(eventLoop); - } - return NULL; -} - -/* Return the current set size. */ -int aeGetSetSize(aeEventLoop *eventLoop) { - return eventLoop->setsize; -} - -/* - * Tell the event processing to change the wait timeout as soon as possible. - * - * Note: it just means you turn on/off the global AE_DONT_WAIT. - */ -void aeSetDontWait(aeEventLoop *eventLoop, int noWait) { - if (noWait) - eventLoop->flags |= AE_DONT_WAIT; - else - eventLoop->flags &= ~AE_DONT_WAIT; -} - -/* Resize the maximum set size of the event loop. - * If the requested set size is smaller than the current set size, but - * there is already a file descriptor in use that is >= the requested - * set size minus one, AE_ERR is returned and the operation is not - * performed at all. - * - * Otherwise AE_OK is returned and the operation is successful. */ -int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) { - if (setsize == eventLoop->setsize) return AE_OK; - if (eventLoop->maxfd >= setsize) return AE_ERR; - if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR; - - eventLoop->setsize = setsize; - - /* If the current allocated space is larger than the requested size, - * we need to shrink it to the requested size. */ - if (setsize < eventLoop->nevents) { - eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize); - eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize); - eventLoop->nevents = setsize; - } - return AE_OK; -} - -void aeDeleteEventLoop(aeEventLoop *eventLoop) { - aeApiFree(eventLoop); - zfree(eventLoop->events); - zfree(eventLoop->fired); - - /* Free the time events list. */ - aeTimeEvent *next_te, *te = eventLoop->timeEventHead; - while (te) { - next_te = te->next; - if (te->finalizerProc) - te->finalizerProc(eventLoop, te->clientData); - zfree(te); - te = next_te; - } - zfree(eventLoop); -} - -void aeStop(aeEventLoop *eventLoop) { - eventLoop->stop = 1; -} - -int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask, - aeFileProc *proc, void *clientData) -{ - if (fd >= eventLoop->setsize) { - errno = ERANGE; - return AE_ERR; - } - - /* Resize the events and fired arrays if the file - * descriptor exceeds the current number of events. */ - if (unlikely(fd >= eventLoop->nevents)) { - int newnevents = eventLoop->nevents; - newnevents = (newnevents * 2 > fd + 1) ? newnevents * 2 : fd + 1; - newnevents = (newnevents > eventLoop->setsize) ? eventLoop->setsize : newnevents; - eventLoop->events = zrealloc(eventLoop->events, sizeof(aeFileEvent) * newnevents); - eventLoop->fired = zrealloc(eventLoop->fired, sizeof(aeFiredEvent) * newnevents); - - /* Initialize new slots with an AE_NONE mask */ - for (int i = eventLoop->nevents; i < newnevents; i++) - eventLoop->events[i].mask = AE_NONE; - eventLoop->nevents = newnevents; - } - - aeFileEvent *fe = &eventLoop->events[fd]; - - if (aeApiAddEvent(eventLoop, fd, mask) == -1) - return AE_ERR; - fe->mask |= mask; - if (mask & AE_READABLE) fe->rfileProc = proc; - if (mask & AE_WRITABLE) fe->wfileProc = proc; - fe->clientData = clientData; - if (fd > eventLoop->maxfd) - eventLoop->maxfd = fd; - return AE_OK; -} - -void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask) -{ - if (fd >= eventLoop->setsize) return; - aeFileEvent *fe = &eventLoop->events[fd]; - if (fe->mask == AE_NONE) return; - - /* We want to always remove AE_BARRIER if set when AE_WRITABLE - * is removed. */ - if (mask & AE_WRITABLE) mask |= AE_BARRIER; - - aeApiDelEvent(eventLoop, fd, mask); - fe->mask = fe->mask & (~mask); - if (fd == eventLoop->maxfd && fe->mask == AE_NONE) { - /* Update the max fd */ - int j; - - for (j = eventLoop->maxfd-1; j >= 0; j--) - if (eventLoop->events[j].mask != AE_NONE) break; - eventLoop->maxfd = j; - } -} - -void *aeGetFileClientData(aeEventLoop *eventLoop, int fd) { - if (fd >= eventLoop->setsize) return NULL; - aeFileEvent *fe = &eventLoop->events[fd]; - if (fe->mask == AE_NONE) return NULL; - - return fe->clientData; -} - -int aeGetFileEvents(aeEventLoop *eventLoop, int fd) { - if (fd >= eventLoop->setsize) return 0; - aeFileEvent *fe = &eventLoop->events[fd]; - - return fe->mask; -} - -long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds, - aeTimeProc *proc, void *clientData, - aeEventFinalizerProc *finalizerProc) -{ - long long id = eventLoop->timeEventNextId++; - aeTimeEvent *te; - - te = zmalloc(sizeof(*te)); - if (te == NULL) return AE_ERR; - te->id = id; - te->when = getMonotonicUs() + milliseconds * 1000; - te->timeProc = proc; - te->finalizerProc = finalizerProc; - te->clientData = clientData; - te->prev = NULL; - te->next = eventLoop->timeEventHead; - te->refcount = 0; - if (te->next) - te->next->prev = te; - eventLoop->timeEventHead = te; - return id; -} - -int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id) -{ - aeTimeEvent *te = eventLoop->timeEventHead; - while(te) { - if (te->id == id) { - te->id = AE_DELETED_EVENT_ID; - return AE_OK; - } - te = te->next; - } - return AE_ERR; /* NO event with the specified ID found */ -} - -/* How many microseconds until the first timer should fire. - * If there are no timers, -1 is returned. - * - * Note that's O(N) since time events are unsorted. - * Possible optimizations (not needed by Redis so far, but...): - * 1) Insert the event in order, so that the nearest is just the head. - * Much better but still insertion or deletion of timers is O(N). - * 2) Use a skiplist to have this operation as O(1) and insertion as O(log(N)). - */ -static int64_t usUntilEarliestTimer(aeEventLoop *eventLoop) { - aeTimeEvent *te = eventLoop->timeEventHead; - if (te == NULL) return -1; - - aeTimeEvent *earliest = NULL; - while (te) { - if ((!earliest || te->when < earliest->when) && te->id != AE_DELETED_EVENT_ID) - earliest = te; - te = te->next; - } - - monotime now = getMonotonicUs(); - return (now >= earliest->when) ? 0 : earliest->when - now; -} - -/* Process time events */ -static int processTimeEvents(aeEventLoop *eventLoop) { - int processed = 0; - aeTimeEvent *te; - long long maxId; - - te = eventLoop->timeEventHead; - maxId = eventLoop->timeEventNextId-1; - monotime now = getMonotonicUs(); - while(te) { - long long id; - - /* Remove events scheduled for deletion. */ - if (te->id == AE_DELETED_EVENT_ID) { - aeTimeEvent *next = te->next; - /* If a reference exists for this timer event, - * don't free it. This is currently incremented - * for recursive timerProc calls */ - if (te->refcount) { - te = next; - continue; - } - if (te->prev) - te->prev->next = te->next; - else - eventLoop->timeEventHead = te->next; - if (te->next) - te->next->prev = te->prev; - if (te->finalizerProc) { - te->finalizerProc(eventLoop, te->clientData); - now = getMonotonicUs(); - } - zfree(te); - te = next; - continue; - } - - /* Make sure we don't process time events created by time events in - * this iteration. Note that this check is currently useless: we always - * add new timers on the head, however if we change the implementation - * detail, this check may be useful again: we keep it here for future - * defense. */ - if (te->id > maxId) { - te = te->next; - continue; - } - - if (te->when <= now) { - int retval; - - id = te->id; - te->refcount++; - retval = te->timeProc(eventLoop, id, te->clientData); - te->refcount--; - processed++; - now = getMonotonicUs(); - if (retval != AE_NOMORE) { - te->when = now + (monotime)retval * 1000; - } else { - te->id = AE_DELETED_EVENT_ID; - } - } - te = te->next; - } - return processed; -} - -/* Process every pending file event, then every pending time event - * (that may be registered by file event callbacks just processed). - * Without special flags the function sleeps until some file event - * fires, or when the next time event occurs (if any). - * - * If flags is 0, the function does nothing and returns. - * if flags has AE_ALL_EVENTS set, all the kind of events are processed. - * if flags has AE_FILE_EVENTS set, file events are processed. - * if flags has AE_TIME_EVENTS set, time events are processed. - * if flags has AE_DONT_WAIT set, the function returns ASAP once all - * the events that can be handled without a wait are processed. - * if flags has AE_CALL_AFTER_SLEEP set, the aftersleep callback is called. - * if flags has AE_CALL_BEFORE_SLEEP set, the beforesleep callback is called. - * - * The function returns the number of events processed. */ -int aeProcessEvents(aeEventLoop *eventLoop, int flags) -{ - int processed = 0, numevents; - - /* Nothing to do? return ASAP */ - if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0; - - /* Note that we want to call aeApiPoll() even if there are no - * file events to process as long as we want to process time - * events, in order to sleep until the next time event is ready - * to fire. */ - if (eventLoop->maxfd != -1 || - ((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) { - int j; - struct timeval tv, *tvp = NULL; /* NULL means infinite wait. */ - int64_t usUntilTimer; - - if (eventLoop->beforesleep != NULL && (flags & AE_CALL_BEFORE_SLEEP)) - eventLoop->beforesleep(eventLoop); - - /* The eventLoop->flags may be changed inside beforesleep. - * So we should check it after beforesleep be called. At the same time, - * the parameter flags always should have the highest priority. - * That is to say, once the parameter flag is set to AE_DONT_WAIT, - * no matter what value eventLoop->flags is set to, we should ignore it. */ - if ((flags & AE_DONT_WAIT) || (eventLoop->flags & AE_DONT_WAIT)) { - tv.tv_sec = tv.tv_usec = 0; - tvp = &tv; - } else if (flags & AE_TIME_EVENTS) { - usUntilTimer = usUntilEarliestTimer(eventLoop); - if (usUntilTimer >= 0) { - tv.tv_sec = usUntilTimer / 1000000; - tv.tv_usec = usUntilTimer % 1000000; - tvp = &tv; - } - } - /* Call the multiplexing API, will return only on timeout or when - * some event fires. */ - numevents = aeApiPoll(eventLoop, tvp); - - /* Don't process file events if not requested. */ - if (!(flags & AE_FILE_EVENTS)) { - numevents = 0; - } - - /* After sleep callback. */ - if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP) - eventLoop->aftersleep(eventLoop); - - for (j = 0; j < numevents; j++) { - int fd = eventLoop->fired[j].fd; - aeFileEvent *fe = &eventLoop->events[fd]; - int mask = eventLoop->fired[j].mask; - int fired = 0; /* Number of events fired for current fd. */ - - /* Normally we execute the readable event first, and the writable - * event later. This is useful as sometimes we may be able - * to serve the reply of a query immediately after processing the - * query. - * - * However if AE_BARRIER is set in the mask, our application is - * asking us to do the reverse: never fire the writable event - * after the readable. In such a case, we invert the calls. - * This is useful when, for instance, we want to do things - * in the beforeSleep() hook, like fsyncing a file to disk, - * before replying to a client. */ - int invert = fe->mask & AE_BARRIER; - - /* Note the "fe->mask & mask & ..." code: maybe an already - * processed event removed an element that fired and we still - * didn't processed, so we check if the event is still valid. - * - * Fire the readable event if the call sequence is not - * inverted. */ - if (!invert && fe->mask & mask & AE_READABLE) { - fe->rfileProc(eventLoop,fd,fe->clientData,mask); - fired++; - fe = &eventLoop->events[fd]; /* Refresh in case of resize. */ - } - - /* Fire the writable event. */ - if (fe->mask & mask & AE_WRITABLE) { - if (!fired || fe->wfileProc != fe->rfileProc) { - fe->wfileProc(eventLoop,fd,fe->clientData,mask); - fired++; - } - } - - /* If we have to invert the call, fire the readable event now - * after the writable one. */ - if (invert) { - fe = &eventLoop->events[fd]; /* Refresh in case of resize. */ - if ((fe->mask & mask & AE_READABLE) && - (!fired || fe->wfileProc != fe->rfileProc)) - { - fe->rfileProc(eventLoop,fd,fe->clientData,mask); - fired++; - } - } - - processed++; - } - } - /* Check time events */ - if (flags & AE_TIME_EVENTS) - processed += processTimeEvents(eventLoop); - - return processed; /* return the number of processed file/time events */ -} - -/* Wait for milliseconds until the given file descriptor becomes - * writable/readable/exception */ -int aeWait(int fd, int mask, long long milliseconds) { - struct pollfd pfd; - int retmask = 0, retval; - - memset(&pfd, 0, sizeof(pfd)); - pfd.fd = fd; - if (mask & AE_READABLE) pfd.events |= POLLIN; - if (mask & AE_WRITABLE) pfd.events |= POLLOUT; - - if ((retval = poll(&pfd, 1, milliseconds))== 1) { - if (pfd.revents & POLLIN) retmask |= AE_READABLE; - if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE; - if (pfd.revents & POLLERR) retmask |= AE_WRITABLE; - if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE; - return retmask; - } else { - return retval; - } -} - -void aeMain(aeEventLoop *eventLoop) { - eventLoop->stop = 0; - while (!eventLoop->stop) { - aeProcessEvents(eventLoop, AE_ALL_EVENTS| - AE_CALL_BEFORE_SLEEP| - AE_CALL_AFTER_SLEEP); - } -} - -char *aeGetApiName(void) { - return aeApiName(); -} - -void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) { - eventLoop->beforesleep = beforesleep; -} - -void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) { - eventLoop->aftersleep = aftersleep; -} diff --git a/examples/redis-unstable/src/ae.h b/examples/redis-unstable/src/ae.h deleted file mode 100644 index 996d48b..0000000 --- a/examples/redis-unstable/src/ae.h +++ /dev/null @@ -1,118 +0,0 @@ -/* A simple event-driven programming library. Originally I wrote this code - * for the Jim's event-loop (Jim is a Tcl interpreter) but later translated - * it in form of a library for easy reuse. - * - * Copyright (c) 2006-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#ifndef __AE_H__ -#define __AE_H__ - -#include "monotonic.h" - -#define AE_OK 0 -#define AE_ERR -1 - -#define AE_NONE 0 /* No events registered. */ -#define AE_READABLE 1 /* Fire when descriptor is readable. */ -#define AE_WRITABLE 2 /* Fire when descriptor is writable. */ -#define AE_BARRIER 4 /* With WRITABLE, never fire the event if the - READABLE event already fired in the same event - loop iteration. Useful when you want to persist - things to disk before sending replies, and want - to do that in a group fashion. */ - -#define AE_FILE_EVENTS (1<<0) -#define AE_TIME_EVENTS (1<<1) -#define AE_ALL_EVENTS (AE_FILE_EVENTS|AE_TIME_EVENTS) -#define AE_DONT_WAIT (1<<2) -#define AE_CALL_BEFORE_SLEEP (1<<3) -#define AE_CALL_AFTER_SLEEP (1<<4) - -#define AE_NOMORE -1 -#define AE_DELETED_EVENT_ID -1 - -/* Macros */ -#define AE_NOTUSED(V) ((void) V) - -struct aeEventLoop; - -/* Types and data structures */ -typedef void aeFileProc(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask); -typedef int aeTimeProc(struct aeEventLoop *eventLoop, long long id, void *clientData); -typedef void aeEventFinalizerProc(struct aeEventLoop *eventLoop, void *clientData); -typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop); - -/* File event structure */ -typedef struct aeFileEvent { - int mask; /* one of AE_(READABLE|WRITABLE|BARRIER) */ - aeFileProc *rfileProc; - aeFileProc *wfileProc; - void *clientData; -} aeFileEvent; - -/* Time event structure */ -typedef struct aeTimeEvent { - long long id; /* time event identifier. */ - monotime when; - aeTimeProc *timeProc; - aeEventFinalizerProc *finalizerProc; - void *clientData; - struct aeTimeEvent *prev; - struct aeTimeEvent *next; - int refcount; /* refcount to prevent timer events from being - * freed in recursive time event calls. */ -} aeTimeEvent; - -/* A fired event */ -typedef struct aeFiredEvent { - int fd; - int mask; -} aeFiredEvent; - -/* State of an event based program */ -typedef struct aeEventLoop { - int maxfd; /* highest file descriptor currently registered */ - int setsize; /* max number of file descriptors tracked */ - long long timeEventNextId; - int nevents; /* Size of Registered events */ - aeFileEvent *events; /* Registered events */ - aeFiredEvent *fired; /* Fired events */ - aeTimeEvent *timeEventHead; - int stop; - void *apidata; /* This is used for polling API specific data */ - aeBeforeSleepProc *beforesleep; - aeBeforeSleepProc *aftersleep; - int flags; - void *privdata[2]; -} aeEventLoop; - -/* Prototypes */ -aeEventLoop *aeCreateEventLoop(int setsize); -void aeDeleteEventLoop(aeEventLoop *eventLoop); -void aeStop(aeEventLoop *eventLoop); -int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask, - aeFileProc *proc, void *clientData); -void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask); -int aeGetFileEvents(aeEventLoop *eventLoop, int fd); -void *aeGetFileClientData(aeEventLoop *eventLoop, int fd); -long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds, - aeTimeProc *proc, void *clientData, - aeEventFinalizerProc *finalizerProc); -int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id); -int aeProcessEvents(aeEventLoop *eventLoop, int flags); -int aeWait(int fd, int mask, long long milliseconds); -void aeMain(aeEventLoop *eventLoop); -char *aeGetApiName(void); -void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep); -void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep); -int aeGetSetSize(aeEventLoop *eventLoop); -int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); -void aeSetDontWait(aeEventLoop *eventLoop, int noWait); - -#endif diff --git a/examples/redis-unstable/src/ae_epoll.c b/examples/redis-unstable/src/ae_epoll.c deleted file mode 100644 index 6b91661..0000000 --- a/examples/redis-unstable/src/ae_epoll.c +++ /dev/null @@ -1,119 +0,0 @@ -/* Linux epoll(2) based ae.c module - * - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - - -#include - -typedef struct aeApiState { - int epfd; - struct epoll_event *events; -} aeApiState; - -static int aeApiCreate(aeEventLoop *eventLoop) { - aeApiState *state = zmalloc(sizeof(aeApiState)); - - if (!state) return -1; - state->events = zmalloc(sizeof(struct epoll_event)*eventLoop->setsize); - if (!state->events) { - zfree(state); - return -1; - } - state->epfd = epoll_create(1024); /* 1024 is just a hint for the kernel */ - if (state->epfd == -1) { - zfree(state->events); - zfree(state); - return -1; - } - anetCloexec(state->epfd); - eventLoop->apidata = state; - return 0; -} - -static int aeApiResize(aeEventLoop *eventLoop, int setsize) { - aeApiState *state = eventLoop->apidata; - - state->events = zrealloc(state->events, sizeof(struct epoll_event)*setsize); - return 0; -} - -static void aeApiFree(aeEventLoop *eventLoop) { - aeApiState *state = eventLoop->apidata; - - close(state->epfd); - zfree(state->events); - zfree(state); -} - -static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { - aeApiState *state = eventLoop->apidata; - struct epoll_event ee = {0}; /* avoid valgrind warning */ - /* If the fd was already monitored for some event, we need a MOD - * operation. Otherwise we need an ADD operation. */ - int op = eventLoop->events[fd].mask == AE_NONE ? - EPOLL_CTL_ADD : EPOLL_CTL_MOD; - - ee.events = 0; - mask |= eventLoop->events[fd].mask; /* Merge old events */ - if (mask & AE_READABLE) ee.events |= EPOLLIN; - if (mask & AE_WRITABLE) ee.events |= EPOLLOUT; - ee.data.fd = fd; - if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1; - return 0; -} - -static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) { - aeApiState *state = eventLoop->apidata; - struct epoll_event ee = {0}; /* avoid valgrind warning */ - int mask = eventLoop->events[fd].mask & (~delmask); - - ee.events = 0; - if (mask & AE_READABLE) ee.events |= EPOLLIN; - if (mask & AE_WRITABLE) ee.events |= EPOLLOUT; - ee.data.fd = fd; - if (mask != AE_NONE) { - epoll_ctl(state->epfd,EPOLL_CTL_MOD,fd,&ee); - } else { - /* Note, Kernel < 2.6.9 requires a non null event pointer even for - * EPOLL_CTL_DEL. */ - epoll_ctl(state->epfd,EPOLL_CTL_DEL,fd,&ee); - } -} - -static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { - aeApiState *state = eventLoop->apidata; - int retval, numevents = 0; - - retval = epoll_wait(state->epfd,state->events,eventLoop->setsize, - tvp ? (tvp->tv_sec*1000 + (tvp->tv_usec + 999)/1000) : -1); - if (retval > 0) { - int j; - - numevents = retval; - for (j = 0; j < numevents; j++) { - int mask = 0; - struct epoll_event *e = state->events+j; - - if (e->events & EPOLLIN) mask |= AE_READABLE; - if (e->events & EPOLLOUT) mask |= AE_WRITABLE; - if (e->events & EPOLLERR) mask |= AE_WRITABLE|AE_READABLE; - if (e->events & EPOLLHUP) mask |= AE_WRITABLE|AE_READABLE; - eventLoop->fired[j].fd = e->data.fd; - eventLoop->fired[j].mask = mask; - } - } else if (retval == -1 && errno != EINTR) { - panic("aeApiPoll: epoll_wait, %s", strerror(errno)); - } - - return numevents; -} - -static char *aeApiName(void) { - return "epoll"; -} diff --git a/examples/redis-unstable/src/ae_evport.c b/examples/redis-unstable/src/ae_evport.c deleted file mode 100644 index 2598ca0..0000000 --- a/examples/redis-unstable/src/ae_evport.c +++ /dev/null @@ -1,323 +0,0 @@ -/* ae.c module for illumos event ports. - * - * Copyright (c) 2012, Joyent, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Redis nor the names of its contributors may be used - * to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#include -#include -#include - -#include -#include - -#include - -static int evport_debug = 0; - -/* - * This file implements the ae API using event ports, present on Solaris-based - * systems since Solaris 10. Using the event port interface, we associate file - * descriptors with the port. Each association also includes the set of poll(2) - * events that the consumer is interested in (e.g., POLLIN and POLLOUT). - * - * There's one tricky piece to this implementation: when we return events via - * aeApiPoll, the corresponding file descriptors become dissociated from the - * port. This is necessary because poll events are level-triggered, so if the - * fd didn't become dissociated, it would immediately fire another event since - * the underlying state hasn't changed yet. We must re-associate the file - * descriptor, but only after we know that our caller has actually read from it. - * The ae API does not tell us exactly when that happens, but we do know that - * it must happen by the time aeApiPoll is called again. Our solution is to - * keep track of the last fds returned by aeApiPoll and re-associate them next - * time aeApiPoll is invoked. - * - * To summarize, in this module, each fd association is EITHER (a) represented - * only via the in-kernel association OR (b) represented by pending_fds and - * pending_masks. (b) is only true for the last fds we returned from aeApiPoll, - * and only until we enter aeApiPoll again (at which point we restore the - * in-kernel association). - */ -#define MAX_EVENT_BATCHSZ 512 - -typedef struct aeApiState { - int portfd; /* event port */ - uint_t npending; /* # of pending fds */ - int pending_fds[MAX_EVENT_BATCHSZ]; /* pending fds */ - int pending_masks[MAX_EVENT_BATCHSZ]; /* pending fds' masks */ -} aeApiState; - -static int aeApiCreate(aeEventLoop *eventLoop) { - int i; - aeApiState *state = zmalloc(sizeof(aeApiState)); - if (!state) return -1; - - state->portfd = port_create(); - if (state->portfd == -1) { - zfree(state); - return -1; - } - anetCloexec(state->portfd); - - state->npending = 0; - - for (i = 0; i < MAX_EVENT_BATCHSZ; i++) { - state->pending_fds[i] = -1; - state->pending_masks[i] = AE_NONE; - } - - eventLoop->apidata = state; - return 0; -} - -static int aeApiResize(aeEventLoop *eventLoop, int setsize) { - (void) eventLoop; - (void) setsize; - /* Nothing to resize here. */ - return 0; -} - -static void aeApiFree(aeEventLoop *eventLoop) { - aeApiState *state = eventLoop->apidata; - - close(state->portfd); - zfree(state); -} - -static int aeApiLookupPending(aeApiState *state, int fd) { - uint_t i; - - for (i = 0; i < state->npending; i++) { - if (state->pending_fds[i] == fd) - return (i); - } - - return (-1); -} - -/* - * Helper function to invoke port_associate for the given fd and mask. - */ -static int aeApiAssociate(const char *where, int portfd, int fd, int mask) { - int events = 0; - int rv, err; - - if (mask & AE_READABLE) - events |= POLLIN; - if (mask & AE_WRITABLE) - events |= POLLOUT; - - if (evport_debug) - fprintf(stderr, "%s: port_associate(%d, 0x%x) = ", where, fd, events); - - rv = port_associate(portfd, PORT_SOURCE_FD, fd, events, - (void *)(uintptr_t)mask); - err = errno; - - if (evport_debug) - fprintf(stderr, "%d (%s)\n", rv, rv == 0 ? "no error" : strerror(err)); - - if (rv == -1) { - fprintf(stderr, "%s: port_associate: %s\n", where, strerror(err)); - - if (err == EAGAIN) - fprintf(stderr, "aeApiAssociate: event port limit exceeded."); - } - - return rv; -} - -static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { - aeApiState *state = eventLoop->apidata; - int fullmask, pfd; - - if (evport_debug) - fprintf(stderr, "aeApiAddEvent: fd %d mask 0x%x\n", fd, mask); - - /* - * Since port_associate's "events" argument replaces any existing events, we - * must be sure to include whatever events are already associated when - * we call port_associate() again. - */ - fullmask = mask | eventLoop->events[fd].mask; - pfd = aeApiLookupPending(state, fd); - - if (pfd != -1) { - /* - * This fd was recently returned from aeApiPoll. It should be safe to - * assume that the consumer has processed that poll event, but we play - * it safer by simply updating pending_mask. The fd will be - * re-associated as usual when aeApiPoll is called again. - */ - if (evport_debug) - fprintf(stderr, "aeApiAddEvent: adding to pending fd %d\n", fd); - state->pending_masks[pfd] |= fullmask; - return 0; - } - - return (aeApiAssociate("aeApiAddEvent", state->portfd, fd, fullmask)); -} - -static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { - aeApiState *state = eventLoop->apidata; - int fullmask, pfd; - - if (evport_debug) - fprintf(stderr, "del fd %d mask 0x%x\n", fd, mask); - - pfd = aeApiLookupPending(state, fd); - - if (pfd != -1) { - if (evport_debug) - fprintf(stderr, "deleting event from pending fd %d\n", fd); - - /* - * This fd was just returned from aeApiPoll, so it's not currently - * associated with the port. All we need to do is update - * pending_mask appropriately. - */ - state->pending_masks[pfd] &= ~mask; - - if (state->pending_masks[pfd] == AE_NONE) - state->pending_fds[pfd] = -1; - - return; - } - - /* - * The fd is currently associated with the port. Like with the add case - * above, we must look at the full mask for the file descriptor before - * updating that association. We don't have a good way of knowing what the - * events are without looking into the eventLoop state directly. We rely on - * the fact that our caller has already updated the mask in the eventLoop. - */ - - /* We always remove the specified events from the current mask, - * regardless of whether eventLoop->events[fd].mask has been updated yet. */ - fullmask = eventLoop->events[fd].mask & ~mask; - if (fullmask == AE_NONE) { - /* - * We're removing *all* events, so use port_dissociate to remove the - * association completely. Failure here indicates a bug. - */ - if (evport_debug) - fprintf(stderr, "aeApiDelEvent: port_dissociate(%d)\n", fd); - - if (port_dissociate(state->portfd, PORT_SOURCE_FD, fd) != 0) { - perror("aeApiDelEvent: port_dissociate"); - abort(); /* will not return */ - } - } else if (aeApiAssociate("aeApiDelEvent", state->portfd, fd, - fullmask) != 0) { - /* - * ENOMEM is a potentially transient condition, but the kernel won't - * generally return it unless things are really bad. EAGAIN indicates - * we've reached a resource limit, for which it doesn't make sense to - * retry (counter-intuitively). All other errors indicate a bug. In any - * of these cases, the best we can do is to abort. - */ - abort(); /* will not return */ - } -} - -static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { - aeApiState *state = eventLoop->apidata; - struct timespec timeout, *tsp; - uint_t mask, i; - uint_t nevents; - port_event_t event[MAX_EVENT_BATCHSZ]; - - /* - * If we've returned fd events before, we must re-associate them with the - * port now, before calling port_get(). See the block comment at the top of - * this file for an explanation of why. - */ - for (i = 0; i < state->npending; i++) { - if (state->pending_fds[i] == -1) - /* This fd has since been deleted. */ - continue; - - if (aeApiAssociate("aeApiPoll", state->portfd, - state->pending_fds[i], state->pending_masks[i]) != 0) { - /* See aeApiDelEvent for why this case is fatal. */ - abort(); - } - - state->pending_masks[i] = AE_NONE; - state->pending_fds[i] = -1; - } - - state->npending = 0; - - if (tvp != NULL) { - timeout.tv_sec = tvp->tv_sec; - timeout.tv_nsec = tvp->tv_usec * 1000; - tsp = &timeout; - } else { - tsp = NULL; - } - - /* - * port_getn can return with errno == ETIME having returned some events (!). - * So if we get ETIME, we check nevents, too. - */ - nevents = 1; - if (port_getn(state->portfd, event, MAX_EVENT_BATCHSZ, &nevents, - tsp) == -1 && (errno != ETIME || nevents == 0)) { - if (errno == ETIME || errno == EINTR) - return 0; - - /* Any other error indicates a bug. */ - panic("aeApiPoll: port_getn, %s", strerror(errno)); - } - - state->npending = nevents; - - for (i = 0; i < nevents; i++) { - mask = 0; - if (event[i].portev_events & POLLIN) - mask |= AE_READABLE; - if (event[i].portev_events & POLLOUT) - mask |= AE_WRITABLE; - - eventLoop->fired[i].fd = event[i].portev_object; - eventLoop->fired[i].mask = mask; - - if (evport_debug) - fprintf(stderr, "aeApiPoll: fd %d mask 0x%x\n", - (int)event[i].portev_object, mask); - - state->pending_fds[i] = event[i].portev_object; - state->pending_masks[i] = (uintptr_t)event[i].portev_user; - } - - return nevents; -} - -static char *aeApiName(void) { - return "evport"; -} diff --git a/examples/redis-unstable/src/ae_kqueue.c b/examples/redis-unstable/src/ae_kqueue.c deleted file mode 100644 index ec10a5e..0000000 --- a/examples/redis-unstable/src/ae_kqueue.c +++ /dev/null @@ -1,183 +0,0 @@ -/* Kqueue(2)-based ae.c module - * - * Copyright (C) 2009 Harish Mallipeddi - harish.mallipeddi@gmail.com - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Redis nor the names of its contributors may be used - * to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#include -#include -#include - -typedef struct aeApiState { - int kqfd; - struct kevent *events; - - /* Events mask for merge read and write event. - * To reduce memory consumption, we use 2 bits to store the mask - * of an event, so that 1 byte will store the mask of 4 events. */ - char *eventsMask; -} aeApiState; - -#define EVENT_MASK_MALLOC_SIZE(sz) (((sz) + 3) / 4) -#define EVENT_MASK_OFFSET(fd) ((fd) % 4 * 2) -#define EVENT_MASK_ENCODE(fd, mask) (((mask) & 0x3) << EVENT_MASK_OFFSET(fd)) - -static inline int getEventMask(const char *eventsMask, int fd) { - return (eventsMask[fd/4] >> EVENT_MASK_OFFSET(fd)) & 0x3; -} - -static inline void addEventMask(char *eventsMask, int fd, int mask) { - eventsMask[fd/4] |= EVENT_MASK_ENCODE(fd, mask); -} - -static inline void resetEventMask(char *eventsMask, int fd) { - eventsMask[fd/4] &= ~EVENT_MASK_ENCODE(fd, 0x3); -} - -static int aeApiCreate(aeEventLoop *eventLoop) { - aeApiState *state = zmalloc(sizeof(aeApiState)); - - if (!state) return -1; - state->events = zmalloc(sizeof(struct kevent)*eventLoop->setsize); - if (!state->events) { - zfree(state); - return -1; - } - state->kqfd = kqueue(); - if (state->kqfd == -1) { - zfree(state->events); - zfree(state); - return -1; - } - anetCloexec(state->kqfd); - state->eventsMask = zmalloc(EVENT_MASK_MALLOC_SIZE(eventLoop->setsize)); - memset(state->eventsMask, 0, EVENT_MASK_MALLOC_SIZE(eventLoop->setsize)); - eventLoop->apidata = state; - return 0; -} - -static int aeApiResize(aeEventLoop *eventLoop, int setsize) { - aeApiState *state = eventLoop->apidata; - - state->events = zrealloc(state->events, sizeof(struct kevent)*setsize); - state->eventsMask = zrealloc(state->eventsMask, EVENT_MASK_MALLOC_SIZE(setsize)); - memset(state->eventsMask, 0, EVENT_MASK_MALLOC_SIZE(setsize)); - return 0; -} - -static void aeApiFree(aeEventLoop *eventLoop) { - aeApiState *state = eventLoop->apidata; - - close(state->kqfd); - zfree(state->events); - zfree(state->eventsMask); - zfree(state); -} - -static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { - aeApiState *state = eventLoop->apidata; - struct kevent evs[2]; - int nch = 0; - - if (mask & AE_READABLE) EV_SET(evs + nch++, fd, EVFILT_READ, EV_ADD, 0, 0, NULL); - if (mask & AE_WRITABLE) EV_SET(evs + nch++, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); - - return kevent(state->kqfd, evs, nch, NULL, 0, NULL); -} - -static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { - aeApiState *state = eventLoop->apidata; - struct kevent evs[2]; - int nch = 0; - - if (mask & AE_READABLE) EV_SET(evs + nch++, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); - if (mask & AE_WRITABLE) EV_SET(evs + nch++, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); - - kevent(state->kqfd, evs, nch, NULL, 0, NULL); -} - -static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { - aeApiState *state = eventLoop->apidata; - int retval, numevents = 0; - - if (tvp != NULL) { - struct timespec timeout; - timeout.tv_sec = tvp->tv_sec; - timeout.tv_nsec = tvp->tv_usec * 1000; - retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize, - &timeout); - } else { - retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize, - NULL); - } - - if (retval > 0) { - int j; - - /* Normally we execute the read event first and then the write event. - * When the barrier is set, we will do it reverse. - * - * However, under kqueue, read and write events would be separate - * events, which would make it impossible to control the order of - * reads and writes. So we store the event's mask we've got and merge - * the same fd events later. */ - for (j = 0; j < retval; j++) { - struct kevent *e = state->events+j; - int fd = e->ident; - int mask = 0; - - if (e->filter == EVFILT_READ) mask = AE_READABLE; - else if (e->filter == EVFILT_WRITE) mask = AE_WRITABLE; - addEventMask(state->eventsMask, fd, mask); - } - - /* Re-traversal to merge read and write events, and set the fd's mask to - * 0 so that events are not added again when the fd is encountered again. */ - numevents = 0; - for (j = 0; j < retval; j++) { - struct kevent *e = state->events+j; - int fd = e->ident; - int mask = getEventMask(state->eventsMask, fd); - - if (mask) { - eventLoop->fired[numevents].fd = fd; - eventLoop->fired[numevents].mask = mask; - resetEventMask(state->eventsMask, fd); - numevents++; - } - } - } else if (retval == -1 && errno != EINTR) { - panic("aeApiPoll: kevent, %s", strerror(errno)); - } - - return numevents; -} - -static char *aeApiName(void) { - return "kqueue"; -} diff --git a/examples/redis-unstable/src/ae_select.c b/examples/redis-unstable/src/ae_select.c deleted file mode 100644 index 208cc32..0000000 --- a/examples/redis-unstable/src/ae_select.c +++ /dev/null @@ -1,90 +0,0 @@ -/* Select()-based ae.c module. - * - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - - -#include -#include - -typedef struct aeApiState { - fd_set rfds, wfds; - /* We need to have a copy of the fd sets as it's not safe to reuse - * FD sets after select(). */ - fd_set _rfds, _wfds; -} aeApiState; - -static int aeApiCreate(aeEventLoop *eventLoop) { - aeApiState *state = zmalloc(sizeof(aeApiState)); - - if (!state) return -1; - FD_ZERO(&state->rfds); - FD_ZERO(&state->wfds); - eventLoop->apidata = state; - return 0; -} - -static int aeApiResize(aeEventLoop *eventLoop, int setsize) { - AE_NOTUSED(eventLoop); - /* Just ensure we have enough room in the fd_set type. */ - if (setsize >= FD_SETSIZE) return -1; - return 0; -} - -static void aeApiFree(aeEventLoop *eventLoop) { - zfree(eventLoop->apidata); -} - -static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) { - aeApiState *state = eventLoop->apidata; - - if (mask & AE_READABLE) FD_SET(fd,&state->rfds); - if (mask & AE_WRITABLE) FD_SET(fd,&state->wfds); - return 0; -} - -static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { - aeApiState *state = eventLoop->apidata; - - if (mask & AE_READABLE) FD_CLR(fd,&state->rfds); - if (mask & AE_WRITABLE) FD_CLR(fd,&state->wfds); -} - -static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) { - aeApiState *state = eventLoop->apidata; - int retval, j, numevents = 0; - - memcpy(&state->_rfds,&state->rfds,sizeof(fd_set)); - memcpy(&state->_wfds,&state->wfds,sizeof(fd_set)); - - retval = select(eventLoop->maxfd+1, - &state->_rfds,&state->_wfds,NULL,tvp); - if (retval > 0) { - for (j = 0; j <= eventLoop->maxfd; j++) { - int mask = 0; - aeFileEvent *fe = &eventLoop->events[j]; - - if (fe->mask == AE_NONE) continue; - if (fe->mask & AE_READABLE && FD_ISSET(j,&state->_rfds)) - mask |= AE_READABLE; - if (fe->mask & AE_WRITABLE && FD_ISSET(j,&state->_wfds)) - mask |= AE_WRITABLE; - eventLoop->fired[numevents].fd = j; - eventLoop->fired[numevents].mask = mask; - numevents++; - } - } else if (retval == -1 && errno != EINTR) { - panic("aeApiPoll: select, %s", strerror(errno)); - } - - return numevents; -} - -static char *aeApiName(void) { - return "select"; -} diff --git a/examples/redis-unstable/src/anet.c b/examples/redis-unstable/src/anet.c deleted file mode 100644 index 8b7b91e..0000000 --- a/examples/redis-unstable/src/anet.c +++ /dev/null @@ -1,812 +0,0 @@ -/* anet.c -- Basic TCP socket stuff made a bit less boring - * - * Copyright (c) 2006-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "fmacros.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "anet.h" -#include "config.h" -#include "util.h" - -#define UNUSED(x) (void)(x) - -static void anetSetError(char *err, const char *fmt, ...) -{ - va_list ap; - - if (!err) return; - va_start(ap, fmt); - vsnprintf(err, ANET_ERR_LEN, fmt, ap); - va_end(ap); -} - -int anetGetError(int fd) { - int sockerr = 0; - socklen_t errlen = sizeof(sockerr); - - if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &sockerr, &errlen) == -1) - sockerr = errno; - return sockerr; -} - -int anetSetBlock(char *err, int fd, int non_block) { - int flags; - - /* Set the socket blocking (if non_block is zero) or non-blocking. - * Note that fcntl(2) for F_GETFL and F_SETFL can't be - * interrupted by a signal. */ - if ((flags = fcntl(fd, F_GETFL)) == -1) { - anetSetError(err, "fcntl(F_GETFL): %s", strerror(errno)); - return ANET_ERR; - } - - /* Check if this flag has been set or unset, if so, - * then there is no need to call fcntl to set/unset it again. */ - if (!!(flags & O_NONBLOCK) == !!non_block) - return ANET_OK; - - if (non_block) - flags |= O_NONBLOCK; - else - flags &= ~O_NONBLOCK; - - if (fcntl(fd, F_SETFL, flags) == -1) { - anetSetError(err, "fcntl(F_SETFL,O_NONBLOCK): %s", strerror(errno)); - return ANET_ERR; - } - return ANET_OK; -} - -int anetNonBlock(char *err, int fd) { - return anetSetBlock(err,fd,1); -} - -int anetBlock(char *err, int fd) { - return anetSetBlock(err,fd,0); -} - -/* Enable the FD_CLOEXEC on the given fd to avoid fd leaks. - * This function should be invoked for fd's on specific places - * where fork + execve system calls are called. */ -int anetCloexec(int fd) { - int r; - int flags; - - do { - r = fcntl(fd, F_GETFD); - } while (r == -1 && errno == EINTR); - - if (r == -1 || (r & FD_CLOEXEC)) - return r; - - flags = r | FD_CLOEXEC; - - do { - r = fcntl(fd, F_SETFD, flags); - } while (r == -1 && errno == EINTR); - - return r; -} - -/* Enable TCP keep-alive mechanism to detect dead peers, - * TCP_KEEPIDLE, TCP_KEEPINTVL and TCP_KEEPCNT will be set accordingly. */ -int anetKeepAlive(char *err, int fd, int interval) -{ - int enabled = 1; - if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &enabled, sizeof(enabled))) - { - anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno)); - return ANET_ERR; - } - - int idle; - int intvl; - int cnt; - - /* There are platforms that are expected to support the full mechanism of TCP keep-alive, - * we want the compiler to emit warnings of unused variables if the preprocessor directives - * somehow fail, and other than those platforms, just omit these warnings if they happen. - */ -#if !(defined(_AIX) || defined(__APPLE__) || defined(__DragonFly__) || \ - defined(__FreeBSD__) || defined(__illumos__) || defined(__linux__) || \ - defined(__NetBSD__) || defined(__sun)) - UNUSED(interval); - UNUSED(idle); - UNUSED(intvl); - UNUSED(cnt); -#endif - -#ifdef __sun - /* The implementation of TCP keep-alive on Solaris/SmartOS is a bit unusual - * compared to other Unix-like systems. - * Thus, we need to specialize it on Solaris. - * - * There are two keep-alive mechanisms on Solaris: - * - By default, the first keep-alive probe is sent out after a TCP connection is idle for two hours. - * If the peer does not respond to the probe within eight minutes, the TCP connection is aborted. - * You can alter the interval for sending out the first probe using the socket option TCP_KEEPALIVE_THRESHOLD - * in milliseconds or TCP_KEEPIDLE in seconds. - * The system default is controlled by the TCP ndd parameter tcp_keepalive_interval. The minimum value is ten seconds. - * The maximum is ten days, while the default is two hours. If you receive no response to the probe, - * you can use the TCP_KEEPALIVE_ABORT_THRESHOLD socket option to change the time threshold for aborting a TCP connection. - * The option value is an unsigned integer in milliseconds. The value zero indicates that TCP should never time out and - * abort the connection when probing. The system default is controlled by the TCP ndd parameter tcp_keepalive_abort_interval. - * The default is eight minutes. - * - * - The second implementation is activated if socket option TCP_KEEPINTVL and/or TCP_KEEPCNT are set. - * The time between each consequent probes is set by TCP_KEEPINTVL in seconds. - * The minimum value is ten seconds. The maximum is ten days, while the default is two hours. - * The TCP connection will be aborted after certain amount of probes, which is set by TCP_KEEPCNT, without receiving response. - */ - - idle = interval; - if (idle < 10) idle = 10; // kernel expects at least 10 seconds - if (idle > 10*24*60*60) idle = 10*24*60*60; // kernel expects at most 10 days - - /* `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on Solaris - * until version 11.4, but let's take a chance here. */ -#if defined(TCP_KEEPIDLE) && defined(TCP_KEEPINTVL) && defined(TCP_KEEPCNT) - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) { - anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno)); - return ANET_ERR; - } - - intvl = idle/3; - if (intvl < 10) intvl = 10; /* kernel expects at least 10 seconds */ - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) { - anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno)); - return ANET_ERR; - } - - cnt = 3; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) { - anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno)); - return ANET_ERR; - } -#else - /* Fall back to the first implementation of tcp-alive mechanism for older Solaris, - * simulate the tcp-alive mechanism on other platforms via `TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`. - */ - idle *= 1000; // kernel expects milliseconds - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_THRESHOLD, &idle, sizeof(idle))) { - anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno)); - return ANET_ERR; - } - - /* Note that the consequent probes will not be sent at equal intervals on Solaris, - * but will be sent using the exponential backoff algorithm. */ - int time_to_abort = idle; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_ABORT_THRESHOLD, &time_to_abort, sizeof(time_to_abort))) { - anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno)); - return ANET_ERR; - } -#endif - - return ANET_OK; - -#endif - -#ifdef TCP_KEEPIDLE - /* Default settings are more or less garbage, with the keepalive time - * set to 7200 by default on Linux and other Unix-like systems. - * Modify settings to make the feature actually useful. */ - - /* Send first probe after interval. */ - idle = interval; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) { - anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno)); - return ANET_ERR; - } -#elif defined(TCP_KEEPALIVE) - /* Darwin/macOS uses TCP_KEEPALIVE in place of TCP_KEEPIDLE. */ - idle = interval; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &idle, sizeof(idle))) { - anetSetError(err, "setsockopt TCP_KEEPALIVE: %s\n", strerror(errno)); - return ANET_ERR; - } -#endif - -#ifdef TCP_KEEPINTVL - /* Send next probes after the specified interval. Note that we set the - * delay as interval / 3, as we send three probes before detecting - * an error (see the next setsockopt call). */ - intvl = interval/3; - if (intvl == 0) intvl = 1; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) { - anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno)); - return ANET_ERR; - } -#endif - -#ifdef TCP_KEEPCNT - /* Consider the socket in error state after three we send three ACK - * probes without getting a reply. */ - cnt = 3; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) { - anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno)); - return ANET_ERR; - } -#endif - - return ANET_OK; -} - -static int anetSetTcpNoDelay(char *err, int fd, int val) -{ - if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) == -1) - { - anetSetError(err, "setsockopt TCP_NODELAY: %s", strerror(errno)); - return ANET_ERR; - } - return ANET_OK; -} - -int anetEnableTcpNoDelay(char *err, int fd) -{ - return anetSetTcpNoDelay(err, fd, 1); -} - -int anetDisableTcpNoDelay(char *err, int fd) -{ - return anetSetTcpNoDelay(err, fd, 0); -} - -/* Set the socket send timeout (SO_SNDTIMEO socket option) to the specified - * number of milliseconds, or disable it if the 'ms' argument is zero. */ -int anetSendTimeout(char *err, int fd, long long ms) { - struct timeval tv; - - tv.tv_sec = ms/1000; - tv.tv_usec = (ms%1000)*1000; - if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) { - anetSetError(err, "setsockopt SO_SNDTIMEO: %s", strerror(errno)); - return ANET_ERR; - } - return ANET_OK; -} - -/* Set the socket receive timeout (SO_RCVTIMEO socket option) to the specified - * number of milliseconds, or disable it if the 'ms' argument is zero. */ -int anetRecvTimeout(char *err, int fd, long long ms) { - struct timeval tv; - - tv.tv_sec = ms/1000; - tv.tv_usec = (ms%1000)*1000; - if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) { - anetSetError(err, "setsockopt SO_RCVTIMEO: %s", strerror(errno)); - return ANET_ERR; - } - return ANET_OK; -} - -/* Resolve the hostname "host" and set the string representation of the - * IP address into the buffer pointed by "ipbuf". - * - * If flags is set to ANET_IP_ONLY the function only resolves hostnames - * that are actually already IPv4 or IPv6 addresses. This turns the function - * into a validating / normalizing function. - * - * If the flag ANET_PREFER_IPV4 is set, IPv4 is preferred over IPv6. - * If the flag ANET_PREFER_IPV6 is set, IPv6 is preferred over IPv4. - * */ -int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, - int flags) -{ - struct addrinfo hints, *info; - int rv; - - memset(&hints,0,sizeof(hints)); - if (flags & ANET_IP_ONLY) hints.ai_flags = AI_NUMERICHOST; - hints.ai_family = AF_UNSPEC; - if (flags & ANET_PREFER_IPV4 && !(flags & ANET_PREFER_IPV6)) { - hints.ai_family = AF_INET; - } else if (flags & ANET_PREFER_IPV6 && !(flags & ANET_PREFER_IPV4)) { - hints.ai_family = AF_INET6; - } - hints.ai_socktype = SOCK_STREAM; /* specify socktype to avoid dups */ - - rv = getaddrinfo(host, NULL, &hints, &info); - if (rv != 0 && hints.ai_family != AF_UNSPEC) { - /* Try the other IP version. */ - hints.ai_family = (hints.ai_family == AF_INET) ? AF_INET6 : AF_INET; - rv = getaddrinfo(host, NULL, &hints, &info); - } - if (rv != 0) { - anetSetError(err, "%s", gai_strerror(rv)); - return ANET_ERR; - } - if (info->ai_family == AF_INET) { - struct sockaddr_in *sa = (struct sockaddr_in *)info->ai_addr; - inet_ntop(AF_INET, &(sa->sin_addr), ipbuf, ipbuf_len); - } else { - struct sockaddr_in6 *sa = (struct sockaddr_in6 *)info->ai_addr; - inet_ntop(AF_INET6, &(sa->sin6_addr), ipbuf, ipbuf_len); - } - - freeaddrinfo(info); - return ANET_OK; -} - -static int anetSetReuseAddr(char *err, int fd) { - int yes = 1; - /* Make sure connection-intensive things like the redis benchmark - * will be able to close/open sockets a zillion of times */ - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) == -1) { - anetSetError(err, "setsockopt SO_REUSEADDR: %s", strerror(errno)); - return ANET_ERR; - } - return ANET_OK; -} - -static int anetCreateSocket(char *err, int domain) { - int s; - if ((s = socket(domain, SOCK_STREAM, 0)) == -1) { - anetSetError(err, "creating socket: %s", strerror(errno)); - return ANET_ERR; - } - - /* Make sure connection-intensive things like the redis benchmark - * will be able to close/open sockets a zillion of times */ - if (anetSetReuseAddr(err,s) == ANET_ERR) { - close(s); - return ANET_ERR; - } - return s; -} - -#define ANET_CONNECT_NONE 0 -#define ANET_CONNECT_NONBLOCK 1 -#define ANET_CONNECT_BE_BINDING 2 /* Best effort binding. */ -static int anetTcpGenericConnect(char *err, const char *addr, int port, - const char *source_addr, int flags) -{ - int s = ANET_ERR, rv; - char portstr[6]; /* strlen("65535") + 1; */ - struct addrinfo hints, *servinfo, *bservinfo, *p, *b; - - snprintf(portstr,sizeof(portstr),"%d",port); - memset(&hints,0,sizeof(hints)); - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_STREAM; - - if ((rv = getaddrinfo(addr,portstr,&hints,&servinfo)) != 0) { - anetSetError(err, "%s", gai_strerror(rv)); - return ANET_ERR; - } - for (p = servinfo; p != NULL; p = p->ai_next) { - /* Try to create the socket and to connect it. - * If we fail in the socket() call, or on connect(), we retry with - * the next entry in servinfo. */ - if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1) - continue; - if (anetSetReuseAddr(err,s) == ANET_ERR) goto error; - if (flags & ANET_CONNECT_NONBLOCK && anetNonBlock(err,s) != ANET_OK) - goto error; - if (source_addr) { - int bound = 0; - /* Using getaddrinfo saves us from self-determining IPv4 vs IPv6 */ - if ((rv = getaddrinfo(source_addr, NULL, &hints, &bservinfo)) != 0) - { - anetSetError(err, "%s", gai_strerror(rv)); - goto error; - } - for (b = bservinfo; b != NULL; b = b->ai_next) { - if (bind(s,b->ai_addr,b->ai_addrlen) != -1) { - bound = 1; - break; - } - } - freeaddrinfo(bservinfo); - if (!bound) { - anetSetError(err, "bind: %s", strerror(errno)); - goto error; - } - } - if (connect(s,p->ai_addr,p->ai_addrlen) == -1) { - /* If the socket is non-blocking, it is ok for connect() to - * return an EINPROGRESS error here. */ - if (errno == EINPROGRESS && flags & ANET_CONNECT_NONBLOCK) - goto end; - close(s); - s = ANET_ERR; - continue; - } - - /* If we ended an iteration of the for loop without errors, we - * have a connected socket. Let's return to the caller. */ - goto end; - } - if (p == NULL) - anetSetError(err, "creating socket: %s", strerror(errno)); - -error: - if (s != ANET_ERR) { - close(s); - s = ANET_ERR; - } - -end: - freeaddrinfo(servinfo); - - /* Handle best effort binding: if a binding address was used, but it is - * not possible to create a socket, try again without a binding address. */ - if (s == ANET_ERR && source_addr && (flags & ANET_CONNECT_BE_BINDING)) { - return anetTcpGenericConnect(err,addr,port,NULL,flags); - } else { - return s; - } -} - -int anetTcpNonBlockConnect(char *err, const char *addr, int port) -{ - return anetTcpGenericConnect(err,addr,port,NULL,ANET_CONNECT_NONBLOCK); -} - -int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port, - const char *source_addr) -{ - return anetTcpGenericConnect(err,addr,port,source_addr, - ANET_CONNECT_NONBLOCK|ANET_CONNECT_BE_BINDING); -} - -int anetUnixGenericConnect(char *err, const char *path, int flags) -{ - int s; - struct sockaddr_un sa; - - if ((s = anetCreateSocket(err,AF_LOCAL)) == ANET_ERR) - return ANET_ERR; - - sa.sun_family = AF_LOCAL; - redis_strlcpy(sa.sun_path,path,sizeof(sa.sun_path)); - if (flags & ANET_CONNECT_NONBLOCK) { - if (anetNonBlock(err,s) != ANET_OK) { - close(s); - return ANET_ERR; - } - } - if (connect(s,(struct sockaddr*)&sa,sizeof(sa)) == -1) { - if (errno == EINPROGRESS && - flags & ANET_CONNECT_NONBLOCK) - return s; - - anetSetError(err, "connect: %s", strerror(errno)); - close(s); - return ANET_ERR; - } - return s; -} - -static int anetListen(char *err, int s, struct sockaddr *sa, socklen_t len, int backlog, mode_t perm) { - if (bind(s,sa,len) == -1) { - anetSetError(err, "bind: %s", strerror(errno)); - close(s); - return ANET_ERR; - } - - if (sa->sa_family == AF_LOCAL && perm) - chmod(((struct sockaddr_un *) sa)->sun_path, perm); - - if (listen(s, backlog) == -1) { - anetSetError(err, "listen: %s", strerror(errno)); - close(s); - return ANET_ERR; - } - return ANET_OK; -} - -static int anetV6Only(char *err, int s) { - int yes = 1; - if (setsockopt(s,IPPROTO_IPV6,IPV6_V6ONLY,&yes,sizeof(yes)) == -1) { - anetSetError(err, "setsockopt: %s", strerror(errno)); - return ANET_ERR; - } - return ANET_OK; -} - -static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backlog) -{ - int s = -1, rv; - char _port[6]; /* strlen("65535") */ - struct addrinfo hints, *servinfo, *p; - - snprintf(_port,6,"%d",port); - memset(&hints,0,sizeof(hints)); - hints.ai_family = af; - hints.ai_socktype = SOCK_STREAM; - hints.ai_flags = AI_PASSIVE; /* No effect if bindaddr != NULL */ - if (bindaddr && !strcmp("*", bindaddr)) - bindaddr = NULL; - if (af == AF_INET6 && bindaddr && !strcmp("::*", bindaddr)) - bindaddr = NULL; - - if ((rv = getaddrinfo(bindaddr,_port,&hints,&servinfo)) != 0) { - anetSetError(err, "%s", gai_strerror(rv)); - return ANET_ERR; - } - for (p = servinfo; p != NULL; p = p->ai_next) { - if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1) - continue; - - if (af == AF_INET6 && anetV6Only(err,s) == ANET_ERR) goto error; - if (anetSetReuseAddr(err,s) == ANET_ERR) goto error; - if (anetListen(err,s,p->ai_addr,p->ai_addrlen,backlog,0) == ANET_ERR) s = ANET_ERR; - goto end; - } - if (p == NULL) { - anetSetError(err, "unable to bind socket, errno: %d", errno); - goto error; - } - -error: - if (s != -1) close(s); - s = ANET_ERR; -end: - freeaddrinfo(servinfo); - return s; -} - -int anetTcpServer(char *err, int port, char *bindaddr, int backlog) -{ - return _anetTcpServer(err, port, bindaddr, AF_INET, backlog); -} - -int anetTcp6Server(char *err, int port, char *bindaddr, int backlog) -{ - return _anetTcpServer(err, port, bindaddr, AF_INET6, backlog); -} - -int anetUnixServer(char *err, char *path, mode_t perm, int backlog) -{ - int s; - struct sockaddr_un sa; - - if (strlen(path) > sizeof(sa.sun_path)-1) { - anetSetError(err,"unix socket path too long (%zu), must be under %zu", strlen(path), sizeof(sa.sun_path)); - return ANET_ERR; - } - if ((s = anetCreateSocket(err,AF_LOCAL)) == ANET_ERR) - return ANET_ERR; - - memset(&sa,0,sizeof(sa)); - sa.sun_family = AF_LOCAL; - redis_strlcpy(sa.sun_path,path,sizeof(sa.sun_path)); - if (anetListen(err,s,(struct sockaddr*)&sa,sizeof(sa),backlog,perm) == ANET_ERR) - return ANET_ERR; - return s; -} - -/* Accept a connection and also make sure the socket is non-blocking, and CLOEXEC. - * returns the new socket FD, or -1 on error. */ -static int anetGenericAccept(char *err, int s, struct sockaddr *sa, socklen_t *len) { - int fd; - do { - /* Use the accept4() call on linux to simultaneously accept and - * set a socket as non-blocking. */ -#ifdef HAVE_ACCEPT4 - fd = accept4(s, sa, len, SOCK_NONBLOCK | SOCK_CLOEXEC); -#else - fd = accept(s,sa,len); -#endif - } while(fd == -1 && errno == EINTR); - if (fd == -1) { - anetSetError(err, "accept: %s", strerror(errno)); - return ANET_ERR; - } -#ifndef HAVE_ACCEPT4 - if (anetCloexec(fd) == -1) { - anetSetError(err, "anetCloexec: %s", strerror(errno)); - close(fd); - return ANET_ERR; - } - if (anetNonBlock(err, fd) != ANET_OK) { - close(fd); - return ANET_ERR; - } -#endif - return fd; -} - -/* Accept a connection and also make sure the socket is non-blocking, and CLOEXEC. - * returns the new socket FD, or -1 on error. */ -int anetTcpAccept(char *err, int serversock, char *ip, size_t ip_len, int *port) { - int fd; - struct sockaddr_storage sa; - socklen_t salen = sizeof(sa); - if ((fd = anetGenericAccept(err,serversock,(struct sockaddr*)&sa,&salen)) == ANET_ERR) - return ANET_ERR; - - if (sa.ss_family == AF_INET) { - struct sockaddr_in *s = (struct sockaddr_in *)&sa; - if (ip) inet_ntop(AF_INET,(void*)&(s->sin_addr),ip,ip_len); - if (port) *port = ntohs(s->sin_port); - } else { - struct sockaddr_in6 *s = (struct sockaddr_in6 *)&sa; - if (ip) inet_ntop(AF_INET6,(void*)&(s->sin6_addr),ip,ip_len); - if (port) *port = ntohs(s->sin6_port); - } - return fd; -} - -/* Accept a connection and also make sure the socket is non-blocking, and CLOEXEC. - * returns the new socket FD, or -1 on error. */ -int anetUnixAccept(char *err, int s) { - int fd; - struct sockaddr_un sa; - socklen_t salen = sizeof(sa); - if ((fd = anetGenericAccept(err,s,(struct sockaddr*)&sa,&salen)) == ANET_ERR) - return ANET_ERR; - - return fd; -} - -int anetFdToString(int fd, char *ip, size_t ip_len, int *port, int remote) { - struct sockaddr_storage sa; - socklen_t salen = sizeof(sa); - - if (remote) { - if (getpeername(fd, (struct sockaddr *)&sa, &salen) == -1) goto error; - } else { - if (getsockname(fd, (struct sockaddr *)&sa, &salen) == -1) goto error; - } - - if (sa.ss_family == AF_INET) { - struct sockaddr_in *s = (struct sockaddr_in *)&sa; - if (ip) { - if (inet_ntop(AF_INET,(void*)&(s->sin_addr),ip,ip_len) == NULL) - goto error; - } - if (port) *port = ntohs(s->sin_port); - } else if (sa.ss_family == AF_INET6) { - struct sockaddr_in6 *s = (struct sockaddr_in6 *)&sa; - if (ip) { - if (inet_ntop(AF_INET6,(void*)&(s->sin6_addr),ip,ip_len) == NULL) - goto error; - } - if (port) *port = ntohs(s->sin6_port); - } else if (sa.ss_family == AF_UNIX) { - if (ip) { - int res = snprintf(ip, ip_len, "/unixsocket"); - if (res < 0 || (unsigned int) res >= ip_len) goto error; - } - if (port) *port = 0; - } else { - goto error; - } - return 0; - -error: - if (ip) { - if (ip_len >= 2) { - ip[0] = '?'; - ip[1] = '\0'; - } else if (ip_len == 1) { - ip[0] = '\0'; - } - } - if (port) *port = 0; - return -1; -} - -/* Create a pipe buffer with given flags for read end and write end. - * Note that it supports the file flags defined by pipe2() and fcntl(F_SETFL), - * and one of the use cases is O_CLOEXEC|O_NONBLOCK. */ -int anetPipe(int fds[2], int read_flags, int write_flags) { - int pipe_flags = 0; -#if defined(__linux__) || defined(__FreeBSD__) - /* When possible, try to leverage pipe2() to apply flags that are common to both ends. - * There is no harm to set O_CLOEXEC to prevent fd leaks. */ - pipe_flags = O_CLOEXEC | (read_flags & write_flags); - if (pipe2(fds, pipe_flags)) { - /* Fail on real failures, and fallback to simple pipe if pipe2 is unsupported. */ - if (errno != ENOSYS && errno != EINVAL) - return -1; - pipe_flags = 0; - } else { - /* If the flags on both ends are identical, no need to do anything else. */ - if ((O_CLOEXEC | read_flags) == (O_CLOEXEC | write_flags)) - return 0; - /* Clear the flags which have already been set using pipe2. */ - read_flags &= ~pipe_flags; - write_flags &= ~pipe_flags; - } -#endif - - /* When we reach here with pipe_flags of 0, it means pipe2 failed (or was not attempted), - * so we try to use pipe. Otherwise, we skip and proceed to set specific flags below. */ - if (pipe_flags == 0 && pipe(fds)) - return -1; - - /* File descriptor flags. - * Currently, only one such flag is defined: FD_CLOEXEC, the close-on-exec flag. */ - if (read_flags & O_CLOEXEC) - if (fcntl(fds[0], F_SETFD, FD_CLOEXEC)) - goto error; - if (write_flags & O_CLOEXEC) - if (fcntl(fds[1], F_SETFD, FD_CLOEXEC)) - goto error; - - /* File status flags after clearing the file descriptor flag O_CLOEXEC. */ - read_flags &= ~O_CLOEXEC; - if (read_flags) - if (fcntl(fds[0], F_SETFL, read_flags)) - goto error; - write_flags &= ~O_CLOEXEC; - if (write_flags) - if (fcntl(fds[1], F_SETFL, write_flags)) - goto error; - - return 0; - -error: - close(fds[0]); - close(fds[1]); - return -1; -} - -int anetSetSockMarkId(char *err, int fd, uint32_t id) { -#ifdef HAVE_SOCKOPTMARKID - if (setsockopt(fd, SOL_SOCKET, SOCKOPTMARKID, (void *)&id, sizeof(id)) == -1) { - anetSetError(err, "setsockopt: %s", strerror(errno)); - return ANET_ERR; - } - return ANET_OK; -#else - UNUSED(fd); - UNUSED(id); - anetSetError(err,"anetSetSockMarkid unsupported on this platform"); - return ANET_OK; -#endif -} - -int anetIsFifo(char *filepath) { - struct stat sb; - if (stat(filepath, &sb) == -1) return 0; - return S_ISFIFO(sb.st_mode); -} - -/* This function must be called after accept4() fails. It returns 1 if 'err' - * indicates accepted connection faced an error, and it's okay to continue - * accepting next connection by calling accept4() again. Other errors either - * indicate programming errors, e.g. calling accept() on a closed fd or indicate - * a resource limit has been reached, e.g. -EMFILE, open fd limit has been - * reached. In the latter case, caller might wait until resources are available. - * See accept4() documentation for details. */ -int anetAcceptFailureNeedsRetry(int err) { - if (err == ECONNABORTED) - return 1; - -#if defined(__linux__) - /* For details, see 'Error Handling' section on - * https://man7.org/linux/man-pages/man2/accept.2.html */ - if (err == ENETDOWN || err == EPROTO || err == ENOPROTOOPT || - err == EHOSTDOWN || err == ENONET || err == EHOSTUNREACH || - err == EOPNOTSUPP || err == ENETUNREACH) - { - return 1; - } -#endif - return 0; -} diff --git a/examples/redis-unstable/src/anet.h b/examples/redis-unstable/src/anet.h deleted file mode 100644 index 1d3aec9..0000000 --- a/examples/redis-unstable/src/anet.h +++ /dev/null @@ -1,58 +0,0 @@ -/* anet.c -- Basic TCP socket stuff made a bit less boring - * - * Copyright (c) 2006-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#ifndef ANET_H -#define ANET_H - -#include - -#define ANET_OK 0 -#define ANET_ERR -1 -#define ANET_ERR_LEN 256 - -/* Flags used with certain functions. */ -#define ANET_NONE 0 -#define ANET_IP_ONLY (1<<0) -#define ANET_PREFER_IPV4 (1<<1) -#define ANET_PREFER_IPV6 (1<<2) - -#if defined(__sun) || defined(_AIX) -#define AF_LOCAL AF_UNIX -#endif - -#ifdef _AIX -#undef ip_len -#endif - -int anetTcpNonBlockConnect(char *err, const char *addr, int port); -int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port, const char *source_addr); -int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, int flags); -int anetTcpServer(char *err, int port, char *bindaddr, int backlog); -int anetTcp6Server(char *err, int port, char *bindaddr, int backlog); -int anetUnixServer(char *err, char *path, mode_t perm, int backlog); -int anetTcpAccept(char *err, int serversock, char *ip, size_t ip_len, int *port); -int anetUnixAccept(char *err, int serversock); -int anetNonBlock(char *err, int fd); -int anetBlock(char *err, int fd); -int anetCloexec(int fd); -int anetEnableTcpNoDelay(char *err, int fd); -int anetDisableTcpNoDelay(char *err, int fd); -int anetSendTimeout(char *err, int fd, long long ms); -int anetRecvTimeout(char *err, int fd, long long ms); -int anetFdToString(int fd, char *ip, size_t ip_len, int *port, int remote); -int anetKeepAlive(char *err, int fd, int interval); -int anetFormatAddr(char *fmt, size_t fmt_len, char *ip, int port); -int anetPipe(int fds[2], int read_flags, int write_flags); -int anetSetSockMarkId(char *err, int fd, uint32_t id); -int anetGetError(int fd); -int anetIsFifo(char *filepath); -int anetAcceptFailureNeedsRetry(int err); - -#endif diff --git a/examples/redis-unstable/src/aof.c b/examples/redis-unstable/src/aof.c deleted file mode 100644 index 3ace670..0000000 --- a/examples/redis-unstable/src/aof.c +++ /dev/null @@ -1,2921 +0,0 @@ -/* - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "server.h" -#include "bio.h" -#include "rio.h" -#include "functions.h" -#include "cluster_asm.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -void freeClientArgv(client *c); -off_t getAppendOnlyFileSize(sds filename, int *status); -off_t getBaseAndIncrAppendOnlyFilesSize(aofManifest *am, int *status); -int getBaseAndIncrAppendOnlyFilesNum(aofManifest *am); -int aofFileExist(char *filename); -int rewriteAppendOnlyFile(char *filename); -aofManifest *aofLoadManifestFromFile(sds am_filepath); -void aofManifestFreeAndUpdate(aofManifest *am); -void aof_background_fsync_and_close(int fd); - -/* When we call 'startAppendOnly', we will create a temp INCR AOF, and rename - * it to the real INCR AOF name when the AOFRW is done, so if want to know the - * accurate start offset of the INCR AOF, we need to record it when we create - * the temp INCR AOF. This variable is used to record the start offset, and - * set the start offset of the real INCR AOF when the AOFRW is done. */ -static long long tempIncAofStartReplOffset = 0; - -/* ---------------------------------------------------------------------------- - * AOF Manifest file implementation. - * - * The following code implements the read/write logic of AOF manifest file, which - * is used to track and manage all AOF files. - * - * Append-only files consist of three types: - * - * BASE: Represents a Redis snapshot from the time of last AOF rewrite. The manifest - * file contains at most a single BASE file, which will always be the first file in the - * list. - * - * INCR: Represents all write commands executed by Redis following the last successful - * AOF rewrite. In some cases it is possible to have several ordered INCR files. For - * example: - * - During an on-going AOF rewrite - * - After an AOF rewrite was aborted/failed, and before the next one succeeded. - * - * HISTORY: After a successful rewrite, the previous BASE and INCR become HISTORY files. - * They will be automatically removed unless garbage collection is disabled. - * - * The following is a possible AOF manifest file content: - * - * file appendonly.aof.2.base.rdb seq 2 type b - * file appendonly.aof.1.incr.aof seq 1 type h - * file appendonly.aof.2.incr.aof seq 2 type h - * file appendonly.aof.3.incr.aof seq 3 type h - * file appendonly.aof.4.incr.aof seq 4 type i - * file appendonly.aof.5.incr.aof seq 5 type i - * ------------------------------------------------------------------------- */ - -/* Naming rules. */ -#define BASE_FILE_SUFFIX ".base" -#define INCR_FILE_SUFFIX ".incr" -#define RDB_FORMAT_SUFFIX ".rdb" -#define AOF_FORMAT_SUFFIX ".aof" -#define MANIFEST_NAME_SUFFIX ".manifest" -#define TEMP_FILE_NAME_PREFIX "temp-" - -/* AOF manifest key. */ -#define AOF_MANIFEST_KEY_FILE_NAME "file" -#define AOF_MANIFEST_KEY_FILE_SEQ "seq" -#define AOF_MANIFEST_KEY_FILE_TYPE "type" -#define AOF_MANIFEST_KEY_FILE_STARTOFFSET "startoffset" -#define AOF_MANIFEST_KEY_FILE_ENDOFFSET "endoffset" - -/* Create an empty aofInfo. */ -aofInfo *aofInfoCreate(void) { - aofInfo *ai = zcalloc(sizeof(aofInfo)); - ai->start_offset = -1; - ai->end_offset = -1; - return ai; -} - -/* Free the aofInfo structure (pointed to by ai) and its embedded file_name. */ -void aofInfoFree(aofInfo *ai) { - serverAssert(ai != NULL); - if (ai->file_name) sdsfree(ai->file_name); - zfree(ai); -} - -/* Deep copy an aofInfo. */ -aofInfo *aofInfoDup(aofInfo *orig) { - serverAssert(orig != NULL); - aofInfo *ai = aofInfoCreate(); - ai->file_name = sdsdup(orig->file_name); - ai->file_seq = orig->file_seq; - ai->file_type = orig->file_type; - ai->start_offset = orig->start_offset; - ai->end_offset = orig->end_offset; - return ai; -} - -/* Format aofInfo as a string and it will be a line in the manifest. - * - * When update this format, make sure to update redis-check-aof as well. */ -sds aofInfoFormat(sds buf, aofInfo *ai) { - sds filename_repr = NULL; - - if (sdsneedsrepr(ai->file_name)) - filename_repr = sdscatrepr(sdsempty(), ai->file_name, sdslen(ai->file_name)); - - sds ret = sdscatprintf(buf, "%s %s %s %lld %s %c", - AOF_MANIFEST_KEY_FILE_NAME, filename_repr ? filename_repr : ai->file_name, - AOF_MANIFEST_KEY_FILE_SEQ, ai->file_seq, - AOF_MANIFEST_KEY_FILE_TYPE, ai->file_type); - - if (ai->start_offset != -1) { - ret = sdscatprintf(ret, " %s %lld", AOF_MANIFEST_KEY_FILE_STARTOFFSET, ai->start_offset); - if (ai->end_offset != -1) { - ret = sdscatprintf(ret, " %s %lld", AOF_MANIFEST_KEY_FILE_ENDOFFSET, ai->end_offset); - } - } - - ret = sdscatlen(ret, "\n", 1); - sdsfree(filename_repr); - - return ret; -} - -/* Method to free AOF list elements. */ -void aofListFree(void *item) { - aofInfo *ai = (aofInfo *)item; - aofInfoFree(ai); -} - -/* Method to duplicate AOF list elements. */ -void *aofListDup(void *item) { - return aofInfoDup(item); -} - -/* Create an empty aofManifest, which will be called in `aofLoadManifestFromDisk`. */ -aofManifest *aofManifestCreate(void) { - aofManifest *am = zcalloc(sizeof(aofManifest)); - am->incr_aof_list = listCreate(); - am->history_aof_list = listCreate(); - listSetFreeMethod(am->incr_aof_list, aofListFree); - listSetDupMethod(am->incr_aof_list, aofListDup); - listSetFreeMethod(am->history_aof_list, aofListFree); - listSetDupMethod(am->history_aof_list, aofListDup); - return am; -} - -/* Free the aofManifest structure (pointed to by am) and its embedded members. */ -void aofManifestFree(aofManifest *am) { - if (am->base_aof_info) aofInfoFree(am->base_aof_info); - if (am->incr_aof_list) listRelease(am->incr_aof_list); - if (am->history_aof_list) listRelease(am->history_aof_list); - zfree(am); -} - -sds getAofManifestFileName(void) { - return sdscatprintf(sdsempty(), "%s%s", server.aof_filename, - MANIFEST_NAME_SUFFIX); -} - -sds getTempAofManifestFileName(void) { - return sdscatprintf(sdsempty(), "%s%s%s", TEMP_FILE_NAME_PREFIX, - server.aof_filename, MANIFEST_NAME_SUFFIX); -} - -sds appendAofInfoFromList(sds buf, list *aofList) { - listNode *ln; - listIter li; - - listRewind(aofList, &li); - while ((ln = listNext(&li)) != NULL) { - aofInfo *ai = (aofInfo*)ln->value; - buf = aofInfoFormat(buf, ai); - } - - return buf; -} - -/* Returns the string representation of aofManifest pointed to by am. - * - * The string is multiple lines separated by '\n', and each line represents - * an AOF file. - * - * Each line is space delimited and contains 6 fields, as follows: - * "file" [filename] "seq" [sequence] "type" [type] - * - * Where "file", "seq" and "type" are keywords that describe the next value, - * [filename] and [sequence] describe file name and order, and [type] is one - * of 'b' (base), 'h' (history) or 'i' (incr). - * - * The base file, if exists, will always be first, followed by history files, - * and incremental files. - */ -sds getAofManifestAsString(aofManifest *am) { - serverAssert(am != NULL); - - sds buf = sdsempty(); - - /* 1. Add BASE File information, it is always at the beginning - * of the manifest file. */ - if (am->base_aof_info) { - buf = aofInfoFormat(buf, am->base_aof_info); - } - - /* 2. Add HISTORY type AOF information. */ - buf = appendAofInfoFromList(buf, am->history_aof_list); - - /* 3. Add INCR type AOF information. */ - buf = appendAofInfoFromList(buf, am->incr_aof_list); - - return buf; -} - -/* Load the manifest information from the disk to `server.aof_manifest` - * when the Redis server start. - * - * During loading, this function does strict error checking and will abort - * the entire Redis server process on error (I/O error, invalid format, etc.) - * - * If the AOF directory or manifest file do not exist, this will be ignored - * in order to support seamless upgrades from previous versions which did not - * use them. - */ -void aofLoadManifestFromDisk(void) { - server.aof_manifest = aofManifestCreate(); - if (!dirExists(server.aof_dirname)) { - serverLog(LL_DEBUG, "The AOF directory %s doesn't exist", server.aof_dirname); - return; - } - - sds am_name = getAofManifestFileName(); - sds am_filepath = makePath(server.aof_dirname, am_name); - if (!fileExist(am_filepath)) { - serverLog(LL_DEBUG, "The AOF manifest file %s doesn't exist", am_name); - sdsfree(am_name); - sdsfree(am_filepath); - return; - } - - aofManifest *am = aofLoadManifestFromFile(am_filepath); - if (am) aofManifestFreeAndUpdate(am); - sdsfree(am_name); - sdsfree(am_filepath); -} - -/* Generic manifest loading function, used in `aofLoadManifestFromDisk` and redis-check-aof tool. */ -#define MANIFEST_MAX_LINE 1024 -aofManifest *aofLoadManifestFromFile(sds am_filepath) { - const char *err = NULL; - long long maxseq = 0; - - aofManifest *am = aofManifestCreate(); - FILE *fp = fopen(am_filepath, "r"); - if (fp == NULL) { - serverLog(LL_WARNING, "Fatal error: can't open the AOF manifest " - "file %s for reading: %s", am_filepath, strerror(errno)); - exit(1); - } - - char buf[MANIFEST_MAX_LINE+1]; - sds *argv = NULL; - int argc; - aofInfo *ai = NULL; - - sds line = NULL; - int linenum = 0; - - while (1) { - if (fgets(buf, MANIFEST_MAX_LINE+1, fp) == NULL) { - if (feof(fp)) { - if (linenum == 0) { - err = "Found an empty AOF manifest"; - goto loaderr; - } else { - break; - } - } else { - err = "Read AOF manifest failed"; - goto loaderr; - } - } - - linenum++; - - /* Skip comments lines */ - if (buf[0] == '#') continue; - - if (strchr(buf, '\n') == NULL) { - err = "The AOF manifest file contains too long line"; - goto loaderr; - } - - line = sdstrim(sdsnew(buf), " \t\r\n"); - if (!sdslen(line)) { - err = "Invalid AOF manifest file format"; - goto loaderr; - } - - argv = sdssplitargs(line, &argc); - /* 'argc < 6' was done for forward compatibility. */ - if (argv == NULL || argc < 6 || (argc % 2)) { - err = "Invalid AOF manifest file format"; - goto loaderr; - } - - ai = aofInfoCreate(); - for (int i = 0; i < argc; i += 2) { - if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_NAME)) { - ai->file_name = sdsnew(argv[i+1]); - if (!pathIsBaseName(ai->file_name)) { - err = "File can't be a path, just a filename"; - goto loaderr; - } - } else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_SEQ)) { - ai->file_seq = atoll(argv[i+1]); - } else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_TYPE)) { - ai->file_type = (argv[i+1])[0]; - } else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_STARTOFFSET)) { - ai->start_offset = atoll(argv[i+1]); - } else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_FILE_ENDOFFSET)) { - ai->end_offset = atoll(argv[i+1]); - } - /* else if (!strcasecmp(argv[i], AOF_MANIFEST_KEY_OTHER)) {} */ - } - - /* We have to make sure we load all the information. */ - if (!ai->file_name || !ai->file_seq || !ai->file_type) { - err = "Invalid AOF manifest file format"; - goto loaderr; - } - - sdsfreesplitres(argv, argc); - argv = NULL; - - if (ai->file_type == AOF_FILE_TYPE_BASE) { - if (am->base_aof_info) { - err = "Found duplicate base file information"; - goto loaderr; - } - am->base_aof_info = ai; - am->curr_base_file_seq = ai->file_seq; - } else if (ai->file_type == AOF_FILE_TYPE_HIST) { - listAddNodeTail(am->history_aof_list, ai); - } else if (ai->file_type == AOF_FILE_TYPE_INCR) { - if (ai->file_seq <= maxseq) { - err = "Found a non-monotonic sequence number"; - goto loaderr; - } - listAddNodeTail(am->incr_aof_list, ai); - am->curr_incr_file_seq = ai->file_seq; - maxseq = ai->file_seq; - } else { - err = "Unknown AOF file type"; - goto loaderr; - } - - sdsfree(line); - line = NULL; - ai = NULL; - } - - fclose(fp); - return am; - -loaderr: - /* Sanitizer suppression: may report a false positive if we goto loaderr - * and exit(1) without freeing these allocations. */ - if (argv) sdsfreesplitres(argv, argc); - if (ai) aofInfoFree(ai); - - serverLog(LL_WARNING, "\n*** FATAL AOF MANIFEST FILE ERROR ***\n"); - if (line) { - serverLog(LL_WARNING, "Reading the manifest file, at line %d\n", linenum); - serverLog(LL_WARNING, ">>> '%s'\n", line); - } - serverLog(LL_WARNING, "%s\n", err); - exit(1); -} - -/* Deep copy an aofManifest from orig. - * - * In `backgroundRewriteDoneHandler` and `openNewIncrAofForAppend`, we will - * first deep copy a temporary AOF manifest from the `server.aof_manifest` and - * try to modify it. Once everything is modified, we will atomically make the - * `server.aof_manifest` point to this temporary aof_manifest. - */ -aofManifest *aofManifestDup(aofManifest *orig) { - serverAssert(orig != NULL); - aofManifest *am = zcalloc(sizeof(aofManifest)); - - am->curr_base_file_seq = orig->curr_base_file_seq; - am->curr_incr_file_seq = orig->curr_incr_file_seq; - am->dirty = orig->dirty; - - if (orig->base_aof_info) { - am->base_aof_info = aofInfoDup(orig->base_aof_info); - } - - am->incr_aof_list = listDup(orig->incr_aof_list); - am->history_aof_list = listDup(orig->history_aof_list); - serverAssert(am->incr_aof_list != NULL); - serverAssert(am->history_aof_list != NULL); - return am; -} - -/* Change the `server.aof_manifest` pointer to 'am' and free the previous - * one if we have. */ -void aofManifestFreeAndUpdate(aofManifest *am) { - serverAssert(am != NULL); - if (server.aof_manifest) aofManifestFree(server.aof_manifest); - server.aof_manifest = am; -} - -/* Called in `backgroundRewriteDoneHandler` to get a new BASE file - * name, and mark the previous (if we have) BASE file as HISTORY type. - * - * BASE file naming rules: `server.aof_filename`.seq.base.format - * - * for example: - * appendonly.aof.1.base.aof (server.aof_use_rdb_preamble is no) - * appendonly.aof.1.base.rdb (server.aof_use_rdb_preamble is yes) - */ -sds getNewBaseFileNameAndMarkPreAsHistory(aofManifest *am) { - serverAssert(am != NULL); - if (am->base_aof_info) { - serverAssert(am->base_aof_info->file_type == AOF_FILE_TYPE_BASE); - am->base_aof_info->file_type = AOF_FILE_TYPE_HIST; - listAddNodeHead(am->history_aof_list, am->base_aof_info); - } - - char *format_suffix = server.aof_use_rdb_preamble ? - RDB_FORMAT_SUFFIX:AOF_FORMAT_SUFFIX; - - aofInfo *ai = aofInfoCreate(); - ai->file_name = sdscatprintf(sdsempty(), "%s.%lld%s%s", server.aof_filename, - ++am->curr_base_file_seq, BASE_FILE_SUFFIX, format_suffix); - ai->file_seq = am->curr_base_file_seq; - ai->file_type = AOF_FILE_TYPE_BASE; - am->base_aof_info = ai; - am->dirty = 1; - return am->base_aof_info->file_name; -} - -/* Get a new INCR type AOF name. - * - * INCR AOF naming rules: `server.aof_filename`.seq.incr.aof - * - * for example: - * appendonly.aof.1.incr.aof - */ -sds getNewIncrAofName(aofManifest *am, long long start_reploff) { - aofInfo *ai = aofInfoCreate(); - ai->file_type = AOF_FILE_TYPE_INCR; - ai->file_name = sdscatprintf(sdsempty(), "%s.%lld%s%s", server.aof_filename, - ++am->curr_incr_file_seq, INCR_FILE_SUFFIX, AOF_FORMAT_SUFFIX); - ai->file_seq = am->curr_incr_file_seq; - ai->start_offset = start_reploff; - listAddNodeTail(am->incr_aof_list, ai); - am->dirty = 1; - return ai->file_name; -} - -/* Get temp INCR type AOF name. */ -sds getTempIncrAofName(void) { - return sdscatprintf(sdsempty(), "%s%s%s", TEMP_FILE_NAME_PREFIX, server.aof_filename, - INCR_FILE_SUFFIX); -} - -/* Get the last INCR AOF name or create a new one. */ -sds getLastIncrAofName(aofManifest *am) { - serverAssert(am != NULL); - - /* If 'incr_aof_list' is empty, just create a new one. */ - if (!listLength(am->incr_aof_list)) { - return getNewIncrAofName(am, server.master_repl_offset); - } - - /* Or return the last one. */ - listNode *lastnode = listIndex(am->incr_aof_list, -1); - aofInfo *ai = listNodeValue(lastnode); - return ai->file_name; -} - -/* Called in `backgroundRewriteDoneHandler`. when AOFRW success, This - * function will change the AOF file type in 'incr_aof_list' from - * AOF_FILE_TYPE_INCR to AOF_FILE_TYPE_HIST, and move them to the - * 'history_aof_list'. - */ -void markRewrittenIncrAofAsHistory(aofManifest *am) { - serverAssert(am != NULL); - if (!listLength(am->incr_aof_list)) { - return; - } - - listNode *ln; - listIter li; - - listRewindTail(am->incr_aof_list, &li); - - /* "server.aof_fd != -1" means AOF enabled, then we must skip the - * last AOF, because this file is our currently writing. */ - if (server.aof_fd != -1) { - ln = listNext(&li); - serverAssert(ln != NULL); - } - - /* Move aofInfo from 'incr_aof_list' to 'history_aof_list'. */ - while ((ln = listNext(&li)) != NULL) { - aofInfo *ai = (aofInfo*)ln->value; - serverAssert(ai->file_type == AOF_FILE_TYPE_INCR); - - aofInfo *hai = aofInfoDup(ai); - hai->file_type = AOF_FILE_TYPE_HIST; - listAddNodeHead(am->history_aof_list, hai); - listDelNode(am->incr_aof_list, ln); - } - - am->dirty = 1; -} - -/* Write the formatted manifest string to disk. */ -int writeAofManifestFile(sds buf) { - int ret = C_OK; - ssize_t nwritten; - int len; - - sds am_name = getAofManifestFileName(); - sds am_filepath = makePath(server.aof_dirname, am_name); - sds tmp_am_name = getTempAofManifestFileName(); - sds tmp_am_filepath = makePath(server.aof_dirname, tmp_am_name); - - int fd = open(tmp_am_filepath, O_WRONLY|O_TRUNC|O_CREAT, 0644); - if (fd == -1) { - serverLog(LL_WARNING, "Can't open the AOF manifest file %s: %s", - tmp_am_name, strerror(errno)); - - ret = C_ERR; - goto cleanup; - } - - len = sdslen(buf); - while(len) { - nwritten = write(fd, buf, len); - - if (nwritten < 0) { - if (errno == EINTR) continue; - - serverLog(LL_WARNING, "Error trying to write the temporary AOF manifest file %s: %s", - tmp_am_name, strerror(errno)); - - ret = C_ERR; - goto cleanup; - } - - len -= nwritten; - buf += nwritten; - } - - if (redis_fsync(fd) == -1) { - serverLog(LL_WARNING, "Fail to fsync the temp AOF file %s: %s.", - tmp_am_name, strerror(errno)); - - ret = C_ERR; - goto cleanup; - } - - if (rename(tmp_am_filepath, am_filepath) != 0) { - serverLog(LL_WARNING, - "Error trying to rename the temporary AOF manifest file %s into %s: %s", - tmp_am_name, am_name, strerror(errno)); - - ret = C_ERR; - goto cleanup; - } - - /* Also sync the AOF directory as new AOF files may be added in the directory */ - if (fsyncFileDir(am_filepath) == -1) { - serverLog(LL_WARNING, "Fail to fsync AOF directory %s: %s.", - am_filepath, strerror(errno)); - - ret = C_ERR; - goto cleanup; - } - -cleanup: - if (fd != -1) close(fd); - sdsfree(am_name); - sdsfree(am_filepath); - sdsfree(tmp_am_name); - sdsfree(tmp_am_filepath); - return ret; -} - -/* Persist the aofManifest information pointed to by am to disk. */ -int persistAofManifest(aofManifest *am) { - if (am->dirty == 0) { - return C_OK; - } - - sds amstr = getAofManifestAsString(am); - int ret = writeAofManifestFile(amstr); - sdsfree(amstr); - if (ret == C_OK) am->dirty = 0; - return ret; -} - -/* Called in `loadAppendOnlyFiles` when we upgrade from a old version redis. - * - * 1) Create AOF directory use 'server.aof_dirname' as the name. - * 2) Use 'server.aof_filename' to construct a BASE type aofInfo and add it to - * aofManifest, then persist the manifest file to AOF directory. - * 3) Move the old AOF file (server.aof_filename) to AOF directory. - * - * If any of the above steps fails or crash occurs, this will not cause any - * problems, and redis will retry the upgrade process when it restarts. - */ -void aofUpgradePrepare(aofManifest *am) { - serverAssert(!aofFileExist(server.aof_filename)); - - /* Create AOF directory use 'server.aof_dirname' as the name. */ - if (dirCreateIfMissing(server.aof_dirname) == -1) { - serverLog(LL_WARNING, "Can't open or create append-only dir %s: %s", - server.aof_dirname, strerror(errno)); - exit(1); - } - - /* Manually construct a BASE type aofInfo and add it to aofManifest. */ - if (am->base_aof_info) aofInfoFree(am->base_aof_info); - aofInfo *ai = aofInfoCreate(); - ai->file_name = sdsnew(server.aof_filename); - ai->file_seq = 1; - ai->file_type = AOF_FILE_TYPE_BASE; - am->base_aof_info = ai; - am->curr_base_file_seq = 1; - am->dirty = 1; - - /* Persist the manifest file to AOF directory. */ - if (persistAofManifest(am) != C_OK) { - exit(1); - } - - /* Move the old AOF file to AOF directory. */ - sds aof_filepath = makePath(server.aof_dirname, server.aof_filename); - if (rename(server.aof_filename, aof_filepath) == -1) { - serverLog(LL_WARNING, - "Error trying to move the old AOF file %s into dir %s: %s", - server.aof_filename, - server.aof_dirname, - strerror(errno)); - sdsfree(aof_filepath); - exit(1); - } - sdsfree(aof_filepath); - - serverLog(LL_NOTICE, "Successfully migrated an old-style AOF file (%s) into the AOF directory (%s).", - server.aof_filename, server.aof_dirname); -} - -/* When AOFRW success, the previous BASE and INCR AOFs will - * become HISTORY type and be moved into 'history_aof_list'. - * - * The function will traverse the 'history_aof_list' and submit - * the delete task to the bio thread. - */ -int aofDelHistoryFiles(void) { - if (server.aof_manifest == NULL || - server.aof_disable_auto_gc == 1 || - !listLength(server.aof_manifest->history_aof_list)) - { - return C_OK; - } - - listNode *ln; - listIter li; - - listRewind(server.aof_manifest->history_aof_list, &li); - while ((ln = listNext(&li)) != NULL) { - aofInfo *ai = (aofInfo*)ln->value; - serverAssert(ai->file_type == AOF_FILE_TYPE_HIST); - serverLog(LL_NOTICE, "Removing the history file %s in the background", ai->file_name); - sds aof_filepath = makePath(server.aof_dirname, ai->file_name); - bg_unlink(aof_filepath); - sdsfree(aof_filepath); - listDelNode(server.aof_manifest->history_aof_list, ln); - } - - server.aof_manifest->dirty = 1; - return persistAofManifest(server.aof_manifest); -} - -/* Used to clean up temp INCR AOF when AOFRW fails. */ -void aofDelTempIncrAofFile(void) { - sds aof_filename = getTempIncrAofName(); - sds aof_filepath = makePath(server.aof_dirname, aof_filename); - serverLog(LL_NOTICE, "Removing the temp incr aof file %s in the background", aof_filename); - bg_unlink(aof_filepath); - sdsfree(aof_filepath); - sdsfree(aof_filename); - return; -} - -/* Called after `loadDataFromDisk` when redis start. If `server.aof_state` is - * 'AOF_ON', It will do three things: - * 1. Force create a BASE file when redis starts with an empty dataset - * 2. Open the last opened INCR type AOF for writing, If not, create a new one - * 3. Synchronously update the manifest file to the disk - * - * If any of the above steps fails, the redis process will exit. - */ -void aofOpenIfNeededOnServerStart(void) { - if (server.aof_state != AOF_ON) { - return; - } - - serverAssert(server.aof_manifest != NULL); - serverAssert(server.aof_fd == -1); - - if (dirCreateIfMissing(server.aof_dirname) == -1) { - serverLog(LL_WARNING, "Can't open or create append-only dir %s: %s", - server.aof_dirname, strerror(errno)); - exit(1); - } - - /* If we start with an empty dataset, we will force create a BASE file. */ - size_t incr_aof_len = listLength(server.aof_manifest->incr_aof_list); - if (!server.aof_manifest->base_aof_info && !incr_aof_len) { - sds base_name = getNewBaseFileNameAndMarkPreAsHistory(server.aof_manifest); - sds base_filepath = makePath(server.aof_dirname, base_name); - if (rewriteAppendOnlyFile(base_filepath) != C_OK) { - exit(1); - } - sdsfree(base_filepath); - serverLog(LL_NOTICE, "Creating AOF base file %s on server start", - base_name); - } - - /* Because we will 'exit(1)' if open AOF or persistent manifest fails, so - * we don't need atomic modification here. */ - sds aof_name = getLastIncrAofName(server.aof_manifest); - - /* Here we should use 'O_APPEND' flag. */ - sds aof_filepath = makePath(server.aof_dirname, aof_name); - server.aof_fd = open(aof_filepath, O_WRONLY|O_APPEND|O_CREAT, 0644); - sdsfree(aof_filepath); - if (server.aof_fd == -1) { - serverLog(LL_WARNING, "Can't open the append-only file %s: %s", - aof_name, strerror(errno)); - exit(1); - } - - /* Persist our changes. */ - int ret = persistAofManifest(server.aof_manifest); - if (ret != C_OK) { - exit(1); - } - - server.aof_last_incr_size = getAppendOnlyFileSize(aof_name, NULL); - server.aof_last_incr_fsync_offset = server.aof_last_incr_size; - - if (incr_aof_len) { - serverLog(LL_NOTICE, "Opening AOF incr file %s on server start", aof_name); - } else { - serverLog(LL_NOTICE, "Creating AOF incr file %s on server start", aof_name); - } -} - -int aofFileExist(char *filename) { - sds file_path = makePath(server.aof_dirname, filename); - int ret = fileExist(file_path); - sdsfree(file_path); - return ret; -} - -/* Called in `rewriteAppendOnlyFileBackground`. If `server.aof_state` - * is 'AOF_ON', It will do two things: - * 1. Open a new INCR type AOF for writing - * 2. Synchronously update the manifest file to the disk - * - * The above two steps of modification are atomic, that is, if - * any step fails, the entire operation will rollback and returns - * C_ERR, and if all succeeds, it returns C_OK. - * - * If `server.aof_state` is 'AOF_WAIT_REWRITE', It will open a temporary INCR AOF - * file to accumulate data during AOF_WAIT_REWRITE, and it will eventually be - * renamed in the `backgroundRewriteDoneHandler` and written to the manifest file. - * */ -int openNewIncrAofForAppend(void) { - serverAssert(server.aof_manifest != NULL); - int newfd = -1; - aofManifest *temp_am = NULL; - sds new_aof_name = NULL; - - /* Only open new INCR AOF when AOF enabled. */ - if (server.aof_state == AOF_OFF) return C_OK; - - /* Open new AOF. */ - if (server.aof_state == AOF_WAIT_REWRITE) { - /* Use a temporary INCR AOF file to accumulate data during AOF_WAIT_REWRITE. */ - new_aof_name = getTempIncrAofName(); - tempIncAofStartReplOffset = server.master_repl_offset; - } else { - /* Dup a temp aof_manifest to modify. */ - temp_am = aofManifestDup(server.aof_manifest); - new_aof_name = sdsdup(getNewIncrAofName(temp_am, server.master_repl_offset)); - } - sds new_aof_filepath = makePath(server.aof_dirname, new_aof_name); - newfd = open(new_aof_filepath, O_WRONLY|O_TRUNC|O_CREAT, 0644); - sdsfree(new_aof_filepath); - if (newfd == -1) { - serverLog(LL_WARNING, "Can't open the append-only file %s: %s", - new_aof_name, strerror(errno)); - goto cleanup; - } - - if (temp_am) { - /* Persist AOF Manifest. */ - if (persistAofManifest(temp_am) == C_ERR) { - goto cleanup; - } - } - - serverLog(LL_NOTICE, "Creating AOF incr file %s on background rewrite", - new_aof_name); - sdsfree(new_aof_name); - - /* If reaches here, we can safely modify the `server.aof_manifest` - * and `server.aof_fd`. */ - - /* fsync and close old aof_fd if needed. In fsync everysec it's ok to delay - * the fsync as long as we grantee it happens, and in fsync always the file - * is already synced at this point so fsync doesn't matter. */ - if (server.aof_fd != -1) { - aof_background_fsync_and_close(server.aof_fd); - server.aof_last_fsync = server.mstime; - } - server.aof_fd = newfd; - - /* Reset the aof_last_incr_size. */ - server.aof_last_incr_size = 0; - /* Reset the aof_last_incr_fsync_offset. */ - server.aof_last_incr_fsync_offset = 0; - /* Update `server.aof_manifest`. */ - if (temp_am) aofManifestFreeAndUpdate(temp_am); - return C_OK; - -cleanup: - if (new_aof_name) sdsfree(new_aof_name); - if (newfd != -1) close(newfd); - if (temp_am) aofManifestFree(temp_am); - return C_ERR; -} - -/* When we close gracefully the AOF file, we have the chance to persist the - * end replication offset of current INCR AOF. */ -void updateCurIncrAofEndOffset(void) { - if (server.aof_state != AOF_ON) return; - serverAssert(server.aof_manifest != NULL); - - if (listLength(server.aof_manifest->incr_aof_list) == 0) return; - aofInfo *ai = listNodeValue(listLast(server.aof_manifest->incr_aof_list)); - ai->end_offset = server.master_repl_offset; - server.aof_manifest->dirty = 1; - /* It doesn't matter if the persistence fails since this information is not - * critical, we can get an approximate value by start offset plus file size. */ - persistAofManifest(server.aof_manifest); -} - -/* After loading AOF data, we need to update the `server.master_repl_offset` - * based on the information of the last INCR AOF, to avoid the rollback of - * the start offset of new INCR AOF. */ -void updateReplOffsetAndResetEndOffset(void) { - if (server.aof_state != AOF_ON) return; - serverAssert(server.aof_manifest != NULL); - - /* If the INCR file has an end offset, we directly use it, and clear it - * to avoid the next time we load the manifest file, we will use the same - * offset, but the real offset may have advanced. */ - if (listLength(server.aof_manifest->incr_aof_list) == 0) return; - aofInfo *ai = listNodeValue(listLast(server.aof_manifest->incr_aof_list)); - if (ai->end_offset != -1) { - server.master_repl_offset = ai->end_offset; - ai->end_offset = -1; - server.aof_manifest->dirty = 1; - /* We must update the end offset of INCR file correctly, otherwise we - * may keep wrong information in the manifest file, since we continue - * to append data to the same INCR file. */ - if (persistAofManifest(server.aof_manifest) != AOF_OK) - exit(1); - } else { - /* If the INCR file doesn't have an end offset, we need to calculate - * the replication offset by the start offset plus the file size. */ - server.master_repl_offset = (ai->start_offset == -1 ? 0 : ai->start_offset) + - getAppendOnlyFileSize(ai->file_name, NULL); - } -} - -/* Whether to limit the execution of Background AOF rewrite. - * - * At present, if AOFRW fails, redis will automatically retry. If it continues - * to fail, we may get a lot of very small INCR files. so we need an AOFRW - * limiting measure. - * - * We can't directly use `server.aof_current_size` and `server.aof_last_incr_size`, - * because there may be no new writes after AOFRW fails. - * - * So, we use time delay to achieve our goal. When AOFRW fails, we delay the execution - * of the next AOFRW by 1 minute. If the next AOFRW also fails, it will be delayed by 2 - * minutes. The next is 4, 8, 16, the maximum delay is 60 minutes (1 hour). - * - * During the limit period, we can still use the 'bgrewriteaof' command to execute AOFRW - * immediately. - * - * Return 1 means that AOFRW is limited and cannot be executed. 0 means that we can execute - * AOFRW, which may be that we have reached the 'next_rewrite_time' or the number of INCR - * AOFs has not reached the limit threshold. - * */ -#define AOF_REWRITE_LIMITE_THRESHOLD 3 -#define AOF_REWRITE_LIMITE_MAX_MINUTES 60 /* 1 hour */ -int aofRewriteLimited(void) { - static int next_delay_minutes = 0; - static time_t next_rewrite_time = 0; - - if (server.stat_aofrw_consecutive_failures < AOF_REWRITE_LIMITE_THRESHOLD) { - /* We may be recovering from limited state, so reset all states. */ - next_delay_minutes = 0; - next_rewrite_time = 0; - return 0; - } - - /* if it is in the limiting state, then check if the next_rewrite_time is reached */ - if (next_rewrite_time != 0) { - if (server.unixtime < next_rewrite_time) { - return 1; - } else { - next_rewrite_time = 0; - return 0; - } - } - - next_delay_minutes = (next_delay_minutes == 0) ? 1 : (next_delay_minutes * 2); - if (next_delay_minutes > AOF_REWRITE_LIMITE_MAX_MINUTES) { - next_delay_minutes = AOF_REWRITE_LIMITE_MAX_MINUTES; - } - - next_rewrite_time = server.unixtime + next_delay_minutes * 60; - serverLog(LL_WARNING, - "Background AOF rewrite has repeatedly failed and triggered the limit, will retry in %d minutes", next_delay_minutes); - return 1; -} - -/* ---------------------------------------------------------------------------- - * AOF file implementation - * ------------------------------------------------------------------------- */ - -/* Return true if an AOf fsync is currently already in progress in a - * BIO thread. */ -int aofFsyncInProgress(void) { - /* Note that we don't care about aof_background_fsync_and_close because - * server.aof_fd has been replaced by the new INCR AOF file fd, - * see openNewIncrAofForAppend. */ - return bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; -} - -/* Starts a background task that performs fsync() against the specified - * file descriptor (the one of the AOF file) in another thread. */ -void aof_background_fsync(int fd) { - bioCreateFsyncJob(fd, server.master_repl_offset, 1); -} - -/* Close the fd on the basis of aof_background_fsync. */ -void aof_background_fsync_and_close(int fd) { - bioCreateCloseAofJob(fd, server.master_repl_offset, 1); -} - -/* Kills an AOFRW child process if exists */ -void killAppendOnlyChild(void) { - int statloc; - /* No AOFRW child? return. */ - if (server.child_type != CHILD_TYPE_AOF) return; - /* Kill AOFRW child, wait for child exit. */ - serverLog(LL_NOTICE,"Killing running AOF rewrite child: %ld", - (long) server.child_pid); - if (kill(server.child_pid,SIGUSR1) != -1) { - while(waitpid(-1, &statloc, 0) != server.child_pid); - } - aofRemoveTempFile(server.child_pid); - resetChildState(); - server.aof_rewrite_time_start = -1; -} - -/* Called when the user switches from "appendonly yes" to "appendonly no" - * at runtime using the CONFIG command. */ -void stopAppendOnly(void) { - serverAssert(server.aof_state != AOF_OFF); - flushAppendOnlyFile(1); - if (redis_fsync(server.aof_fd) == -1) { - serverLog(LL_WARNING,"Fail to fsync the AOF file: %s",strerror(errno)); - } else { - server.aof_last_fsync = server.mstime; - } - close(server.aof_fd); - updateCurIncrAofEndOffset(); - - server.aof_fd = -1; - server.aof_selected_db = -1; - server.aof_state = AOF_OFF; - server.aof_rewrite_scheduled = 0; - server.aof_last_incr_size = 0; - server.aof_last_incr_fsync_offset = 0; - server.fsynced_reploff = -1; - atomicSet(server.fsynced_reploff_pending, 0); - killAppendOnlyChild(); - sdsfree(server.aof_buf); - server.aof_buf = sdsempty(); -} - -/* Called when the user switches from "appendonly no" to "appendonly yes" - * at runtime using the CONFIG command. */ -int startAppendOnly(void) { - serverAssert(server.aof_state == AOF_OFF); - - server.aof_state = AOF_WAIT_REWRITE; - if (hasActiveChildProcess() && server.child_type != CHILD_TYPE_AOF) { - server.aof_rewrite_scheduled = 1; - serverLog(LL_NOTICE,"AOF was enabled but there is already another background operation. An AOF background was scheduled to start when possible."); - } else if (server.in_exec){ - server.aof_rewrite_scheduled = 1; - serverLog(LL_NOTICE,"AOF was enabled during a transaction. An AOF background was scheduled to start when possible."); - } else { - /* If there is a pending AOF rewrite, we need to switch it off and - * start a new one: the old one cannot be reused because it is not - * accumulating the AOF buffer. */ - if (server.child_type == CHILD_TYPE_AOF) { - serverLog(LL_NOTICE,"AOF was enabled but there is already an AOF rewriting in background. Stopping background AOF and starting a rewrite now."); - killAppendOnlyChild(); - } - - if (rewriteAppendOnlyFileBackground() == C_ERR) { - server.aof_state = AOF_OFF; - serverLog(LL_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error."); - return C_ERR; - } - } - server.aof_last_fsync = server.mstime; - /* If AOF fsync error in bio job, we just ignore it and log the event. */ - int aof_bio_fsync_status; - atomicGet(server.aof_bio_fsync_status, aof_bio_fsync_status); - if (aof_bio_fsync_status == C_ERR) { - serverLog(LL_WARNING, - "AOF reopen, just ignore the AOF fsync error in bio job"); - atomicSet(server.aof_bio_fsync_status,C_OK); - } - - /* If AOF was in error state, we just ignore it and log the event. */ - if (server.aof_last_write_status == C_ERR) { - serverLog(LL_WARNING,"AOF reopen, just ignore the last error."); - server.aof_last_write_status = C_OK; - } - return C_OK; -} - -void startAppendOnlyWithRetry(void) { - unsigned int tries, max_tries = 10; - for (tries = 0; tries < max_tries; ++tries) { - if (startAppendOnly() == C_OK) - break; - serverLog(LL_WARNING, "Failed to enable AOF! Trying it again in one second."); - sleep(1); - } - if (tries == max_tries) { - serverLog(LL_WARNING, "FATAL: AOF can't be turned on. Exiting now."); - exit(1); - } -} - -/* Called after "appendonly" config is changed. */ -void applyAppendOnlyConfig(void) { - if (!server.aof_enabled && server.aof_state != AOF_OFF) { - stopAppendOnly(); - } else if (server.aof_enabled && server.aof_state == AOF_OFF) { - startAppendOnlyWithRetry(); - } -} - -/* This is a wrapper to the write syscall in order to retry on short writes - * or if the syscall gets interrupted. It could look strange that we retry - * on short writes given that we are writing to a block device: normally if - * the first call is short, there is a end-of-space condition, so the next - * is likely to fail. However apparently in modern systems this is no longer - * true, and in general it looks just more resilient to retry the write. If - * there is an actual error condition we'll get it at the next try. */ -ssize_t aofWrite(int fd, const char *buf, size_t len) { - ssize_t nwritten = 0, totwritten = 0; - - while(len) { - nwritten = write(fd, buf, len); - - if (nwritten < 0) { - if (errno == EINTR) continue; - return totwritten ? totwritten : -1; - } - - len -= nwritten; - buf += nwritten; - totwritten += nwritten; - } - - return totwritten; -} - -/* Write the append only file buffer on disk. - * - * Since we are required to write the AOF before replying to the client, - * and the only way the client socket can get a write is entering when - * the event loop, we accumulate all the AOF writes in a memory - * buffer and write it on disk using this function just before entering - * the event loop again. - * - * About the 'force' argument: - * - * When the fsync policy is set to 'everysec' we may delay the flush if there - * is still an fsync() going on in the background thread, since for instance - * on Linux write(2) will be blocked by the background fsync anyway. - * When this happens we remember that there is some aof buffer to be - * flushed ASAP, and will try to do that in the serverCron() function. - * - * However if force is set to 1 we'll write regardless of the background - * fsync. */ -#define AOF_WRITE_LOG_ERROR_RATE 30 /* Seconds between errors logging. */ -void flushAppendOnlyFile(int force) { - ssize_t nwritten; - int sync_in_progress = 0; - mstime_t latency; - - if (sdslen(server.aof_buf) == 0) { - if (server.aof_last_incr_fsync_offset == server.aof_last_incr_size) { - /* All data is fsync'd already: Update fsynced_reploff_pending just in case. - * This is needed to avoid a WAITAOF hang in case a module used RM_Call - * with the NO_AOF flag, in which case master_repl_offset will increase but - * fsynced_reploff_pending won't be updated (because there's no reason, from - * the AOF POV, to call fsync) and then WAITAOF may wait on the higher offset - * (which contains data that was only propagated to replicas, and not to AOF) */ - if (!aofFsyncInProgress()) - atomicSet(server.fsynced_reploff_pending, server.master_repl_offset); - } else { - /* Check if we need to do fsync even the aof buffer is empty, - * because previously in AOF_FSYNC_EVERYSEC mode, fsync is - * called only when aof buffer is not empty, so if users - * stop write commands before fsync called in one second, - * the data in page cache cannot be flushed in time. */ - if (server.aof_fsync == AOF_FSYNC_EVERYSEC && - server.mstime - server.aof_last_fsync >= 1000 && - !(sync_in_progress = aofFsyncInProgress())) - goto try_fsync; - - /* Check if we need to do fsync even the aof buffer is empty, - * the reason is described in the previous AOF_FSYNC_EVERYSEC block, - * and AOF_FSYNC_ALWAYS is also checked here to handle a case where - * aof_fsync is changed from everysec to always. */ - if (server.aof_fsync == AOF_FSYNC_ALWAYS) - goto try_fsync; - } - return; - } - - if (server.aof_fsync == AOF_FSYNC_EVERYSEC) - sync_in_progress = aofFsyncInProgress(); - - if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { - /* With this append fsync policy we do background fsyncing. - * If the fsync is still in progress we can try to delay - * the write for a couple of seconds. */ - if (sync_in_progress) { - if (server.aof_flush_postponed_start == 0) { - /* No previous write postponing, remember that we are - * postponing the flush and return. */ - server.aof_flush_postponed_start = server.mstime; - return; - } else if (server.mstime - server.aof_flush_postponed_start < 2000) { - /* We were already waiting for fsync to finish, but for less - * than two seconds this is still ok. Postpone again. */ - return; - } - /* Otherwise fall through, and go write since we can't wait - * over two seconds. */ - server.aof_delayed_fsync++; - serverLog(LL_NOTICE,"Asynchronous AOF fsync is taking too long (disk is busy?). Writing the AOF buffer without waiting for fsync to complete, this may slow down Redis."); - } - } - /* We want to perform a single write. This should be guaranteed atomic - * at least if the filesystem we are writing is a real physical one. - * While this will save us against the server being killed I don't think - * there is much to do about the whole server stopping for power problems - * or alike */ - - if (server.aof_flush_sleep && sdslen(server.aof_buf)) { - usleep(server.aof_flush_sleep); - } - - latencyStartMonitor(latency); - nwritten = aofWrite(server.aof_fd,server.aof_buf,sdslen(server.aof_buf)); - latencyEndMonitor(latency); - /* We want to capture different events for delayed writes: - * when the delay happens with a pending fsync, or with a saving child - * active, and when the above two conditions are missing. - * We also use an additional event name to save all samples which is - * useful for graphing / monitoring purposes. */ - if (sync_in_progress) { - latencyAddSampleIfNeeded("aof-write-pending-fsync",latency); - } else if (hasActiveChildProcess()) { - latencyAddSampleIfNeeded("aof-write-active-child",latency); - } else { - latencyAddSampleIfNeeded("aof-write-alone",latency); - } - latencyAddSampleIfNeeded("aof-write",latency); - - /* We performed the write so reset the postponed flush sentinel to zero. */ - server.aof_flush_postponed_start = 0; - - if (nwritten != (ssize_t)sdslen(server.aof_buf)) { - static time_t last_write_error_log = 0; - int can_log = 0; - - /* Limit logging rate to 1 line per AOF_WRITE_LOG_ERROR_RATE seconds. */ - if ((server.unixtime - last_write_error_log) > AOF_WRITE_LOG_ERROR_RATE) { - can_log = 1; - last_write_error_log = server.unixtime; - } - - /* Log the AOF write error and record the error code. */ - if (nwritten == -1) { - if (can_log) { - serverLog(LL_WARNING,"Error writing to the AOF file: %s", - strerror(errno)); - } - server.aof_last_write_errno = errno; - } else { - if (can_log) { - serverLog(LL_WARNING,"Short write while writing to " - "the AOF file: (nwritten=%lld, " - "expected=%lld)", - (long long)nwritten, - (long long)sdslen(server.aof_buf)); - } - - if (ftruncate(server.aof_fd, server.aof_last_incr_size) == -1) { - if (can_log) { - serverLog(LL_WARNING, "Could not remove short write " - "from the append-only file. Redis may refuse " - "to load the AOF the next time it starts. " - "ftruncate: %s", strerror(errno)); - } - } else { - /* If the ftruncate() succeeded we can set nwritten to - * -1 since there is no longer partial data into the AOF. */ - nwritten = -1; - } - server.aof_last_write_errno = ENOSPC; - } - - /* Handle the AOF write error. */ - if (server.aof_fsync == AOF_FSYNC_ALWAYS) { - /* We can't recover when the fsync policy is ALWAYS since the reply - * for the client is already in the output buffers (both writes and - * reads), and the changes to the db can't be rolled back. Since we - * have a contract with the user that on acknowledged or observed - * writes are is synced on disk, we must exit. */ - serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting..."); - exit(1); - } else { - /* Recover from failed write leaving data into the buffer. However - * set an error to stop accepting writes as long as the error - * condition is not cleared. */ - server.aof_last_write_status = C_ERR; - - /* Trim the sds buffer if there was a partial write, and there - * was no way to undo it with ftruncate(2). */ - if (nwritten > 0) { - server.aof_current_size += nwritten; - server.aof_last_incr_size += nwritten; - sdsrange(server.aof_buf,nwritten,-1); - } - return; /* We'll try again on the next call... */ - } - } else { - /* Successful write(2). If AOF was in error state, restore the - * OK state and log the event. */ - if (server.aof_last_write_status == C_ERR) { - serverLog(LL_NOTICE, - "AOF write error looks solved, Redis can write again."); - server.aof_last_write_status = C_OK; - } - } - server.aof_current_size += nwritten; - server.aof_last_incr_size += nwritten; - - /* Re-use AOF buffer when it is small enough. The maximum comes from the - * arena size of 4k minus some overhead (but is otherwise arbitrary). */ - if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) { - sdsclear(server.aof_buf); - } else { - sdsfree(server.aof_buf); - server.aof_buf = sdsempty(); - } - -try_fsync: - /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are - * children doing I/O in the background. */ - if (server.aof_no_fsync_on_rewrite && hasActiveChildProcess()) - return; - - /* Perform the fsync if needed. */ - if (server.aof_fsync == AOF_FSYNC_ALWAYS) { - /* redis_fsync is defined as fdatasync() for Linux in order to avoid - * flushing metadata. */ - latencyStartMonitor(latency); - /* Let's try to get this data on the disk. To guarantee data safe when - * the AOF fsync policy is 'always', we should exit if failed to fsync - * AOF (see comment next to the exit(1) after write error above). */ - if (redis_fsync(server.aof_fd) == -1) { - serverLog(LL_WARNING,"Can't persist AOF for fsync error when the " - "AOF fsync policy is 'always': %s. Exiting...", strerror(errno)); - exit(1); - } - latencyEndMonitor(latency); - latencyAddSampleIfNeeded("aof-fsync-always",latency); - server.aof_last_incr_fsync_offset = server.aof_last_incr_size; - server.aof_last_fsync = server.mstime; - atomicSet(server.fsynced_reploff_pending, server.master_repl_offset); - } else if (server.aof_fsync == AOF_FSYNC_EVERYSEC && - server.mstime - server.aof_last_fsync >= 1000) { - if (!sync_in_progress) { - aof_background_fsync(server.aof_fd); - server.aof_last_incr_fsync_offset = server.aof_last_incr_size; - } - server.aof_last_fsync = server.mstime; - } -} - -sds catAppendOnlyGenericCommand(sds dst, int argc, robj **argv) { - char buf[32]; - int len, j; - robj *o; - - buf[0] = '*'; - len = 1+ll2string(buf+1,sizeof(buf)-1,argc); - buf[len++] = '\r'; - buf[len++] = '\n'; - dst = sdscatlen(dst,buf,len); - - for (j = 0; j < argc; j++) { - o = getDecodedObject(argv[j]); - buf[0] = '$'; - len = 1+ll2string(buf+1,sizeof(buf)-1,sdslen(o->ptr)); - buf[len++] = '\r'; - buf[len++] = '\n'; - dst = sdscatlen(dst,buf,len); - dst = sdscatlen(dst,o->ptr,sdslen(o->ptr)); - dst = sdscatlen(dst,"\r\n",2); - decrRefCount(o); - } - return dst; -} - -/* Generate a piece of timestamp annotation for AOF if current record timestamp - * in AOF is not equal server unix time. If we specify 'force' argument to 1, - * we would generate one without check, currently, it is useful in AOF rewriting - * child process which always needs to record one timestamp at the beginning of - * rewriting AOF. - * - * Timestamp annotation format is "#TS:${timestamp}\r\n". "TS" is short of - * timestamp and this method could save extra bytes in AOF. */ -sds genAofTimestampAnnotationIfNeeded(int force) { - sds ts = NULL; - - if (force || server.aof_cur_timestamp < server.unixtime) { - server.aof_cur_timestamp = force ? time(NULL) : server.unixtime; - ts = sdscatfmt(sdsempty(), "#TS:%I\r\n", server.aof_cur_timestamp); - serverAssert(sdslen(ts) <= AOF_ANNOTATION_LINE_MAX_LEN); - } - return ts; -} - -/* Write the given command to the aof file. - * dictid - dictionary id the command should be applied to, - * this is used in order to decide if a `select` command - * should also be written to the aof. Value of -1 means - * to avoid writing `select` command in any case. - * argv - The command to write to the aof. - * argc - Number of values in argv - */ -void feedAppendOnlyFile(int dictid, robj **argv, int argc) { - sds buf = sdsempty(); - - serverAssert(dictid == -1 || (dictid >= 0 && dictid < server.dbnum)); - - /* Feed timestamp if needed */ - if (server.aof_timestamp_enabled) { - sds ts = genAofTimestampAnnotationIfNeeded(0); - if (ts != NULL) { - buf = sdscatsds(buf, ts); - sdsfree(ts); - } - } - - /* The DB this command was targeting is not the same as the last command - * we appended. To issue a SELECT command is needed. */ - if (dictid != -1 && dictid != server.aof_selected_db) { - char seldb[64]; - - snprintf(seldb,sizeof(seldb),"%d",dictid); - buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", - (unsigned long)strlen(seldb),seldb); - server.aof_selected_db = dictid; - } - - /* All commands should be propagated the same way in AOF as in replication. - * No need for AOF-specific translation. */ - buf = catAppendOnlyGenericCommand(buf,argc,argv); - - /* Append to the AOF buffer. This will be flushed on disk just before - * of re-entering the event loop, so before the client will get a - * positive reply about the operation performed. */ - if (server.aof_state == AOF_ON || - (server.aof_state == AOF_WAIT_REWRITE && server.child_type == CHILD_TYPE_AOF)) - { - server.aof_buf = sdscatlen(server.aof_buf, buf, sdslen(buf)); - } - - sdsfree(buf); -} - -/* ---------------------------------------------------------------------------- - * AOF loading - * ------------------------------------------------------------------------- */ - -/* In Redis commands are always executed in the context of a client, so in - * order to load the append only file we need to create a fake client. */ -struct client *createAOFClient(void) { - struct client *c = createClient(NULL); - - c->id = CLIENT_ID_AOF; /* So modules can identify it's the AOF client. */ - - /* - * The AOF client should never be blocked (unlike master - * replication connection). - * This is because blocking the AOF client might cause - * deadlock (because potentially no one will unblock it). - * Also, if the AOF client will be blocked just for - * background processing there is a chance that the - * command execution order will be violated. - */ - c->flags = CLIENT_DENY_BLOCKING; - - /* We set the fake client as a slave waiting for the synchronization - * so that Redis will not try to send replies to this client. */ - c->replstate = SLAVE_STATE_WAIT_BGSAVE_START; - return c; -} - -static int truncateAppendOnlyFile(char *filename, off_t valid_up_to) { - if (valid_up_to == -1) { - serverLog(LL_WARNING,"Last valid command offset is invalid"); - return 0; - } - - if (truncate(filename, valid_up_to) == -1) { - serverLog(LL_WARNING,"Error truncating the AOF file %s: %s", - filename, strerror(errno)); - return 0; - } - - /* Make sure the AOF file descriptor points to the end of the - * file after the truncate call. */ - if (server.aof_fd != -1 && lseek(server.aof_fd, 0, SEEK_END) == -1) { - serverLog(LL_WARNING,"Can't seek the end of the AOF file %s: %s", - filename, strerror(errno)); - return 0; - } - - return 1; /* Success */ -} - -/* Replay an append log file. On success AOF_OK or AOF_TRUNCATED is returned, - * otherwise, one of the following is returned: - * AOF_OPEN_ERR: Failed to open the AOF file. - * AOF_NOT_EXIST: AOF file doesn't exist. - * AOF_EMPTY: The AOF file is empty (nothing to load). - * AOF_FAILED: Failed to load the AOF file. */ -int loadSingleAppendOnlyFile(char *filename) { - struct client *fakeClient; - struct redis_stat sb; - int old_aof_state = server.aof_state; - long loops = 0; - off_t valid_up_to = 0; /* Offset of latest well-formed command loaded. */ - off_t valid_before_multi = 0; /* Offset before MULTI command loaded. */ - off_t last_progress_report_size = 0; - int ret = AOF_OK; - - sds aof_filepath = makePath(server.aof_dirname, filename); - FILE *fp = fopen(aof_filepath, "r"); - if (fp == NULL) { - int en = errno; - if (redis_stat(aof_filepath, &sb) == 0 || errno != ENOENT) { - serverLog(LL_WARNING,"Fatal error: can't open the append log file %s for reading: %s", filename, strerror(en)); - sdsfree(aof_filepath); - return AOF_OPEN_ERR; - } else { - serverLog(LL_WARNING,"The append log file %s doesn't exist: %s", filename, strerror(errno)); - sdsfree(aof_filepath); - return AOF_NOT_EXIST; - } - } - - if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) { - fclose(fp); - sdsfree(aof_filepath); - return AOF_EMPTY; - } - - /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI - * to the same file we're about to read. */ - server.aof_state = AOF_OFF; - - client *old_cur_client = server.current_client; - client *old_exec_client = server.executing_client; - fakeClient = createAOFClient(); - server.current_client = server.executing_client = fakeClient; - - /* Check if the AOF file is in RDB format (it may be RDB encoded base AOF - * or old style RDB-preamble AOF). In that case we need to load the RDB file - * and later continue loading the AOF tail if it is an old style RDB-preamble AOF. */ - char sig[5]; /* "REDIS" */ - if (fread(sig,1,5,fp) != 5 || memcmp(sig,"REDIS",5) != 0) { - /* Not in RDB format, seek back at 0 offset. */ - if (fseek(fp,0,SEEK_SET) == -1) goto readerr; - } else { - /* RDB format. Pass loading the RDB functions. */ - rio rdb; - int old_style = !strcmp(filename, server.aof_filename); - if (old_style) - serverLog(LL_NOTICE, "Reading RDB preamble from AOF file..."); - else - serverLog(LL_NOTICE, "Reading RDB base file on AOF loading..."); - - if (fseek(fp,0,SEEK_SET) == -1) goto readerr; - rioInitWithFile(&rdb,fp); - if (rdbLoadRio(&rdb,RDBFLAGS_AOF_PREAMBLE,NULL) != C_OK) { - if (old_style) - serverLog(LL_WARNING, "Error reading the RDB preamble of the AOF file %s, AOF loading aborted", filename); - else - serverLog(LL_WARNING, "Error reading the RDB base file %s, AOF loading aborted", filename); - - ret = AOF_FAILED; - goto cleanup; - } else { - loadingAbsProgress(ftello(fp)); - last_progress_report_size = ftello(fp); - if (old_style) serverLog(LL_NOTICE, "Reading the remaining AOF tail..."); - } - } - - /* Read the actual AOF file, in REPL format, command by command. */ - while(1) { - int argc, j; - unsigned long len; - robj **argv; - char buf[AOF_ANNOTATION_LINE_MAX_LEN]; - sds argsds; - struct redisCommand *cmd; - - /* Serve the clients from time to time */ - if (!(loops++ % 1024)) { - off_t progress_delta = ftello(fp) - last_progress_report_size; - loadingIncrProgress(progress_delta); - last_progress_report_size += progress_delta; - processEventsWhileBlocked(); - processModuleLoadingProgressEvent(1); - } - if (fgets(buf,sizeof(buf),fp) == NULL) { - if (feof(fp)) { - break; - } else { - goto readerr; - } - } - if (buf[0] == '#') continue; /* Skip annotations */ - if (buf[0] != '*') goto fmterr; - if (buf[1] == '\0') goto readerr; - argc = atoi(buf+1); - if (argc < 1) goto fmterr; - if ((size_t)argc > SIZE_MAX / sizeof(robj*)) goto fmterr; - - /* Load the next command in the AOF as our fake client - * argv. */ - argv = zmalloc(sizeof(robj*)*argc); - fakeClient->argc = argc; - fakeClient->argv = argv; - fakeClient->argv_len = argc; - - for (j = 0; j < argc; j++) { - /* Parse the argument len. */ - char *readres = fgets(buf,sizeof(buf),fp); - if (readres == NULL || buf[0] != '$') { - fakeClient->argc = j; /* Free up to j-1. */ - freeClientArgv(fakeClient); - if (readres == NULL) - goto readerr; - else - goto fmterr; - } - len = strtol(buf+1,NULL,10); - - /* Read it into a string object. */ - argsds = sdsnewlen(SDS_NOINIT,len); - if (len && fread(argsds,len,1,fp) == 0) { - sdsfree(argsds); - fakeClient->argc = j; /* Free up to j-1. */ - freeClientArgv(fakeClient); - goto readerr; - } - argv[j] = createObject(OBJ_STRING,argsds); - - /* Discard CRLF. */ - if (fread(buf,2,1,fp) == 0) { - fakeClient->argc = j+1; /* Free up to j. */ - freeClientArgv(fakeClient); - goto readerr; - } - } - - /* Command lookup */ - cmd = lookupCommand(argv,argc); - if (!cmd) { - serverLog(LL_WARNING, - "Unknown command '%s' reading the append only file %s", - (char*)argv[0]->ptr, filename); - freeClientArgv(fakeClient); - ret = AOF_FAILED; - goto cleanup; - } - - if (cmd->proc == multiCommand) valid_before_multi = valid_up_to; - - /* Run the command in the context of a fake client */ - fakeClient->cmd = fakeClient->lastcmd = cmd; - if (fakeClient->flags & CLIENT_MULTI && - fakeClient->cmd->proc != execCommand) - { - /* queueMultiCommand requires a pendingCommand, so we create a "fake" one here - * for it to consume */ - pendingCommand *pcmd = zmalloc(sizeof(pendingCommand)); - initPendingCommand(pcmd); - addPendingCommand(&fakeClient->pending_cmds, pcmd); - - pcmd->argc = argc; - pcmd->argv_len = argc; - pcmd->argv = argv; - pcmd->cmd = cmd; - - /* Note: we don't have to attempt calling evalGetCommandFlags, - * since this is AOF, the checks in processCommand are not made - * anyway.*/ - queueMultiCommand(fakeClient, cmd->flags); - } else { - cmd->proc(fakeClient); - fakeClient->all_argv_len_sum = 0; /* Otherwise no one cleans this up and we reach cleanup with it non-zero */ - } - - /* The fake client should not have a reply */ - serverAssert(fakeClient->bufpos == 0 && - listLength(fakeClient->reply) == 0); - - /* The fake client should never get blocked */ - serverAssert((fakeClient->flags & CLIENT_BLOCKED) == 0); - - /* Clean up. Command code may have changed argv/argc so we use the - * argv/argc of the client instead of the local variables. */ - freeClientArgv(fakeClient); - if (server.aof_load_truncated || server.aof_load_corrupt_tail_max_size) valid_up_to = ftello(fp); - if (server.key_load_delay) - debugDelay(server.key_load_delay); - } - - /* This point can only be reached when EOF is reached without errors. - * If the client is in the middle of a MULTI/EXEC, handle it as it was - * a short read, even if technically the protocol is correct: we want - * to remove the unprocessed tail and continue. */ - if (fakeClient->flags & CLIENT_MULTI) { - serverLog(LL_WARNING, - "Revert incomplete MULTI/EXEC transaction in AOF file %s", filename); - valid_up_to = valid_before_multi; - goto uxeof; - } - -loaded_ok: /* DB loaded, cleanup and return success (AOF_OK or AOF_TRUNCATED). */ - loadingIncrProgress(ftello(fp) - last_progress_report_size); - server.aof_state = old_aof_state; - goto cleanup; - -readerr: /* Read error. If feof(fp) is true, fall through to unexpected EOF. */ - if (!feof(fp)) { - serverLog(LL_WARNING,"Unrecoverable error reading the append only file %s: %s", filename, strerror(errno)); - ret = AOF_FAILED; - goto cleanup; - } - -uxeof: /* Unexpected AOF end of file. */ - if (server.aof_load_truncated) { - serverLog(LL_WARNING,"!!! Warning: short read while loading the AOF file %s!!!", filename); - serverLog(LL_WARNING,"!!! Truncating the AOF %s at offset %llu !!!", - filename, (unsigned long long) valid_up_to); - if (truncateAppendOnlyFile(aof_filepath, valid_up_to)) { - serverLog(LL_WARNING, "AOF %s loaded anyway because aof-load-truncated is enabled", aof_filepath); - ret = AOF_TRUNCATED; - goto loaded_ok; - } - } - serverLog(LL_WARNING, "Unexpected end of file reading the append only file %s. You can: " - "1) Make a backup of your AOF file, then use ./redis-check-aof --fix . " - "2) Alternatively you can set the 'aof-load-truncated' configuration option to yes and restart the server.", filename); - ret = AOF_FAILED; - goto cleanup; - -fmterr: /* Format error. */ - /* fmterr may be caused by accidentally machine shutdown, so if the broken tail - * is less than a specified size, try to recover it automatically */ - if (server.aof_load_corrupt_tail_max_size && sb.st_size - valid_up_to < server.aof_load_corrupt_tail_max_size) { - serverLog(LL_WARNING,"!!! Warning: corrupt AOF file tail!!!"); - serverLog(LL_WARNING,"!!! Truncating the AOF %s at offset %llu (remaining %llu) !!!", - aof_filepath, (unsigned long long) valid_up_to, (unsigned long long) sb.st_size - valid_up_to); - if (truncateAppendOnlyFile(aof_filepath, valid_up_to)) { - serverLog(LL_WARNING, "AOF %s loaded anyway because aof-load-corrupt-tail-max-size is enabled", aof_filepath); - ret = AOF_BROKEN_RECOVERED; - goto loaded_ok; - } - } - serverLog(LL_WARNING, "Bad file format reading the append only file %s at offset %llu. \ - make a backup of your AOF file, then use ./redis-check-aof --fix . \ - Alternatively you can set the 'aof-load-corrupt-tail-max-size' configuration option to %llu and restart the server.", - aof_filepath, (unsigned long long)valid_up_to, (unsigned long long) sb.st_size - valid_up_to); - ret = AOF_FAILED; - /* fall through to cleanup. */ - -cleanup: - if (fakeClient) freeClient(fakeClient); - server.current_client = old_cur_client; - server.executing_client = old_exec_client; - int fd = dup(fileno(fp)); - fclose(fp); - /* Reclaim page cache memory used by the AOF file in background. */ - if (fd >= 0) bioCreateCloseJob(fd, 0, 1); - sdsfree(aof_filepath); - return ret; -} - -/* Load the AOF files according the aofManifest pointed by am. */ -int loadAppendOnlyFiles(aofManifest *am) { - serverAssert(am != NULL); - int status, ret = AOF_OK; - long long start; - off_t total_size = 0, base_size = 0; - sds aof_name; - int total_num, aof_num = 0, last_file; - - /* If the 'server.aof_filename' file exists in dir, we may be starting - * from an old redis version. We will use enter upgrade mode in three situations. - * - * 1. If the 'server.aof_dirname' directory not exist - * 2. If the 'server.aof_dirname' directory exists but the manifest file is missing - * 3. If the 'server.aof_dirname' directory exists and the manifest file it contains - * has only one base AOF record, and the file name of this base AOF is 'server.aof_filename', - * and the 'server.aof_filename' file not exist in 'server.aof_dirname' directory - * */ - if (fileExist(server.aof_filename)) { - if (!dirExists(server.aof_dirname) || - (am->base_aof_info == NULL && listLength(am->incr_aof_list) == 0) || - (am->base_aof_info != NULL && listLength(am->incr_aof_list) == 0 && - !strcmp(am->base_aof_info->file_name, server.aof_filename) && !aofFileExist(server.aof_filename))) - { - aofUpgradePrepare(am); - } - } - - if (am->base_aof_info == NULL && listLength(am->incr_aof_list) == 0) { - return AOF_NOT_EXIST; - } - - total_num = getBaseAndIncrAppendOnlyFilesNum(am); - serverAssert(total_num > 0); - - /* Here we calculate the total size of all BASE and INCR files in - * advance, it will be set to `server.loading_total_bytes`. */ - total_size = getBaseAndIncrAppendOnlyFilesSize(am, &status); - if (status != AOF_OK) { - /* If an AOF exists in the manifest but not on the disk, we consider this to be a fatal error. */ - if (status == AOF_NOT_EXIST) status = AOF_FAILED; - - return status; - } else if (total_size == 0) { - return AOF_EMPTY; - } - - startLoading(total_size, RDBFLAGS_AOF_PREAMBLE, 0); - - /* Load BASE AOF if needed. */ - if (am->base_aof_info) { - serverAssert(am->base_aof_info->file_type == AOF_FILE_TYPE_BASE); - aof_name = (char*)am->base_aof_info->file_name; - updateLoadingFileName(aof_name); - base_size = getAppendOnlyFileSize(aof_name, NULL); - last_file = ++aof_num == total_num; - start = ustime(); - ret = loadSingleAppendOnlyFile(aof_name); - if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)) { - serverLog(LL_NOTICE, "DB loaded from base file %s: %.3f seconds", - aof_name, (float)(ustime()-start)/1000000); - } - - /* If the truncated file is not the last file, we consider this to be a fatal error. */ - if ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && !last_file) { - ret = AOF_FAILED; - serverLog(LL_WARNING, "Fatal error: the truncated file is not the last file"); - } - - if (ret == AOF_OPEN_ERR || ret == AOF_FAILED) { - goto cleanup; - } - } - - /* Load INCR AOFs if needed. */ - if (listLength(am->incr_aof_list)) { - listNode *ln; - listIter li; - - listRewind(am->incr_aof_list, &li); - while ((ln = listNext(&li)) != NULL) { - aofInfo *ai = (aofInfo*)ln->value; - serverAssert(ai->file_type == AOF_FILE_TYPE_INCR); - aof_name = (char*)ai->file_name; - updateLoadingFileName(aof_name); - last_file = ++aof_num == total_num; - start = ustime(); - ret = loadSingleAppendOnlyFile(aof_name); - if (ret == AOF_OK || ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && last_file)) { - serverLog(LL_NOTICE, "DB loaded from incr file %s: %.3f seconds", - aof_name, (float)(ustime()-start)/1000000); - } - - /* We know that (at least) one of the AOF files has data (total_size > 0), - * so empty incr AOF file doesn't count as a AOF_EMPTY result */ - if (ret == AOF_EMPTY) ret = AOF_OK; - - /* If the truncated file is not the last file, we consider this to be a fatal error. */ - if ((ret == AOF_TRUNCATED || ret == AOF_BROKEN_RECOVERED) && !last_file) { - ret = AOF_FAILED; - serverLog(LL_WARNING, "Fatal error: the truncated file is not the last file"); - } - - if (ret == AOF_OPEN_ERR || ret == AOF_FAILED) { - goto cleanup; - } - } - } - - server.aof_current_size = total_size; - /* Ideally, the aof_rewrite_base_size variable should hold the size of the - * AOF when the last rewrite ended, this should include the size of the - * incremental file that was created during the rewrite since otherwise we - * risk the next automatic rewrite to happen too soon (or immediately if - * auto-aof-rewrite-percentage is low). However, since we do not persist - * aof_rewrite_base_size information anywhere, we initialize it on restart - * to the size of BASE AOF file. This might cause the first AOFRW to be - * executed early, but that shouldn't be a problem since everything will be - * fine after the first AOFRW. */ - server.aof_rewrite_base_size = base_size; - -cleanup: - stopLoading(ret == AOF_OK || ret == AOF_TRUNCATED); - return ret; -} - -/* ---------------------------------------------------------------------------- - * AOF rewrite - * ------------------------------------------------------------------------- */ - -/* Delegate writing an object to writing a bulk string or bulk long long. - * This is not placed in rio.c since that adds the server.h dependency. */ -int rioWriteBulkObject(rio *r, robj *obj) { - /* Avoid using getDecodedObject to help copy-on-write (we are often - * in a child process when this function is called). */ - if (obj->encoding == OBJ_ENCODING_INT) { - return rioWriteBulkLongLong(r,(long)obj->ptr); - } else if (sdsEncodedObject(obj)) { - return rioWriteBulkString(r,obj->ptr,sdslen(obj->ptr)); - } else { - serverPanic("Unknown string encoding"); - } -} - -/* Emit the commands needed to rebuild a list object. - * The function returns 0 on error, 1 on success. */ -int rewriteListObject(rio *r, robj *key, robj *o) { - long long count = 0, items = listTypeLength(o); - - listTypeIterator li; - listTypeEntry entry; - listTypeInitIterator(&li, o, 0, LIST_TAIL); - while (listTypeNext(&li, &entry)) { - if (count == 0) { - int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? - AOF_REWRITE_ITEMS_PER_CMD : items; - if (!rioWriteBulkCount(r,'*',2+cmd_items) || - !rioWriteBulkString(r,"RPUSH",5) || - !rioWriteBulkObject(r,key)) - { - listTypeResetIterator(&li); - return 0; - } - } - - unsigned char *vstr; - size_t vlen; - long long lval; - vstr = listTypeGetValue(&entry,&vlen,&lval); - if (vstr) { - if (!rioWriteBulkString(r,(char*)vstr,vlen)) { - listTypeResetIterator(&li); - return 0; - } - } else { - if (!rioWriteBulkLongLong(r,lval)) { - listTypeResetIterator(&li); - return 0; - } - } - if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; - } - listTypeResetIterator(&li); - return 1; -} - -/* Emit the commands needed to rebuild a set object. - * The function returns 0 on error, 1 on success. */ -int rewriteSetObject(rio *r, robj *key, robj *o) { - long long count = 0, items = setTypeSize(o); - setTypeIterator si; - char *str; - size_t len; - int64_t llval; - setTypeInitIterator(&si, o); - while (setTypeNext(&si, &str, &len, &llval) != -1) { - if (count == 0) { - int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? - AOF_REWRITE_ITEMS_PER_CMD : items; - if (!rioWriteBulkCount(r,'*',2+cmd_items) || - !rioWriteBulkString(r,"SADD",4) || - !rioWriteBulkObject(r,key)) - { - setTypeResetIterator(&si); - return 0; - } - } - size_t written = str ? - rioWriteBulkString(r, str, len) : rioWriteBulkLongLong(r, llval); - if (!written) { - setTypeResetIterator(&si); - return 0; - } - if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; - } - setTypeResetIterator(&si); - return 1; -} - -/* Emit the commands needed to rebuild a sorted set object. - * The function returns 0 on error, 1 on success. */ -int rewriteSortedSetObject(rio *r, robj *key, robj *o) { - long long count = 0, items = zsetLength(o); - - if (o->encoding == OBJ_ENCODING_LISTPACK) { - unsigned char *zl = o->ptr; - unsigned char *eptr, *sptr; - unsigned char *vstr; - unsigned int vlen; - long long vll; - double score; - - eptr = lpSeek(zl,0); - serverAssert(eptr != NULL); - sptr = lpNext(zl,eptr); - serverAssert(sptr != NULL); - - while (eptr != NULL) { - vstr = lpGetValue(eptr,&vlen,&vll); - score = zzlGetScore(sptr); - - if (count == 0) { - int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? - AOF_REWRITE_ITEMS_PER_CMD : items; - - if (!rioWriteBulkCount(r,'*',2+cmd_items*2) || - !rioWriteBulkString(r,"ZADD",4) || - !rioWriteBulkObject(r,key)) - { - return 0; - } - } - if (!rioWriteBulkDouble(r,score)) return 0; - if (vstr != NULL) { - if (!rioWriteBulkString(r,(char*)vstr,vlen)) return 0; - } else { - if (!rioWriteBulkLongLong(r,vll)) return 0; - } - zzlNext(zl,&eptr,&sptr); - if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; - } - } else if (o->encoding == OBJ_ENCODING_SKIPLIST) { - zset *zs = o->ptr; - dictIterator di; - dictEntry *de; - - dictInitIterator(&di, zs->dict); - while((de = dictNext(&di)) != NULL) { - zskiplistNode *znode = dictGetKey(de); - sds ele = zslGetNodeElement(znode); - double score = znode->score; - - if (count == 0) { - int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? - AOF_REWRITE_ITEMS_PER_CMD : items; - - if (!rioWriteBulkCount(r,'*',2+cmd_items*2) || - !rioWriteBulkString(r,"ZADD",4) || - !rioWriteBulkObject(r,key)) - { - dictResetIterator(&di); - return 0; - } - } - if (!rioWriteBulkDouble(r,score) || - !rioWriteBulkString(r,ele,sdslen(ele))) - { - dictResetIterator(&di); - return 0; - } - if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; - } - dictResetIterator(&di); - } else { - serverPanic("Unknown sorted zset encoding"); - } - return 1; -} - -/* Write either the key or the value of the currently selected item of a hash. - * The 'hi' argument passes a valid Redis hash iterator. - * The 'what' filed specifies if to write a key or a value and can be - * either OBJ_HASH_KEY or OBJ_HASH_VALUE. - * - * The function returns 0 on error, non-zero on success. */ -static int rioWriteHashIteratorCursor(rio *r, hashTypeIterator *hi, int what) { - if ((hi->encoding == OBJ_ENCODING_LISTPACK) || (hi->encoding == OBJ_ENCODING_LISTPACK_EX)) { - unsigned char *vstr = NULL; - unsigned int vlen = UINT_MAX; - long long vll = LLONG_MAX; - - hashTypeCurrentFromListpack(hi, what, &vstr, &vlen, &vll, NULL); - if (vstr) - return rioWriteBulkString(r, (char*)vstr, vlen); - else - return rioWriteBulkLongLong(r, vll); - } else if (hi->encoding == OBJ_ENCODING_HT) { - char *str; - size_t len; - hashTypeCurrentFromHashTable(hi, what, &str, &len, NULL); - return rioWriteBulkString(r, str, len); - } - - serverPanic("Unknown hash encoding"); - return 0; -} - -/* Emit the commands needed to rebuild a hash object. - * The function returns 0 on error, 1 on success. */ -int rewriteHashObject(rio *r, robj *key, robj *o) { - int res = 0; /*fail*/ - - hashTypeIterator hi; - long long count = 0, items = hashTypeLength(o, 0); - - int isHFE = hashTypeGetMinExpire(o, 0) != EB_EXPIRE_TIME_INVALID; - hashTypeInitIterator(&hi, o); - - if (!isHFE) { - while (hashTypeNext(&hi, 0) != C_ERR) { - if (count == 0) { - int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? - AOF_REWRITE_ITEMS_PER_CMD : items; - if (!rioWriteBulkCount(r, '*', 2 + cmd_items * 2) || - !rioWriteBulkString(r, "HMSET", 5) || - !rioWriteBulkObject(r, key)) - goto reHashEnd; - } - - if (!rioWriteHashIteratorCursor(r, &hi, OBJ_HASH_KEY) || - !rioWriteHashIteratorCursor(r, &hi, OBJ_HASH_VALUE)) - goto reHashEnd; - - if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; - } - } else { - while (hashTypeNext(&hi, 0) != C_ERR) { - - char hmsetCmd[] = "*4\r\n$5\r\nHMSET\r\n"; - if ( (!rioWrite(r, hmsetCmd, sizeof(hmsetCmd) - 1)) || - (!rioWriteBulkObject(r, key)) || - (!rioWriteHashIteratorCursor(r, &hi, OBJ_HASH_KEY)) || - (!rioWriteHashIteratorCursor(r, &hi, OBJ_HASH_VALUE)) ) - goto reHashEnd; - - if (hi.expire_time != EB_EXPIRE_TIME_INVALID) { - char cmd[] = "*6\r\n$10\r\nHPEXPIREAT\r\n"; - if ( (!rioWrite(r, cmd, sizeof(cmd) - 1)) || - (!rioWriteBulkObject(r, key)) || - (!rioWriteBulkLongLong(r, hi.expire_time)) || - (!rioWriteBulkString(r, "FIELDS", 6)) || - (!rioWriteBulkString(r, "1", 1)) || - (!rioWriteHashIteratorCursor(r, &hi, OBJ_HASH_KEY)) ) - goto reHashEnd; - } - } - } - - res = 1; /* success */ - -reHashEnd: - hashTypeResetIterator(&hi); - return res; -} - -/* Helper for rewriteStreamObject() that generates a bulk string into the - * AOF representing the ID 'id'. */ -int rioWriteBulkStreamID(rio *r,streamID *id) { - int retval; - - sds replyid = sdscatfmt(sdsempty(),"%U-%U",id->ms,id->seq); - retval = rioWriteBulkString(r,replyid,sdslen(replyid)); - sdsfree(replyid); - return retval; -} - -/* Helper for rewriteStreamObject(): emit the XCLAIM needed in order to - * add the message described by 'nack' having the id 'rawid', into the pending - * list of the specified consumer. All this in the context of the specified - * key and group. */ -int rioWriteStreamPendingEntry(rio *r, robj *key, const char *groupname, size_t groupname_len, streamConsumer *consumer, unsigned char *rawid, streamNACK *nack) { - /* XCLAIM 0 TIME - RETRYCOUNT JUSTID FORCE. */ - streamID id; - streamDecodeID(rawid,&id); - if (rioWriteBulkCount(r,'*',12) == 0) return 0; - if (rioWriteBulkString(r,"XCLAIM",6) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - if (rioWriteBulkString(r,groupname,groupname_len) == 0) return 0; - if (rioWriteBulkString(r,consumer->name,sdslen(consumer->name)) == 0) return 0; - if (rioWriteBulkString(r,"0",1) == 0) return 0; - if (rioWriteBulkStreamID(r,&id) == 0) return 0; - if (rioWriteBulkString(r,"TIME",4) == 0) return 0; - if (rioWriteBulkLongLong(r,nack->delivery_time) == 0) return 0; - if (rioWriteBulkString(r,"RETRYCOUNT",10) == 0) return 0; - if (rioWriteBulkLongLong(r,nack->delivery_count) == 0) return 0; - if (rioWriteBulkString(r,"JUSTID",6) == 0) return 0; - if (rioWriteBulkString(r,"FORCE",5) == 0) return 0; - return 1; -} - -/* Helper for rewriteStreamObject(): emit the XGROUP CREATECONSUMER is - * needed in order to create consumers that do not have any pending entries. - * All this in the context of the specified key and group. */ -int rioWriteStreamEmptyConsumer(rio *r, robj *key, const char *groupname, size_t groupname_len, streamConsumer *consumer) { - /* XGROUP CREATECONSUMER */ - if (rioWriteBulkCount(r,'*',5) == 0) return 0; - if (rioWriteBulkString(r,"XGROUP",6) == 0) return 0; - if (rioWriteBulkString(r,"CREATECONSUMER",14) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - if (rioWriteBulkString(r,groupname,groupname_len) == 0) return 0; - if (rioWriteBulkString(r,consumer->name,sdslen(consumer->name)) == 0) return 0; - return 1; -} - -/* Emit the commands needed to rebuild a stream object. - * The function returns 0 on error, 1 on success. */ -int rewriteStreamObject(rio *r, robj *key, robj *o) { - stream *s = o->ptr; - streamIterator si; - streamIteratorStart(&si,s,NULL,NULL,0); - streamID id; - int64_t numfields; - - if (s->length) { - /* Reconstruct the stream data using XADD commands. */ - while(streamIteratorGetID(&si,&id,&numfields)) { - /* Emit a two elements array for each item. The first is - * the ID, the second is an array of field-value pairs. */ - - /* Emit the XADD ...fields... command. */ - if (!rioWriteBulkCount(r,'*',3+numfields*2) || - !rioWriteBulkString(r,"XADD",4) || - !rioWriteBulkObject(r,key) || - !rioWriteBulkStreamID(r,&id)) - { - streamIteratorStop(&si); - return 0; - } - while(numfields--) { - unsigned char *field, *value; - int64_t field_len, value_len; - streamIteratorGetField(&si,&field,&value,&field_len,&value_len); - if (!rioWriteBulkString(r,(char*)field,field_len) || - !rioWriteBulkString(r,(char*)value,value_len)) - { - streamIteratorStop(&si); - return 0; - } - } - } - } else { - /* Use the XADD MAXLEN 0 trick to generate an empty stream if - * the key we are serializing is an empty string, which is possible - * for the Stream type. */ - id.ms = 0; id.seq = 1; - if (!rioWriteBulkCount(r,'*',7) || - !rioWriteBulkString(r,"XADD",4) || - !rioWriteBulkObject(r,key) || - !rioWriteBulkString(r,"MAXLEN",6) || - !rioWriteBulkString(r,"0",1) || - !rioWriteBulkStreamID(r,&id) || - !rioWriteBulkString(r,"x",1) || - !rioWriteBulkString(r,"y",1)) - { - streamIteratorStop(&si); - return 0; - } - } - - /* Append XSETID after XADD, make sure lastid is correct, - * in case of XDEL lastid. */ - if (!rioWriteBulkCount(r,'*',7) || - !rioWriteBulkString(r,"XSETID",6) || - !rioWriteBulkObject(r,key) || - !rioWriteBulkStreamID(r,&s->last_id) || - !rioWriteBulkString(r,"ENTRIESADDED",12) || - !rioWriteBulkLongLong(r,s->entries_added) || - !rioWriteBulkString(r,"MAXDELETEDID",12) || - !rioWriteBulkStreamID(r,&s->max_deleted_entry_id)) - { - streamIteratorStop(&si); - return 0; - } - - - /* Create all the stream consumer groups. */ - if (s->cgroups) { - raxIterator ri; - raxStart(&ri,s->cgroups); - raxSeek(&ri,"^",NULL,0); - while(raxNext(&ri)) { - streamCG *group = ri.data; - /* Emit the XGROUP CREATE in order to create the group. */ - if (!rioWriteBulkCount(r,'*',7) || - !rioWriteBulkString(r,"XGROUP",6) || - !rioWriteBulkString(r,"CREATE",6) || - !rioWriteBulkObject(r,key) || - !rioWriteBulkString(r,(char*)ri.key,ri.key_len) || - !rioWriteBulkStreamID(r,&group->last_id) || - !rioWriteBulkString(r,"ENTRIESREAD",11) || - !rioWriteBulkLongLong(r,group->entries_read)) - { - raxStop(&ri); - streamIteratorStop(&si); - return 0; - } - - /* Generate XCLAIMs for each consumer that happens to - * have pending entries. Empty consumers would be generated with - * XGROUP CREATECONSUMER. */ - raxIterator ri_cons; - raxStart(&ri_cons,group->consumers); - raxSeek(&ri_cons,"^",NULL,0); - while(raxNext(&ri_cons)) { - streamConsumer *consumer = ri_cons.data; - /* If there are no pending entries, just emit XGROUP CREATECONSUMER */ - if (raxSize(consumer->pel) == 0) { - if (rioWriteStreamEmptyConsumer(r,key,(char*)ri.key, - ri.key_len,consumer) == 0) - { - raxStop(&ri_cons); - raxStop(&ri); - streamIteratorStop(&si); - return 0; - } - continue; - } - /* For the current consumer, iterate all the PEL entries - * to emit the XCLAIM protocol. */ - raxIterator ri_pel; - raxStart(&ri_pel,consumer->pel); - raxSeek(&ri_pel,"^",NULL,0); - while(raxNext(&ri_pel)) { - streamNACK *nack = ri_pel.data; - if (rioWriteStreamPendingEntry(r,key,(char*)ri.key, - ri.key_len,consumer, - ri_pel.key,nack) == 0) - { - raxStop(&ri_pel); - raxStop(&ri_cons); - raxStop(&ri); - streamIteratorStop(&si); - return 0; - } - } - raxStop(&ri_pel); - } - raxStop(&ri_cons); - } - raxStop(&ri); - } - - streamIteratorStop(&si); - return 1; -} - -/* Call the module type callback in order to rewrite a data type - * that is exported by a module and is not handled by Redis itself. - * The function returns 0 on error, 1 on success. */ -int rewriteModuleObject(rio *r, robj *key, robj *o, int dbid) { - RedisModuleIO io; - moduleValue *mv = o->ptr; - moduleType *mt = mv->type; - moduleInitIOContext(&io, &mt->entity, r, key, dbid); - mt->aof_rewrite(&io,key,mv->value); - if (io.ctx) { - moduleFreeContext(io.ctx); - zfree(io.ctx); - } - return io.error ? 0 : 1; -} - -static int rewriteFunctions(rio *aof) { - dict *functions = functionsLibGet(); - dictIterator iter; - dictEntry *entry = NULL; - dictInitIterator(&iter, functions); - while ((entry = dictNext(&iter))) { - functionLibInfo *li = dictGetVal(entry); - if (rioWrite(aof, "*3\r\n", 4) == 0) goto werr; - char function_load[] = "$8\r\nFUNCTION\r\n$4\r\nLOAD\r\n"; - if (rioWrite(aof, function_load, sizeof(function_load) - 1) == 0) goto werr; - if (rioWriteBulkString(aof, li->code, sdslen(li->code)) == 0) goto werr; - } - dictResetIterator(&iter); - return 1; - -werr: - dictResetIterator(&iter); - return 0; -} - -int rewriteObject(rio *r, robj *key, robj *o, int dbid, long long expiretime) { - /* Save the key and associated value */ - if (o->type == OBJ_STRING) { - /* Emit a SET command */ - static const char cmd[]="*3\r\n$3\r\nSET\r\n"; - if (rioWrite(r,cmd,sizeof(cmd)-1) == 0) return C_ERR; - /* Key and value */ - if (rioWriteBulkObject(r,key) == 0) return C_ERR; - if (rioWriteBulkObject(r,o) == 0) return C_ERR; - } else if (o->type == OBJ_LIST) { - if (rewriteListObject(r,key,o) == 0) return C_ERR; - } else if (o->type == OBJ_SET) { - if (rewriteSetObject(r,key,o) == 0) return C_ERR; - } else if (o->type == OBJ_ZSET) { - if (rewriteSortedSetObject(r,key,o) == 0) return C_ERR; - } else if (o->type == OBJ_HASH) { - if (rewriteHashObject(r,key,o) == 0) return C_ERR; - } else if (o->type == OBJ_STREAM) { - if (rewriteStreamObject(r,key,o) == 0) return C_ERR; - } else if (o->type == OBJ_MODULE) { - if (rewriteModuleObject(r,key,o,dbid) == 0) return C_ERR; - } else { - serverPanic("Unknown object type"); - } - - /* Save the expire time */ - if (expiretime != -1) { - static const char cmd[]="*3\r\n$9\r\nPEXPIREAT\r\n"; - if (rioWrite(r,cmd,sizeof(cmd)-1) == 0) return C_ERR; - if (rioWriteBulkObject(r,key) == 0) return C_ERR; - if (rioWriteBulkLongLong(r,expiretime) == 0) return C_ERR; - } - - /* If modules metadata is available */ - if ((getModuleMetaBits(o->metabits)) && (keyMetaOnAof(r, key, o, dbid) == 0)) - return C_ERR; - - return C_OK; -} - -int rewriteAppendOnlyFileRio(rio *aof) { - dictEntry *de; - int j; - long key_count = 0; - long long updated_time = 0; - unsigned long long skipped = 0; - kvstoreIterator kvs_it; - - /* Record timestamp at the beginning of rewriting AOF. */ - if (server.aof_timestamp_enabled) { - sds ts = genAofTimestampAnnotationIfNeeded(1); - if (rioWrite(aof,ts,sdslen(ts)) == 0) { sdsfree(ts); goto werr; } - sdsfree(ts); - } - - if (rewriteFunctions(aof) == 0) goto werr; - - for (j = 0; j < server.dbnum; j++) { - char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; - redisDb *db = server.db + j; - if (kvstoreSize(db->keys) == 0) continue; - - /* SELECT the new DB */ - if (rioWrite(aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr; - if (rioWriteBulkLongLong(aof,j) == 0) goto werr; - - kvstoreIteratorInit(&kvs_it, db->keys); - /* Iterate this DB writing every entry */ - while((de = kvstoreIteratorNext(&kvs_it)) != NULL) { - long long expiretime; - size_t aof_bytes_before_key = aof->processed_bytes; - - /* Get the value object (of type kvobj) */ - kvobj *o = dictGetKV(de); - - /* Get the expire time */ - expiretime = kvobjGetExpire(o); - - /* Skip keys that are being trimmed */ - if (server.cluster_enabled) { - int curr_slot = kvstoreIteratorGetCurrentDictIndex(&kvs_it); - if (isSlotInTrimJob(curr_slot)) { - skipped++; - continue; - } - } - - /* Set on stack string object for key */ - robj key; - initStaticStringObject(key, kvobjGetKey(o)); - - if (rewriteObject(aof, &key, o, j, expiretime) == C_ERR) goto werr2; - - /* In fork child process, we can try to release memory back to the - * OS and possibly avoid or decrease COW. We give the dismiss - * mechanism a hint about an estimated size of the object we stored. */ - size_t dump_size = aof->processed_bytes - aof_bytes_before_key; - if (server.in_fork_child) dismissObject(o, dump_size); - - /* Update info every 1 second (approximately). - * in order to avoid calling mstime() on each iteration, we will - * check the diff every 1024 keys */ - if ((key_count++ & 1023) == 0) { - long long now = mstime(); - if (now - updated_time >= 1000) { - sendChildInfo(CHILD_INFO_TYPE_CURRENT_INFO, key_count, "AOF rewrite"); - updated_time = now; - } - } - - /* Delay before next key if required (for testing) */ - if (server.rdb_key_save_delay) - debugDelay(server.rdb_key_save_delay); - } - kvstoreIteratorReset(&kvs_it); - } - serverLog(LL_NOTICE, "AOF rewrite done, %ld keys saved, %llu keys skipped.", key_count, skipped); - return C_OK; - -werr2: - kvstoreIteratorReset(&kvs_it); -werr: - return C_ERR; -} - -/* Write a sequence of commands able to fully rebuild the dataset into - * "filename". Used both by REWRITEAOF and BGREWRITEAOF. - * - * In order to minimize the number of commands needed in the rewritten - * log Redis uses variadic commands when possible, such as RPUSH, SADD - * and ZADD. However at max AOF_REWRITE_ITEMS_PER_CMD items per time - * are inserted using a single command. */ -int rewriteAppendOnlyFile(char *filename) { - rio aof; - FILE *fp = NULL; - char tmpfile[256]; - - /* Note that we have to use a different temp name here compared to the - * one used by rewriteAppendOnlyFileBackground() function. */ - snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); - fp = fopen(tmpfile,"w"); - if (!fp) { - serverLog(LL_WARNING, "Opening the temp file for AOF rewrite in rewriteAppendOnlyFile(): %s", strerror(errno)); - return C_ERR; - } - - rioInitWithFile(&aof,fp); - - if (server.aof_rewrite_incremental_fsync) { - rioSetAutoSync(&aof,REDIS_AUTOSYNC_BYTES); - rioSetReclaimCache(&aof,1); - } - - startSaving(RDBFLAGS_AOF_PREAMBLE); - - if (server.aof_use_rdb_preamble) { - int error; - if (rdbSaveRio(SLAVE_REQ_NONE,&aof,&error,RDBFLAGS_AOF_PREAMBLE,NULL) == C_ERR) { - errno = error; - goto werr; - } - } else { - if (rewriteAppendOnlyFileRio(&aof) == C_ERR) goto werr; - } - - /* Make sure data will not remain on the OS's output buffers */ - if (fflush(fp)) goto werr; - if (fsync(fileno(fp))) goto werr; - if (reclaimFilePageCache(fileno(fp), 0, 0) == -1) { - /* A minor error. Just log to know what happens */ - serverLog(LL_NOTICE,"Unable to reclaim page cache: %s", strerror(errno)); - } - if (fclose(fp)) { fp = NULL; goto werr; } - fp = NULL; - - /* Use RENAME to make sure the DB file is changed atomically only - * if the generate DB file is ok. */ - if (rename(tmpfile,filename) == -1) { - serverLog(LL_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno)); - unlink(tmpfile); - stopSaving(0); - return C_ERR; - } - stopSaving(1); - - return C_OK; - -werr: - serverLog(LL_WARNING,"Write error writing append only file on disk: %s", strerror(errno)); - if (fp) fclose(fp); - unlink(tmpfile); - stopSaving(0); - return C_ERR; -} -/* ---------------------------------------------------------------------------- - * AOF background rewrite - * ------------------------------------------------------------------------- */ - -/* This is how rewriting of the append only file in background works: - * - * 1) The user calls BGREWRITEAOF - * 2) Redis calls this function, that forks(): - * 2a) the child rewrite the append only file in a temp file. - * 2b) the parent open a new INCR AOF file to continue writing. - * 3) When the child finished '2a' exists. - * 4) The parent will trap the exit code, if it's OK, it will: - * 4a) get a new BASE file name and mark the previous (if we have) as the HISTORY type - * 4b) rename(2) the temp file in new BASE file name - * 4c) mark the rewritten INCR AOFs as history type - * 4d) persist AOF manifest file - * 4e) Delete the history files use bio - */ -int rewriteAppendOnlyFileBackground(void) { - pid_t childpid; - - if (hasActiveChildProcess()) return C_ERR; - - if (dirCreateIfMissing(server.aof_dirname) == -1) { - serverLog(LL_WARNING, "Can't open or create append-only dir %s: %s", - server.aof_dirname, strerror(errno)); - server.aof_lastbgrewrite_status = C_ERR; - return C_ERR; - } - - /* We set aof_selected_db to -1 in order to force the next call to the - * feedAppendOnlyFile() to issue a SELECT command. */ - server.aof_selected_db = -1; - flushAppendOnlyFile(1); - if (openNewIncrAofForAppend() != C_OK) { - server.aof_lastbgrewrite_status = C_ERR; - return C_ERR; - } - - if (server.aof_state == AOF_WAIT_REWRITE) { - /* Wait for all bio jobs related to AOF to drain. This prevents a race - * between updates to `fsynced_reploff_pending` of the worker thread, belonging - * to the previous AOF, and the new one. This concern is specific for a full - * sync scenario where we don't wanna risk the ACKed replication offset - * jumping backwards or forward when switching to a different master. */ - bioDrainWorker(BIO_AOF_FSYNC); - - /* Set the initial repl_offset, which will be applied to fsynced_reploff - * when AOFRW finishes (after possibly being updated by a bio thread) */ - atomicSet(server.fsynced_reploff_pending, server.master_repl_offset); - server.fsynced_reploff = 0; - } - - server.stat_aof_rewrites++; - - if ((childpid = redisFork(CHILD_TYPE_AOF)) == 0) { - char tmpfile[256]; - - /* Child */ - redisSetProcTitle("redis-aof-rewrite"); - redisSetCpuAffinity(server.aof_rewrite_cpulist); - snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); - if (rewriteAppendOnlyFile(tmpfile) == C_OK) { - serverLog(LL_NOTICE, - "Successfully created the temporary AOF base file %s", tmpfile); - sendChildCowInfo(CHILD_INFO_TYPE_AOF_COW_SIZE, "AOF rewrite"); - exitFromChild(0, 0); - } else { - exitFromChild(1, 0); - } - } else { - /* Parent */ - if (childpid == -1) { - server.aof_lastbgrewrite_status = C_ERR; - serverLog(LL_WARNING, - "Can't rewrite append only file in background: fork: %s", - strerror(errno)); - return C_ERR; - } - serverLog(LL_NOTICE, - "Background append only file rewriting started by pid %ld",(long) childpid); - server.aof_rewrite_scheduled = 0; - server.aof_rewrite_time_start = time(NULL); - return C_OK; - } - return C_OK; /* unreached */ -} - -void bgrewriteaofCommand(client *c) { - if (server.child_type == CHILD_TYPE_AOF) { - addReplyError(c,"Background append only file rewriting already in progress"); - } else if (hasActiveChildProcess() || server.in_exec) { - server.aof_rewrite_scheduled = 1; - /* When manually triggering AOFRW we reset the count - * so that it can be executed immediately. */ - server.stat_aofrw_consecutive_failures = 0; - addReplyStatus(c,"Background append only file rewriting scheduled"); - } else if (rewriteAppendOnlyFileBackground() == C_OK) { - addReplyStatus(c,"Background append only file rewriting started"); - } else { - addReplyError(c,"Can't execute an AOF background rewriting. " - "Please check the server logs for more information."); - } -} - -void aofRemoveTempFile(pid_t childpid) { - char tmpfile[256]; - - snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid); - bg_unlink(tmpfile); - - snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) childpid); - bg_unlink(tmpfile); -} - -/* Get size of an AOF file. - * The status argument is an optional output argument to be filled with - * one of the AOF_ status values. */ -off_t getAppendOnlyFileSize(sds filename, int *status) { - struct redis_stat sb; - off_t size; - mstime_t latency; - - sds aof_filepath = makePath(server.aof_dirname, filename); - latencyStartMonitor(latency); - if (redis_stat(aof_filepath, &sb) == -1) { - if (status) *status = errno == ENOENT ? AOF_NOT_EXIST : AOF_OPEN_ERR; - serverLog(LL_WARNING, "Unable to obtain the AOF file %s length. stat: %s", - filename, strerror(errno)); - size = 0; - } else { - if (status) *status = AOF_OK; - size = sb.st_size; - } - latencyEndMonitor(latency); - latencyAddSampleIfNeeded("aof-fstat", latency); - sdsfree(aof_filepath); - return size; -} - -/* Get size of all AOF files referred by the manifest (excluding history). - * The status argument is an output argument to be filled with - * one of the AOF_ status values. */ -off_t getBaseAndIncrAppendOnlyFilesSize(aofManifest *am, int *status) { - off_t size = 0; - listNode *ln; - listIter li; - - if (am->base_aof_info) { - serverAssert(am->base_aof_info->file_type == AOF_FILE_TYPE_BASE); - - size += getAppendOnlyFileSize(am->base_aof_info->file_name, status); - if (*status != AOF_OK) return 0; - } - - listRewind(am->incr_aof_list, &li); - while ((ln = listNext(&li)) != NULL) { - aofInfo *ai = (aofInfo*)ln->value; - serverAssert(ai->file_type == AOF_FILE_TYPE_INCR); - size += getAppendOnlyFileSize(ai->file_name, status); - if (*status != AOF_OK) return 0; - } - - return size; -} - -int getBaseAndIncrAppendOnlyFilesNum(aofManifest *am) { - int num = 0; - if (am->base_aof_info) num++; - if (am->incr_aof_list) num += listLength(am->incr_aof_list); - return num; -} - -/* A background append only file rewriting (BGREWRITEAOF) terminated its work. - * Handle this. */ -void backgroundRewriteDoneHandler(int exitcode, int bysignal) { - if (!bysignal && exitcode == 0) { - char tmpfile[256]; - long long now = ustime(); - sds new_base_filepath = NULL; - sds new_incr_filepath = NULL; - aofManifest *temp_am; - mstime_t latency; - - serverLog(LL_NOTICE, - "Background AOF rewrite terminated with success"); - - snprintf(tmpfile, 256, "temp-rewriteaof-bg-%d.aof", - (int)server.child_pid); - - serverAssert(server.aof_manifest != NULL); - - /* Dup a temporary aof_manifest for subsequent modifications. */ - temp_am = aofManifestDup(server.aof_manifest); - - /* Get a new BASE file name and mark the previous (if we have) - * as the HISTORY type. */ - sds new_base_filename = getNewBaseFileNameAndMarkPreAsHistory(temp_am); - serverAssert(new_base_filename != NULL); - new_base_filepath = makePath(server.aof_dirname, new_base_filename); - - /* Rename the temporary aof file to 'new_base_filename'. */ - latencyStartMonitor(latency); - if (rename(tmpfile, new_base_filepath) == -1) { - serverLog(LL_WARNING, - "Error trying to rename the temporary AOF base file %s into %s: %s", - tmpfile, - new_base_filepath, - strerror(errno)); - aofManifestFree(temp_am); - sdsfree(new_base_filepath); - server.aof_lastbgrewrite_status = C_ERR; - server.stat_aofrw_consecutive_failures++; - goto cleanup; - } - latencyEndMonitor(latency); - latencyAddSampleIfNeeded("aof-rename", latency); - serverLog(LL_NOTICE, - "Successfully renamed the temporary AOF base file %s into %s", tmpfile, new_base_filename); - - /* Rename the temporary incr aof file to 'new_incr_filename'. */ - if (server.aof_state == AOF_WAIT_REWRITE) { - /* Get temporary incr aof name. */ - sds temp_incr_aof_name = getTempIncrAofName(); - sds temp_incr_filepath = makePath(server.aof_dirname, temp_incr_aof_name); - /* Get next new incr aof name. */ - sds new_incr_filename = getNewIncrAofName(temp_am, tempIncAofStartReplOffset); - new_incr_filepath = makePath(server.aof_dirname, new_incr_filename); - latencyStartMonitor(latency); - if (rename(temp_incr_filepath, new_incr_filepath) == -1) { - serverLog(LL_WARNING, - "Error trying to rename the temporary AOF incr file %s into %s: %s", - temp_incr_filepath, - new_incr_filepath, - strerror(errno)); - bg_unlink(new_base_filepath); - sdsfree(new_base_filepath); - aofManifestFree(temp_am); - sdsfree(temp_incr_filepath); - sdsfree(new_incr_filepath); - sdsfree(temp_incr_aof_name); - server.aof_lastbgrewrite_status = C_ERR; - server.stat_aofrw_consecutive_failures++; - goto cleanup; - } - latencyEndMonitor(latency); - latencyAddSampleIfNeeded("aof-rename", latency); - serverLog(LL_NOTICE, - "Successfully renamed the temporary AOF incr file %s into %s", temp_incr_aof_name, new_incr_filename); - sdsfree(temp_incr_filepath); - sdsfree(temp_incr_aof_name); - } - - /* Change the AOF file type in 'incr_aof_list' from AOF_FILE_TYPE_INCR - * to AOF_FILE_TYPE_HIST, and move them to the 'history_aof_list'. */ - markRewrittenIncrAofAsHistory(temp_am); - - /* Persist our modifications. */ - if (persistAofManifest(temp_am) == C_ERR) { - bg_unlink(new_base_filepath); - aofManifestFree(temp_am); - sdsfree(new_base_filepath); - if (new_incr_filepath) { - bg_unlink(new_incr_filepath); - sdsfree(new_incr_filepath); - } - server.aof_lastbgrewrite_status = C_ERR; - server.stat_aofrw_consecutive_failures++; - goto cleanup; - } - sdsfree(new_base_filepath); - if (new_incr_filepath) sdsfree(new_incr_filepath); - - /* We can safely let `server.aof_manifest` point to 'temp_am' and free the previous one. */ - aofManifestFreeAndUpdate(temp_am); - - if (server.aof_state != AOF_OFF) { - /* AOF enabled. */ - server.aof_current_size = getAppendOnlyFileSize(new_base_filename, NULL) + server.aof_last_incr_size; - server.aof_rewrite_base_size = server.aof_current_size; - } - - /* We don't care about the return value of `aofDelHistoryFiles`, because the history - * deletion failure will not cause any problems. */ - aofDelHistoryFiles(); - - server.aof_lastbgrewrite_status = C_OK; - server.stat_aofrw_consecutive_failures = 0; - - serverLog(LL_NOTICE, "Background AOF rewrite finished successfully"); - /* Change state from WAIT_REWRITE to ON if needed */ - if (server.aof_state == AOF_WAIT_REWRITE) { - server.aof_state = AOF_ON; - - /* Update the fsynced replication offset that just now become valid. - * This could either be the one we took in startAppendOnly, or a - * newer one set by the bio thread. */ - long long fsynced_reploff_pending; - atomicGet(server.fsynced_reploff_pending, fsynced_reploff_pending); - server.fsynced_reploff = fsynced_reploff_pending; - } - - serverLog(LL_VERBOSE, - "Background AOF rewrite signal handler took %lldus", ustime()-now); - } else if (!bysignal && exitcode != 0) { - server.aof_lastbgrewrite_status = C_ERR; - server.stat_aofrw_consecutive_failures++; - - serverLog(LL_WARNING, - "Background AOF rewrite terminated with error"); - } else { - /* SIGUSR1 is whitelisted, so we have a way to kill a child without - * triggering an error condition. */ - if (bysignal != SIGUSR1) { - server.aof_lastbgrewrite_status = C_ERR; - server.stat_aofrw_consecutive_failures++; - } - - serverLog(LL_WARNING, - "Background AOF rewrite terminated by signal %d", bysignal); - } - -cleanup: - aofRemoveTempFile(server.child_pid); - /* Clear AOF buffer and delete temp incr aof for next rewrite. */ - if (server.aof_state == AOF_WAIT_REWRITE) { - sdsfree(server.aof_buf); - server.aof_buf = sdsempty(); - aofDelTempIncrAofFile(); - } - server.aof_rewrite_time_last = time(NULL)-server.aof_rewrite_time_start; - server.aof_rewrite_time_start = -1; - /* Schedule a new rewrite if we are waiting for it to switch the AOF ON. */ - if (server.aof_state == AOF_WAIT_REWRITE) - server.aof_rewrite_scheduled = 1; -} diff --git a/examples/redis-unstable/src/asciilogo.h b/examples/redis-unstable/src/asciilogo.h deleted file mode 100644 index 73a9977..0000000 --- a/examples/redis-unstable/src/asciilogo.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -const char *ascii_logo = -" _._ \n" -" _.-``__ ''-._ \n" -" _.-`` `. `_. ''-._ Redis Open Source \n" -" .-`` .-```. ```\\/ _.,_ ''-._ %s (%s/%d) %s bit\n" -" ( ' , .-` | `, ) Running in %s mode\n" -" |`-._`-...-` __...-.``-._|'` _.-'| Port: %d\n" -" | `-._ `._ / _.-' | PID: %ld\n" -" `-._ `-._ `-./ _.-' _.-' \n" -" |`-._`-._ `-.__.-' _.-'_.-'| \n" -" | `-._`-._ _.-'_.-' | https://redis.io \n" -" `-._ `-._`-.__.-'_.-' _.-' \n" -" |`-._`-._ `-.__.-' _.-'_.-'| \n" -" | `-._`-._ _.-'_.-' | \n" -" `-._ `-._`-.__.-'_.-' _.-' \n" -" `-._ `-.__.-' _.-' \n" -" `-._ _.-' \n" -" `-.__.-' \n\n"; diff --git a/examples/redis-unstable/src/atomicvar.h b/examples/redis-unstable/src/atomicvar.h deleted file mode 100644 index 3c332ee..0000000 --- a/examples/redis-unstable/src/atomicvar.h +++ /dev/null @@ -1,186 +0,0 @@ -/* This file implements atomic counters using c11 _Atomic, __atomic or __sync - * macros if available, otherwise we will throw an error when compile. - * - * The exported interface is composed of the following macros: - * - * atomicIncr(var,count) -- Increment the atomic counter - * atomicGetIncr(var,oldvalue_var,count) -- Get and increment the atomic counter - * atomicIncrGet(var,newvalue_var,count) -- Increment and get the atomic counter new value - * atomicDecr(var,count) -- Decrement the atomic counter - * atomicGet(var,dstvar) -- Fetch the atomic counter value - * atomicSet(var,value) -- Set the atomic counter value - * atomicGetWithSync(var,value) -- 'atomicGet' with inter-thread synchronization - * atomicSetWithSync(var,value) -- 'atomicSet' with inter-thread synchronization - * atomicCompareExchange(type,var,expected_var,desired) -- Compare and exchange (CAS) operation - * - * Atomic operations on flags. - * Flag type can be int, long, long long or their unsigned counterparts. - * The value of the flag can be 1 or 0. - * - * atomicFlagGetSet(var,oldvalue_var) -- Get and set the atomic counter value - * - * NOTE1: __atomic* and _Atomic implementations can be actually elaborated to support any value by changing the - * hardcoded new value passed to __atomic_exchange* from 1 to @param count - * i.e oldvalue_var = atomic_exchange_explicit(&var, count). - * However, in order to be compatible with the __sync functions family, we can use only 0 and 1. - * The only exchange alternative suggested by __sync is __sync_lock_test_and_set, - * But as described by the gnu manual for __sync_lock_test_and_set(): - * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html - * "A target may support reduced functionality here by which the only valid value to store is the immediate constant 1. The exact value - * actually stored in *ptr is implementation defined." - * Hence, we can't rely on it for a any value other than 1. - * We eventually chose to implement this method with __sync_val_compare_and_swap since it satisfies functionality needed for atomicFlagGetSet - * (if the flag was 0 -> set to 1, if it's already 1 -> do nothing, but the final result is that the flag is set), - * and also it has a full barrier (__sync_lock_test_and_set has acquire barrier). - * - * NOTE2: Unlike other atomic type, which aren't guaranteed to be lock free, c11 atomic_flag does. - * To check whether a type is lock free, atomic_is_lock_free() can be used. - * It can be considered to limit the flag type to atomic_flag to improve performance. - * - * Never use return value from the macros, instead use the AtomicGetIncr() - * if you need to get the current value and increment it atomically, like - * in the following example: - * - * long oldvalue; - * atomicGetIncr(myvar,oldvalue,1); - * doSomethingWith(oldvalue); - * - * ---------------------------------------------------------------------------- - * - * Copyright (c) 2015-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include -#include "config.h" - -#ifndef __ATOMIC_VAR_H -#define __ATOMIC_VAR_H - -/* Define redisAtomic for atomic variable. */ -#define redisAtomic - -/* To test Redis with Helgrind (a Valgrind tool) it is useful to define - * the following macro, so that __sync macros are used: those can be detected - * by Helgrind (even if they are less efficient) so that no false positive - * is reported. */ -// #define __ATOMIC_VAR_FORCE_SYNC_MACROS - -/* There will be many false positives if we test Redis with Helgrind, since - * Helgrind can't understand we have imposed ordering on the program, so - * we use macros in helgrind.h to tell Helgrind inter-thread happens-before - * relationship explicitly for avoiding false positives. - * - * For more details, please see: valgrind/helgrind.h and - * https://www.valgrind.org/docs/manual/hg-manual.html#hg-manual.effective-use - * - * These macros take effect only when 'make helgrind', and you must first - * install Valgrind in the default path configuration. */ -#ifdef __ATOMIC_VAR_FORCE_SYNC_MACROS -#include -#else -#define ANNOTATE_HAPPENS_BEFORE(v) ((void) v) -#define ANNOTATE_HAPPENS_AFTER(v) ((void) v) -#endif - -#if !defined(__ATOMIC_VAR_FORCE_SYNC_MACROS) && defined(__STDC_VERSION__) && \ - (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) -/* Use '_Atomic' keyword if the compiler supports. */ -#undef redisAtomic -#define redisAtomic _Atomic -/* Implementation using _Atomic in C11. */ - -#include -#define atomicIncr(var,count) atomic_fetch_add_explicit(&var,(count),memory_order_relaxed) -#define atomicGetIncr(var,oldvalue_var,count) do { \ - oldvalue_var = atomic_fetch_add_explicit(&var,(count),memory_order_relaxed); \ -} while(0) -#define atomicIncrGet(var, newvalue_var, count) \ - newvalue_var = atomicIncr(var,count) + count -#define atomicDecr(var,count) atomic_fetch_sub_explicit(&var,(count),memory_order_relaxed) -#define atomicGet(var,dstvar) do { \ - dstvar = atomic_load_explicit(&var,memory_order_relaxed); \ -} while(0) -#define atomicSet(var,value) atomic_store_explicit(&var,value,memory_order_relaxed) -#define atomicGetWithSync(var,dstvar) do { \ - dstvar = atomic_load_explicit(&var,memory_order_seq_cst); \ -} while(0) -#define atomicSetWithSync(var,value) \ - atomic_store_explicit(&var,value,memory_order_seq_cst) -#define atomicCompareExchange(type,var,expected_var,desired) \ - atomic_compare_exchange_weak_explicit(&var,&expected_var,desired,memory_order_relaxed,memory_order_relaxed) -#define atomicFlagGetSet(var,oldvalue_var) \ - oldvalue_var = atomic_exchange_explicit(&var,1,memory_order_relaxed) -#define REDIS_ATOMIC_API "c11-builtin" - -#elif !defined(__ATOMIC_VAR_FORCE_SYNC_MACROS) && \ - (!defined(__clang__) || !defined(__APPLE__) || __apple_build_version__ > 4210057) && \ - defined(__ATOMIC_RELAXED) && defined(__ATOMIC_SEQ_CST) -/* Implementation using __atomic macros. */ - -#define atomicIncr(var,count) __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) -#define atomicIncrGet(var, newvalue_var, count) \ - newvalue_var = __atomic_add_fetch(&var,(count),__ATOMIC_RELAXED) -#define atomicGetIncr(var,oldvalue_var,count) do { \ - oldvalue_var = __atomic_fetch_add(&var,(count),__ATOMIC_RELAXED); \ -} while(0) -#define atomicDecr(var,count) __atomic_sub_fetch(&var,(count),__ATOMIC_RELAXED) -#define atomicGet(var,dstvar) do { \ - dstvar = __atomic_load_n(&var,__ATOMIC_RELAXED); \ -} while(0) -#define atomicSet(var,value) __atomic_store_n(&var,value,__ATOMIC_RELAXED) -#define atomicGetWithSync(var,dstvar) do { \ - dstvar = __atomic_load_n(&var,__ATOMIC_SEQ_CST); \ -} while(0) -#define atomicSetWithSync(var,value) \ - __atomic_store_n(&var,value,__ATOMIC_SEQ_CST) -#define atomicCompareExchange(type,var,expected_var,desired) \ - __atomic_compare_exchange_n(&var,&expected_var,desired,1,__ATOMIC_RELAXED,__ATOMIC_RELAXED) -#define atomicFlagGetSet(var,oldvalue_var) \ - oldvalue_var = __atomic_exchange_n(&var,1,__ATOMIC_RELAXED) -#define REDIS_ATOMIC_API "atomic-builtin" - -#elif defined(HAVE_ATOMIC) -/* Implementation using __sync macros. */ - -#define atomicIncr(var,count) __sync_add_and_fetch(&var,(count)) -#define atomicIncrGet(var, newvalue_var, count) \ - newvalue_var = __sync_add_and_fetch(&var,(count)) -#define atomicGetIncr(var,oldvalue_var,count) do { \ - oldvalue_var = __sync_fetch_and_add(&var,(count)); \ -} while(0) -#define atomicDecr(var,count) __sync_sub_and_fetch(&var,(count)) -#define atomicGet(var,dstvar) do { \ - dstvar = __sync_sub_and_fetch(&var,0); \ -} while(0) -#define atomicSet(var,value) do { \ - while(!__sync_bool_compare_and_swap(&var,var,value)); \ -} while(0) -/* Actually the builtin issues a full memory barrier by default. */ -#define atomicGetWithSync(var,dstvar) do { \ - dstvar = __sync_sub_and_fetch(&var,0,__sync_synchronize); \ - ANNOTATE_HAPPENS_AFTER(&var); \ -} while(0) -#define atomicSetWithSync(var,value) do { \ - ANNOTATE_HAPPENS_BEFORE(&var); \ - while(!__sync_bool_compare_and_swap(&var,var,value,__sync_synchronize)); \ -} while(0) -#define atomicCompareExchange(type,var,expected_var,desired) ({ \ - type _old = __sync_val_compare_and_swap(&var,expected_var,desired); \ - int _success = (_old == expected_var); \ - if (!_success) expected_var = _old; \ - _success; \ -}) -#define atomicFlagGetSet(var,oldvalue_var) \ - oldvalue_var = __sync_val_compare_and_swap(&var,0,1) -#define REDIS_ATOMIC_API "sync-builtin" - -#else -#error "Unable to determine atomic operations for your platform" - -#endif -#endif /* __ATOMIC_VAR_H */ diff --git a/examples/redis-unstable/src/bio.c b/examples/redis-unstable/src/bio.c deleted file mode 100644 index 4d36e3e..0000000 --- a/examples/redis-unstable/src/bio.c +++ /dev/null @@ -1,445 +0,0 @@ -/* Background I/O service for Redis. - * - * This file implements operations that we need to perform in the background. - * Currently there are 3 operations: - * 1) a background close(2) system call. This is needed when the process is - * the last owner of a reference to a file closing it means unlinking it, and - * the deletion of the file is slow, blocking the server. - * 2) AOF fsync - * 3) lazyfree of memory - * - * In the future we'll either continue implementing new things we need or - * we'll switch to libeio. However there are probably long term uses for this - * file as we may want to put Redis specific background tasks here. - * - * DESIGN - * ------ - * - * The design is simple: We have a structure representing a job to perform, - * and several worker threads and job queues. Every job type is assigned to - * a specific worker thread, and a single worker may handle several different - * job types. - * Every thread waits for new jobs in its queue, and processes every job - * sequentially. - * - * Jobs handled by the same worker are guaranteed to be processed from the - * least-recently-inserted to the most-recently-inserted (older jobs processed - * first). - * - * To let the creator of the job to be notified about the completion of the - * operation, it will need to submit additional dummy job, coined as - * completion job request that will be written back eventually, by the - * background thread, into completion job response queue. This notification - * layout can simplify flows that might submit more than one job, such as - * in case of FLUSHALL which for a single command submits multiple jobs. It - * is also correct because jobs are processed in FIFO fashion. - * - * ---------------------------------------------------------------------------- - * - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "server.h" -#include "bio.h" -#include - -static char* bio_worker_title[] = { - "bio_close_file", - "bio_aof", - "bio_lazy_free", -}; - -#define BIO_WORKER_NUM (sizeof(bio_worker_title) / sizeof(*bio_worker_title)) - -static unsigned int bio_job_to_worker[] = { - [BIO_CLOSE_FILE] = 0, - [BIO_AOF_FSYNC] = 1, - [BIO_CLOSE_AOF] = 1, - [BIO_LAZY_FREE] = 2, - [BIO_COMP_RQ_CLOSE_FILE] = 0, - [BIO_COMP_RQ_AOF_FSYNC] = 1, - [BIO_COMP_RQ_LAZY_FREE] = 2 -}; - -static pthread_t bio_threads[BIO_WORKER_NUM]; -static pthread_mutex_t bio_mutex[BIO_WORKER_NUM]; -static pthread_cond_t bio_newjob_cond[BIO_WORKER_NUM]; -static list *bio_jobs[BIO_WORKER_NUM]; -static unsigned long bio_jobs_counter[BIO_NUM_OPS] = {0}; - -/* The bio_comp_list is used to hold completion job responses and to handover - * to main thread to callback as notification for job completion. Main - * thread will be triggered to read the list by signaling via writing to a pipe */ -static list *bio_comp_list; -static pthread_mutex_t bio_mutex_comp; -static int job_comp_pipe[2]; /* Pipe used to awake the event loop */ - -typedef struct bio_comp_item { - comp_fn *func; /* callback after completion job will be processed */ - uint64_t arg; /* user data to be passed to the function */ - void *ptr; /* user pointer to be passed to the function */ -} bio_comp_item; - -/* This structure represents a background Job. It is only used locally to this - * file as the API does not expose the internals at all. */ -typedef union bio_job { - struct { - int type; /* Job-type tag. This needs to appear as the first element in all union members. */ - } header; - - /* Job specific arguments.*/ - struct { - int type; - int fd; /* Fd for file based background jobs */ - long long offset; /* A job-specific offset, if applicable */ - unsigned need_fsync:1; /* A flag to indicate that a fsync is required before - * the file is closed. */ - unsigned need_reclaim_cache:1; /* A flag to indicate that reclaim cache is required before - * the file is closed. */ - } fd_args; - - struct { - int type; - lazy_free_fn *free_fn; /* Function that will free the provided arguments */ - void *free_args[]; /* List of arguments to be passed to the free function */ - } free_args; - struct { - int type; /* header */ - comp_fn *fn; /* callback. Handover to main thread to cb as notify for job completion */ - uint64_t arg; /* callback arguments */ - void *ptr; /* callback pointer */ - } comp_rq; -} bio_job; - -void *bioProcessBackgroundJobs(void *arg); -void bioPipeReadJobCompList(aeEventLoop *el, int fd, void *privdata, int mask); - -/* Make sure we have enough stack to perform all the things we do in the - * main thread. */ -#define REDIS_THREAD_STACK_SIZE (1024*1024*4) - -/* Initialize the background system, spawning the thread. */ -void bioInit(void) { - pthread_attr_t attr; - pthread_t thread; - size_t stacksize; - unsigned long j; - - /* Initialization of state vars and objects */ - for (j = 0; j < BIO_WORKER_NUM; j++) { - pthread_mutex_init(&bio_mutex[j],NULL); - pthread_cond_init(&bio_newjob_cond[j],NULL); - bio_jobs[j] = listCreate(); - } - - /* init jobs comp responses */ - bio_comp_list = listCreate(); - pthread_mutex_init(&bio_mutex_comp, NULL); - - /* Create a pipe for background thread to be able to wake up the redis main thread. - * Make the pipe non blocking. This is just a best effort aware mechanism - * and we do not want to block not in the read nor in the write half. - * Enable close-on-exec flag on pipes in case of the fork-exec system calls in - * sentinels or redis servers. */ - if (anetPipe(job_comp_pipe, O_CLOEXEC|O_NONBLOCK, O_CLOEXEC|O_NONBLOCK) == -1) { - serverLog(LL_WARNING, - "Can't create the pipe for bio thread: %s", strerror(errno)); - exit(1); - } - - /* Register a readable event for the pipe used to awake the event loop on job completion */ - if (aeCreateFileEvent(server.el, job_comp_pipe[0], AE_READABLE, - bioPipeReadJobCompList, NULL) == AE_ERR) { - serverPanic("Error registering the readable event for the bio pipe."); - } - - /* Set the stack size as by default it may be small in some system */ - pthread_attr_init(&attr); - pthread_attr_getstacksize(&attr,&stacksize); - if (!stacksize) stacksize = 1; /* The world is full of Solaris Fixes */ - while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2; - pthread_attr_setstacksize(&attr, stacksize); - - /* Ready to spawn our threads. We use the single argument the thread - * function accepts in order to pass the job ID the thread is - * responsible for. */ - for (j = 0; j < BIO_WORKER_NUM; j++) { - int err = pthread_create(&thread,&attr,bioProcessBackgroundJobs, (void*) j); - if (err) { - serverLog(LL_WARNING, "Fatal: Can't initialize Background Jobs. Error message: %s", strerror(err)); - exit(1); - } - bio_threads[j] = thread; - } -} - -void bioSubmitJob(int type, bio_job *job) { - job->header.type = type; - unsigned long worker = bio_job_to_worker[type]; - pthread_mutex_lock(&bio_mutex[worker]); - listAddNodeTail(bio_jobs[worker],job); - bio_jobs_counter[type]++; - pthread_cond_signal(&bio_newjob_cond[worker]); - pthread_mutex_unlock(&bio_mutex[worker]); -} - -void bioCreateLazyFreeJob(lazy_free_fn free_fn, int arg_count, ...) { - va_list valist; - /* Allocate memory for the job structure and all required - * arguments */ - bio_job *job = zmalloc(sizeof(*job) + sizeof(void *) * (arg_count)); - job->free_args.free_fn = free_fn; - - va_start(valist, arg_count); - for (int i = 0; i < arg_count; i++) { - job->free_args.free_args[i] = va_arg(valist, void *); - } - va_end(valist); - bioSubmitJob(BIO_LAZY_FREE, job); -} - -void bioCreateCompRq(bio_worker_t assigned_worker, comp_fn *func, uint64_t user_data, void *user_ptr) { - int type; - switch (assigned_worker) { - case BIO_WORKER_CLOSE_FILE: - type = BIO_COMP_RQ_CLOSE_FILE; - break; - case BIO_WORKER_AOF_FSYNC: - type = BIO_COMP_RQ_AOF_FSYNC; - break; - case BIO_WORKER_LAZY_FREE: - type = BIO_COMP_RQ_LAZY_FREE; - break; - default: - serverPanic("Invalid worker type in bioCreateCompRq()."); - } - - bio_job *job = zmalloc(sizeof(*job)); - job->comp_rq.fn = func; - job->comp_rq.arg = user_data; - job->comp_rq.ptr = user_ptr; - bioSubmitJob(type, job); -} - -void bioCreateCloseJob(int fd, int need_fsync, int need_reclaim_cache) { - bio_job *job = zmalloc(sizeof(*job)); - job->fd_args.fd = fd; - job->fd_args.need_fsync = need_fsync; - job->fd_args.need_reclaim_cache = need_reclaim_cache; - - bioSubmitJob(BIO_CLOSE_FILE, job); -} - -void bioCreateCloseAofJob(int fd, long long offset, int need_reclaim_cache) { - bio_job *job = zmalloc(sizeof(*job)); - job->fd_args.fd = fd; - job->fd_args.offset = offset; - job->fd_args.need_fsync = 1; - job->fd_args.need_reclaim_cache = need_reclaim_cache; - - bioSubmitJob(BIO_CLOSE_AOF, job); -} - -void bioCreateFsyncJob(int fd, long long offset, int need_reclaim_cache) { - bio_job *job = zmalloc(sizeof(*job)); - job->fd_args.fd = fd; - job->fd_args.offset = offset; - job->fd_args.need_reclaim_cache = need_reclaim_cache; - - bioSubmitJob(BIO_AOF_FSYNC, job); -} - -void *bioProcessBackgroundJobs(void *arg) { - bio_job *job; - unsigned long worker = (unsigned long) arg; - sigset_t sigset; - - /* Check that the worker is within the right interval. */ - serverAssert(worker < BIO_WORKER_NUM); - - redis_set_thread_title(bio_worker_title[worker]); - - redisSetCpuAffinity(server.bio_cpulist); - - makeThreadKillable(); - - pthread_mutex_lock(&bio_mutex[worker]); - /* Block SIGALRM so we are sure that only the main thread will - * receive the watchdog signal. */ - sigemptyset(&sigset); - sigaddset(&sigset, SIGALRM); - int err = pthread_sigmask(SIG_BLOCK, &sigset, NULL); - if (err) - serverLog(LL_WARNING, - "Warning: can't mask SIGALRM in bio.c thread: %s", strerror(err)); - - while(1) { - listNode *ln; - - /* The loop always starts with the lock hold. */ - if (listLength(bio_jobs[worker]) == 0) { - pthread_cond_wait(&bio_newjob_cond[worker], &bio_mutex[worker]); - continue; - } - /* Get the job from the queue. */ - ln = listFirst(bio_jobs[worker]); - job = ln->value; - /* It is now possible to unlock the background system as we know have - * a stand alone job structure to process.*/ - pthread_mutex_unlock(&bio_mutex[worker]); - - /* Process the job accordingly to its type. */ - int job_type = job->header.type; - - if (job_type == BIO_CLOSE_FILE) { - if (job->fd_args.need_fsync && - redis_fsync(job->fd_args.fd) == -1 && - errno != EBADF && errno != EINVAL) - { - serverLog(LL_WARNING, "Fail to fsync the AOF file: %s",strerror(errno)); - } - if (job->fd_args.need_reclaim_cache) { - if (reclaimFilePageCache(job->fd_args.fd, 0, 0) == -1) { - serverLog(LL_NOTICE,"Unable to reclaim page cache: %s", strerror(errno)); - } - } - close(job->fd_args.fd); - } else if (job_type == BIO_AOF_FSYNC || job_type == BIO_CLOSE_AOF) { - /* The fd may be closed by main thread and reused for another - * socket, pipe, or file. We just ignore these errno because - * aof fsync did not really fail. */ - if (redis_fsync(job->fd_args.fd) == -1 && - errno != EBADF && errno != EINVAL) - { - int last_status; - atomicGet(server.aof_bio_fsync_status,last_status); - atomicSet(server.aof_bio_fsync_status,C_ERR); - atomicSet(server.aof_bio_fsync_errno,errno); - if (last_status == C_OK) { - serverLog(LL_WARNING, - "Fail to fsync the AOF file: %s",strerror(errno)); - } - } else { - atomicSet(server.aof_bio_fsync_status,C_OK); - atomicSet(server.fsynced_reploff_pending, job->fd_args.offset); - } - - if (job->fd_args.need_reclaim_cache) { - if (reclaimFilePageCache(job->fd_args.fd, 0, 0) == -1) { - serverLog(LL_NOTICE,"Unable to reclaim page cache: %s", strerror(errno)); - } - } - if (job_type == BIO_CLOSE_AOF) - close(job->fd_args.fd); - } else if (job_type == BIO_LAZY_FREE) { - job->free_args.free_fn(job->free_args.free_args); - } else if ((job_type == BIO_COMP_RQ_CLOSE_FILE) || - (job_type == BIO_COMP_RQ_AOF_FSYNC) || - (job_type == BIO_COMP_RQ_LAZY_FREE)) { - bio_comp_item *comp_rsp = zmalloc(sizeof(bio_comp_item)); - comp_rsp->func = job->comp_rq.fn; - comp_rsp->arg = job->comp_rq.arg; - comp_rsp->ptr = job->comp_rq.ptr; - - /* just write it to completion job responses */ - pthread_mutex_lock(&bio_mutex_comp); - listAddNodeTail(bio_comp_list, comp_rsp); - pthread_mutex_unlock(&bio_mutex_comp); - - if (write(job_comp_pipe[1],"A",1) != 1) { - /* Pipe is non-blocking, write() may fail if it's full. */ - } - } else { - serverPanic("Wrong job type in bioProcessBackgroundJobs()."); - } - zfree(job); - - /* Lock again before reiterating the loop, if there are no longer - * jobs to process we'll block again in pthread_cond_wait(). */ - pthread_mutex_lock(&bio_mutex[worker]); - listDelNode(bio_jobs[worker], ln); - bio_jobs_counter[job_type]--; - pthread_cond_signal(&bio_newjob_cond[worker]); - } -} - -/* Return the number of pending jobs of the specified type. */ -unsigned long bioPendingJobsOfType(int type) { - unsigned int worker = bio_job_to_worker[type]; - - pthread_mutex_lock(&bio_mutex[worker]); - unsigned long val = bio_jobs_counter[type]; - pthread_mutex_unlock(&bio_mutex[worker]); - - return val; -} - -/* Wait for the job queue of the worker for jobs of specified type to become empty. */ -void bioDrainWorker(int job_type) { - unsigned long worker = bio_job_to_worker[job_type]; - - pthread_mutex_lock(&bio_mutex[worker]); - while (listLength(bio_jobs[worker]) > 0) { - pthread_cond_wait(&bio_newjob_cond[worker], &bio_mutex[worker]); - } - pthread_mutex_unlock(&bio_mutex[worker]); -} - -/* Kill the running bio threads in an unclean way. This function should be - * used only when it's critical to stop the threads for some reason. - * Currently Redis does this only on crash (for instance on SIGSEGV) in order - * to perform a fast memory check without other threads messing with memory. */ -void bioKillThreads(void) { - int err; - unsigned long j; - - for (j = 0; j < BIO_WORKER_NUM; j++) { - if (bio_threads[j] == pthread_self()) continue; - if (bio_threads[j] && pthread_cancel(bio_threads[j]) == 0) { - if ((err = pthread_join(bio_threads[j],NULL)) != 0) { - serverLog(LL_WARNING, - "Bio worker thread #%lu can not be joined: %s", - j, strerror(err)); - } else { - serverLog(LL_WARNING, - "Bio worker thread #%lu terminated",j); - } - } - } -} - -void bioPipeReadJobCompList(aeEventLoop *el, int fd, void *privdata, int mask) { - UNUSED(el); - UNUSED(mask); - UNUSED(privdata); - - char buf[128]; - list *tmp_list = NULL; - - while (read(fd, buf, sizeof(buf)) == sizeof(buf)); - - /* Handle event loop events if pipe was written from event loop API */ - pthread_mutex_lock(&bio_mutex_comp); - if (listLength(bio_comp_list)) { - tmp_list = bio_comp_list; - bio_comp_list = listCreate(); - } - pthread_mutex_unlock(&bio_mutex_comp); - - if (!tmp_list) return; - - /* callback to all job completions */ - while (listLength(tmp_list)) { - listNode *ln = listFirst(tmp_list); - bio_comp_item *rsp = ln->value; - listDelNode(tmp_list, ln); - rsp->func(rsp->arg, rsp->ptr); - zfree(rsp); - } - listRelease(tmp_list); -} diff --git a/examples/redis-unstable/src/bio.h b/examples/redis-unstable/src/bio.h deleted file mode 100644 index 615cf45..0000000 --- a/examples/redis-unstable/src/bio.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#ifndef __BIO_H -#define __BIO_H - -typedef void lazy_free_fn(void *args[]); -typedef void comp_fn(uint64_t user_data, void *user_ptr); - -typedef enum bio_worker_t { - BIO_WORKER_CLOSE_FILE = 0, - BIO_WORKER_AOF_FSYNC, - BIO_WORKER_LAZY_FREE, - BIO_WORKER_NUM -} bio_worker_t; - -/* Background job opcodes */ -typedef enum bio_job_type_t { - BIO_CLOSE_FILE = 0, /* Deferred close(2) syscall. */ - BIO_AOF_FSYNC, /* Deferred AOF fsync. */ - BIO_LAZY_FREE, /* Deferred objects freeing. */ - BIO_CLOSE_AOF, - BIO_COMP_RQ_CLOSE_FILE, /* Job completion request, registered on close-file worker's queue */ - BIO_COMP_RQ_AOF_FSYNC, /* Job completion request, registered on aof-fsync worker's queue */ - BIO_COMP_RQ_LAZY_FREE, /* Job completion request, registered on lazy-free worker's queue */ - BIO_NUM_OPS -} bio_job_type_t; - -/* Exported API */ -void bioInit(void); -unsigned long bioPendingJobsOfType(int type); -void bioDrainWorker(int job_type); -void bioKillThreads(void); -void bioCreateCloseJob(int fd, int need_fsync, int need_reclaim_cache); -void bioCreateCloseAofJob(int fd, long long offset, int need_reclaim_cache); -void bioCreateFsyncJob(int fd, long long offset, int need_reclaim_cache); -void bioCreateLazyFreeJob(lazy_free_fn free_fn, int arg_count, ...); -void bioCreateCompRq(bio_worker_t assigned_worker, comp_fn *func, uint64_t user_data, void *user_ptr); - - -#endif diff --git a/examples/redis-unstable/src/bitops.c b/examples/redis-unstable/src/bitops.c deleted file mode 100644 index 7a3d9f9..0000000 --- a/examples/redis-unstable/src/bitops.c +++ /dev/null @@ -1,2037 +0,0 @@ -/* Bit operations. - * - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "server.h" -#include "ctype.h" - -#ifdef HAVE_AVX2 -/* Define __MM_MALLOC_H to prevent importing the memory aligned - * allocation functions, which we don't use. */ -#define __MM_MALLOC_H -#include -#endif - -#ifdef HAVE_AVX512 -/* Define __MM_MALLOC_H to prevent importing the memory aligned - * allocation functions, which we don't use. */ -#define __MM_MALLOC_H -#include -#endif - -#ifdef HAVE_AARCH64_NEON -#include -#endif - -#ifdef HAVE_AVX2 -#define BITOP_USE_AVX2 (__builtin_cpu_supports("avx2")) -#else -#define BITOP_USE_AVX2 0 -#endif - -/* AArch64 NEON support is determined at compile time via HAVE_AARCH64_NEON */ -#ifdef HAVE_AVX512 -#define BITOP_USE_AVX512 (__builtin_cpu_supports("avx512f") && __builtin_cpu_supports("avx512vpopcntdq")) -#else -#define BITOP_USE_AVX512 0 -#endif - - -/* ----------------------------------------------------------------------------- - * Helpers and low level bit functions. - * -------------------------------------------------------------------------- */ - - /* Shared lookup table for bit counting - maps each byte value to its popcount */ -static const uint8_t bitsinbyte[256] = { - #define B2(n) n, n+1, n+1, n+2 - #define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2) - #define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2) - B6(0), B6(1), B6(1), B6(2) - #undef B6 - #undef B4 - #undef B2 -}; - -/* Count number of bits set in the binary array pointed by 's' and long - * 'count' bytes. The implementation of this function is required to - * work with an input string length up to 512 MB or more (server.proto_max_bulk_len) */ -ATTRIBUTE_TARGET_POPCNT -long long redisPopcount(void *s, long count) { - long long bits = 0; - unsigned char *p = s; - uint32_t *p4; -#if defined(HAVE_POPCNT) - int use_popcnt = __builtin_cpu_supports("popcnt"); /* Check if CPU supports POPCNT instruction. */ -#else - int use_popcnt = 0; /* Assume CPU does not support POPCNT if - * __builtin_cpu_supports() is not available. */ -#endif - /* Count initial bytes not aligned to 64-bit when using the POPCNT instruction, - * otherwise align to 32-bit. */ - int align = use_popcnt ? 7 : 3; - while ((unsigned long)p & align && count) { - bits += bitsinbyte[*p++]; - count--; - } - - if (likely(use_popcnt)) { - /* Use separate counters to make the CPU think there are no - * dependencies between these popcnt operations. */ - uint64_t cnt[4]; - memset(cnt, 0, sizeof(cnt)); - - /* Count bits 32 bytes at a time by using popcnt. - * Unroll the loop to avoid the overhead of a single popcnt per iteration, - * allowing the CPU to extract more instruction-level parallelism. - * Reference: https://danluu.com/assembly-intrinsics/ */ - while (count >= 32) { - cnt[0] += __builtin_popcountll(*(uint64_t*)(p)); - cnt[1] += __builtin_popcountll(*(uint64_t*)(p + 8)); - cnt[2] += __builtin_popcountll(*(uint64_t*)(p + 16)); - cnt[3] += __builtin_popcountll(*(uint64_t*)(p + 24)); - count -= 32; - p += 32; - /* Prefetch with 2K stride is just enough to overlap L3 miss latency effectively - * without causing pressure on lower memory hierarchy or polluting L1/L2 */ - redis_prefetch_read(p + 2048); - } - bits += cnt[0] + cnt[1] + cnt[2] + cnt[3]; - goto remain; - } - - /* Count bits 28 bytes at a time */ - p4 = (uint32_t*)p; - while(count>=28) { - uint32_t aux1, aux2, aux3, aux4, aux5, aux6, aux7; - - aux1 = *p4++; - aux2 = *p4++; - aux3 = *p4++; - aux4 = *p4++; - aux5 = *p4++; - aux6 = *p4++; - aux7 = *p4++; - count -= 28; - - aux1 = aux1 - ((aux1 >> 1) & 0x55555555); - aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333); - aux2 = aux2 - ((aux2 >> 1) & 0x55555555); - aux2 = (aux2 & 0x33333333) + ((aux2 >> 2) & 0x33333333); - aux3 = aux3 - ((aux3 >> 1) & 0x55555555); - aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333); - aux4 = aux4 - ((aux4 >> 1) & 0x55555555); - aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333); - aux5 = aux5 - ((aux5 >> 1) & 0x55555555); - aux5 = (aux5 & 0x33333333) + ((aux5 >> 2) & 0x33333333); - aux6 = aux6 - ((aux6 >> 1) & 0x55555555); - aux6 = (aux6 & 0x33333333) + ((aux6 >> 2) & 0x33333333); - aux7 = aux7 - ((aux7 >> 1) & 0x55555555); - aux7 = (aux7 & 0x33333333) + ((aux7 >> 2) & 0x33333333); - bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) + - ((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) + - ((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) + - ((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) + - ((aux5 + (aux5 >> 4)) & 0x0F0F0F0F) + - ((aux6 + (aux6 >> 4)) & 0x0F0F0F0F) + - ((aux7 + (aux7 >> 4)) & 0x0F0F0F0F))* 0x01010101) >> 24; - } - p = (unsigned char*)p4; - -remain: - /* Count the remaining bytes. */ - while(count--) bits += bitsinbyte[*p++]; - return bits; -} - -#ifdef HAVE_AARCH64_NEON -/* AArch64 optimized popcount implementation. - * Processes the input bitmap using four NEON vector accumulators in parallel - * to improve instruction-level parallelism and reduce the frequency of - * scalar reductions. Each accumulator holds 16-bit partial sums that are - * combined only once per large block (128 bytes), minimizing data movement. - * - * Benchmark results show this approach outperforms 2-lane implementations - * and matches or exceeds 8-lane versions in throughput, while avoiding - * register pressure and keeping the backend pipeline fully utilized. - * - * This function is now memory bound on large bitmaps, as confirmed by perf - * profiling, with backend stalls dominated by L1/L2 data cache refills. - */ -long long redisPopCountAarch64(void *s, long count) { - long long bits = 0; - const uint8_t *p = (const uint8_t*)s; - - /* Align */ - while (((uintptr_t)p & 15) && count) { - bits += bitsinbyte[*p++]; - count--; - } - - /* Four vector accumulators of u16 (pairwise-accumulated byte counts). */ - uint16x8_t acc0 = vdupq_n_u16(0); - uint16x8_t acc1 = vdupq_n_u16(0); - uint16x8_t acc2 = vdupq_n_u16(0); - uint16x8_t acc3 = vdupq_n_u16(0); - - /* Process 128B per loop to amortize reductions. */ - while (count >= 128) { - uint8x16_t d0 = vld1q_u8(p + 0); - uint8x16_t d1 = vld1q_u8(p + 16); - uint8x16_t d2 = vld1q_u8(p + 32); - uint8x16_t d3 = vld1q_u8(p + 48); - uint8x16_t d4 = vld1q_u8(p + 64); - uint8x16_t d5 = vld1q_u8(p + 80); - uint8x16_t d6 = vld1q_u8(p + 96); - uint8x16_t d7 = vld1q_u8(p +112); - - /* Per-byte popcount */ - uint8x16_t c0 = vcntq_u8(d0); - uint8x16_t c1 = vcntq_u8(d1); - uint8x16_t c2 = vcntq_u8(d2); - uint8x16_t c3 = vcntq_u8(d3); - uint8x16_t c4 = vcntq_u8(d4); - uint8x16_t c5 = vcntq_u8(d5); - uint8x16_t c6 = vcntq_u8(d6); - uint8x16_t c7 = vcntq_u8(d7); - - /* Pairwise widen-add with accumulation: u8 -> u16, stay in vectors */ - acc0 = vpadalq_u8(acc0, c0); - acc1 = vpadalq_u8(acc1, c1); - acc2 = vpadalq_u8(acc2, c2); - acc3 = vpadalq_u8(acc3, c3); - - acc0 = vpadalq_u8(acc0, c4); - acc1 = vpadalq_u8(acc1, c5); - acc2 = vpadalq_u8(acc2, c6); - acc3 = vpadalq_u8(acc3, c7); - - p += 128; - count -= 128; - } - - /* Reduce vector accumulators to scalar once. */ - uint32x4_t s0 = vpaddlq_u16(acc0); - uint32x4_t s1 = vpaddlq_u16(acc1); - uint32x4_t s2 = vpaddlq_u16(acc2); - uint32x4_t s3 = vpaddlq_u16(acc3); - uint32x4_t s01 = vaddq_u32(s0, s1); - uint32x4_t s23 = vaddq_u32(s2, s3); - uint32x4_t st = vaddq_u32(s01, s23); - uint64x2_t s64 = vpaddlq_u32(st); - bits += (long long)(vgetq_lane_u64(s64, 0) + vgetq_lane_u64(s64, 1)); - - /* Remaining 64B blocks (keep vector domain) */ - while (count >= 64) { - uint8x16_t d0 = vld1q_u8(p + 0); - uint8x16_t d1 = vld1q_u8(p + 16); - uint8x16_t d2 = vld1q_u8(p + 32); - uint8x16_t d3 = vld1q_u8(p + 48); - - uint8x16_t c0 = vcntq_u8(d0); - uint8x16_t c1 = vcntq_u8(d1); - uint8x16_t c2 = vcntq_u8(d2); - uint8x16_t c3 = vcntq_u8(d3); - - uint64x2_t t0 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(c0))); - uint64x2_t t1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(c1))); - uint64x2_t t2 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(c2))); - uint64x2_t t3 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(c3))); - - uint64x2_t s = vaddq_u64(vaddq_u64(t0, t1), vaddq_u64(t2, t3)); - bits += (long long)(vgetq_lane_u64(s, 0) + vgetq_lane_u64(s, 1)); - - p += 64; - count -= 64; - } - - /* 16B chunks */ - while (count >= 16) { - uint8x16_t d = vld1q_u8(p); - uint64x2_t s = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vcntq_u8(d)))); - bits += (long long)(vgetq_lane_u64(s, 0) + vgetq_lane_u64(s, 1)); - p += 16; - count -= 16; - } - - /* Tail */ - while (count--) bits += bitsinbyte[*p++]; - - return bits; -} -#endif - -#ifdef HAVE_AVX512 -/* AVX512 optimized version of redisPopcount using VPOPCNTDQ instruction. - * This function requires AVX512F and AVX512VPOPCNTDQ support. */ -ATTRIBUTE_TARGET_AVX512_POPCOUNT -long long redisPopCountAvx512(void *s, long count) { - long long bits = 0; - unsigned char *p = s; - - /* Align to 64-byte boundary for optimal AVX512 performance */ - while ((unsigned long)p & 63 && count) { - bits += bitsinbyte[*p++]; - count--; - } - - /* Process 64 bytes at a time using AVX512 */ - while (count >= 64) { - __m512i data = _mm512_loadu_si512((__m512i*)p); - __m512i popcnt = _mm512_popcnt_epi64(data); - - /* Sum all 8 64-bit popcount results */ - bits += _mm512_reduce_add_epi64(popcnt); - - p += 64; - count -= 64; - - /* Prefetch next cache line */ - redis_prefetch_read(p + 2048); - } - - /* Handle remaining bytes with scalar popcount */ - while (count >= 8) { - bits += __builtin_popcountll(*(uint64_t*)p); - p += 8; - count -= 8; - } - - /* Handle final bytes */ - while (count--) { - bits += bitsinbyte[*p++]; - } - - return bits; -} -#endif - -#ifdef HAVE_AVX2 -/* AVX2 optimized version of redisPopcount. - * This function requires AVX2 and POPCNT support. */ -ATTRIBUTE_TARGET_AVX2_POPCOUNT -long long redisPopCountAvx2(void *s, long count) { - long long bits = 0; - unsigned char *p = s; - - /* Align to 8-byte boundary for 64-bit operations */ - while ((unsigned long)p & 7 && count) { - bits += bitsinbyte[*p++]; - count--; - } - - /* Use separate counters to avoid dependencies, similar to regular redisPopcount */ - uint64_t cnt[4]; - memset(cnt, 0, sizeof(cnt)); - - /* Process 32 bytes at a time using POPCNT on 64-bit chunks */ - while (count >= 32) { - cnt[0] += __builtin_popcountll(*(uint64_t*)(p)); - cnt[1] += __builtin_popcountll(*(uint64_t*)(p + 8)); - cnt[2] += __builtin_popcountll(*(uint64_t*)(p + 16)); - cnt[3] += __builtin_popcountll(*(uint64_t*)(p + 24)); - - p += 32; - count -= 32; - - /* Prefetch next cache line */ - redis_prefetch_read(p + 2048); - } - - bits += cnt[0] + cnt[1] + cnt[2] + cnt[3]; - - /* Handle remaining bytes with scalar popcount */ - while (count >= 8) { - bits += __builtin_popcountll(*(uint64_t*)p); - p += 8; - count -= 8; - } - - /* Handle final bytes */ - while (count--) { - bits += bitsinbyte[*p++]; - } - - return bits; -} -#endif - -/* Automatically select the best available popcount implementation */ -static inline long long redisPopcountAuto(const unsigned char *p, long count) { -#ifdef HAVE_AVX512 - if (BITOP_USE_AVX512) { - return redisPopCountAvx512((void*)p, count); - } -#endif -#ifdef HAVE_AVX2 - if (BITOP_USE_AVX2) { - return redisPopCountAvx2((void*)p, count); - } -#endif -#ifdef HAVE_AARCH64_NEON - return redisPopCountAarch64((void*)p, count); -#else - return redisPopcount((void*)p, count); -#endif -} - -/* Return the position of the first bit set to one (if 'bit' is 1) or - * zero (if 'bit' is 0) in the bitmap starting at 's' and long 'count' bytes. - * - * The function is guaranteed to return a value >= 0 if 'bit' is 0 since if - * no zero bit is found, it returns count*8 assuming the string is zero - * padded on the right. However if 'bit' is 1 it is possible that there is - * not a single set bit in the bitmap. In this special case -1 is returned. */ -long long redisBitpos(void *s, unsigned long count, int bit) { - unsigned long *l; - unsigned char *c; - unsigned long skipval, word = 0, one; - long long pos = 0; /* Position of bit, to return to the caller. */ - unsigned long j; - int found; - - /* Process whole words first, seeking for first word that is not - * all ones or all zeros respectively if we are looking for zeros - * or ones. This is much faster with large strings having contiguous - * blocks of 1 or 0 bits compared to the vanilla bit per bit processing. - * - * Note that if we start from an address that is not aligned - * to sizeof(unsigned long) we consume it byte by byte until it is - * aligned. */ - - /* Skip initial bits not aligned to sizeof(unsigned long) byte by byte. */ - skipval = bit ? 0 : UCHAR_MAX; - c = (unsigned char*) s; - found = 0; - while((unsigned long)c & (sizeof(*l)-1) && count) { - if (*c != skipval) { - found = 1; - break; - } - c++; - count--; - pos += 8; - } - - /* Skip bits with full word step. */ - l = (unsigned long*) c; - if (!found) { - skipval = bit ? 0 : ULONG_MAX; - while (count >= sizeof(*l)) { - if (*l != skipval) break; - l++; - count -= sizeof(*l); - pos += sizeof(*l)*8; - } - } - - /* Load bytes into "word" considering the first byte as the most significant - * (we basically consider it as written in big endian, since we consider the - * string as a set of bits from left to right, with the first bit at position - * zero. - * - * Note that the loading is designed to work even when the bytes left - * (count) are less than a full word. We pad it with zero on the right. */ - c = (unsigned char*)l; - for (j = 0; j < sizeof(*l); j++) { - word <<= 8; - if (count) { - word |= *c; - c++; - count--; - } - } - - /* Special case: - * If bits in the string are all zero and we are looking for one, - * return -1 to signal that there is not a single "1" in the whole - * string. This can't happen when we are looking for "0" as we assume - * that the right of the string is zero padded. */ - if (bit == 1 && word == 0) return -1; - - /* Last word left, scan bit by bit. The first thing we need is to - * have a single "1" set in the most significant position in an - * unsigned long. We don't know the size of the long so we use a - * simple trick. */ - one = ULONG_MAX; /* All bits set to 1.*/ - one >>= 1; /* All bits set to 1 but the MSB. */ - one = ~one; /* All bits set to 0 but the MSB. */ - - while(one) { - if (((one & word) != 0) == bit) return pos; - pos++; - one >>= 1; - } - - /* If we reached this point, there is a bug in the algorithm, since - * the case of no match is handled as a special case before. */ - serverPanic("End of redisBitpos() reached."); - return 0; /* Just to avoid warnings. */ -} - -/* The following set.*Bitfield and get.*Bitfield functions implement setting - * and getting arbitrary size (up to 64 bits) signed and unsigned integers - * at arbitrary positions into a bitmap. - * - * The representation considers the bitmap as having the bit number 0 to be - * the most significant bit of the first byte, and so forth, so for example - * setting a 5 bits unsigned integer to value 23 at offset 7 into a bitmap - * previously set to all zeroes, will produce the following representation: - * - * +--------+--------+ - * |00000001|01110000| - * +--------+--------+ - * - * When offsets and integer sizes are aligned to bytes boundaries, this is the - * same as big endian, however when such alignment does not exist, its important - * to also understand how the bits inside a byte are ordered. - * - * Note that this format follows the same convention as SETBIT and related - * commands. - */ - -void setUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, uint64_t value) { - uint64_t byte, bit, byteval, bitval, j; - - for (j = 0; j < bits; j++) { - bitval = (value & ((uint64_t)1<<(bits-1-j))) != 0; - byte = offset >> 3; - bit = 7 - (offset & 0x7); - byteval = p[byte]; - byteval &= ~(1 << bit); - byteval |= bitval << bit; - p[byte] = byteval & 0xff; - offset++; - } -} - -void setSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits, int64_t value) { - uint64_t uv = value; /* Casting will add UINT64_MAX + 1 if v is negative. */ - setUnsignedBitfield(p,offset,bits,uv); -} - -uint64_t getUnsignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) { - uint64_t byte, bit, byteval, bitval, j, value = 0; - - for (j = 0; j < bits; j++) { - byte = offset >> 3; - bit = 7 - (offset & 0x7); - byteval = p[byte]; - bitval = (byteval >> bit) & 1; - value = (value<<1) | bitval; - offset++; - } - return value; -} - -int64_t getSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) { - int64_t value; - union {uint64_t u; int64_t i;} conv; - - /* Converting from unsigned to signed is undefined when the value does - * not fit, however here we assume two's complement and the original value - * was obtained from signed -> unsigned conversion, so we'll find the - * most significant bit set if the original value was negative. - * - * Note that two's complement is mandatory for exact-width types - * according to the C99 standard. */ - conv.u = getUnsignedBitfield(p,offset,bits); - value = conv.i; - - /* If the top significant bit is 1, propagate it to all the - * higher bits for two's complement representation of signed - * integers. */ - if (bits < 64 && (value & ((uint64_t)1 << (bits-1)))) - value |= ((uint64_t)-1) << bits; - return value; -} - -/* The following two functions detect overflow of a value in the context - * of storing it as an unsigned or signed integer with the specified - * number of bits. The functions both take the value and a possible increment. - * If no overflow could happen and the value+increment fit inside the limits, - * then zero is returned, otherwise in case of overflow, 1 is returned, - * otherwise in case of underflow, -1 is returned. - * - * When non-zero is returned (overflow or underflow), if not NULL, *limit is - * set to the value the operation should result when an overflow happens, - * depending on the specified overflow semantics: - * - * For BFOVERFLOW_SAT if 1 is returned, *limit it is set maximum value that - * you can store in that integer. when -1 is returned, *limit is set to the - * minimum value that an integer of that size can represent. - * - * For BFOVERFLOW_WRAP *limit is set by performing the operation in order to - * "wrap" around towards zero for unsigned integers, or towards the most - * negative number that is possible to represent for signed integers. */ - -#define BFOVERFLOW_WRAP 0 -#define BFOVERFLOW_SAT 1 -#define BFOVERFLOW_FAIL 2 /* Used by the BITFIELD command implementation. */ - -int checkUnsignedBitfieldOverflow(uint64_t value, int64_t incr, uint64_t bits, int owtype, uint64_t *limit) { - uint64_t max = (bits == 64) ? UINT64_MAX : (((uint64_t)1< max || (incr > 0 && incr > maxincr)) { - if (limit) { - if (owtype == BFOVERFLOW_WRAP) { - goto handle_wrap; - } else if (owtype == BFOVERFLOW_SAT) { - *limit = max; - } - } - return 1; - } else if (incr < 0 && incr < minincr) { - if (limit) { - if (owtype == BFOVERFLOW_WRAP) { - goto handle_wrap; - } else if (owtype == BFOVERFLOW_SAT) { - *limit = 0; - } - } - return -1; - } - return 0; - -handle_wrap: - { - uint64_t mask = ((uint64_t)-1) << bits; - uint64_t res = value+incr; - - res &= ~mask; - *limit = res; - } - return 1; -} - -int checkSignedBitfieldOverflow(int64_t value, int64_t incr, uint64_t bits, int owtype, int64_t *limit) { - int64_t max = (bits == 64) ? INT64_MAX : (((int64_t)1<<(bits-1))-1); - int64_t min = (-max)-1; - - /* Note that maxincr and minincr could overflow, but we use the values - * only after checking 'value' range, so when we use it no overflow - * happens. 'uint64_t' cast is there just to prevent undefined behavior on - * overflow */ - int64_t maxincr = (uint64_t)max-value; - int64_t minincr = min-value; - - if (value > max || (bits != 64 && incr > maxincr) || (value >= 0 && incr > 0 && incr > maxincr)) - { - if (limit) { - if (owtype == BFOVERFLOW_WRAP) { - goto handle_wrap; - } else if (owtype == BFOVERFLOW_SAT) { - *limit = max; - } - } - return 1; - } else if (value < min || (bits != 64 && incr < minincr) || (value < 0 && incr < 0 && incr < minincr)) { - if (limit) { - if (owtype == BFOVERFLOW_WRAP) { - goto handle_wrap; - } else if (owtype == BFOVERFLOW_SAT) { - *limit = min; - } - } - return -1; - } - return 0; - -handle_wrap: - { - uint64_t msb = (uint64_t)1 << (bits-1); - uint64_t a = value, b = incr, c; - c = a+b; /* Perform addition as unsigned so that's defined. */ - - /* If the sign bit is set, propagate to all the higher order - * bits, to cap the negative value. If it's clear, mask to - * the positive integer limit. */ - if (bits < 64) { - uint64_t mask = ((uint64_t)-1) << bits; - if (c & msb) { - c |= mask; - } else { - c &= ~mask; - } - } - *limit = c; - } - return 1; -} - -/* Debugging function. Just show bits in the specified bitmap. Not used - * but here for not having to rewrite it when debugging is needed. */ -void printBits(unsigned char *p, unsigned long count) { - unsigned long j, i, byte; - - for (j = 0; j < count; j++) { - byte = p[j]; - for (i = 0x80; i > 0; i /= 2) - printf("%c", (byte & i) ? '1' : '0'); - printf("|"); - } - printf("\n"); -} - -/* ----------------------------------------------------------------------------- - * Bits related string commands: GETBIT, SETBIT, BITCOUNT, BITOP. - * -------------------------------------------------------------------------- */ - -#define BITOP_AND 0 -#define BITOP_OR 1 -#define BITOP_XOR 2 -#define BITOP_NOT 3 -#define BITOP_DIFF 4 /* DIFF(X, A1, A2, ..., An) = X & !(A1 | A2 | ... | An) */ -#define BITOP_DIFF1 5 /* DIFF1(X, A1, A2, ..., An) = !X & (A1 | A2 | ... | An) */ -#define BITOP_ANDOR 6 /* ANDOR(X, A1, A2, ..., An) = X & (A1 | A2 | ... | An) */ - -/* ONE(A1, A2, ..., An) = X. - * If X[i] is the i-th bit of X then: - * X[i] == 1 if and only if there is m such that: - * Am[i] == 1 and Al[i] == 0 for all l != m. */ -#define BITOP_ONE 7 - -#define BITFIELDOP_GET 0 -#define BITFIELDOP_SET 1 -#define BITFIELDOP_INCRBY 2 - -/* This helper function used by GETBIT / SETBIT parses the bit offset argument - * making sure an error is returned if it is negative or if it overflows - * Redis 512 MB limit for the string value or more (server.proto_max_bulk_len). - * - * If the 'hash' argument is true, and 'bits is positive, then the command - * will also parse bit offsets prefixed by "#". In such a case the offset - * is multiplied by 'bits'. This is useful for the BITFIELD command. */ -int getBitOffsetFromArgument(client *c, robj *o, uint64_t *offset, int hash, int bits) { - long long loffset; - char *err = "bit offset is not an integer or out of range"; - char *p = o->ptr; - size_t plen = sdslen(p); - int usehash = 0; - - /* Handle # form. */ - if (p[0] == '#' && hash && bits > 0) usehash = 1; - - if (string2ll(p+usehash,plen-usehash,&loffset) == 0) { - addReplyError(c,err); - return C_ERR; - } - - /* Adjust the offset by 'bits' for # form. */ - if (usehash) loffset *= bits; - - /* Limit offset to server.proto_max_bulk_len (512MB in bytes by default) */ - if (loffset < 0 || (!mustObeyClient(c) && (loffset >> 3) >= server.proto_max_bulk_len)) - { - addReplyError(c,err); - return C_ERR; - } - - *offset = loffset; - return C_OK; -} - -/* This helper function for BITFIELD parses a bitfield type in the form - * where sign is 'u' or 'i' for unsigned and signed, and - * the bits is a value between 1 and 64. However 64 bits unsigned integers - * are reported as an error because of current limitations of Redis protocol - * to return unsigned integer values greater than INT64_MAX. - * - * On error C_ERR is returned and an error is sent to the client. */ -int getBitfieldTypeFromArgument(client *c, robj *o, int *sign, int *bits) { - char *p = o->ptr; - char *err = "Invalid bitfield type. Use something like i16 u8. Note that u64 is not supported but i64 is."; - long long llbits; - - if (p[0] == 'i') { - *sign = 1; - } else if (p[0] == 'u') { - *sign = 0; - } else { - addReplyError(c,err); - return C_ERR; - } - - if ((string2ll(p+1,strlen(p+1),&llbits)) == 0 || - llbits < 1 || - (*sign == 1 && llbits > 64) || - (*sign == 0 && llbits > 63)) - { - addReplyError(c,err); - return C_ERR; - } - *bits = llbits; - return C_OK; -} - -/* This is a helper function for commands implementations that need to write - * bits to a string object. The command creates or pad with zeroes the string - * so that the 'maxbit' bit can be addressed. The object is finally - * returned. Otherwise if the key holds a wrong type NULL is returned and - * an error is sent to the client. - * - * (Must provide all the arguments to the function) - */ -static kvobj *lookupStringForBitCommand(client *c, uint64_t maxbit, - size_t *strOldSize, size_t *strGrowSize) -{ - dictEntryLink link; - size_t byte = maxbit >> 3; - size_t oldAllocSize = 0; - kvobj *o = lookupKeyWriteWithLink(c->db,c->argv[1],&link); - if (checkType(c,o,OBJ_STRING)) return NULL; - - if (o == NULL) { - o = createObject(OBJ_STRING,sdsnewlen(NULL, byte+1)); - dbAddByLink(c->db,c->argv[1],&o,&link); - *strGrowSize = byte + 1; - *strOldSize = 0; - } else { - o = dbUnshareStringValue(c->db,c->argv[1],o); - *strOldSize = sdslen(o->ptr); - if (server.memory_tracking_per_slot) - oldAllocSize = stringObjectAllocSize(o); - o->ptr = sdsgrowzero(o->ptr,byte+1); - if (server.memory_tracking_per_slot) - updateSlotAllocSize(c->db, getKeySlot(c->argv[1]->ptr), oldAllocSize, stringObjectAllocSize(o)); - *strGrowSize = sdslen(o->ptr) - *strOldSize; - } - return o; -} - -/* Return a pointer to the string object content, and stores its length - * in 'len'. The user is required to pass (likely stack allocated) buffer - * 'llbuf' of at least LONG_STR_SIZE bytes. Such a buffer is used in the case - * the object is integer encoded in order to provide the representation - * without using heap allocation. - * - * The function returns the pointer to the object array of bytes representing - * the string it contains, that may be a pointer to 'llbuf' or to the - * internal object representation. As a side effect 'len' is filled with - * the length of such buffer. - * - * If the source object is NULL the function is guaranteed to return NULL - * and set 'len' to 0. */ -unsigned char *getObjectReadOnlyString(robj *o, long *len, char *llbuf) { - serverAssert(!o || o->type == OBJ_STRING); - unsigned char *p = NULL; - - /* Set the 'p' pointer to the string, that can be just a stack allocated - * array if our string was integer encoded. */ - if (o && o->encoding == OBJ_ENCODING_INT) { - p = (unsigned char*) llbuf; - if (len) *len = ll2string(llbuf,LONG_STR_SIZE,(long)o->ptr); - } else if (o) { - p = (unsigned char*) o->ptr; - if (len) *len = sdslen(o->ptr); - } else { - if (len) *len = 0; - } - return p; -} - -/* SETBIT key offset bitvalue */ -void setbitCommand(client *c) { - char *err = "bit is not an integer or out of range"; - uint64_t bitoffset; - ssize_t byte, bit; - int byteval, bitval; - long on; - - if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != C_OK) - return; - - if (getLongFromObjectOrReply(c,c->argv[3],&on,err) != C_OK) - return; - - /* Bits can only be set or cleared... */ - if (on & ~1) { - addReplyError(c,err); - return; - } - - size_t strOldSize, strGrowSize; - kvobj *o = lookupStringForBitCommand(c, bitoffset, &strOldSize, &strGrowSize); - if (o == NULL) return; - - /* Get current values */ - byte = bitoffset >> 3; - byteval = ((uint8_t*)o->ptr)[byte]; - bit = 7 - (bitoffset & 0x7); - bitval = byteval & (1 << bit); - - /* Either it is newly created, changed length, or the bit changes before and after. - * Note that the bitval here is actually a decimal number. - * So we need to use `!!` to convert it to 0 or 1 for comparison. */ - if (strGrowSize || (!!bitval != on)) { - /* Update byte with new bit value. */ - byteval &= ~(1 << bit); - byteval |= ((on & 0x1) << bit); - ((uint8_t*)o->ptr)[byte] = byteval; - keyModified(c,c->db,c->argv[1],o,1); - notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id); - server.dirty++; - - /* If this is not a new key (old size not 0) and size changed, then - * update the keysizes histogram. Otherwise, the histogram already - * updated in lookupStringForBitCommand() by calling dbAdd(). */ - if ((strOldSize > 0) && (strGrowSize != 0)) - updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_STRING, - strOldSize, strOldSize + strGrowSize); - } - - /* Return original value. */ - addReply(c, bitval ? shared.cone : shared.czero); -} - -/* GETBIT key offset */ -void getbitCommand(client *c) { - char llbuf[32]; - uint64_t bitoffset; - size_t byte, bit; - size_t bitval = 0; - - if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != C_OK) - return; - - kvobj *kv = lookupKeyReadOrReply(c, c->argv[1], shared.czero); - if (kv == NULL || checkType(c,kv,OBJ_STRING)) return; - - byte = bitoffset >> 3; - bit = 7 - (bitoffset & 0x7); - if (sdsEncodedObject(kv)) { - if (byte < sdslen(kv->ptr)) - bitval = ((uint8_t*)kv->ptr)[byte] & (1 << bit); - } else { - if (byte < (size_t)ll2string(llbuf,sizeof(llbuf),(long)kv->ptr)) - bitval = llbuf[byte] & (1 << bit); - } - - addReply(c, bitval ? shared.cone : shared.czero); -} - -#ifdef HAVE_AVX2 -/* Compute the given bitop operation using AVX2 intrinsics. - * Return how many bytes were successfully processed, as AVX2 operates on - * 256-bit registers so if `minlen` is not a multiple of 32 some of the bytes - * will be skipped. They will be taken care for in the unoptimized loop in the - * main bitopCommand function. */ -ATTRIBUTE_TARGET_AVX2_POPCOUNT -unsigned long bitopCommandAVX(unsigned char **keys, unsigned char *res, - unsigned long op, unsigned long numkeys, - unsigned long minlen) -{ - const unsigned long step = sizeof(__m256i); - - unsigned long i; - unsigned long processed = 0; - unsigned char *res_start = res; - unsigned char *fst_key = keys[0]; - - if (minlen < step) { - return 0; - } - - /* Unlike other operations that do the same with all source keys - * DIFF, DIFF1 and ANDOR all compute the disjunction of all the source keys - * but the first one. We first store that disjunction in `lres` and later - * compute the final operation using the first source key. */ - if (op != BITOP_DIFF && op != BITOP_DIFF1 && op != BITOP_ANDOR) { - memcpy(res, keys[0], minlen); - } - - const __m256i max256 = _mm256_set1_epi64x(-1); - const __m256i zero256 = _mm256_set1_epi64x(0); - - switch (op) { - case BITOP_AND: - while (minlen >= step) { - __m256i lres = _mm256_lddqu_si256((__m256i*)res); - - for (i = 1; i < numkeys; i++) { - __m256i lkey = _mm256_lddqu_si256((__m256i*)(keys[i]+processed)); - lres = _mm256_and_si256(lres, lkey); - } - _mm256_storeu_si256((__m256i*)res, lres); - res += step; - processed += step; - minlen -= step; - } - break; - case BITOP_DIFF: - case BITOP_DIFF1: - case BITOP_ANDOR: - case BITOP_OR: - while (minlen >= step) { - __m256i lres = _mm256_lddqu_si256((__m256i*)res); - - for (i = 1; i < numkeys; i++) { - __m256i lkey = _mm256_lddqu_si256((__m256i*)(keys[i]+processed)); - lres = _mm256_or_si256(lres, lkey); - } - _mm256_storeu_si256((__m256i*)res, lres); - res += step; - processed += step; - minlen -= step; - } - break; - case BITOP_XOR: - while (minlen >= step) { - __m256i lres = _mm256_lddqu_si256((__m256i*)res); - - for (i = 1; i < numkeys; i++) { - __m256i lkey = _mm256_lddqu_si256((__m256i*)(keys[i]+processed)); - lres = _mm256_xor_si256(lres, lkey); - } - _mm256_storeu_si256((__m256i*)res, lres); - res += step; - processed += step; - minlen -= step; - } - break; - case BITOP_NOT: - while (minlen >= step) { - __m256i lres = _mm256_lddqu_si256((__m256i*)res); - lres = _mm256_xor_si256(lres, max256); - _mm256_storeu_si256((__m256i*)res, lres); - res += step; - processed += step; - minlen -= step; - } - break; - case BITOP_ONE: - while (minlen >= step) { - __m256i lres = _mm256_lddqu_si256((__m256i*)res); - __m256i common_bits = zero256; - - for (i = 1; i < numkeys; i++) { - __m256i lkey = _mm256_lddqu_si256((__m256i*)(keys[i]+processed)); - __m256i common = _mm256_and_si256(lres, lkey); - common_bits = _mm256_or_si256(common_bits, common); - - lres = _mm256_xor_si256(lres, lkey); - } - lres = _mm256_andnot_si256(common_bits, lres); - _mm256_storeu_si256((__m256i*)res, lres); - res += step; - processed += step; - minlen -= step; - } - break; - default: - break; - } - - res = res_start; - switch (op) { - case BITOP_DIFF: - for (i = 0; i < processed; i += step) { - __m256i lres = _mm256_lddqu_si256((__m256i*)res); - __m256i fkey = _mm256_lddqu_si256((__m256i*)fst_key); - - lres = _mm256_andnot_si256(lres, fkey); - _mm256_storeu_si256((__m256i*)res, lres); - - res += step; - fst_key += step; - } - break; - case BITOP_DIFF1: - for (i = 0; i < processed; i += step) { - __m256i lres = _mm256_lddqu_si256((__m256i*)res); - __m256i fkey = _mm256_lddqu_si256((__m256i*)fst_key); - - lres = _mm256_andnot_si256(fkey, lres); - _mm256_storeu_si256((__m256i*)res, lres); - - res += step; - fst_key += step; - } - break; - case BITOP_ANDOR: - for (i = 0; i < processed; i += step) { - __m256i lres = _mm256_lddqu_si256((__m256i*)res); - __m256i fkey = _mm256_lddqu_si256((__m256i*)fst_key); - - lres = _mm256_and_si256(fkey, lres); - _mm256_storeu_si256((__m256i*)res, lres); - - res += step; - fst_key += step; - } - break; - default: - break; - } - - return processed; -} -#endif /* HAVE_AVX2 */ - -/* BITOP op_name target_key src_key1 src_key2 src_key3 ... src_keyN */ -REDIS_NO_SANITIZE("alignment") -void bitopCommand(client *c) { - char *opname = c->argv[1]->ptr; - robj *targetkey = c->argv[2]; - unsigned long op, j, numkeys; - robj **objects; /* Array of source objects. */ - unsigned char **src; /* Array of source strings pointers. */ - unsigned long *len, maxlen = 0; /* Array of length of src strings, - and max len. */ - unsigned long minlen = 0; /* Min len among the input keys. */ - unsigned char *res = NULL; /* Resulting string. */ - - /* Parse the operation name. */ - if ((opname[0] == 'a' || opname[0] == 'A') && !strcasecmp(opname,"and")) - op = BITOP_AND; - else if((opname[0] == 'o' || opname[0] == 'O') && !strcasecmp(opname,"or")) - op = BITOP_OR; - else if((opname[0] == 'x' || opname[0] == 'X') && !strcasecmp(opname,"xor")) - op = BITOP_XOR; - else if((opname[0] == 'n' || opname[0] == 'N') && !strcasecmp(opname,"not")) - op = BITOP_NOT; - else if ((opname[0] == 'd' || opname[0] == 'D') && !strcasecmp(opname,"diff")) - op = BITOP_DIFF; - else if ((opname[0] == 'd' || opname[0] == 'D') && !strcasecmp(opname,"diff1")) - op = BITOP_DIFF1; - else if ((opname[0] == 'a' || opname[0] == 'A') && !strcasecmp(opname,"andor")) - op = BITOP_ANDOR; - else if ((opname[0] == 'o' || opname[0] == 'O') && !strcasecmp(opname,"one")) - op = BITOP_ONE; - else { - addReplyErrorObject(c,shared.syntaxerr); - return; - } - - /* Sanity check: NOT accepts only a single key argument. */ - if (op == BITOP_NOT && c->argc != 4) { - addReplyError(c,"BITOP NOT must be called with a single source key."); - return; - } - - if ((op == BITOP_DIFF || op == BITOP_DIFF1 || op == BITOP_ANDOR) && c->argc < 5) { - sds opname_upper = sdsnew(opname); - sdstoupper(opname_upper); - addReplyErrorFormat(c,"BITOP %s must be called with at least two source keys.", opname_upper); - sdsfree(opname_upper); - return; - } - - /* Lookup keys, and store pointers to the string objects into an array. */ - numkeys = c->argc - 3; - src = zmalloc(sizeof(unsigned char*) * numkeys); - len = zmalloc(sizeof(long) * numkeys); - objects = zmalloc(sizeof(robj*) * numkeys); - for (j = 0; j < numkeys; j++) { - kvobj *kv = lookupKeyRead(c->db, c->argv[j + 3]); - /* Handle non-existing keys as empty strings. */ - if (kv == NULL) { - objects[j] = NULL; - src[j] = NULL; - len[j] = 0; - minlen = 0; - continue; - } - /* Return an error if one of the keys is not a string. */ - if (checkType(c, kv, OBJ_STRING)) { - unsigned long i; - for (i = 0; i < j; i++) { - if (objects[i]) - decrRefCount(objects[i]); - } - zfree(src); - zfree(len); - zfree(objects); - return; - } - objects[j] = getDecodedObject(kv); - src[j] = objects[j]->ptr; - len[j] = sdslen(objects[j]->ptr); - if (len[j] > maxlen) maxlen = len[j]; - if (j == 0 || len[j] < minlen) minlen = len[j]; - } - - /* Compute the bit operation, if at least one string is not empty. */ - if (maxlen) { - res = (unsigned char*) sdsnewlen(NULL,maxlen); - unsigned char output, byte, disjunction, common_bits; - unsigned long i; - int useAVX2 = 0; - - /* Number of bytes processed from each source key */ - j = 0; - -#if defined(HAVE_AVX2) - if (BITOP_USE_AVX2) { - j = bitopCommandAVX(src, res, op, numkeys, minlen); - - serverAssert(minlen >= j); - minlen -= j; - - useAVX2 = 1; - } -#endif - -#if !defined(USE_ALIGNED_ACCESS) - /* We don't have AVX2 but we still have fast path: - * as far as we have data for all the input bitmaps we - * can take a fast path that performs much better than the - * vanilla algorithm. On ARM we skip the fast path since it will - * result in GCC compiling the code using multiple-words load/store - * operations that are not supported even in ARM >= v6. */ - if (minlen >= sizeof(unsigned long)*4) { - /* We can't have entered the AVX2 path since minlen >= sizeof(unsigned long)*4 - * AVX2 path operates on steps of sizeof(__m256i) which for 64-bit - * machines (the only ones supporting AVX2) is equal to - * sizeof(unsigned long)*4. That means after the AVX2 - * path minlen will necessarily be < sizeof(unsigned long)*4. */ - serverAssert(!useAVX2); - - unsigned long **lp = (unsigned long**)src; - unsigned long *lres = (unsigned long*) res; - - /* Index over the unsigned long version of the source keys */ - size_t k = 0; - - /* Unlike other operations that do the same with all source keys - * DIFF, DIFF1 and ANDOR all compute the disjunction of all the - * source keys but the first one. We first store that disjunction - * in `lres` and later compute the final operation using the first - * source key. */ - if (op != BITOP_DIFF && op != BITOP_DIFF1 && op != BITOP_ANDOR) - memcpy(lres,src[0],minlen); - - /* Different branches per different operations for speed (sorry). */ - if (op == BITOP_AND) { - while(minlen >= sizeof(unsigned long)*4) { - for (i = 1; i < numkeys; i++) { - lres[0] &= lp[i][k+0]; - lres[1] &= lp[i][k+1]; - lres[2] &= lp[i][k+2]; - lres[3] &= lp[i][k+3]; - } - k+=4; - lres+=4; - j += sizeof(unsigned long)*4; - minlen -= sizeof(unsigned long)*4; - } - } else if (op == BITOP_OR) { - while(minlen >= sizeof(unsigned long)*4) { - for (i = 1; i < numkeys; i++) { - lres[0] |= lp[i][k+0]; - lres[1] |= lp[i][k+1]; - lres[2] |= lp[i][k+2]; - lres[3] |= lp[i][k+3]; - } - k+=4; - lres+=4; - j += sizeof(unsigned long)*4; - minlen -= sizeof(unsigned long)*4; - } - } else if (op == BITOP_XOR) { - while(minlen >= sizeof(unsigned long)*4) { - for (i = 1; i < numkeys; i++) { - lres[0] ^= lp[i][k+0]; - lres[1] ^= lp[i][k+1]; - lres[2] ^= lp[i][k+2]; - lres[3] ^= lp[i][k+3]; - } - k+=4; - lres+=4; - j += sizeof(unsigned long)*4; - minlen -= sizeof(unsigned long)*4; - } - } else if (op == BITOP_NOT) { - while(minlen >= sizeof(unsigned long)*4) { - lres[0] = ~lres[0]; - lres[1] = ~lres[1]; - lres[2] = ~lres[2]; - lres[3] = ~lres[3]; - lres+=4; - j += sizeof(unsigned long)*4; - minlen -= sizeof(unsigned long)*4; - } - } else if (op == BITOP_DIFF || op == BITOP_DIFF1 || op == BITOP_ANDOR) { - size_t processed = 0; - while(minlen >= sizeof(unsigned long)*4) { - for (i = 1; i < numkeys; i++) { - lres[0] |= lp[i][k+0]; - lres[1] |= lp[i][k+1]; - lres[2] |= lp[i][k+2]; - lres[3] |= lp[i][k+3]; - } - k+=4; - lres+=4; - j += sizeof(unsigned long)*4; - minlen -= sizeof(unsigned long)*4; - processed += sizeof(unsigned long)*4; - } - - lres = (unsigned long*) res; - unsigned long *first_key = (unsigned long*)src[0]; - switch (op) { - case BITOP_DIFF: - for (i = 0; i < processed; i += sizeof(unsigned long)*4) { - lres[0] = (first_key[0] & ~lres[0]); - lres[1] = (first_key[1] & ~lres[1]); - lres[2] = (first_key[2] & ~lres[2]); - lres[3] = (first_key[3] & ~lres[3]); - lres+=4; - first_key += 4; - } - break; - case BITOP_DIFF1: - for (i = 0; i < processed; i += sizeof(unsigned long)*4) { - lres[0] = (~first_key[0] & lres[0]); - lres[1] = (~first_key[1] & lres[1]); - lres[2] = (~first_key[2] & lres[2]); - lres[3] = (~first_key[3] & lres[3]); - lres+=4; - first_key += 4; - } - break; - case BITOP_ANDOR: - for (i = 0; i < processed; i += sizeof(unsigned long)*4) { - lres[0] = (first_key[0] & lres[0]); - lres[1] = (first_key[1] & lres[1]); - lres[2] = (first_key[2] & lres[2]); - lres[3] = (first_key[3] & lres[3]); - lres+=4; - first_key += 4; - } - break; - } - } else if (op == BITOP_ONE) { - unsigned long lcommon_bits[4]; - - while(minlen >= sizeof(unsigned long)*4) { - memset(lcommon_bits, 0, sizeof(lcommon_bits)); - - for (i = 1; i < numkeys; i++) { - lcommon_bits[0] |= (lres[0] & lp[i][k+0]); - lcommon_bits[1] |= (lres[1] & lp[i][k+1]); - lcommon_bits[2] |= (lres[2] & lp[i][k+2]); - lcommon_bits[3] |= (lres[3] & lp[i][k+3]); - - lres[0] ^= lp[i][k+0]; - lres[1] ^= lp[i][k+1]; - lres[2] ^= lp[i][k+2]; - lres[3] ^= lp[i][k+3]; - } - - lres[0] &= ~lcommon_bits[0]; - lres[1] &= ~lcommon_bits[1]; - lres[2] &= ~lcommon_bits[2]; - lres[3] &= ~lcommon_bits[3]; - - k+=4; - lres+=4; - j += sizeof(unsigned long)*4; - minlen -= sizeof(unsigned long)*4; - } - } - } -#endif /* !defined(USE_ALIGNED_ACCESS) */ - - /* j is set to the next byte to process by the previous loop. */ - for (; j < maxlen; j++) { - output = (len[0] <= j) ? 0 : src[0][j]; - if (op == BITOP_NOT) output = ~output; - disjunction = 0; - common_bits = 0; - - for (i = 1; i < numkeys; i++) { - int skip = 0; - byte = (len[i] <= j) ? 0 : src[i][j]; - switch(op) { - case BITOP_AND: - output &= byte; - skip = (output == 0); - break; - case BITOP_OR: - output |= byte; - skip = (output == 0xff); - break; - case BITOP_XOR: output ^= byte; break; - - /* For DIFF, DIFF1 and ANDOR we compute the disjunction of all - * key arguments except the first one. After that we do their - * respective bit op on said first arg and that disjunction. - * */ - case BITOP_DIFF: - case BITOP_DIFF1: - case BITOP_ANDOR: - disjunction |= byte; - skip = (disjunction == 0xff); - break; - - /* BITOP ONE dest key_1 [key_2...] - * If dest[i] is the i-th bit of dest then: - * dest[i] == 1 if and only if there is j such that key_j[i] == 1 - * and key_n[i] == 0 for all n != j. - * - * In order to compute that on each step we track which bits - * were seen in more than one key and store that in a helper - * variable. Then the operation is just XOR but on each step we - * nullify the bits that are set in the helper. - * Logically, this operation is the same as nullifying the - * helper bits only once at the end, but performance-wise it had - * no significant benefit and makes the code only more unclear. - * - * e.g: - * 0001 0111 # key1 - * 0010 0110 # key2 - * - * 0011 0001 # intermediate1 - * 0000 0110 # helper - * 0011 0001 # intermediate1 & ~helper - * - * 0100 1101 # key3 - * - * 0111 1100 # intermediate2 - * 0000 0111 # helper - * 0111 1000 # intermediate2 & ~helper - * --------- - * 0111 1000 # result - * */ - case BITOP_ONE: - common_bits |= (output & byte); - output ^= byte; - output &= ~common_bits; - skip = (common_bits == 0xff); - break; - default: - break; - } - - if (skip) { - break; - } - } - - switch(op) { - case BITOP_DIFF: - res[j] = (output & ~disjunction); - break; - case BITOP_DIFF1: - res[j] = (~output & disjunction); - break; - case BITOP_ANDOR: - res[j] = (output & disjunction); - break; - default: - res[j] = output; - break; - } - } - } - for (j = 0; j < numkeys; j++) { - if (objects[j]) - decrRefCount(objects[j]); - } - zfree(src); - zfree(len); - zfree(objects); - - /* Store the computed value into the target key */ - if (maxlen) { - robj *o = createObject(OBJ_STRING, res); - setKey(c, c->db, targetkey, &o, 0); - notifyKeyspaceEvent(NOTIFY_STRING,"set",targetkey,c->db->id); - server.dirty++; - } else if (dbDelete(c->db,targetkey)) { - keyModified(c,c->db,targetkey,NULL,1); - notifyKeyspaceEvent(NOTIFY_GENERIC,"del",targetkey,c->db->id); - server.dirty++; - } - addReplyLongLong(c,maxlen); /* Return the output string length in bytes. */ -} - -/* BITCOUNT key [start end [BIT|BYTE]] */ -void bitcountCommand(client *c) { - kvobj *o; - long long start, end; - long strlen; - unsigned char *p; - char llbuf[LONG_STR_SIZE]; - int isbit = 0; - unsigned char first_byte_neg_mask = 0, last_byte_neg_mask = 0; - - /* Parse start/end range if any. */ - if (c->argc == 4 || c->argc == 5) { - if (getLongLongFromObjectOrReply(c,c->argv[2],&start,NULL) != C_OK) - return; - if (getLongLongFromObjectOrReply(c,c->argv[3],&end,NULL) != C_OK) - return; - if (c->argc == 5) { - if (!strcasecmp(c->argv[4]->ptr,"bit")) isbit = 1; - else if (!strcasecmp(c->argv[4]->ptr,"byte")) isbit = 0; - else { - addReplyErrorObject(c,shared.syntaxerr); - return; - } - } - /* Lookup, check for type. */ - o = lookupKeyRead(c->db, c->argv[1]); - if (checkType(c, o, OBJ_STRING)) return; - p = getObjectReadOnlyString(o,&strlen,llbuf); - long long totlen = strlen; - - /* Make sure we will not overflow */ - serverAssert(totlen <= LLONG_MAX >> 3); - - /* Convert negative indexes */ - if (start < 0 && end < 0 && start > end) { - addReply(c,shared.czero); - return; - } - if (isbit) totlen <<= 3; - if (start < 0) start = totlen+start; - if (end < 0) end = totlen+end; - if (start < 0) start = 0; - if (end < 0) end = 0; - if (end >= totlen) end = totlen-1; - if (isbit && start <= end) { - /* Before converting bit offset to byte offset, create negative masks - * for the edges. */ - first_byte_neg_mask = ~((1<<(8-(start&7)))-1) & 0xFF; - last_byte_neg_mask = (1<<(7-(end&7)))-1; - start >>= 3; - end >>= 3; - } - } else if (c->argc == 2) { - /* Lookup, check for type. */ - o = lookupKeyRead(c->db, c->argv[1]); - if (checkType(c, o, OBJ_STRING)) return; - p = getObjectReadOnlyString(o,&strlen,llbuf); - /* The whole string. */ - start = 0; - end = strlen-1; - } else { - /* Syntax error. */ - addReplyErrorObject(c,shared.syntaxerr); - return; - } - - /* Return 0 for non existing keys. */ - if (o == NULL) { - addReply(c, shared.czero); - return; - } - - /* Precondition: end >= 0 && end < strlen, so the only condition where - * zero can be returned is: start > end. */ - if (start > end) { - addReply(c,shared.czero); - } else { - long bytes = (long)(end-start+1); - long long count; - - /* Use the best available popcount implementation */ - count = redisPopcountAuto(p+start, bytes); - - if (first_byte_neg_mask != 0 || last_byte_neg_mask != 0) { - unsigned char firstlast[2] = {0, 0}; - /* We may count bits of first byte and last byte which are out of - * range. So we need to subtract them. Here we use a trick. We set - * bits in the range to zero. So these bit will not be excluded. */ - if (first_byte_neg_mask != 0) firstlast[0] = p[start] & first_byte_neg_mask; - if (last_byte_neg_mask != 0) firstlast[1] = p[end] & last_byte_neg_mask; - - /* Use the same popcount implementation for consistency */ - count -= redisPopcountAuto(firstlast, 2); - } - addReplyLongLong(c,count); - } -} - -/* BITPOS key bit [start [end [BIT|BYTE]]] */ -void bitposCommand(client *c) { - kvobj *o; - long long start, end; - long bit, strlen; - unsigned char *p; - char llbuf[LONG_STR_SIZE]; - int isbit = 0, end_given = 0; - unsigned char first_byte_neg_mask = 0, last_byte_neg_mask = 0; - - /* Parse the bit argument to understand what we are looking for, set - * or clear bits. */ - if (getLongFromObjectOrReply(c,c->argv[2],&bit,NULL) != C_OK) - return; - if (bit != 0 && bit != 1) { - addReplyError(c, "The bit argument must be 1 or 0."); - return; - } - - /* Parse start/end range if any. */ - if (c->argc == 4 || c->argc == 5 || c->argc == 6) { - if (getLongLongFromObjectOrReply(c,c->argv[3],&start,NULL) != C_OK) - return; - if (c->argc == 6) { - if (!strcasecmp(c->argv[5]->ptr,"bit")) isbit = 1; - else if (!strcasecmp(c->argv[5]->ptr,"byte")) isbit = 0; - else { - addReplyErrorObject(c,shared.syntaxerr); - return; - } - } - if (c->argc >= 5) { - if (getLongLongFromObjectOrReply(c,c->argv[4],&end,NULL) != C_OK) - return; - end_given = 1; - } - - /* Lookup, check for type. */ - o = lookupKeyRead(c->db, c->argv[1]); - if (checkType(c, o, OBJ_STRING)) return; - p = getObjectReadOnlyString(o, &strlen, llbuf); - - /* Make sure we will not overflow */ - long long totlen = strlen; - serverAssert(totlen <= LLONG_MAX >> 3); - - if (c->argc < 5) { - if (isbit) end = (totlen<<3) + 7; - else end = totlen-1; - } - - if (isbit) totlen <<= 3; - /* Convert negative indexes */ - if (start < 0) start = totlen+start; - if (end < 0) end = totlen+end; - if (start < 0) start = 0; - if (end < 0) end = 0; - if (end >= totlen) end = totlen-1; - if (isbit && start <= end) { - /* Before converting bit offset to byte offset, create negative masks - * for the edges. */ - first_byte_neg_mask = ~((1<<(8-(start&7)))-1) & 0xFF; - last_byte_neg_mask = (1<<(7-(end&7)))-1; - start >>= 3; - end >>= 3; - } - } else if (c->argc == 3) { - /* Lookup, check for type. */ - o = lookupKeyRead(c->db, c->argv[1]); - if (checkType(c,o,OBJ_STRING)) return; - p = getObjectReadOnlyString(o,&strlen,llbuf); - - /* The whole string. */ - start = 0; - end = strlen-1; - } else { - /* Syntax error. */ - addReplyErrorObject(c,shared.syntaxerr); - return; - } - - /* If the key does not exist, from our point of view it is an infinite - * array of 0 bits. If the user is looking for the first clear bit return 0, - * If the user is looking for the first set bit, return -1. */ - if (o == NULL) { - addReplyLongLong(c, bit ? -1 : 0); - return; - } - - /* For empty ranges (start > end) we return -1 as an empty range does - * not contain a 0 nor a 1. */ - if (start > end) { - addReplyLongLong(c, -1); - } else { - long bytes = end-start+1; - long long pos; - unsigned char tmpchar; - if (first_byte_neg_mask) { - if (bit) tmpchar = p[start] & ~first_byte_neg_mask; - else tmpchar = p[start] | first_byte_neg_mask; - /* Special case, there is only one byte */ - if (last_byte_neg_mask && bytes == 1) { - if (bit) tmpchar = tmpchar & ~last_byte_neg_mask; - else tmpchar = tmpchar | last_byte_neg_mask; - } - pos = redisBitpos(&tmpchar,1,bit); - /* If there are no more bytes or we get valid pos, we can exit early */ - if (bytes == 1 || (pos != -1 && pos != 8)) goto result; - start++; - bytes--; - } - /* If the last byte has not bits in the range, we should exclude it */ - long curbytes = bytes - (last_byte_neg_mask ? 1 : 0); - if (curbytes > 0) { - pos = redisBitpos(p+start,curbytes,bit); - /* If there is no more bytes or we get valid pos, we can exit early */ - if (bytes == curbytes || (pos != -1 && pos != (long long)curbytes<<3)) goto result; - start += curbytes; - bytes -= curbytes; - } - if (bit) tmpchar = p[end] & ~last_byte_neg_mask; - else tmpchar = p[end] | last_byte_neg_mask; - pos = redisBitpos(&tmpchar,1,bit); - - result: - /* If we are looking for clear bits, and the user specified an exact - * range with start-end, we can't consider the right of the range as - * zero padded (as we do when no explicit end is given). - * - * So if redisBitpos() returns the first bit outside the range, - * we return -1 to the caller, to mean, in the specified range there - * is not a single "0" bit. */ - if (end_given && bit == 0 && pos == (long long)bytes<<3) { - addReplyLongLong(c,-1); - return; - } - if (pos != -1) pos += (long long)start<<3; /* Adjust for the bytes we skipped. */ - addReplyLongLong(c,pos); - } -} - -/* BITFIELD key subcommand-1 arg ... subcommand-2 arg ... subcommand-N ... - * - * Supported subcommands: - * - * GET - * SET - * INCRBY - * OVERFLOW [WRAP|SAT|FAIL] - */ - -#define BITFIELD_FLAG_NONE 0 -#define BITFIELD_FLAG_READONLY (1<<0) - -struct bitfieldOp { - uint64_t offset; /* Bitfield offset. */ - int64_t i64; /* Increment amount (INCRBY) or SET value */ - int opcode; /* Operation id. */ - int owtype; /* Overflow type to use. */ - int bits; /* Integer bitfield bits width. */ - int sign; /* True if signed, otherwise unsigned op. */ -}; - -/* This implements both the BITFIELD command and the BITFIELD_RO command - * when flags is set to BITFIELD_FLAG_READONLY: in this case only the - * GET subcommand is allowed, other subcommands will return an error. */ -void bitfieldGeneric(client *c, int flags) { - kvobj *o; - uint64_t bitoffset; - int j, numops = 0, changes = 0; - size_t strOldSize, strGrowSize = 0; - struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */ - int owtype = BFOVERFLOW_WRAP; /* Overflow type. */ - int readonly = 1; - uint64_t highest_write_offset = 0; - - for (j = 2; j < c->argc; j++) { - int remargs = c->argc-j-1; /* Remaining args other than current. */ - char *subcmd = c->argv[j]->ptr; /* Current command name. */ - int opcode; /* Current operation code. */ - long long i64 = 0; /* Signed SET value. */ - int sign = 0; /* Signed or unsigned type? */ - int bits = 0; /* Bitfield width in bits. */ - - if (!strcasecmp(subcmd,"get") && remargs >= 2) - opcode = BITFIELDOP_GET; - else if (!strcasecmp(subcmd,"set") && remargs >= 3) - opcode = BITFIELDOP_SET; - else if (!strcasecmp(subcmd,"incrby") && remargs >= 3) - opcode = BITFIELDOP_INCRBY; - else if (!strcasecmp(subcmd,"overflow") && remargs >= 1) { - char *owtypename = c->argv[j+1]->ptr; - j++; - if (!strcasecmp(owtypename,"wrap")) - owtype = BFOVERFLOW_WRAP; - else if (!strcasecmp(owtypename,"sat")) - owtype = BFOVERFLOW_SAT; - else if (!strcasecmp(owtypename,"fail")) - owtype = BFOVERFLOW_FAIL; - else { - addReplyError(c,"Invalid OVERFLOW type specified"); - zfree(ops); - return; - } - continue; - } else { - addReplyErrorObject(c,shared.syntaxerr); - zfree(ops); - return; - } - - /* Get the type and offset arguments, common to all the ops. */ - if (getBitfieldTypeFromArgument(c,c->argv[j+1],&sign,&bits) != C_OK) { - zfree(ops); - return; - } - - if (getBitOffsetFromArgument(c,c->argv[j+2],&bitoffset,1,bits) != C_OK){ - zfree(ops); - return; - } - - if (opcode != BITFIELDOP_GET) { - readonly = 0; - if (highest_write_offset < bitoffset + bits - 1) - highest_write_offset = bitoffset + bits - 1; - /* INCRBY and SET require another argument. */ - if (getLongLongFromObjectOrReply(c,c->argv[j+3],&i64,NULL) != C_OK){ - zfree(ops); - return; - } - } - - /* Populate the array of operations we'll process. */ - ops = zrealloc(ops,sizeof(*ops)*(numops+1)); - ops[numops].offset = bitoffset; - ops[numops].i64 = i64; - ops[numops].opcode = opcode; - ops[numops].owtype = owtype; - ops[numops].bits = bits; - ops[numops].sign = sign; - numops++; - - j += 3 - (opcode == BITFIELDOP_GET); - } - - if (readonly) { - /* Lookup for read is ok if key doesn't exit, but errors - * if it's not a string. */ - o = lookupKeyRead(c->db,c->argv[1]); - if (o != NULL && checkType(c,o,OBJ_STRING)) { - zfree(ops); - return; - } - } else { - if (flags & BITFIELD_FLAG_READONLY) { - zfree(ops); - addReplyError(c, "BITFIELD_RO only supports the GET subcommand"); - return; - } - - /* Lookup by making room up to the farthest bit reached by - * this operation. */ - if ((o = lookupStringForBitCommand(c, - highest_write_offset,&strOldSize,&strGrowSize)) == NULL) { - zfree(ops); - return; - } - } - - addReplyArrayLen(c,numops); - - /* Actually process the operations. */ - for (j = 0; j < numops; j++) { - struct bitfieldOp *thisop = ops+j; - - /* Execute the operation. */ - if (thisop->opcode == BITFIELDOP_SET || - thisop->opcode == BITFIELDOP_INCRBY) - { - /* SET and INCRBY: We handle both with the same code path - * for simplicity. SET return value is the previous value so - * we need fetch & store as well. */ - - /* We need two different but very similar code paths for signed - * and unsigned operations, since the set of functions to get/set - * the integers and the used variables types are different. */ - if (thisop->sign) { - int64_t oldval, newval, wrapped, retval; - int overflow; - - oldval = getSignedBitfield(o->ptr,thisop->offset, - thisop->bits); - - if (thisop->opcode == BITFIELDOP_INCRBY) { - overflow = checkSignedBitfieldOverflow(oldval, - thisop->i64,thisop->bits,thisop->owtype,&wrapped); - newval = overflow ? wrapped : oldval + thisop->i64; - retval = newval; - } else { - newval = thisop->i64; - overflow = checkSignedBitfieldOverflow(newval, - 0,thisop->bits,thisop->owtype,&wrapped); - if (overflow) newval = wrapped; - retval = oldval; - } - - /* On overflow of type is "FAIL", don't write and return - * NULL to signal the condition. */ - if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) { - addReplyLongLong(c,retval); - setSignedBitfield(o->ptr,thisop->offset, - thisop->bits,newval); - - if (strGrowSize || (oldval != newval)) - changes++; - } else { - addReplyNull(c); - } - } else { - /* Initialization of 'wrapped' is required to avoid - * false-positive warning "-Wmaybe-uninitialized" */ - uint64_t oldval, newval, retval, wrapped = 0; - int overflow; - - oldval = getUnsignedBitfield(o->ptr,thisop->offset, - thisop->bits); - - if (thisop->opcode == BITFIELDOP_INCRBY) { - newval = oldval + thisop->i64; - overflow = checkUnsignedBitfieldOverflow(oldval, - thisop->i64,thisop->bits,thisop->owtype,&wrapped); - if (overflow) newval = wrapped; - retval = newval; - } else { - newval = thisop->i64; - overflow = checkUnsignedBitfieldOverflow(newval, - 0,thisop->bits,thisop->owtype,&wrapped); - if (overflow) newval = wrapped; - retval = oldval; - } - /* On overflow of type is "FAIL", don't write and return - * NULL to signal the condition. */ - if (!(overflow && thisop->owtype == BFOVERFLOW_FAIL)) { - addReplyLongLong(c,retval); - setUnsignedBitfield(o->ptr,thisop->offset, - thisop->bits,newval); - - if (strGrowSize || (oldval != newval)) - changes++; - } else { - addReplyNull(c); - } - } - } else { - /* GET */ - unsigned char buf[9]; - long strlen = 0; - unsigned char *src = NULL; - char llbuf[LONG_STR_SIZE]; - - if (o != NULL) - src = getObjectReadOnlyString(o,&strlen,llbuf); - - /* For GET we use a trick: before executing the operation - * copy up to 9 bytes to a local buffer, so that we can easily - * execute up to 64 bit operations that are at actual string - * object boundaries. */ - memset(buf,0,9); - int i; - uint64_t byte = thisop->offset >> 3; - for (i = 0; i < 9; i++) { - if (src == NULL || i+byte >= (uint64_t)strlen) break; - buf[i] = src[i+byte]; - } - - /* Now operate on the copied buffer which is guaranteed - * to be zero-padded. */ - if (thisop->sign) { - int64_t val = getSignedBitfield(buf,thisop->offset-(byte*8), - thisop->bits); - addReplyLongLong(c,val); - } else { - uint64_t val = getUnsignedBitfield(buf,thisop->offset-(byte*8), - thisop->bits); - addReplyLongLong(c,val); - } - } - } - - if (changes) { - - /* If this is not a new key (old size not 0) and size changed, then - * update the keysizes histogram. Otherwise, the histogram already - * updated in lookupStringForBitCommand() by calling dbAdd(). */ - if ((strOldSize > 0) && (strGrowSize != 0)) - updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_STRING, - strOldSize, strOldSize + strGrowSize); - - keyModified(c,c->db,c->argv[1],o,1); - notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id); - server.dirty += changes; - } - zfree(ops); -} - -void bitfieldCommand(client *c) { - bitfieldGeneric(c, BITFIELD_FLAG_NONE); -} - -void bitfieldroCommand(client *c) { - bitfieldGeneric(c, BITFIELD_FLAG_READONLY); -} - -#ifdef REDIS_TEST -/* Test function to verify popcount implementations */ -int bitopsTest(int argc, char **argv, int flags) { - UNUSED(argc); - UNUSED(argv); - UNUSED(flags); - - /* Test data with known popcount values */ - unsigned char test_data[] = {0xFF, 0x00, 0xAA, 0x55, 0xF0, 0x0F, 0x33, 0xCC}; - int expected_bits = 8 + 0 + 4 + 4 + 4 + 4 + 4 + 4; /* = 32 bits */ - - long long result_regular = redisPopcount(test_data, sizeof(test_data)); - - printf("Regular popcount: %lld (expected: %d)\n", result_regular, expected_bits); - - if (result_regular != expected_bits) { - printf("FAIL: Regular popcount mismatch\n"); - return 1; - } - -#ifdef HAVE_AVX2 - if (BITOP_USE_AVX2) { - long long result_avx2 = redisPopCountAvx2(test_data, sizeof(test_data)); - printf("AVX2 popcount: %lld (expected: %d)\n", result_avx2, expected_bits); - - if (result_avx2 != expected_bits) { - printf("FAIL: AVX2 popcount mismatch\n"); - return 1; - } - } else { - printf("AVX2 not supported on this CPU\n"); - } -#else - printf("AVX2 not compiled in\n"); -#endif - -#ifdef HAVE_AVX512 - if (BITOP_USE_AVX512) { - long long result_avx512 = redisPopCountAvx512(test_data, sizeof(test_data)); - printf("AVX512 popcount: %lld (expected: %d)\n", result_avx512, expected_bits); - - if (result_avx512 != expected_bits) { - printf("FAIL: AVX512 popcount mismatch\n"); - return 1; - } - } else { - printf("AVX512 not supported on this CPU\n"); - } -#else - printf("AVX512 not compiled in\n"); -#endif - -#ifdef HAVE_AARCH64_NEON - { - long long result_aarch64 = redisPopCountAarch64(test_data, sizeof(test_data)); - printf("AArch64 NEON popcount: %lld (expected: %d)\n", result_aarch64, expected_bits); - - if (result_aarch64 != expected_bits) { - printf("FAIL: AArch64 NEON popcount mismatch\n"); - return 1; - } - } -#else - printf("AArch64 NEON not available\n"); -#endif - printf("All popcount tests passed!\n"); - return 0; -} -#endif diff --git a/examples/redis-unstable/src/blocked.c b/examples/redis-unstable/src/blocked.c deleted file mode 100644 index 4f518c9..0000000 --- a/examples/redis-unstable/src/blocked.c +++ /dev/null @@ -1,787 +0,0 @@ -/* blocked.c - generic support for blocking operations like BLPOP & WAIT. - * - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Copyright (c) 2024-present, Valkey contributors. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - * - * Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information. - * - * --------------------------------------------------------------------------- - * - * API: - * - * blockClient() set the CLIENT_BLOCKED flag in the client, and set the - * specified block type 'btype' filed to one of BLOCKED_* macros. - * - * unblockClient() unblocks the client doing the following: - * 1) It calls the btype-specific function to cleanup the state. - * 2) It unblocks the client by unsetting the CLIENT_BLOCKED flag. - * 3) It puts the client into a list of just unblocked clients that are - * processed ASAP in the beforeSleep() event loop callback, so that - * if there is some query buffer to process, we do it. This is also - * required because otherwise there is no 'readable' event fired, we - * already read the pending commands. We also set the CLIENT_UNBLOCKED - * flag to remember the client is in the unblocked_clients list. - * - * processUnblockedClients() is called inside the beforeSleep() function - * to process the query buffer from unblocked clients and remove the clients - * from the blocked_clients queue. - * - * replyToBlockedClientTimedOut() is called by the cron function when - * a client blocked reaches the specified timeout (if the timeout is set - * to 0, no timeout is processed). - * It usually just needs to send a reply to the client. - * - * When implementing a new type of blocking operation, the implementation - * should modify unblockClient() and replyToBlockedClientTimedOut() in order - * to handle the btype-specific behavior of this two functions. - * If the blocking operation waits for certain keys to change state, the - * clusterRedirectBlockedClientIfNeeded() function should also be updated. - */ - -#include "server.h" -#include "slowlog.h" -#include "latency.h" -#include "monotonic.h" -#include "cluster_slot_stats.h" - -/* forward declarations */ -static void unblockClientWaitingData(client *c); -static void handleClientsBlockedOnKey(readyList *rl); -static void unblockClientOnKey(client *c, robj *key); -static void moduleUnblockClientOnKey(client *c, robj *key); -static void releaseBlockedEntry(client *c, dictEntry *de, int remove_key); - -void initClientBlockingState(client *c) { - c->bstate.btype = BLOCKED_NONE; - c->bstate.timeout = 0; - c->bstate.keys = dictCreate(&objectKeyHeapPointerValueDictType); - c->bstate.numreplicas = 0; - c->bstate.reploffset = 0; - c->bstate.unblock_on_nokey = 0; - c->bstate.async_rm_call_handle = NULL; -} - -/* Block a client for the specific operation type. Once the CLIENT_BLOCKED - * flag is set client query buffer is not longer processed, but accumulated, - * and will be processed when the client is unblocked. */ -void blockClient(client *c, int btype) { - /* Master client should never be blocked unless pause or module */ - serverAssert(!(c->flags & CLIENT_MASTER && - btype != BLOCKED_MODULE && - btype != BLOCKED_LAZYFREE && - btype != BLOCKED_POSTPONE && - btype != BLOCKED_POSTPONE_TRIM)); - - c->flags |= CLIENT_BLOCKED; - c->bstate.btype = btype; - if (!(c->flags & CLIENT_MODULE)) server.blocked_clients++; /* We count blocked client stats on regular clients and not on module clients */ - server.blocked_clients_by_type[btype]++; - addClientToTimeoutTable(c); -} - -/* Usually when a client is unblocked due to being blocked while processing some command - * he will attempt to reprocess the command which will update the statistics. - * However in case the client was timed out or in case of module blocked client is being unblocked - * the command will not be reprocessed and we need to make stats update. - * This function will make updates to the commandstats, slowlog and monitors.*/ -void updateStatsOnUnblock(client *c, long blocked_us, long reply_us, int had_errors){ - const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us; - clusterSlotStatsAddCpuDuration(c, total_cmd_duration); - c->lastcmd->microseconds += total_cmd_duration; - c->lastcmd->calls++; - c->commands_processed++; - server.stat_numcommands++; - if (had_errors) - c->lastcmd->failed_calls++; - if (server.latency_tracking_enabled) - updateCommandLatencyHistogram(&(c->lastcmd->latency_histogram), total_cmd_duration*1000); - /* Log the command into the Slow log if needed. */ - slowlogPushCurrentCommand(c, c->lastcmd, total_cmd_duration); - c->duration = 0; - /* Log the reply duration event. */ - latencyAddSampleIfNeeded("command-unblocking",reply_us/1000); -} - -/* This function is called in the beforeSleep() function of the event loop - * in order to process the pending input buffer of clients that were - * unblocked after a blocking operation. */ -void processUnblockedClients(void) { - listNode *ln; - client *c; - - while (listLength(server.unblocked_clients)) { - ln = listFirst(server.unblocked_clients); - serverAssert(ln != NULL); - c = ln->value; - listDelNode(server.unblocked_clients,ln); - c->flags &= ~CLIENT_UNBLOCKED; - - /* Reset the client for a new query, unless the client has pending command to process. */ - if (!(c->flags & CLIENT_PENDING_COMMAND)) { - freeClientOriginalArgv(c); - /* Clients that are not blocked on keys are not reprocessed so we must - * call reqresAppendResponse here (for clients blocked on key, - * unblockClientOnKey is called, which eventually calls processCommand, - * which calls reqresAppendResponse) */ - prepareForNextCommand(c, 0); - } - - if (c->flags & CLIENT_MODULE) { - if (!(c->flags & CLIENT_BLOCKED)) { - moduleCallCommandUnblockedHandler(c); - } - continue; - } - - /* Process remaining data in the input buffer, unless the client - * is blocked again. Actually processInputBuffer() checks that the - * client is not blocked before to proceed, but things may change and - * the code is conceptually more correct this way. */ - if (!(c->flags & CLIENT_BLOCKED)) { - /* If we have a queued command, execute it now. */ - if (processPendingCommandAndInputBuffer(c) == C_ERR) { - c = NULL; - } - } - beforeNextClient(c); - } -} - -/* This function will schedule the client for reprocessing at a safe time. - * - * This is useful when a client was blocked for some reason (blocking operation, - * CLIENT PAUSE, or whatever), because it may end with some accumulated query - * buffer that needs to be processed ASAP: - * - * 1. When a client is blocked, its readable handler is still active. - * 2. However in this case it only gets data into the query buffer, but the - * query is not parsed or executed once there is enough to proceed as - * usually (because the client is blocked... so we can't execute commands). - * 3. When the client is unblocked, without this function, the client would - * have to write some query in order for the readable handler to finally - * call processQueryBuffer*() on it. - * 4. With this function instead we can put the client in a queue that will - * process it for queries ready to be executed at a safe time. - */ -void queueClientForReprocessing(client *c) { - /* The client may already be into the unblocked list because of a previous - * blocking operation, don't add back it into the list multiple times. */ - if (!(c->flags & CLIENT_UNBLOCKED)) { - c->flags |= CLIENT_UNBLOCKED; - listAddNodeTail(server.unblocked_clients,c); - } -} - -/* Unblock a client calling the right function depending on the kind - * of operation the client is blocking for. */ -void unblockClient(client *c, int queue_for_reprocessing) { - if (c->bstate.btype == BLOCKED_LIST || - c->bstate.btype == BLOCKED_ZSET || - c->bstate.btype == BLOCKED_STREAM) { - unblockClientWaitingData(c); - } else if (c->bstate.btype == BLOCKED_WAIT || c->bstate.btype == BLOCKED_WAITAOF) { - unblockClientWaitingReplicas(c); - } else if (c->bstate.btype == BLOCKED_MODULE) { - if (moduleClientIsBlockedOnKeys(c)) unblockClientWaitingData(c); - unblockClientFromModule(c); - } else if (c->bstate.btype == BLOCKED_POSTPONE || c->bstate.btype == BLOCKED_POSTPONE_TRIM) { - listDelNode(server.postponed_clients,c->postponed_list_node); - c->postponed_list_node = NULL; - } else if (c->bstate.btype == BLOCKED_SHUTDOWN) { - /* No special cleanup. */ - } else if (c->bstate.btype == BLOCKED_LAZYFREE) { - /* No special cleanup. */ - } else { - serverPanic("Unknown btype in unblockClient()."); - } - - - /* Clear the flags, and put the client in the unblocked list so that - * we'll process new commands in its query buffer ASAP. */ - if (!(c->flags & CLIENT_MODULE)) server.blocked_clients--; /* We count blocked client stats on regular clients and not on module clients */ - server.blocked_clients_by_type[c->bstate.btype]--; - c->flags &= ~CLIENT_BLOCKED; - c->bstate.btype = BLOCKED_NONE; - c->bstate.unblock_on_nokey = 0; - removeClientFromTimeoutTable(c); - if (queue_for_reprocessing) queueClientForReprocessing(c); -} - -/* Check if the specified client can be safely timed out using - * unblockClientOnTimeout(). */ -int blockedClientMayTimeout(client *c) { - if (c->bstate.btype == BLOCKED_MODULE) { - return moduleBlockedClientMayTimeout(c); - } - - if (c->bstate.btype == BLOCKED_LIST || - c->bstate.btype == BLOCKED_ZSET || - c->bstate.btype == BLOCKED_STREAM || - c->bstate.btype == BLOCKED_WAIT || - c->bstate.btype == BLOCKED_WAITAOF) - { - return 1; - } - return 0; -} - -/* This function gets called when a blocked client timed out in order to - * send it a reply of some kind. After this function is called, - * unblockClient() will be called with the same client as argument. */ -void replyToBlockedClientTimedOut(client *c) { - if (c->bstate.btype == BLOCKED_LAZYFREE) { - addReply(c, shared.ok); /* No reason lazy-free to fail */ - } else if (c->bstate.btype == BLOCKED_LIST || - c->bstate.btype == BLOCKED_ZSET || - c->bstate.btype == BLOCKED_STREAM) { - addReplyNullArray(c); - updateStatsOnUnblock(c, 0, 0, 0); - } else if (c->bstate.btype == BLOCKED_WAIT) { - addReplyLongLong(c,replicationCountAcksByOffset(c->bstate.reploffset)); - } else if (c->bstate.btype == BLOCKED_WAITAOF) { - addReplyArrayLen(c,2); - addReplyLongLong(c,server.fsynced_reploff >= c->bstate.reploffset); - addReplyLongLong(c,replicationCountAOFAcksByOffset(c->bstate.reploffset)); - } else if (c->bstate.btype == BLOCKED_MODULE) { - moduleBlockedClientTimedOut(c); - } else { - serverPanic("Unknown btype in replyToBlockedClientTimedOut()."); - } -} - -/* If one or more clients are blocked on the SHUTDOWN command, this function - * sends them an error reply and unblocks them. */ -void replyToClientsBlockedOnShutdown(void) { - if (server.blocked_clients_by_type[BLOCKED_SHUTDOWN] == 0) return; - listNode *ln; - listIter li; - listRewind(server.clients, &li); - while((ln = listNext(&li))) { - client *c = listNodeValue(ln); - if (c->flags & CLIENT_BLOCKED && c->bstate.btype == BLOCKED_SHUTDOWN) { - c->duration = 0; - addReplyError(c, "Errors trying to SHUTDOWN. Check logs."); - unblockClient(c, 1); - } - } -} - -/* Mass-unblock clients because something changed in the instance that makes - * blocking no longer safe. For example clients blocked in list operations - * in an instance which turns from master to slave is unsafe, so this function - * is called when a master turns into a slave. - * - * The semantics is to send an -UNBLOCKED error to the client, disconnecting - * it at the same time. */ -void disconnectAllBlockedClients(void) { - listNode *ln; - listIter li; - - listRewind(server.clients,&li); - while((ln = listNext(&li))) { - client *c = listNodeValue(ln); - - if (c->flags & CLIENT_BLOCKED) { - /* POSTPONEd clients are an exception, when they'll be unblocked, the - * command processing will start from scratch, and the command will - * be either executed or rejected. (unlike LIST blocked clients for - * which the command is already in progress in a way. */ - if (c->bstate.btype == BLOCKED_POSTPONE || c->bstate.btype == BLOCKED_POSTPONE_TRIM) - continue; - - if (c->bstate.btype == BLOCKED_LAZYFREE) { - addReply(c, shared.ok); /* No reason lazy-free to fail */ - updateStatsOnUnblock(c, 0, 0, 0); - c->flags &= ~CLIENT_PENDING_COMMAND; - unblockClient(c, 1); - } else { - - unblockClientOnError(c, - "-UNBLOCKED force unblock from blocking operation, " - "instance state changed (master -> replica?)"); - } - c->flags |= CLIENT_CLOSE_AFTER_REPLY; - } - } -} - -/* This function should be called by Redis every time a single command, - * a MULTI/EXEC block, or a Lua script, terminated its execution after - * being called by a client. It handles serving clients blocked in all scenarios - * where a specific key access requires to block until that key is available. - * - * All the keys with at least one client blocked that are signaled as ready - * are accumulated into the server.ready_keys list. This function will run - * the list and will serve clients accordingly. - * Note that the function will iterate again and again (for example as a result of serving BLMOVE - * we can have new blocking clients to serve because of the PUSH side of BLMOVE.) - * - * This function is normally "fair", that is, it will serve clients - * using a FIFO behavior. However this fairness is violated in certain - * edge cases, that is, when we have clients blocked at the same time - * in a sorted set and in a list, for the same key (a very odd thing to - * do client side, indeed!). Because mismatching clients (blocking for - * a different type compared to the current key type) are moved in the - * other side of the linked list. However as long as the key starts to - * be used only for a single type, like virtually any Redis application will - * do, the function is already fair. */ -void handleClientsBlockedOnKeys(void) { - - /* In case we are already in the process of unblocking clients we should - * not make a recursive call, in order to prevent breaking fairness. */ - static int in_handling_blocked_clients = 0; - if (in_handling_blocked_clients) - return; - in_handling_blocked_clients = 1; - - /* This function is called only when also_propagate is in its basic state - * (i.e. not from call(), module context, etc.) */ - serverAssert(server.also_propagate.numops == 0); - - /* If a command being unblocked causes another command to get unblocked, - * like a BLMOVE would do, then the new unblocked command will get processed - * right away rather than wait for later. */ - while(listLength(server.ready_keys) != 0) { - list *l; - - /* Point server.ready_keys to a fresh list and save the current one - * locally. This way as we run the old list we are free to call - * signalKeyAsReady() that may push new elements in server.ready_keys - * when handling clients blocked into BLMOVE. */ - l = server.ready_keys; - server.ready_keys = listCreate(); - - while(listLength(l) != 0) { - listNode *ln = listFirst(l); - readyList *rl = ln->value; - - /* First of all remove this key from db->ready_keys so that - * we can safely call signalKeyAsReady() against this key. */ - dictDelete(rl->db->ready_keys,rl->key); - - handleClientsBlockedOnKey(rl); - - /* Free this item. */ - decrRefCount(rl->key); - zfree(rl); - listDelNode(l,ln); - } - listRelease(l); /* We have the new list on place at this point. */ - } - in_handling_blocked_clients = 0; -} - -/* Set a client in blocking mode for the specified key, with the specified timeout. - * The 'type' argument is BLOCKED_LIST,BLOCKED_ZSET or BLOCKED_STREAM depending on the kind of operation we are - * waiting for an empty key in order to awake the client. The client is blocked - * for all the 'numkeys' keys as in the 'keys' argument. - * The client will unblocked as soon as one of the keys in 'keys' value was updated. - * the parameter unblock_on_nokey can be used to force client to be unblocked even in the case the key - * is updated to become unavailable, either by type change (override), deletion or swapdb */ -void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, int unblock_on_nokey) { - dictEntry *db_blocked_entry, *db_blocked_existing_entry, *client_blocked_entry; - list *l; - int j; - - if (!(c->flags & CLIENT_REEXECUTING_COMMAND)) { - /* If the client is re-processing the command, we do not set the timeout - * because we need to retain the client's original timeout. */ - c->bstate.timeout = timeout; - } - - for (j = 0; j < numkeys; j++) { - /* If the key already exists in the dictionary ignore it. */ - if (!(client_blocked_entry = dictAddRaw(c->bstate.keys,keys[j],NULL))) { - continue; - } - incrRefCount(keys[j]); - - /* And in the other "side", to map keys -> clients */ - db_blocked_entry = dictAddRaw(c->db->blocking_keys,keys[j], &db_blocked_existing_entry); - - /* In case key[j] did not have blocking clients yet, we need to create a new list */ - if (db_blocked_entry != NULL) { - l = listCreate(); - dictSetVal(c->db->blocking_keys, db_blocked_entry, l); - incrRefCount(keys[j]); - } else { - l = dictGetVal(db_blocked_existing_entry); - } - listAddNodeTail(l,c); - dictSetVal(c->bstate.keys,client_blocked_entry,listLast(l)); - - /* We need to add the key to blocking_keys_unblock_on_nokey, if the client - * wants to be awakened if key is deleted (like XREADGROUP) */ - if (unblock_on_nokey) { - db_blocked_entry = dictAddRaw(c->db->blocking_keys_unblock_on_nokey, keys[j], &db_blocked_existing_entry); - if (db_blocked_entry) { - incrRefCount(keys[j]); - dictSetUnsignedIntegerVal(db_blocked_entry, 1); - } else { - dictIncrUnsignedIntegerVal(db_blocked_existing_entry, 1); - } - } - } - c->bstate.unblock_on_nokey = unblock_on_nokey; - /* Currently we assume key blocking will require reprocessing the command. - * However in case of modules, they have a different way to handle the reprocessing - * which does not require setting the pending command flag */ - if (btype != BLOCKED_MODULE) - c->flags |= CLIENT_PENDING_COMMAND; - blockClient(c,btype); -} - -/* Helper function to unblock a client that's waiting in a blocking operation such as BLPOP. - * Internal function for unblockClient() */ -static void unblockClientWaitingData(client *c) { - dictEntry *de; - dictIterator di; - - if (dictSize(c->bstate.keys) == 0) - return; - - dictInitIterator(&di, c->bstate.keys); - /* The client may wait for multiple keys, so unblock it for every key. */ - while((de = dictNext(&di)) != NULL) { - releaseBlockedEntry(c, de, 0); - } - dictResetIterator(&di); - dictEmpty(c->bstate.keys, NULL); -} - -static blocking_type getBlockedTypeByType(int type) { - switch (type) { - case OBJ_LIST: return BLOCKED_LIST; - case OBJ_ZSET: return BLOCKED_ZSET; - case OBJ_MODULE: return BLOCKED_MODULE; - case OBJ_STREAM: return BLOCKED_STREAM; - default: return BLOCKED_NONE; - } -} - -/* If the specified key has clients blocked waiting for list pushes, this - * function will put the key reference into the server.ready_keys list. - * Note that db->ready_keys is a hash table that allows us to avoid putting - * the same key again and again in the list in case of multiple pushes - * made by a script or in the context of MULTI/EXEC. - * - * The list will be finally processed by handleClientsBlockedOnKeys() */ -static void signalKeyAsReadyLogic(redisDb *db, robj *key, int type, int deleted) { - readyList *rl; - - /* Quick returns. */ - int btype = getBlockedTypeByType(type); - if (btype == BLOCKED_NONE) { - /* The type can never block. */ - return; - } - if (!server.blocked_clients_by_type[btype] && - !server.blocked_clients_by_type[BLOCKED_MODULE]) { - /* No clients block on this type. Note: Blocked modules are represented - * by BLOCKED_MODULE, even if the intention is to wake up by normal - * types (list, zset, stream), so we need to check that there are no - * blocked modules before we do a quick return here. */ - return; - } - - if (deleted) { - /* Key deleted and no clients blocking for this key? No need to queue it. */ - if (dictFind(db->blocking_keys_unblock_on_nokey,key) == NULL) - return; - /* Note: if we made it here it means the key is also present in db->blocking_keys */ - } else { - /* No clients blocking for this key? No need to queue it. */ - if (dictFind(db->blocking_keys,key) == NULL) - return; - } - - dictEntry *de, *existing; - de = dictAddRaw(db->ready_keys, key, &existing); - if (de) { - /* We add the key in the db->ready_keys dictionary in order - * to avoid adding it multiple times into a list with a simple O(1) - * check. */ - incrRefCount(key); - } else { - /* Key was already signaled? No need to queue it again. */ - return; - } - - /* Ok, we need to queue this key into server.ready_keys. */ - rl = zmalloc(sizeof(*rl)); - rl->key = key; - rl->db = db; - incrRefCount(key); - listAddNodeTail(server.ready_keys,rl); -} - -/* Helper function to wrap the logic of removing a client blocked key entry - * In this case we would like to do the following: - * 1. unlink the client from the global DB locked client list - * 2. remove the entry from the global db blocking list in case the list is empty - * 3. in case the global list is empty, also remove the key from the global dict of keys - * which should trigger unblock on key deletion - * 4. remove key from the client blocking keys list - NOTE, since client can be blocked on lots of keys, - * but unblocked when only one of them is triggered, we would like to avoid deleting each key separately - * and instead clear the dictionary in one-shot. this is why the remove_key argument is provided - * to support this logic in unblockClientWaitingData - */ -static void releaseBlockedEntry(client *c, dictEntry *de, int remove_key) { - list *l; - listNode *pos; - void *key; - dictEntry *unblock_on_nokey_entry; - - key = dictGetKey(de); - pos = dictGetVal(de); - /* Remove this client from the list of clients waiting for this key. */ - l = dictFetchValue(c->db->blocking_keys, key); - serverAssertWithInfo(c,key,l != NULL); - listUnlinkNode(l,pos); - /* If the list is empty we need to remove it to avoid wasting memory - * We will also remove the key (if exists) from the blocking_keys_unblock_on_nokey dict. - * However, in case the list is not empty, we will have to still perform reference accounting - * on the blocking_keys_unblock_on_nokey and delete the entry in case of zero reference. - * Why? because it is possible that some more clients are blocked on the same key but without - * require to be triggered on key deletion, we do not want these to be later triggered by the - * signalDeletedKeyAsReady. */ - if (listLength(l) == 0) { - dictDelete(c->db->blocking_keys, key); - dictDelete(c->db->blocking_keys_unblock_on_nokey,key); - } else if (c->bstate.unblock_on_nokey) { - unblock_on_nokey_entry = dictFind(c->db->blocking_keys_unblock_on_nokey,key); - /* it is not possible to have a client blocked on nokey with no matching entry */ - serverAssertWithInfo(c,key,unblock_on_nokey_entry != NULL); - if (!dictIncrUnsignedIntegerVal(unblock_on_nokey_entry, -1)) { - /* in case the count is zero, we can delete the entry */ - dictDelete(c->db->blocking_keys_unblock_on_nokey,key); - } - } - if (remove_key) - dictDelete(c->bstate.keys, key); -} - -void signalKeyAsReady(redisDb *db, robj *key, int type) { - signalKeyAsReadyLogic(db, key, type, 0); -} - -void signalDeletedKeyAsReady(redisDb *db, robj *key, int type) { - signalKeyAsReadyLogic(db, key, type, 1); -} - -/* Helper function for handleClientsBlockedOnKeys(). This function is called - * whenever a key is ready. we iterate over all the clients blocked on this key - * and try to re-execute the command (in case the key is still available). */ -static void handleClientsBlockedOnKey(readyList *rl) { - - /* We serve clients in the same order they blocked for - * this key, from the first blocked to the last. */ - dictEntry *de = dictFind(rl->db->blocking_keys,rl->key); - - if (de) { - list *clients = dictGetVal(de); - listNode *ln; - listIter li; - listRewind(clients,&li); - - /* Avoid processing more than the initial count so that we're not stuck - * in an endless loop in case the reprocessing of the command blocks again. */ - long count = listLength(clients); - while ((ln = listNext(&li)) && count--) { - client *receiver = listNodeValue(ln); - kvobj *o = lookupKeyReadWithFlags(rl->db, rl->key, LOOKUP_NOEFFECTS); - /* 1. In case new key was added/touched we need to verify it satisfy the - * blocked type, since we might process the wrong key type. - * 2. We want to serve clients blocked on module keys - * regardless of the object type: we don't know what the - * module is trying to accomplish right now. - * 3. In case of XREADGROUP call we will want to unblock on any change in object type - * or in case the key was deleted, since the group is no longer valid. */ - if ((o != NULL && (receiver->bstate.btype == getBlockedTypeByType(o->type))) || - (o != NULL && (receiver->bstate.btype == BLOCKED_MODULE)) || - (receiver->bstate.unblock_on_nokey)) - { - if (receiver->bstate.btype != BLOCKED_MODULE) - unblockClientOnKey(receiver, rl->key); - else - moduleUnblockClientOnKey(receiver, rl->key); - } - } - } -} - -/* block a client due to wait command */ -void blockForReplication(client *c, mstime_t timeout, long long offset, long numreplicas) { - c->bstate.timeout = timeout; - c->bstate.reploffset = offset; - c->bstate.numreplicas = numreplicas; - listAddNodeHead(server.clients_waiting_acks,c); - blockClient(c,BLOCKED_WAIT); -} - -/* block a client due to waitaof command */ -void blockForAofFsync(client *c, mstime_t timeout, long long offset, int numlocal, long numreplicas) { - c->bstate.timeout = timeout; - c->bstate.reploffset = offset; - c->bstate.numreplicas = numreplicas; - c->bstate.numlocal = numlocal; - listAddNodeHead(server.clients_waiting_acks,c); - blockClient(c,BLOCKED_WAITAOF); -} - -/* Postpone client from executing a command. For example the server might be busy - * requesting to avoid processing clients commands which will be processed later - * when the it is ready to accept them. */ -void blockPostponeClientWithType(client *c, int btype) { - serverAssert(btype == BLOCKED_POSTPONE || btype == BLOCKED_POSTPONE_TRIM); - c->bstate.timeout = 0; - blockClient(c, btype); - listAddNodeTail(server.postponed_clients, c); - c->postponed_list_node = listLast(server.postponed_clients); - /* Mark this client to execute its command */ - c->flags |= CLIENT_PENDING_COMMAND; -} - -/* Postpone client from executing a command. */ -void blockPostponeClient(client *c) { - blockPostponeClientWithType(c, BLOCKED_POSTPONE); -} - -/* Block client due to shutdown command */ -void blockClientShutdown(client *c) { - blockClient(c, BLOCKED_SHUTDOWN); -} - -/* Unblock a client once a specific key became available for it. - * This function will remove the client from the list of clients blocked on this key - * and also remove the key from the dictionary of keys this client is blocked on. - * in case the client has a command pending it will process it immediately. */ -static void unblockClientOnKey(client *c, robj *key) { - dictEntry *de; - - de = dictFind(c->bstate.keys, key); - releaseBlockedEntry(c, de, 1); - - /* Only in case of blocking API calls, we might be blocked on several keys. - however we should force unblock the entire blocking keys */ - serverAssert(c->bstate.btype == BLOCKED_STREAM || - c->bstate.btype == BLOCKED_LIST || - c->bstate.btype == BLOCKED_ZSET); - - /* We need to unblock the client before calling processCommandAndResetClient - * because it checks the CLIENT_BLOCKED flag */ - unblockClient(c, 0); - /* In case this client was blocked on keys during command - * we need to re process the command again */ - if (c->flags & CLIENT_PENDING_COMMAND) { - c->flags &= ~CLIENT_PENDING_COMMAND; - c->flags |= CLIENT_REEXECUTING_COMMAND; - /* We want the command processing and the unblock handler (see RM_Call 'K' option) - * to run atomically, this is why we must enter the execution unit here before - * running the command, and exit the execution unit after calling the unblock handler (if exists). - * Notice that we also must set the current client so it will be available - * when we will try to send the client side caching notification (done on 'afterCommand'). */ - client *old_client = server.current_client; - server.current_client = c; - enterExecutionUnit(1, 0); - processCommandAndResetClient(c); - if (!(c->flags & CLIENT_BLOCKED)) { - if (c->flags & CLIENT_MODULE) { - moduleCallCommandUnblockedHandler(c); - } else { - queueClientForReprocessing(c); - } - } - exitExecutionUnit(); - afterCommand(c); - /* Clear the CLIENT_REEXECUTING_COMMAND flag after the proc is executed. */ - c->flags &= ~CLIENT_REEXECUTING_COMMAND; - server.current_client = old_client; - } -} - -/* Unblock a client blocked on the specific key from module context. - * This function will try to serve the module call, and in case it succeeds, - * it will add the client to the list of module unblocked clients which will - * be processed in moduleHandleBlockedClients. */ -static void moduleUnblockClientOnKey(client *c, robj *key) { - long long prev_error_replies = server.stat_total_error_replies; - client *old_client = server.current_client; - server.current_client = c; - monotime replyTimer; - elapsedStart(&replyTimer); - - if (moduleTryServeClientBlockedOnKey(c, key)) { - updateStatsOnUnblock(c, 0, elapsedUs(replyTimer), server.stat_total_error_replies != prev_error_replies); - moduleUnblockClient(c); - } - /* We need to call afterCommand even if the client was not unblocked - * in order to propagate any changes that could have been done inside - * moduleTryServeClientBlockedOnKey */ - afterCommand(c); - server.current_client = old_client; -} - -/* Unblock a client which is currently Blocked on and provided a timeout. - * The implementation will first reply to the blocked client with null response - * or, in case of module blocked client the timeout callback will be used. - * In this case since we might have a command pending - * we want to remove the pending flag to indicate we already responded to the - * command with timeout reply. */ -void unblockClientOnTimeout(client *c) { - /* The client has been unlocked (in the moduleUnblocked list), return ASAP. */ - if (c->bstate.btype == BLOCKED_MODULE && isModuleClientUnblocked(c)) return; - - replyToBlockedClientTimedOut(c); - if (c->flags & CLIENT_PENDING_COMMAND) - c->flags &= ~CLIENT_PENDING_COMMAND; - unblockClient(c, 1); -} - -/* Unblock a client which is currently Blocked with error. - * If err_str is provided it will be used to reply to the blocked client */ -void unblockClientOnError(client *c, const char *err_str) { - if (err_str) - addReplyError(c, err_str); - updateStatsOnUnblock(c, 0, 0, 1); - if (c->flags & CLIENT_PENDING_COMMAND) - c->flags &= ~CLIENT_PENDING_COMMAND; - unblockClient(c, 1); -} - -void blockedBeforeSleep(void) { - /* Handle precise timeouts of blocked clients. */ - handleBlockedClientsTimeout(); - - /* Handle for expired pending entries. */ - handleClaimableStreamEntries(); - - /* Unblock all the clients blocked for synchronous replication - * in WAIT or WAITAOF. */ - if (listLength(server.clients_waiting_acks)) - processClientsWaitingReplicas(); - - /* Try to process blocked clients every once in while. - * - * Example: A module calls RM_SignalKeyAsReady from within a timer callback - * (So we don't visit processCommand() at all). - * - * This may unblock clients, so must be done before processUnblockedClients */ - handleClientsBlockedOnKeys(); - - /* Check if there are clients unblocked by modules that implement - * blocking commands. */ - if (moduleCount()) - moduleHandleBlockedClients(); - - /* Try to process pending commands for clients that were just unblocked. */ - if (listLength(server.unblocked_clients)) - processUnblockedClients(); -} diff --git a/examples/redis-unstable/src/call_reply.c b/examples/redis-unstable/src/call_reply.c deleted file mode 100644 index 2a4f710..0000000 --- a/examples/redis-unstable/src/call_reply.c +++ /dev/null @@ -1,540 +0,0 @@ -/* - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "server.h" -#include "call_reply.h" - -#define REPLY_FLAG_ROOT (1<<0) -#define REPLY_FLAG_PARSED (1<<1) -#define REPLY_FLAG_RESP3 (1<<2) - -/* -------------------------------------------------------- - * An opaque struct used to parse a RESP protocol reply and - * represent it. Used when parsing replies such as in RM_Call - * or Lua scripts. - * -------------------------------------------------------- */ -struct CallReply { - void *private_data; - sds original_proto; /* Available only for root reply. */ - const char *proto; - size_t proto_len; - int type; /* REPLY_... */ - int flags; /* REPLY_FLAG... */ - size_t len; /* Length of a string, or the number elements in an array. */ - union { - const char *str; /* String pointer for string and error replies. This - * does not need to be freed, always points inside - * a reply->proto buffer of the reply object or, in - * case of array elements, of parent reply objects. */ - struct { - const char *str; - const char *format; - } verbatim_str; /* Reply value for verbatim string */ - long long ll; /* Reply value for integer reply. */ - double d; /* Reply value for double reply. */ - struct CallReply *array; /* Array of sub-reply elements. used for set, array, map, and attribute */ - } val; - list *deferred_error_list; /* list of errors in sds form or NULL */ - struct CallReply *attribute; /* attribute reply, NULL if not exists */ -}; - -static void callReplySetSharedData(CallReply *rep, int type, const char *proto, size_t proto_len, int extra_flags) { - rep->type = type; - rep->proto = proto; - rep->proto_len = proto_len; - rep->flags |= extra_flags; -} - -static void callReplyNull(void *ctx, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_NULL, proto, proto_len, REPLY_FLAG_RESP3); -} - -static void callReplyNullBulkString(void *ctx, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_NULL, proto, proto_len, 0); -} - -static void callReplyNullArray(void *ctx, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_NULL, proto, proto_len, 0); -} - -static void callReplyBulkString(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_STRING, proto, proto_len, 0); - rep->len = len; - rep->val.str = str; -} - -static void callReplyError(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_ERROR, proto, proto_len, 0); - rep->len = len; - rep->val.str = str; -} - -static void callReplySimpleStr(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_STRING, proto, proto_len, 0); - rep->len = len; - rep->val.str = str; -} - -static void callReplyLong(void *ctx, long long val, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_INTEGER, proto, proto_len, 0); - rep->val.ll = val; -} - -static void callReplyDouble(void *ctx, double val, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_DOUBLE, proto, proto_len, REPLY_FLAG_RESP3); - rep->val.d = val; -} - -static void callReplyVerbatimString(void *ctx, const char *format, const char *str, size_t len, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_VERBATIM_STRING, proto, proto_len, REPLY_FLAG_RESP3); - rep->len = len; - rep->val.verbatim_str.str = str; - rep->val.verbatim_str.format = format; -} - -static void callReplyBigNumber(void *ctx, const char *str, size_t len, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_BIG_NUMBER, proto, proto_len, REPLY_FLAG_RESP3); - rep->len = len; - rep->val.str = str; -} - -static void callReplyBool(void *ctx, int val, const char *proto, size_t proto_len) { - CallReply *rep = ctx; - callReplySetSharedData(rep, REDISMODULE_REPLY_BOOL, proto, proto_len, REPLY_FLAG_RESP3); - rep->val.ll = val; -} - -static void callReplyParseCollection(ReplyParser *parser, CallReply *rep, size_t len, const char *proto, size_t elements_per_entry) { - rep->len = len; - rep->val.array = zcalloc(elements_per_entry * len * sizeof(CallReply)); - for (size_t i = 0; i < len * elements_per_entry; i += elements_per_entry) { - for (size_t j = 0 ; j < elements_per_entry ; ++j) { - rep->val.array[i + j].private_data = rep->private_data; - parseReply(parser, rep->val.array + i + j); - rep->val.array[i + j].flags |= REPLY_FLAG_PARSED; - if (rep->val.array[i + j].flags & REPLY_FLAG_RESP3) { - /* If one of the sub-replies is RESP3, then the current reply is also RESP3. */ - rep->flags |= REPLY_FLAG_RESP3; - } - } - } - rep->proto = proto; - rep->proto_len = parser->curr_location - proto; -} - -static void callReplyAttribute(ReplyParser *parser, void *ctx, size_t len, const char *proto) { - CallReply *rep = ctx; - rep->attribute = zcalloc(sizeof(CallReply)); - - /* Continue parsing the attribute reply */ - rep->attribute->len = len; - rep->attribute->type = REDISMODULE_REPLY_ATTRIBUTE; - callReplyParseCollection(parser, rep->attribute, len, proto, 2); - rep->attribute->flags |= REPLY_FLAG_PARSED | REPLY_FLAG_RESP3; - rep->attribute->private_data = rep->private_data; - - /* Continue parsing the reply */ - parseReply(parser, rep); - - /* In this case we need to fix the proto address and len, it should start from the attribute */ - rep->proto = proto; - rep->proto_len = parser->curr_location - proto; - rep->flags |= REPLY_FLAG_RESP3; -} - -static void callReplyArray(ReplyParser *parser, void *ctx, size_t len, const char *proto) { - CallReply *rep = ctx; - rep->type = REDISMODULE_REPLY_ARRAY; - callReplyParseCollection(parser, rep, len, proto, 1); -} - -static void callReplySet(ReplyParser *parser, void *ctx, size_t len, const char *proto) { - CallReply *rep = ctx; - rep->type = REDISMODULE_REPLY_SET; - callReplyParseCollection(parser, rep, len, proto, 1); - rep->flags |= REPLY_FLAG_RESP3; -} - -static void callReplyMap(ReplyParser *parser, void *ctx, size_t len, const char *proto) { - CallReply *rep = ctx; - rep->type = REDISMODULE_REPLY_MAP; - callReplyParseCollection(parser, rep, len, proto, 2); - rep->flags |= REPLY_FLAG_RESP3; -} - -static void callReplyParseError(void *ctx) { - CallReply *rep = ctx; - rep->type = REDISMODULE_REPLY_UNKNOWN; -} - -/* Recursively free the current call reply and its sub-replies. */ -static void freeCallReplyInternal(CallReply *rep) { - if (rep->type == REDISMODULE_REPLY_ARRAY || rep->type == REDISMODULE_REPLY_SET) { - for (size_t i = 0 ; i < rep->len ; ++i) { - freeCallReplyInternal(rep->val.array + i); - } - zfree(rep->val.array); - } - - if (rep->type == REDISMODULE_REPLY_MAP || rep->type == REDISMODULE_REPLY_ATTRIBUTE) { - for (size_t i = 0 ; i < rep->len ; ++i) { - freeCallReplyInternal(rep->val.array + i * 2); - freeCallReplyInternal(rep->val.array + i * 2 + 1); - } - zfree(rep->val.array); - } - - if (rep->attribute) { - freeCallReplyInternal(rep->attribute); - zfree(rep->attribute); - } -} - -/* Free the given call reply and its children (in case of nested reply) recursively. - * If private data was set when the CallReply was created it will not be freed, as it's - * the caller's responsibility to free it before calling freeCallReply(). */ -void freeCallReply(CallReply *rep) { - if (!(rep->flags & REPLY_FLAG_ROOT)) { - return; - } - if (rep->flags & REPLY_FLAG_PARSED) { - if (rep->type == REDISMODULE_REPLY_PROMISE) { - zfree(rep); - return; - } - freeCallReplyInternal(rep); - } - sdsfree(rep->original_proto); - if (rep->deferred_error_list) - listRelease(rep->deferred_error_list); - zfree(rep); -} - -CallReply *callReplyCreatePromise(void *private_data) { - CallReply *res = zmalloc(sizeof(*res)); - res->type = REDISMODULE_REPLY_PROMISE; - /* Mark the reply as parsed so there will be not attempt to parse - * it when calling reply API such as freeCallReply. - * Also mark the reply as root so freeCallReply will not ignore it. */ - res->flags |= REPLY_FLAG_PARSED | REPLY_FLAG_ROOT; - res->private_data = private_data; - return res; -} - -static const ReplyParserCallbacks DefaultParserCallbacks = { - .null_callback = callReplyNull, - .bulk_string_callback = callReplyBulkString, - .null_bulk_string_callback = callReplyNullBulkString, - .null_array_callback = callReplyNullArray, - .error_callback = callReplyError, - .simple_str_callback = callReplySimpleStr, - .long_callback = callReplyLong, - .array_callback = callReplyArray, - .set_callback = callReplySet, - .map_callback = callReplyMap, - .double_callback = callReplyDouble, - .bool_callback = callReplyBool, - .big_number_callback = callReplyBigNumber, - .verbatim_string_callback = callReplyVerbatimString, - .attribute_callback = callReplyAttribute, - .error = callReplyParseError, -}; - -/* Parse the buffer located in rep->original_proto and update the CallReply - * structure to represent its contents. */ -static void callReplyParse(CallReply *rep) { - if (rep->flags & REPLY_FLAG_PARSED) { - return; - } - - ReplyParser parser = {.curr_location = rep->proto, .callbacks = DefaultParserCallbacks}; - - parseReply(&parser, rep); - rep->flags |= REPLY_FLAG_PARSED; -} - -/* Return the call reply type (REDISMODULE_REPLY_...). */ -int callReplyType(CallReply *rep) { - if (!rep) return REDISMODULE_REPLY_UNKNOWN; - callReplyParse(rep); - return rep->type; -} - -/* Return reply string as buffer and len. Applicable to: - * - REDISMODULE_REPLY_STRING - * - REDISMODULE_REPLY_ERROR - * - * The return value is borrowed from CallReply, so it must not be freed - * explicitly or used after CallReply itself is freed. - * - * The returned value is not NULL terminated and its length is returned by - * reference through len, which must not be NULL. - */ -const char *callReplyGetString(CallReply *rep, size_t *len) { - callReplyParse(rep); - if (rep->type != REDISMODULE_REPLY_STRING && - rep->type != REDISMODULE_REPLY_ERROR) return NULL; - if (len) *len = rep->len; - return rep->val.str; -} - -/* Return a long long reply value. Applicable to: - * - REDISMODULE_REPLY_INTEGER - */ -long long callReplyGetLongLong(CallReply *rep) { - callReplyParse(rep); - if (rep->type != REDISMODULE_REPLY_INTEGER) return LLONG_MIN; - return rep->val.ll; -} - -/* Return a double reply value. Applicable to: - * - REDISMODULE_REPLY_DOUBLE - */ -double callReplyGetDouble(CallReply *rep) { - callReplyParse(rep); - if (rep->type != REDISMODULE_REPLY_DOUBLE) return LLONG_MIN; - return rep->val.d; -} - -/* Return a reply Boolean value. Applicable to: - * - REDISMODULE_REPLY_BOOL - */ -int callReplyGetBool(CallReply *rep) { - callReplyParse(rep); - if (rep->type != REDISMODULE_REPLY_BOOL) return INT_MIN; - return rep->val.ll; -} - -/* Return reply length. Applicable to: - * - REDISMODULE_REPLY_STRING - * - REDISMODULE_REPLY_ERROR - * - REDISMODULE_REPLY_ARRAY - * - REDISMODULE_REPLY_SET - * - REDISMODULE_REPLY_MAP - * - REDISMODULE_REPLY_ATTRIBUTE - */ -size_t callReplyGetLen(CallReply *rep) { - callReplyParse(rep); - switch(rep->type) { - case REDISMODULE_REPLY_STRING: - case REDISMODULE_REPLY_ERROR: - case REDISMODULE_REPLY_ARRAY: - case REDISMODULE_REPLY_SET: - case REDISMODULE_REPLY_MAP: - case REDISMODULE_REPLY_ATTRIBUTE: - return rep->len; - default: - return 0; - } -} - -static CallReply *callReplyGetCollectionElement(CallReply *rep, size_t idx, int elements_per_entry) { - if (idx >= rep->len * elements_per_entry) return NULL; // real len is rep->len * elements_per_entry - return rep->val.array+idx; -} - -/* Return a reply array element at a given index. Applicable to: - * - REDISMODULE_REPLY_ARRAY - * - * The return value is borrowed from CallReply, so it must not be freed - * explicitly or used after CallReply itself is freed. - */ -CallReply *callReplyGetArrayElement(CallReply *rep, size_t idx) { - callReplyParse(rep); - if (rep->type != REDISMODULE_REPLY_ARRAY) return NULL; - return callReplyGetCollectionElement(rep, idx, 1); -} - -/* Return a reply set element at a given index. Applicable to: - * - REDISMODULE_REPLY_SET - * - * The return value is borrowed from CallReply, so it must not be freed - * explicitly or used after CallReply itself is freed. - */ -CallReply *callReplyGetSetElement(CallReply *rep, size_t idx) { - callReplyParse(rep); - if (rep->type != REDISMODULE_REPLY_SET) return NULL; - return callReplyGetCollectionElement(rep, idx, 1); -} - -static int callReplyGetMapElementInternal(CallReply *rep, size_t idx, CallReply **key, CallReply **val, int type) { - callReplyParse(rep); - if (rep->type != type) return C_ERR; - if (idx >= rep->len) return C_ERR; - if (key) *key = callReplyGetCollectionElement(rep, idx * 2, 2); - if (val) *val = callReplyGetCollectionElement(rep, idx * 2 + 1, 2); - return C_OK; -} - -/* Retrieve a map reply key and value at a given index. Applicable to: - * - REDISMODULE_REPLY_MAP - * - * The key and value are returned by reference through key and val, - * which may also be NULL if not needed. - * - * Returns C_OK on success or C_ERR if reply type mismatches, or if idx is out - * of range. - * - * The returned values are borrowed from CallReply, so they must not be freed - * explicitly or used after CallReply itself is freed. - */ -int callReplyGetMapElement(CallReply *rep, size_t idx, CallReply **key, CallReply **val) { - return callReplyGetMapElementInternal(rep, idx, key, val, REDISMODULE_REPLY_MAP); -} - -/* Return reply attribute, or NULL if it does not exist. Applicable to all replies. - * - * The returned values are borrowed from CallReply, so they must not be freed - * explicitly or used after CallReply itself is freed. - */ -CallReply *callReplyGetAttribute(CallReply *rep) { - return rep->attribute; -} - -/* Retrieve attribute reply key and value at a given index. Applicable to: - * - REDISMODULE_REPLY_ATTRIBUTE - * - * The key and value are returned by reference through key and val, - * which may also be NULL if not needed. - * - * Returns C_OK on success or C_ERR if reply type mismatches, or if idx is out - * of range. - * - * The returned values are borrowed from CallReply, so they must not be freed - * explicitly or used after CallReply itself is freed. - */ -int callReplyGetAttributeElement(CallReply *rep, size_t idx, CallReply **key, CallReply **val) { - return callReplyGetMapElementInternal(rep, idx, key, val, REDISMODULE_REPLY_MAP); -} - -/* Return a big number reply value. Applicable to: - * - REDISMODULE_REPLY_BIG_NUMBER - * - * The returned values are borrowed from CallReply, so they must not be freed - * explicitly or used after CallReply itself is freed. - * - * The return value is guaranteed to be a big number, as described in the RESP3 - * protocol specifications. - * - * The returned value is not NULL terminated and its length is returned by - * reference through len, which must not be NULL. - */ -const char *callReplyGetBigNumber(CallReply *rep, size_t *len) { - callReplyParse(rep); - if (rep->type != REDISMODULE_REPLY_BIG_NUMBER) return NULL; - *len = rep->len; - return rep->val.str; -} - -/* Return a verbatim string reply value. Applicable to: - * - REDISMODULE_REPLY_VERBATIM_STRING - * - * If format is non-NULL, the verbatim reply format is also returned by value. - * - * The optional output argument can be given to get a verbatim reply - * format, or can be set NULL if not needed. - * - * The return value is borrowed from CallReply, so it must not be freed - * explicitly or used after CallReply itself is freed. - * - * The returned value is not NULL terminated and its length is returned by - * reference through len, which must not be NULL. - */ -const char *callReplyGetVerbatim(CallReply *rep, size_t *len, const char **format){ - callReplyParse(rep); - if (rep->type != REDISMODULE_REPLY_VERBATIM_STRING) return NULL; - *len = rep->len; - if (format) *format = rep->val.verbatim_str.format; - return rep->val.verbatim_str.str; -} - -/* Return the current reply blob. - * - * The return value is borrowed from CallReply, so it must not be freed - * explicitly or used after CallReply itself is freed. - */ -const char *callReplyGetProto(CallReply *rep, size_t *proto_len) { - *proto_len = rep->proto_len; - return rep->proto; -} - -/* Return CallReply private data, as set by the caller on callReplyCreate(). - */ -void *callReplyGetPrivateData(CallReply *rep) { - return rep->private_data; -} - -/* Return true if the reply or one of it sub-replies is RESP3 formatted. */ -int callReplyIsResp3(CallReply *rep) { - return rep->flags & REPLY_FLAG_RESP3; -} - -/* Returns a list of errors in sds form, or NULL. */ -list *callReplyDeferredErrorList(CallReply *rep) { - return rep->deferred_error_list; -} - -/* Create a new CallReply struct from the reply blob. - * - * The function will own the reply blob, so it must not be used or freed by - * the caller after passing it to this function. - * - * The reply blob will be freed when the returned CallReply struct is later - * freed using freeCallReply(). - * - * The deferred_error_list is an optional list of errors that are present - * in the reply blob, if given, this function will take ownership on it. - * - * The private_data is optional and can later be accessed using - * callReplyGetPrivateData(). - * - * NOTE: The parser used for parsing the reply and producing CallReply is - * designed to handle valid replies created by Redis itself. IT IS NOT - * DESIGNED TO HANDLE USER INPUT and using it to parse invalid replies is - * unsafe. - */ -CallReply *callReplyCreate(sds reply, list *deferred_error_list, void *private_data) { - CallReply *res = zmalloc(sizeof(*res)); - res->flags = REPLY_FLAG_ROOT; - res->original_proto = reply; - res->proto = reply; - res->proto_len = sdslen(reply); - res->private_data = private_data; - res->attribute = NULL; - res->deferred_error_list = deferred_error_list; - return res; -} - -/* Create a new CallReply struct from the reply blob representing an error message. - * Automatically creating deferred_error_list and set a copy of the reply in it. - * Refer to callReplyCreate for detailed explanation. - * Reply string can come in one of two forms: - * 1. A protocol reply starting with "-CODE" and ending with "\r\n" - * 2. A plain string, in which case this function adds the protocol header and footer. */ -CallReply *callReplyCreateError(sds reply, void *private_data) { - sds err_buff = reply; - if (err_buff[0] != '-') { - err_buff = sdscatfmt(sdsempty(), "-ERR %S\r\n", reply); - sdsfree(reply); - } - list *deferred_error_list = listCreate(); - listSetFreeMethod(deferred_error_list, sdsfreegeneric); - listAddNodeTail(deferred_error_list, sdsnew(err_buff)); - return callReplyCreate(err_buff, deferred_error_list, private_data); -} diff --git a/examples/redis-unstable/src/call_reply.h b/examples/redis-unstable/src/call_reply.h deleted file mode 100644 index 4ae7f3c..0000000 --- a/examples/redis-unstable/src/call_reply.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#ifndef SRC_CALL_REPLY_H_ -#define SRC_CALL_REPLY_H_ - -#include "resp_parser.h" - -typedef struct CallReply CallReply; -typedef void (*RedisModuleOnUnblocked)(void *ctx, CallReply *reply, void *private_data); - -CallReply *callReplyCreate(sds reply, list *deferred_error_list, void *private_data); -CallReply *callReplyCreateError(sds reply, void *private_data); -int callReplyType(CallReply *rep); -const char *callReplyGetString(CallReply *rep, size_t *len); -long long callReplyGetLongLong(CallReply *rep); -double callReplyGetDouble(CallReply *rep); -int callReplyGetBool(CallReply *rep); -size_t callReplyGetLen(CallReply *rep); -CallReply *callReplyGetArrayElement(CallReply *rep, size_t idx); -CallReply *callReplyGetSetElement(CallReply *rep, size_t idx); -int callReplyGetMapElement(CallReply *rep, size_t idx, CallReply **key, CallReply **val); -CallReply *callReplyGetAttribute(CallReply *rep); -int callReplyGetAttributeElement(CallReply *rep, size_t idx, CallReply **key, CallReply **val); -const char *callReplyGetBigNumber(CallReply *rep, size_t *len); -const char *callReplyGetVerbatim(CallReply *rep, size_t *len, const char **format); -const char *callReplyGetProto(CallReply *rep, size_t *len); -void *callReplyGetPrivateData(CallReply *rep); -int callReplyIsResp3(CallReply *rep); -list *callReplyDeferredErrorList(CallReply *rep); -void freeCallReply(CallReply *rep); -CallReply *callReplyCreatePromise(void *private_data); - -#endif /* SRC_CALL_REPLY_H_ */ diff --git a/examples/redis-unstable/src/childinfo.c b/examples/redis-unstable/src/childinfo.c deleted file mode 100644 index 95cbbbb..0000000 --- a/examples/redis-unstable/src/childinfo.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2016-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "server.h" -#include -#include - -typedef struct { - size_t keys; - size_t cow; - monotime cow_updated; - double progress; - childInfoType information_type; /* Type of information */ -} child_info_data; - -/* Open a child-parent channel used in order to move information about the - * RDB / AOF saving process from the child to the parent (for instance - * the amount of copy on write memory used) */ -void openChildInfoPipe(void) { - if (anetPipe(server.child_info_pipe, O_NONBLOCK, 0) == -1) { - /* On error our two file descriptors should be still set to -1, - * but we call anyway closeChildInfoPipe() since can't hurt. */ - closeChildInfoPipe(); - } else { - server.child_info_nread = 0; - } -} - -/* Close the pipes opened with openChildInfoPipe(). */ -void closeChildInfoPipe(void) { - if (server.child_info_pipe[0] != -1 || - server.child_info_pipe[1] != -1) - { - close(server.child_info_pipe[0]); - close(server.child_info_pipe[1]); - server.child_info_pipe[0] = -1; - server.child_info_pipe[1] = -1; - server.child_info_nread = 0; - } -} - -/* Send save data to parent. */ -void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress, char *pname) { - if (server.child_info_pipe[1] == -1) return; - - static monotime cow_updated = 0; - static uint64_t cow_update_cost = 0; - static size_t cow = 0; - static size_t peak_cow = 0; - static size_t update_count = 0; - static unsigned long long sum_cow = 0; - - child_info_data data = {0}; /* zero everything, including padding to satisfy valgrind */ - - /* When called to report current info, we need to throttle down CoW updates as they - * can be very expensive. To do that, we measure the time it takes to get a reading - * and schedule the next reading to happen not before time*CHILD_COW_COST_FACTOR - * passes. */ - - monotime now = getMonotonicUs(); - if (info_type != CHILD_INFO_TYPE_CURRENT_INFO || - !cow_updated || - now - cow_updated > cow_update_cost * CHILD_COW_DUTY_CYCLE) - { - cow = zmalloc_get_private_dirty(-1); - cow_updated = getMonotonicUs(); - cow_update_cost = cow_updated - now; - if (cow > peak_cow) peak_cow = cow; - sum_cow += cow; - update_count++; - - int cow_info = (info_type != CHILD_INFO_TYPE_CURRENT_INFO); - if (cow || cow_info) { - serverLog(cow_info ? LL_NOTICE : LL_VERBOSE, - "Fork CoW for %s: current %zu MB, peak %zu MB, average %llu MB", - pname, cow>>20, peak_cow>>20, (sum_cow/update_count)>>20); - } - } - - data.information_type = info_type; - data.keys = keys; - data.cow = cow; - data.cow_updated = cow_updated; - data.progress = progress; - - ssize_t wlen = sizeof(data); - - if (write(server.child_info_pipe[1], &data, wlen) != wlen) { - /* Failed writing to parent, it could have been killed, exit. */ - serverLog(LL_WARNING,"Child failed reporting info to parent, exiting. %s", strerror(errno)); - exitFromChild(1, 0); - } -} - -/* Update Child info. */ -void updateChildInfo(childInfoType information_type, size_t cow, monotime cow_updated, size_t keys, double progress) { - if (cow > server.stat_current_cow_peak) server.stat_current_cow_peak = cow; - - if (information_type == CHILD_INFO_TYPE_CURRENT_INFO) { - server.stat_current_cow_bytes = cow; - server.stat_current_cow_updated = cow_updated; - server.stat_current_save_keys_processed = keys; - if (progress != -1) server.stat_module_progress = progress; - } else if (information_type == CHILD_INFO_TYPE_AOF_COW_SIZE) { - server.stat_aof_cow_bytes = server.stat_current_cow_peak; - } else if (information_type == CHILD_INFO_TYPE_RDB_COW_SIZE) { - server.stat_rdb_cow_bytes = server.stat_current_cow_peak; - } else if (information_type == CHILD_INFO_TYPE_MODULE_COW_SIZE) { - server.stat_module_cow_bytes = server.stat_current_cow_peak; - } -} - -/* Read child info data from the pipe. - * if complete data read into the buffer, - * data is stored into *buffer, and returns 1. - * otherwise, the partial data is left in the buffer, waiting for the next read, and returns 0. */ -int readChildInfo(childInfoType *information_type, size_t *cow, monotime *cow_updated, size_t *keys, double* progress) { - /* We are using here a static buffer in combination with the server.child_info_nread to handle short reads */ - static child_info_data buffer; - ssize_t wlen = sizeof(buffer); - - /* Do not overlap */ - if (server.child_info_nread == wlen) server.child_info_nread = 0; - - int nread = read(server.child_info_pipe[0], (char *)&buffer + server.child_info_nread, wlen - server.child_info_nread); - if (nread > 0) { - server.child_info_nread += nread; - } - - /* We have complete child info */ - if (server.child_info_nread == wlen) { - *information_type = buffer.information_type; - *cow = buffer.cow; - *cow_updated = buffer.cow_updated; - *keys = buffer.keys; - *progress = buffer.progress; - return 1; - } else { - return 0; - } -} - -/* Receive info data from child. */ -void receiveChildInfo(void) { - if (server.child_info_pipe[0] == -1) return; - - size_t cow; - monotime cow_updated; - size_t keys; - double progress; - childInfoType information_type; - - /* Drain the pipe and update child info so that we get the final message. */ - while (readChildInfo(&information_type, &cow, &cow_updated, &keys, &progress)) { - updateChildInfo(information_type, cow, cow_updated, keys, progress); - } -} diff --git a/examples/redis-unstable/src/chk.c b/examples/redis-unstable/src/chk.c deleted file mode 100644 index f15cfb1..0000000 --- a/examples/redis-unstable/src/chk.c +++ /dev/null @@ -1,822 +0,0 @@ -/* Implementation of a topK structure using CuckooHeavyKeeper algorithm - * - * Implementation is based on the paper "Cuckoo Heavy Keeper and the balancing - * act of maintaining heavy hitters in stream processing" by Vinh Quang Ngo and - * Marina Papatriantafilou. Also, the accompanying C++ implementation was used - * as a reference point: https://github.com/vinhqngo5/Cuckoo_Heavy_Keeper - * Main changes are addition of a min-heap so we can keep names of the top K - * elements - idea comes from RedisBloom's TopK structure. - * - * Copyright (c) 2026-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "chk.h" -#include "redisassert.h" -#include "zmalloc.h" -#include "xxhash.h" - -#include -#include -#include - -/* Lobby to heavy item promotion threshold */ -#define LOBBY_PROMOTION_THRESHOLD 16 - -#ifndef static_assert -#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1] -#endif - -static_assert(LOBBY_PROMOTION_THRESHOLD < CHK_LUT_SIZE, - "Lobby promotion threshold should be less then the LUT size to " - "ensure constant operations during decayCounter!"); - -/* After a heavy item is demoted is starts recursively kicking out other heavy - * items in the case it should stay heavy (defined by isHeavyHitter). In - * principle this process could go over all the items in the chkTopK's tables - * so it's artificially limited by this constant. */ -#define MAX_KICKS 16 - -/* An item is defined as heavy hitter if its count is more or equal to x * N - * where x is a threshold constant (HEAVY_RATIO) and N is the total count the - * chkTopK structure has accumulated. See the paper for more info. */ -#define HEAVY_RATIO 0.008 - -/* A unique seed for the items when storing them in the heap so it's not related - * to the cuckoo's hashes. Also, we don't need the less-bit hash here as the - * heap does not take much memory so we avoid needless possible collisions. */ -#define HEAP_SEED 1919 - -typedef struct { - size_t idx[CHK_NUM_TABLES]; - fingerprint_t fp; -} fpAndIdx; - -#define min(a, b) ((a) < (b) ? (a) : (b)) - -/* Heap operations */ -static chkHeapBucket *chkCheckExistInHeap(chkTopK *topk, const char *item, int itemlen, uint64_t fp) { - for (int32_t i = topk->k - 1; i >= 0; --i) { - chkHeapBucket *bucket = topk->heap + i; - if (bucket->fp == fp && bucket->item && - sdslen(bucket->item) == (size_t)itemlen && - memcmp(bucket->item, item, itemlen) == 0) - { - return bucket; - } - } - return NULL; -} - -void chkHeapifyDown(chkHeapBucket *array, size_t len, size_t start) { - size_t child = start; - - if (len < 2 || (len - 2) / 2 < child) { - return; - } - child = 2 * child + 1; - if ((child + 1) < len && (array[child].count > array[child + 1].count)) { - ++child; - } - if (array[child].count > array[start].count) { - return; - } - - chkHeapBucket top = {0}; - top = array[start]; - do { - memcpy(&array[start], &array[child], sizeof(chkHeapBucket)); - start = child; - - if ((len - 2) / 2 < child) { - break; - } - child = 2 * child + 1; - - if ((child + 1) < len && (array[child].count > array[child + 1].count)) { - ++child; - } - } while (array[child].count < top.count); - memcpy(&array[start], &top, sizeof(chkHeapBucket)); -} - -/*----------------------------------------------------------------------------- - * chkTopK operations - *----------------------------------------------------------------------------*/ - -/* Create the chkTopK structure. Note, CHK paper recommends decay=1.08. - * numbuckets must be a power of 2. Recommended size for numbuckets is at least - * 7 or 8 times k. */ -chkTopK *chkTopKCreate(int k, int numbuckets, double decay) { - /* Number of buckets need to be a power of 2 for better performance - we - * have better cache locality of the tables and faster table indices - * calculations. */ - assert(k > 0 && (numbuckets & (numbuckets - 1)) == 0); - - size_t usable = 0; - chkTopK *topk = zcalloc_usable(sizeof(chkTopK), &usable); - topk->alloc_size += usable; - - for (int i = 0; i < CHK_NUM_TABLES; ++i) { - topk->tables[i] = zcalloc_usable(sizeof(chkBucket) * numbuckets, &usable); - topk->alloc_size += usable; - } - - topk->heap = zcalloc_usable(sizeof(chkHeapBucket) * k, &usable); - topk->alloc_size += usable; - - topk->decay = decay; - topk->inv_decay = 1. / decay; - topk->k = k; - topk->numbuckets = numbuckets; - - topk->lut_decay_exp[0] = 0; - topk->lut_min_decay[0] = 0; - topk->lut_decay_prob[0] = 0; - for (int i = 1; i < CHK_LUT_SIZE + 1; ++i) { - topk->lut_decay_exp[i] = topk->lut_decay_exp[i - 1] + pow(topk->decay, i - 1); - topk->lut_min_decay[i] = topk->lut_decay_exp[i] - topk->lut_decay_exp[i - 1]; - topk->lut_decay_prob[i] = pow(topk->inv_decay, i); - } - - return topk; -} - -/* Release chkTopK resources */ -void chkTopKRelease(chkTopK *topk) { - size_t usable; - for (int i = 0; i < CHK_NUM_TABLES; ++i) { - zfree_usable(topk->tables[i], &usable); - topk->alloc_size -= usable; - } - for (int i = 0; i < topk->k; ++i) { - if (topk->heap[i].item) { - topk->alloc_size -= sdsAllocSize(topk->heap[i].item); - sdsfree(topk->heap[i].item); - } - } - zfree_usable(topk->heap, &usable); - topk->alloc_size -= usable; - debugAssert(topk->alloc_size == zmalloc_usable_size(topk)); - - zfree(topk); -} - -static inline int generateAltIdx(fingerprint_t fp, int idx, int numbuckets) { - return (idx ^ (0x5bd1e995 * (size_t)fp)) & (numbuckets - 1); -} - -fpAndIdx generateItemFpAndIdxs(chkTopK *topk, char *item, int itemlen) { - uint64_t hash = XXH3_64bits_withSeed(item, itemlen, 0); - - fpAndIdx res; - res.fp = (hash & 0xFFFF); /* Only use 16 bits for fingerprint */ - - /* Note numbuckets are a power of 2 so we don't use modulo for index calc */ - res.idx[0] = (hash >> 32) & (topk->numbuckets - 1); - for (int i = 1; i < CHK_NUM_TABLES; ++i) { - res.idx[i] = generateAltIdx(res.fp, res.idx[i-1], topk->numbuckets); - } - - return res; -} - -typedef struct { - int table_idx; - int pos; -} checkEntryRes; - -/* Check if `item` is a heavy entry. If so we bump its count. If not - we make - * it a heavy entry immediately if there is an empty spot, thus skipping the - * lobby as an optimization. */ -checkEntryRes checkHeavyEntries(chkTopK *topk, fpAndIdx item, counter_t weight) { - int empty_table_idx = -1; - int empty_pos = -1; - - for (int i = 0; i < CHK_NUM_TABLES; ++i) { - int idx = item.idx[i]; - - chkBucket *bucket = &topk->tables[i][idx]; - for (int j = 0; j < CHK_HEAVY_ENTRIES_PER_BUCKET; ++j) { - chkHeavyEntry *e = &bucket->heavy_entries[j]; - if (e->count > 0) { - if (e->fp == item.fp) { - e->count += weight; - - checkEntryRes res = { i, j }; - return res; - } - } else if (empty_table_idx == -1) { - empty_table_idx = i; - empty_pos = j; - } - } - } - - if (empty_table_idx == -1) { - checkEntryRes res = { -1, -1 }; - return res; - } - - /* If there is an empty slot in the heavy entries just put the item there - * instead of going through the lobby first (optimization as per the paper) */ - int idx = item.idx[empty_table_idx]; - chkHeavyEntry *e = &topk->tables[empty_table_idx][idx].heavy_entries[empty_pos]; - e->fp = item.fp; - e->count = weight; - - checkEntryRes res = {empty_table_idx, empty_pos}; - return res; -} - -/* A heavy hitter is defined by the paper as an item with counter more or equal - * to phi * N, where phi is a constant and N is the total count the structure - * has recorded up to that point */ -int isHeavyHitter(chkTopK *topk, counter_t cnt) { - return cnt >= (topk->total * HEAVY_RATIO); -} - -/* After a lobby item is promoted it may be placed on a heavy item's spot. The - * latter is kicked out, but it may recursively kick out another heavy item. - * The process is limited by MAX_KICKS and also by the fact that during updates - * one of the kicked out items may have its counter decayed so much - it's not - * passing the heavy item threshold (see isHeavyHitter). */ -void kickout(chkTopK *topk, chkHeavyEntry entry, int idx, int table_idx) { - for (int i = 0; i < MAX_KICKS; ++i) { - /* Do not try to swap with any entries if we don't reach the heavy - * hitter threshold */ - if (!isHeavyHitter(topk, entry.count)) return; - - /* Find the heavy entry in the alt bucket in the other table with - * minimum count. If there is empty entry there just occupy it, else - * recursively kick the minimal one out. - * To find the alt bucket we need to compute the alt index from the - * fingerprint of the kicked-out entry. */ - table_idx = 1 - table_idx; - idx = generateAltIdx(entry.fp, idx, topk->numbuckets); - - chkBucket *bucket = &topk->tables[table_idx][idx]; - counter_t min = (counter_t)-1; - int min_pos = -1; - for (int j = 0; j < CHK_HEAVY_ENTRIES_PER_BUCKET; ++j) { - chkHeavyEntry *e = &bucket->heavy_entries[j]; - if (e->count == 0) { - *e = entry; - return; - } - if (e->count < min) { - min = e->count; - min_pos = j; - } - } - - chkHeavyEntry old_entry = bucket->heavy_entries[min_pos]; - bucket->heavy_entries[min_pos] = entry; - entry = old_entry; - } -} - -/* When a lobby entry's counter passes the promotion threshold we try to promote - * it with some probability. See the paper for more details. If promotion is - * successful the lobby entry may kick out a heavy one - see kickout() */ -int tryPromoteAndKickout(chkTopK *topk, fpAndIdx item, counter_t new_count, - int table_idx) -{ - int idx = item.idx[table_idx]; - chkBucket *bucket = &topk->tables[table_idx][idx]; - counter_t min = (counter_t)-1; /* counter_t is unsigned */ - int min_idx = -1; - - /* We search for heavy item bucket of the promoted lobby entry. We may have - * an empty space which we immediately occupy. Otherwise we choose the - * bucket with lowest counter */ - for (int i = 0; i < CHK_HEAVY_ENTRIES_PER_BUCKET; ++i) { - if (bucket->heavy_entries[i].count == 0) { - bucket->heavy_entries[i].fp = item.fp; - bucket->heavy_entries[i].count = new_count; - return i; - } - if (bucket->heavy_entries[i].count < min) { - min = bucket->heavy_entries[i].count; - min_idx = i; - } - } - - /* If the heavy entry that is going to be kicked out has a counter lower - * than the lobby's one we always kick it out */ - if (min > new_count) { - double prob = (new_count - LOBBY_PROMOTION_THRESHOLD) / - (double)(min - LOBBY_PROMOTION_THRESHOLD); - - if ((rand() / (double)RAND_MAX) >= prob) return -1; - } - - chkHeavyEntry to_kickout = bucket->heavy_entries[min_idx]; - /* Note, that here the promoted item keeps the old count as per the paper */ - bucket->heavy_entries[min_idx].fp = bucket->lobby_entry.fp; - - bucket->lobby_entry.count = 0; - bucket->lobby_entry.fp = 0; - - kickout(topk, to_kickout, idx, table_idx); - - return min_idx; -} - -/* Check if an item is a lobby entry */ -checkEntryRes checkLobbyEntries(chkTopK *topk, fpAndIdx item, counter_t weight) { - for (int i = 0; i < CHK_NUM_TABLES; ++i) { - int idx = item.idx[i]; - - chkBucket *bucket = &topk->tables[i][idx]; - chkLobbyEntry *e = &bucket->lobby_entry; - - /* No match or empty lobby entry */ - if (e->fp != item.fp || e->count == 0) continue; - - /* If we don't cross the threshold just update the counter */ - uint64_t new_count = (uint64_t)e->count + weight; - if (new_count < LOBBY_PROMOTION_THRESHOLD) { - e->count = (uint16_t)new_count; - - checkEntryRes res = { i, -1 }; - return res; - } - - /* Try to promote the entry to heavy entry if we crossed the threshold. - * Else just set the counter to the value of the threshold */ - int kickout_pos = tryPromoteAndKickout(topk, item, new_count, i); - if (kickout_pos != -1) { - checkEntryRes res = {i, kickout_pos}; - return res; - } - - e->count = LOBBY_PROMOTION_THRESHOLD; - checkEntryRes res = { i, -1 }; - return res; - } - - checkEntryRes res = { -1, -1 }; - return res; -} - -/* Probability to decay cnt with 1. - * Equal to pow(decay, -cnt) */ -static inline double getDecayProb(chkTopK *topk, counter_t cnt) { - if (cnt < CHK_LUT_SIZE) { - return topk->lut_decay_prob[cnt]; - } - - return pow(topk->lut_decay_prob[CHK_LUT_SIZE], - ((double)cnt / (CHK_LUT_SIZE))) * - topk->lut_decay_prob[cnt % (CHK_LUT_SIZE)]; -} - -/* Expected decay steps to decay cnt to 0. - * Equal to sum(pow(decay, i)) for i in [0; cnt] */ -static inline double getExpDecayCount(chkTopK *topk, lobby_counter_t cnt) { - return topk->lut_decay_exp[cnt]; -} - -/* Expected minimum decay steps to decay cnt with 1. Since probability is - * pow(decay, -cnt) it's equal to pow(decay, cnt) */ -static inline double getMinDecayCount(chkTopK *topk, counter_t cnt) { - if (cnt < CHK_LUT_SIZE) { - return topk->lut_min_decay[cnt]; - } - - return pow(topk->lut_min_decay[CHK_LUT_SIZE], - ((double)cnt / (CHK_LUT_SIZE))) * - topk->lut_min_decay[cnt % (CHK_LUT_SIZE)]; -} - -/* When there is a hash-collission between lobby entries we decay the existing - * lobby entry with the weight of the new one. Return the counter after decaying. */ -lobby_counter_t chkDecayCounter(chkTopK *topk, lobby_counter_t cnt, counter_t weight) { - if (weight == 0) return cnt; - - /* Unweighted update - just decay with probability pow(decay, -cnt) */ - if (weight == 1) { - double prob = getDecayProb(topk, (counter_t)cnt); - if ((rand() / (double)RAND_MAX) < prob) { - return cnt - 1; - } - return cnt; - } - - /* For weighted updates we simulate multiple unweighted ones */ - - /* Weight is smaller than the minimum amount of decay steps required to - * decay the counter with probability of 100% so again we roll the dice */ - double min_decay = getMinDecayCount(topk, cnt); - if (weight < (counter_t)min_decay) { - double prob = weight / min_decay; - if ((rand() / (double)RAND_MAX) < prob) { - return cnt - 1; - } - return cnt; - } - - /* Weight is more than the expected amount of decay steps to decay the - * counter to 0. */ - double exp_decays = getExpDecayCount(topk, cnt); - if (weight >= (counter_t)exp_decays) - return 0; - - /* Weight is large enough to decay the counter to cnt - X where 0 < X < cnt. - * We binary search for the largest value `C` such that: - * - * (expected decay ops for `C`) >= (expected decay ops for `cnt`) - `weight` - * i.e lut_decay_exp[C] + weight >= lut_decay_exp[cnt] - * - * Note that since cnt is a lobby counter it will necessarily be less or - * equal than LOBBY_PROMOTION_THRESHOLD, so although we binary search this - * is a O(1) operation */ - int left = 0; - int right = cnt; - while (left < right) { - int mid = left + (right - left) / 2; - - if (topk->lut_decay_exp[mid] + weight >= topk->lut_decay_exp[cnt]) { - right = mid; - } else { - left = mid + 1; - } - } - - return left; -} - -/* Update weighted item. If another one was expelled from the topK list - - * return it. Caller is responsible for releasing it */ -sds chkTopKUpdate(chkTopK *topk, char *item, int itemlen, counter_t weight) -{ - if (weight == 0) return NULL; - - topk->total += weight; - - /* Generate a fingerprint and indices for both cuckoo tables. */ - fpAndIdx itemFpIdx = generateItemFpAndIdxs(topk, item, itemlen); - - /* Check if the item is amongst the heavy entries. If so we just update its - * counter. */ - checkEntryRes res = checkHeavyEntries(topk, itemFpIdx, weight); - if (res.table_idx != -1) { - goto update_heap; - } - - /* If the item is not already heavy it may be in the lobby. If so we'll - * increase its counter and promote it to a heavy entry if it passes the - * threshold */ - res = checkLobbyEntries(topk, itemFpIdx, weight); - if (res.table_idx != -1) { - goto update_heap; - } - - /* Item is not tracked at all. Check for empty lobby entries - if there is - * any - place the item there. The weight may be higher than the promotional - * threshold in which case we'll try to promote it. */ - for (int i = 0; i < CHK_NUM_TABLES; ++i) { - int idx = itemFpIdx.idx[i]; - chkBucket *bucket = &topk->tables[i][idx]; - if (bucket->lobby_entry.count == 0) { - bucket->lobby_entry.fp = itemFpIdx.fp; - - res.table_idx = i; - res.pos = -1; - - if (weight < LOBBY_PROMOTION_THRESHOLD) { - bucket->lobby_entry.count = weight; - } else { - int kickout_pos = tryPromoteAndKickout(topk, itemFpIdx, weight, i); - if (kickout_pos != -1) { - res.pos = kickout_pos; - } else { - bucket->lobby_entry.count = LOBBY_PROMOTION_THRESHOLD; - } - } - - goto update_heap; - } - } - - /* If there are no empty lobby entries choose a table deterministically, - * decay its lobby counter and update */ - int table_idx = itemFpIdx.fp & 1; - int idx = itemFpIdx.idx[table_idx]; - - chkLobbyEntry *e = &topk->tables[table_idx][idx].lobby_entry; - - /* new_count is the count of `e` after decaying it with weight */ - lobby_counter_t new_count = chkDecayCounter(topk, e->count, weight); - - /* if the chosen lobby entry has decayed its counter to 0, it's replaced by - * the new entry. Note, in that case the new entry has it's weight - * decreased by the approximate amount of decay operations needed to decay - * the old entry. */ - if (new_count == 0) { - e->fp = itemFpIdx.fp; - counter_t exp_decay_cnt = getExpDecayCount(topk, e->count); - e->count = exp_decay_cnt >= weight ? - 1 : (lobby_counter_t)min(255, weight - exp_decay_cnt); - } else { - e->count = new_count; - } - - if (e->count >= LOBBY_PROMOTION_THRESHOLD) { - int kickout_pos = tryPromoteAndKickout(topk, itemFpIdx, e->count, table_idx); - if (kickout_pos != -1) { - res.table_idx = table_idx; - res.pos = kickout_pos; - } - } - - /* After a change in the structure has occurred we check if we also need to - * update the heap - i.e bump a new item in it, or reorder an old item if - * it's counter went up. */ -update_heap: - if (res.table_idx == -1 || res.pos == -1) - return NULL; - - table_idx = res.table_idx; - idx = itemFpIdx.idx[table_idx]; - - counter_t heap_min = topk->heap[0].count; - chkHeavyEntry *entry = &topk->tables[table_idx][idx].heavy_entries[res.pos]; - - if (entry->count < heap_min) - return NULL; - - /* Heap uses different hash than the cuckoo tables */ - uint64_t fp = XXH3_64bits_withSeed(item, itemlen, HEAP_SEED); - chkHeapBucket *itemHeapPtr = chkCheckExistInHeap(topk, item, itemlen, fp); - if (itemHeapPtr != NULL) { - itemHeapPtr->count = entry->count; - chkHeapifyDown(topk->heap, topk->k, itemHeapPtr - topk->heap); - } else { - /* We know the new entry has bigger count than the min-element so it's - * safe to expel it. */ - sds expelled = topk->heap[0].item; - if (expelled) topk->alloc_size -= sdsAllocSize(expelled); - - topk->heap[0].count = entry->count; - topk->heap[0].fp = fp; - topk->heap[0].item = sdsnewlen(item, itemlen); - topk->alloc_size += sdsAllocSize(topk->heap[0].item); - - chkHeapifyDown(topk->heap, topk->k, 0); - return expelled; - } - - return NULL; -} - -int cmpchkHeapBucket(const void *tmp1, const void *tmp2) { - const chkHeapBucket *res1 = tmp1; - const chkHeapBucket *res2 = tmp2; - return res1->count < res2->count ? 1 : res1->count > res2->count ? -1 : 0; -} - -/* Get an ordered by count list of topk->k elements inside the topk object. - * - * NOTE, the returned array is a copy of the internal heap stored by `topk`. The - * caller is responsible for releasing it after use. The elements of the array - * share their `item` pointers with the internal topk->heap buckets so one must - * not use it after `topk` is released. */ -chkHeapBucket *chkTopKList(chkTopK *topk) { - chkHeapBucket *list = zmalloc(sizeof(chkHeapBucket) * topk->k); - memcpy(list, topk->heap, sizeof(chkHeapBucket) * topk->k); - qsort(list, topk->k, sizeof(*list), cmpchkHeapBucket); - return list; -} - -size_t chkTopKGetMemoryUsage(chkTopK *topk) { - if (!topk) return 0; - - return topk->alloc_size; -} - -#ifdef REDIS_TEST - -#include -#include "testhelp.h" - -#define UNUSED(x) (void)(x) - -static int findItemInList(chkHeapBucket *list, int k, const char *item, int itemlen) { - for (int i = 0; i < k; i++) { - if (list[i].item != NULL && - sdslen(list[i].item) == (size_t)itemlen && - memcmp(list[i].item, item, itemlen) == 0) { - return i; - } - } - return -1; -} - -static int verifyListSorted(chkHeapBucket *list, int k) { - for (int i = 0; i < k - 1; i++) { - if (list[i].item == NULL) continue; - if (list[i + 1].item == NULL) continue; - if (list[i].count < list[i + 1].count) { - return 0; - } - } - return 1; -} - -static void chkTopKUpdateAndFreeExpelled(chkTopK *topk, const char *item, int itemlen, counter_t weight) { - sds expelled = chkTopKUpdate(topk, (char *)item, itemlen, weight); - if (expelled) sdsfree(expelled); -} - -static void testBasicTopK(void) { - int k = 5; - int numbuckets = 64; - double decay = 0.9; - - chkTopK *topk = chkTopKCreate(k, numbuckets, decay); - test_cond("Create topk structure", topk != NULL); - - if (topk == NULL) return; - - chkTopKUpdateAndFreeExpelled(topk, "item1", 5, 100); - chkTopKUpdateAndFreeExpelled(topk, "item2", 5, 200); - chkTopKUpdateAndFreeExpelled(topk, "item3", 5, 150); - chkTopKUpdateAndFreeExpelled(topk, "item4", 5, 50); - chkTopKUpdateAndFreeExpelled(topk, "item5", 5, 300); - chkTopKUpdateAndFreeExpelled(topk, "item6", 5, 75); - - chkHeapBucket *list = chkTopKList(topk); - test_cond("chkTopKList returns non-NULL", list != NULL); - - if (list == NULL) { - chkTopKRelease(topk); - return; - } - - test_cond("TopK list is sorted in descending order", verifyListSorted(list, k)); - - int idx1 = findItemInList(list, k, "item5", 5); - int idx2 = findItemInList(list, k, "item2", 5); - int idx3 = findItemInList(list, k, "item3", 5); - - test_cond("Heaviest items are in the list", idx1 != -1 && idx2 != -1 && idx3 != -1); - - test_cond("item5 has the highest count", idx1 == 0); - - zfree(list); - chkTopKRelease(topk); -} - -static void testHeavierElementsReplaceLighter(void) { - int k = 5; - int numbuckets = 64; - double decay = 0.9; - - chkTopK *topk = chkTopKCreate(k, numbuckets, decay); - test_cond("Create topk structure for replacement test", topk != NULL); - - if (topk == NULL) return; - - chkTopKUpdateAndFreeExpelled(topk, "light1", 6, 50); - chkTopKUpdateAndFreeExpelled(topk, "light2", 6, 60); - chkTopKUpdateAndFreeExpelled(topk, "light3", 6, 70); - chkTopKUpdateAndFreeExpelled(topk, "light4", 6, 80); - chkTopKUpdateAndFreeExpelled(topk, "light5", 6, 90); - - chkHeapBucket *list1 = chkTopKList(topk); - test_cond("Initial topk list is not NULL", list1 != NULL); - - if (list1 == NULL) { - chkTopKRelease(topk); - return; - } - - int light1_idx = findItemInList(list1, k, "light1", 6); - int light2_idx = findItemInList(list1, k, "light2", 6); - int light3_idx = findItemInList(list1, k, "light3", 6); - int light4_idx = findItemInList(list1, k, "light4", 6); - int light5_idx = findItemInList(list1, k, "light5", 6); - - test_cond("light1 is in initial topk list", light1_idx != -1); - test_cond("light2 is in initial topk list", light2_idx != -1); - test_cond("light3 is in initial topk list", light3_idx != -1); - test_cond("light4 is in initial topk list", light4_idx != -1); - test_cond("light5 is in initial topk list", light5_idx != -1); - - zfree(list1); - - chkTopKUpdateAndFreeExpelled(topk, "heavy1", 6, 500); - chkTopKUpdateAndFreeExpelled(topk, "heavy2", 6, 600); - - chkHeapBucket *list2 = chkTopKList(topk); - test_cond("Updated topk list is not NULL", list2 != NULL); - - if (list2 == NULL) { - chkTopKRelease(topk); - return; - } - - int heavy1_idx = findItemInList(list2, k, "heavy1", 6); - int heavy2_idx = findItemInList(list2, k, "heavy2", 6); - - test_cond("heavy1 is in updated topk list", heavy1_idx != -1); - test_cond("heavy2 is in updated topk list", heavy2_idx != -1); - - light1_idx = findItemInList(list2, k, "light1", 6); - light2_idx = findItemInList(list2, k, "light2", 6); - light3_idx = findItemInList(list2, k, "light3", 6); - light4_idx = findItemInList(list2, k, "light4", 6); - light5_idx = findItemInList(list2, k, "light5", 6); - - int light_items_remaining = (light1_idx != -1 ? 1 : 0) + - (light2_idx != -1 ? 1 : 0) + - (light3_idx != -1 ? 1 : 0) + - (light4_idx != -1 ? 1 : 0) + - (light5_idx != -1 ? 1 : 0); - - test_cond("Some lighter items remain in the list after adding heavier ones", - light_items_remaining > 0); - - zfree(list2); - chkTopKRelease(topk); -} - -static void testManySmallWeightUpdates(void) { - int k = 2; - int numbuckets = 64; - double decay = 0.9; - - chkTopK *topk = chkTopKCreate(k, numbuckets, decay); - test_cond("Create topk structure for small weight updates test", topk != NULL); - - if (topk == NULL) return; - - chkTopKUpdateAndFreeExpelled(topk, "item0", 5, 50); - chkTopKUpdateAndFreeExpelled(topk, "item1", 5, 100); - - chkHeapBucket *list1 = chkTopKList(topk); - test_cond("Topk list after adding item0 and item1 is not NULL", list1 != NULL); - - if (list1 == NULL) { - chkTopKRelease(topk); - return; - } - - int item0_idx1 = findItemInList(list1, k, "item0", 5); - int item1_idx1 = findItemInList(list1, k, "item1", 5); - - test_cond("item0 and item1 are in topk after initial updates", - item0_idx1 != -1 && item1_idx1 != -1); - - zfree(list1); - - for (int i = 0; i < 100; i++) { - chkTopKUpdateAndFreeExpelled(topk, "item2", 5, 1); - } - - chkHeapBucket *list2 = chkTopKList(topk); - test_cond("Topk list after many small updates is not NULL", list2 != NULL); - - if (list2 == NULL) { - chkTopKRelease(topk); - return; - } - - int item0_idx2 = findItemInList(list2, k, "item0", 5); - int item1_idx2 = findItemInList(list2, k, "item1", 5); - int item2_idx2 = findItemInList(list2, k, "item2", 5); - - test_cond("item1 and item2 are in topk, item0 is not", - item1_idx2 != -1 && item2_idx2 != -1 && item0_idx2 == -1); - - counter_t item1_count = 0; - counter_t item2_count = 0; - if (item1_idx2 != -1) item1_count = list2[item1_idx2].count; - if (item2_idx2 != -1) item2_count = list2[item2_idx2].count; - - test_cond("item1 and item2 have similar weights", item1_count > 0 && item2_count > 0 && - (item1_count > item2_count ? item1_count - item2_count : item2_count - item1_count) < 5); - - zfree(list2); - chkTopKRelease(topk); -} - -int chkTopKTest(int argc, char *argv[], int flags) { - UNUSED(argc); - UNUSED(argv); - UNUSED(flags); - - testBasicTopK(); - testHeavierElementsReplaceLighter(); - testManySmallWeightUpdates(); - - return 0; -} - -#endif /* REDIS_TEST */ diff --git a/examples/redis-unstable/src/chk.h b/examples/redis-unstable/src/chk.h deleted file mode 100644 index a974fd6..0000000 --- a/examples/redis-unstable/src/chk.h +++ /dev/null @@ -1,89 +0,0 @@ -/* Implementation of a topK structure using CuckooHeavyKeeper algorithm - * - * Implementation is based on the paper "Cuckoo Heavy Keeper and the balancing - * act of maintaining heavy hitters in stream processing" by Vinh Quang Ngo and - * Marina Papatriantafilou. Also, the accompanying C++ implementation was used - * as a reference point: https://github.com/vinhqngo5/Cuckoo_Heavy_Keeper - * Main changes are addition of a min-heap so we can keep names of the top K - * elements - idea comes from RedisBloom's TopK structure. - * - * Copyright (c) 2026-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#pragma once - -#include "sds.h" - -#include -#include - -#define CHK_LUT_SIZE 256 -#define CHK_HEAVY_ENTRIES_PER_BUCKET 2 -#define CHK_NUM_TABLES 2 - -typedef uint64_t counter_t; -typedef uint16_t fingerprint_t; -typedef uint8_t lobby_counter_t; - -typedef struct { - counter_t count; - fingerprint_t fp; -} chkHeavyEntry; - -typedef struct { - fingerprint_t fp; - lobby_counter_t count; -} chkLobbyEntry; - -typedef struct { - chkHeavyEntry heavy_entries[CHK_HEAVY_ENTRIES_PER_BUCKET]; - chkLobbyEntry lobby_entry; -} chkBucket; - -typedef struct { - counter_t count; - sds item; - uint64_t fp; /* Fingerprint used to identify the item. Internal use only */ -} chkHeapBucket; - -typedef struct chkTopK { - chkBucket *tables[CHK_NUM_TABLES]; /* Cuckoo tables */ - chkHeapBucket *heap; /* Min-heap for storing top-K item's names */ - - size_t alloc_size; /* Used for memory tracking only */ - - /* Expected number of operations to decay count i to 0 */ - double lut_decay_exp[CHK_LUT_SIZE + 1]; - - /* Minimum number of decay operations to decay count i with 1 */ - double lut_min_decay[CHK_LUT_SIZE + 1]; - - /* Probability of decaying i with 1. As per paper probability is decay^-i - * but we actually store (1/decay)^i for faster computation. */ - double lut_decay_prob[CHK_LUT_SIZE + 1]; - - double decay; /* Decay constant */ - double inv_decay; /* Cache 1/decay for faster computations */ - - counter_t total; /* Total recorded count for all updates */ - - int k; - int numbuckets; -} chkTopK; - -chkTopK *chkTopKCreate(int k, int numbuckets, double decay); -void chkTopKRelease(chkTopK *topk); -sds chkTopKUpdate(chkTopK *topk, char *item, int itemlen, counter_t weight); -chkHeapBucket *chkTopKList(chkTopK *topk); -size_t chkTopKGetMemoryUsage(chkTopK *topk); - -#ifdef REDIS_TEST - -int chkTopKTest(int argc, char *argv[], int flags); - -#endif /* REDIS_TEST */ diff --git a/examples/redis-unstable/src/cli_commands.c b/examples/redis-unstable/src/cli_commands.c deleted file mode 100644 index e56d48c..0000000 --- a/examples/redis-unstable/src/cli_commands.c +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include "cli_commands.h" - -/* Definitions to configure commands.c to generate the above structs. */ -#define MAKE_CMD(name,summary,complexity,since,doc_flags,replaced,deprecated,group,group_enum,history,num_history,tips,num_tips,function,arity,flags,acl,key_specs,key_specs_num,get_keys,numargs) name,summary,group,since,numargs -#define MAKE_ARG(name,type,key_spec_index,token,summary,since,flags,numsubargs,deprecated_since) name,type,token,since,flags,numsubargs -#define COMMAND_ARG cliCommandArg -#define COMMAND_STRUCT commandDocs -#define SKIP_CMD_HISTORY_TABLE -#define SKIP_CMD_TIPS_TABLE -#define SKIP_CMD_KEY_SPECS_TABLE - -#include "commands.def" diff --git a/examples/redis-unstable/src/cli_commands.h b/examples/redis-unstable/src/cli_commands.h deleted file mode 100644 index eb5a476..0000000 --- a/examples/redis-unstable/src/cli_commands.h +++ /dev/null @@ -1,46 +0,0 @@ -/* This file is used by redis-cli in place of server.h when including commands.c - * It contains alternative structs which omit the parts of the commands table - * that are not suitable for redis-cli, e.g. the command proc. */ - -#ifndef __REDIS_CLI_COMMANDS_H -#define __REDIS_CLI_COMMANDS_H - -#include -#include "commands.h" - -/* Syntax specifications for a command argument. */ -typedef struct cliCommandArg { - char *name; - redisCommandArgType type; - char *token; - char *since; - int flags; - int numsubargs; - struct cliCommandArg *subargs; - const char *display_text; - - /* - * For use at runtime. - * Fields used to keep track of input word matches for command-line hinting. - */ - int matched; /* How many input words have been matched by this argument? */ - int matched_token; /* Has the token been matched? */ - int matched_name; /* Has the name been matched? */ - int matched_all; /* Has the whole argument been consumed (no hint needed)? */ -} cliCommandArg; - -/* Command documentation info used for help output */ -struct commandDocs { - char *name; - char *summary; - char *group; - char *since; - int numargs; - cliCommandArg *args; /* An array of the command arguments. */ - struct commandDocs *subcommands; - char *params; /* A string describing the syntax of the command arguments. */ -}; - -extern struct commandDocs redisCommandTable[]; - -#endif diff --git a/examples/redis-unstable/src/cli_common.c b/examples/redis-unstable/src/cli_common.c deleted file mode 100644 index 0c269de..0000000 --- a/examples/redis-unstable/src/cli_common.c +++ /dev/null @@ -1,424 +0,0 @@ -/* CLI (command line interface) common methods - * - * Copyright (c) 2020-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "fmacros.h" -#include "cli_common.h" -#include "version.h" - -#include -#include -#include -#include -#include -#include /* Use hiredis' sds compat header that maps sds calls to their hi_ variants */ -#include /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */ -#include -#include -#include -#ifdef USE_OPENSSL -#include -#include -#include -#endif - -#define UNUSED(V) ((void) V) - -char *redisGitSHA1(void); -char *redisGitDirty(void); - -/* Wrapper around redisSecureConnection to avoid hiredis_ssl dependencies if - * not building with TLS support. - */ -int cliSecureConnection(redisContext *c, cliSSLconfig config, const char **err) { -#ifdef USE_OPENSSL - static SSL_CTX *ssl_ctx = NULL; - - if (!ssl_ctx) { - ssl_ctx = SSL_CTX_new(SSLv23_client_method()); - if (!ssl_ctx) { - *err = "Failed to create SSL_CTX"; - goto error; - } - SSL_CTX_set_options(ssl_ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3); - SSL_CTX_set_verify(ssl_ctx, config.skip_cert_verify ? SSL_VERIFY_NONE : SSL_VERIFY_PEER, NULL); - - if (config.cacert || config.cacertdir) { - if (!SSL_CTX_load_verify_locations(ssl_ctx, config.cacert, config.cacertdir)) { - *err = "Invalid CA Certificate File/Directory"; - goto error; - } - } else { - if (!SSL_CTX_set_default_verify_paths(ssl_ctx)) { - *err = "Failed to use default CA paths"; - goto error; - } - } - - if (config.cert && !SSL_CTX_use_certificate_chain_file(ssl_ctx, config.cert)) { - *err = "Invalid client certificate"; - goto error; - } - - if (config.key && !SSL_CTX_use_PrivateKey_file(ssl_ctx, config.key, SSL_FILETYPE_PEM)) { - *err = "Invalid private key"; - goto error; - } - if (config.ciphers && !SSL_CTX_set_cipher_list(ssl_ctx, config.ciphers)) { - *err = "Error while configuring ciphers"; - goto error; - } -#ifdef TLS1_3_VERSION - if (config.ciphersuites && !SSL_CTX_set_ciphersuites(ssl_ctx, config.ciphersuites)) { - *err = "Error while setting cypher suites"; - goto error; - } -#endif - } - - SSL *ssl = SSL_new(ssl_ctx); - if (!ssl) { - *err = "Failed to create SSL object"; - return REDIS_ERR; - } - - if (config.sni && !SSL_set_tlsext_host_name(ssl, config.sni)) { - *err = "Failed to configure SNI"; - SSL_free(ssl); - return REDIS_ERR; - } - - return redisInitiateSSL(c, ssl); - -error: - SSL_CTX_free(ssl_ctx); - ssl_ctx = NULL; - return REDIS_ERR; -#else - (void) config; - (void) c; - (void) err; - return REDIS_OK; -#endif -} - -/* Wrapper around hiredis to allow arbitrary reads and writes. - * - * We piggybacks on top of hiredis to achieve transparent TLS support, - * and use its internal buffers so it can co-exist with commands - * previously/later issued on the connection. - * - * Interface is close to enough to read()/write() so things should mostly - * work transparently. - */ - -/* Write a raw buffer through a redisContext. If we already have something - * in the buffer (leftovers from hiredis operations) it will be written - * as well. - */ -ssize_t cliWriteConn(redisContext *c, const char *buf, size_t buf_len) -{ - int done = 0; - - /* Append data to buffer which is *usually* expected to be empty - * but we don't assume that, and write. - */ - c->obuf = sdscatlen(c->obuf, buf, buf_len); - if (redisBufferWrite(c, &done) == REDIS_ERR) { - if (!(c->flags & REDIS_BLOCK)) - errno = EAGAIN; - - /* On error, we assume nothing was written and we roll back the - * buffer to its original state. - */ - if (sdslen(c->obuf) > buf_len) - sdsrange(c->obuf, 0, -(buf_len+1)); - else - sdsclear(c->obuf); - - return -1; - } - - /* If we're done, free up everything. We may have written more than - * buf_len (if c->obuf was not initially empty) but we don't have to - * tell. - */ - if (done) { - sdsclear(c->obuf); - return buf_len; - } - - /* Write was successful but we have some leftovers which we should - * remove from the buffer. - * - * Do we still have data that was there prior to our buf? If so, - * restore buffer to it's original state and report no new data was - * written. - */ - if (sdslen(c->obuf) > buf_len) { - sdsrange(c->obuf, 0, -(buf_len+1)); - return 0; - } - - /* At this point we're sure no prior data is left. We flush the buffer - * and report how much we've written. - */ - size_t left = sdslen(c->obuf); - sdsclear(c->obuf); - return buf_len - left; -} - -/* Wrapper around OpenSSL (libssl and libcrypto) initialisation - */ -int cliSecureInit(void) -{ -#ifdef USE_OPENSSL - ERR_load_crypto_strings(); - SSL_load_error_strings(); - SSL_library_init(); -#endif - return REDIS_OK; -} - -/* Create an sds from stdin */ -sds readArgFromStdin(void) { - char buf[1024]; - sds arg = sdsempty(); - - while(1) { - int nread = read(fileno(stdin),buf,1024); - - if (nread == 0) break; - else if (nread == -1) { - perror("Reading from standard input"); - exit(1); - } - arg = sdscatlen(arg,buf,nread); - } - return arg; -} - -/* Create an sds array from argv, either as-is or by dequoting every - * element. When quoted is non-zero, may return a NULL to indicate an - * invalid quoted string. - * - * The caller should free the resulting array of sds strings with - * sdsfreesplitres(). - */ -sds *getSdsArrayFromArgv(int argc,char **argv, int quoted) { - sds *res = sds_malloc(sizeof(sds) * argc); - - for (int j = 0; j < argc; j++) { - if (quoted) { - sds unquoted = unquoteCString(argv[j]); - if (!unquoted) { - while (--j >= 0) sdsfree(res[j]); - sds_free(res); - return NULL; - } - res[j] = unquoted; - } else { - res[j] = sdsnew(argv[j]); - } - } - - return res; -} - -/* Unquote a null-terminated string and return it as a binary-safe sds. */ -sds unquoteCString(char *str) { - int count; - sds *unquoted = sdssplitargs(str, &count); - sds res = NULL; - - if (unquoted && count == 1) { - res = unquoted[0]; - unquoted[0] = NULL; - } - - if (unquoted) - sdsfreesplitres(unquoted, count); - - return res; -} - - -/* URL-style percent decoding. */ -#define isHexChar(c) (isdigit(c) || ((c) >= 'a' && (c) <= 'f')) -#define decodeHexChar(c) (isdigit(c) ? (c) - '0' : (c) - 'a' + 10) -#define decodeHex(h, l) ((decodeHexChar(h) << 4) + decodeHexChar(l)) - -static sds percentDecode(const char *pe, size_t len) { - const char *end = pe + len; - sds ret = sdsempty(); - const char *curr = pe; - - while (curr < end) { - if (*curr == '%') { - if ((end - curr) < 2) { - fprintf(stderr, "Incomplete URI encoding\n"); - exit(1); - } - - char h = tolower(*(++curr)); - char l = tolower(*(++curr)); - if (!isHexChar(h) || !isHexChar(l)) { - fprintf(stderr, "Illegal character in URI encoding\n"); - exit(1); - } - char c = decodeHex(h, l); - ret = sdscatlen(ret, &c, 1); - curr++; - } else { - ret = sdscatlen(ret, curr++, 1); - } - } - - return ret; -} - -/* Parse a URI and extract the server connection information. - * URI scheme is based on the provisional specification[1] excluding support - * for query parameters. Valid URIs are: - * scheme: "redis://" - * authority: [[ ":"] "@"] [ [":" ]] - * path: ["/" []] - * - * [1]: https://www.iana.org/assignments/uri-schemes/prov/redis */ -void parseRedisUri(const char *uri, const char* tool_name, cliConnInfo *connInfo, int *tls_flag) { -#ifdef USE_OPENSSL - UNUSED(tool_name); -#else - UNUSED(tls_flag); -#endif - - const char *scheme = "redis://"; - const char *tlsscheme = "rediss://"; - const char *curr = uri; - const char *end = uri + strlen(uri); - const char *userinfo, *username, *port, *host, *path; - - /* URI must start with a valid scheme. */ - if (!strncasecmp(tlsscheme, curr, strlen(tlsscheme))) { -#ifdef USE_OPENSSL - *tls_flag = 1; - curr += strlen(tlsscheme); -#else - fprintf(stderr,"rediss:// is only supported when %s is compiled with OpenSSL\n", tool_name); - exit(1); -#endif - } else if (!strncasecmp(scheme, curr, strlen(scheme))) { - curr += strlen(scheme); - } else { - fprintf(stderr,"Invalid URI scheme\n"); - exit(1); - } - if (curr == end) return; - - /* Extract user info. */ - if ((userinfo = strchr(curr,'@'))) { - if ((username = strchr(curr, ':')) && username < userinfo) { - connInfo->user = percentDecode(curr, username - curr); - curr = username + 1; - } - - connInfo->auth = percentDecode(curr, userinfo - curr); - curr = userinfo + 1; - } - if (curr == end) return; - - /* Extract host and port. */ - path = strchr(curr, '/'); - if (*curr != '/') { - host = path ? path - 1 : end; - if (*curr == '[') { - curr += 1; - if ((port = strchr(curr, ']'))) { - if (*(port+1) == ':') { - connInfo->hostport = atoi(port + 2); - } - host = port - 1; - } - } else { - if ((port = strchr(curr, ':'))) { - connInfo->hostport = atoi(port + 1); - host = port - 1; - } - } - sdsfree(connInfo->hostip); - connInfo->hostip = sdsnewlen(curr, host - curr + 1); - } - curr = path ? path + 1 : end; - if (curr == end) return; - - /* Extract database number. */ - connInfo->input_dbnum = atoi(curr); -} - -void freeCliConnInfo(cliConnInfo connInfo){ - if (connInfo.hostip) sdsfree(connInfo.hostip); - if (connInfo.auth) sdsfree(connInfo.auth); - if (connInfo.user) sdsfree(connInfo.user); -} - -/* - * Escape a Unicode string for JSON output (--json), following RFC 7159: - * https://datatracker.ietf.org/doc/html/rfc7159#section-7 -*/ -sds escapeJsonString(sds s, const char *p, size_t len) { - s = sdscatlen(s,"\"",1); - while(len--) { - switch(*p) { - case '\\': - case '"': - s = sdscatprintf(s,"\\%c",*p); - break; - case '\n': s = sdscatlen(s,"\\n",2); break; - case '\f': s = sdscatlen(s,"\\f",2); break; - case '\r': s = sdscatlen(s,"\\r",2); break; - case '\t': s = sdscatlen(s,"\\t",2); break; - case '\b': s = sdscatlen(s,"\\b",2); break; - default: - s = sdscatprintf(s,*(unsigned char *)p <= 0x1f ? "\\u%04x" : "%c",*p); - } - p++; - } - return sdscatlen(s,"\"",1); -} - -sds cliVersion(void) { - sds version = sdscatprintf(sdsempty(), "%s", REDIS_VERSION); - - /* Add git commit and working tree status when available. */ - if (strtoll(redisGitSHA1(),NULL,16)) { - version = sdscatprintf(version, " (git:%s", redisGitSHA1()); - if (strtoll(redisGitDirty(),NULL,10)) - version = sdscatprintf(version, "-dirty"); - version = sdscat(version, ")"); - } - return version; -} - -/* This is a wrapper to call redisConnect or redisConnectWithTimeout. */ -redisContext *redisConnectWrapper(const char *ip, int port, const struct timeval tv) { - if (tv.tv_sec == 0 && tv.tv_usec == 0) { - return redisConnect(ip, port); - } else { - return redisConnectWithTimeout(ip, port, tv); - } -} - -/* This is a wrapper to call redisConnectUnix or redisConnectUnixWithTimeout. */ -redisContext *redisConnectUnixWrapper(const char *path, const struct timeval tv) { - if (tv.tv_sec == 0 && tv.tv_usec == 0) { - return redisConnectUnix(path); - } else { - return redisConnectUnixWithTimeout(path, tv); - } -} diff --git a/examples/redis-unstable/src/cli_common.h b/examples/redis-unstable/src/cli_common.h deleted file mode 100644 index a5b8e44..0000000 --- a/examples/redis-unstable/src/cli_common.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef __CLICOMMON_H -#define __CLICOMMON_H - -#include -#include /* Use hiredis' sds compat header that maps sds calls to their hi_ variants */ - -typedef struct cliSSLconfig { - /* Requested SNI, or NULL */ - char *sni; - /* CA Certificate file, or NULL */ - char *cacert; - /* Directory where trusted CA certificates are stored, or NULL */ - char *cacertdir; - /* Skip server certificate verification. */ - int skip_cert_verify; - /* Client certificate to authenticate with, or NULL */ - char *cert; - /* Private key file to authenticate with, or NULL */ - char *key; - /* Preferred cipher list, or NULL (applies only to <= TLSv1.2) */ - char* ciphers; - /* Preferred ciphersuites list, or NULL (applies only to TLSv1.3) */ - char* ciphersuites; -} cliSSLconfig; - - -/* server connection information object, used to describe an ip:port pair, db num user input, and user:pass. */ -typedef struct cliConnInfo { - char *hostip; - int hostport; - int input_dbnum; - char *auth; - char *user; -} cliConnInfo; - -int cliSecureConnection(redisContext *c, cliSSLconfig config, const char **err); - -ssize_t cliWriteConn(redisContext *c, const char *buf, size_t buf_len); - -int cliSecureInit(void); - -sds readArgFromStdin(void); - -sds *getSdsArrayFromArgv(int argc,char **argv, int quoted); - -sds unquoteCString(char *str); - -void parseRedisUri(const char *uri, const char* tool_name, cliConnInfo *connInfo, int *tls_flag); - -void freeCliConnInfo(cliConnInfo connInfo); - -sds escapeJsonString(sds s, const char *p, size_t len); - -sds cliVersion(void); - -redisContext *redisConnectWrapper(const char *ip, int port, const struct timeval tv); -redisContext *redisConnectUnixWrapper(const char *path, const struct timeval tv); - -#endif /* __CLICOMMON_H */ diff --git a/examples/redis-unstable/src/cluster.c b/examples/redis-unstable/src/cluster.c deleted file mode 100644 index d07c31c..0000000 --- a/examples/redis-unstable/src/cluster.c +++ /dev/null @@ -1,2263 +0,0 @@ -/* - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Copyright (c) 2024-present, Valkey contributors. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - * - * Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information. - */ - -/* - * cluster.c contains the common parts of a clustering - * implementation, the parts that are shared between - * any implementation of Redis clustering. - */ - -#include "server.h" -#include "cluster.h" -#include "cluster_asm.h" -#include "cluster_slot_stats.h" - -#include - -/* ----------------------------------------------------------------------------- - * Key space handling - * -------------------------------------------------------------------------- */ - -/* If it can be inferred that the given glob-style pattern, as implemented in - * stringmatchlen() in util.c, only can match keys belonging to a single slot, - * that slot is returned. Otherwise -1 is returned. */ -int patternHashSlot(char *pattern, int length) { - int s = -1; /* index of the first '{' */ - - for (int i = 0; i < length; i++) { - if (pattern[i] == '*' || pattern[i] == '?' || pattern[i] == '[') { - /* Wildcard or character class found. Keys can be in any slot. */ - return -1; - } else if (pattern[i] == '\\') { - /* Escaped character. Computing slot in this case is not - * implemented. We would need a temp buffer. */ - return -1; - } else if (s == -1 && pattern[i] == '{') { - /* Opening brace '{' found. */ - s = i; - } else if (s >= 0 && pattern[i] == '}' && i == s + 1) { - /* Empty tag '{}' found. The whole key is hashed. Ignore braces. */ - s = -2; - } else if (s >= 0 && pattern[i] == '}') { - /* Non-empty tag '{...}' found. Hash what's between braces. */ - return crc16(pattern + s + 1, i - s - 1) & 0x3FFF; - } - } - - /* The pattern matches a single key. Hash the whole pattern. */ - return crc16(pattern, length) & 0x3FFF; -} - -int getSlotOrReply(client *c, robj *o) { - long long slot; - - if (getLongLongFromObject(o,&slot) != C_OK || - slot < 0 || slot >= CLUSTER_SLOTS) - { - addReplyError(c,"Invalid or out of range slot"); - return -1; - } - return (int) slot; -} - -ConnectionType *connTypeOfCluster(void) { - if (server.tls_cluster) { - return connectionTypeTls(); - } - - return connectionTypeTcp(); -} - -/* ----------------------------------------------------------------------------- - * DUMP, RESTORE and MIGRATE commands - * -------------------------------------------------------------------------- */ - -/* Generates a DUMP-format representation of the object 'o', adding it to the - * io stream pointed by 'rio'. This function can't fail. */ -void createDumpPayload(rio *payload, robj *o, robj *key, int dbid, int skip_checksum) { - unsigned char buf[2]; - uint64_t crc = 0; - - /* Serialize the object in an RDB-like format. It consist of an object type - * byte followed by the serialized object. This is understood by RESTORE. */ - rioInitWithBuffer(payload,sdsempty()); - - /* Save key metadata if present without (handles TTL separately via command args) */ - if (getModuleMetaBits(o->metabits)) - serverAssert(rdbSaveKeyMetadata(payload, key, o, dbid) != -1); - serverAssert(rdbSaveObjectType(payload,o)); - serverAssert(rdbSaveObject(payload,o,key,dbid)); - - /* Write the footer, this is how it looks like: - * ----------------+---------------------+---------------+ - * ... RDB payload | 2 bytes RDB version | 8 bytes CRC64 | - * ----------------+---------------------+---------------+ - * RDB version and CRC are both in little endian. - */ - - /* RDB version */ - buf[0] = RDB_VERSION & 0xff; - buf[1] = (RDB_VERSION >> 8) & 0xff; - payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,buf,2); - - /* If crc checksum is disabled, crc is set to 0 and no checksum validation - * will be performed on RESTORE. */ - if (!skip_checksum) { - /* CRC64 */ - crc = crc64(0,(unsigned char*)payload->io.buffer.ptr, - sdslen(payload->io.buffer.ptr)); - memrev64ifbe(&crc); - } - payload->io.buffer.ptr = sdscatlen(payload->io.buffer.ptr,&crc,8); -} - -/* Verify that the RDB version of the dump payload matches the one of this Redis - * instance and that the checksum is ok. - * If the DUMP payload looks valid C_OK is returned, otherwise C_ERR - * is returned. If rdbver_ptr is not NULL, its populated with the value read - * from the input buffer. */ -int verifyDumpPayload(unsigned char *p, size_t len, uint16_t *rdbver_ptr) { - unsigned char *footer; - uint16_t rdbver; - uint64_t crc; - - /* At least 2 bytes of RDB version and 8 of CRC64 should be present. */ - if (len < 10) return C_ERR; - footer = p+(len-10); - - /* Set and verify RDB version. */ - rdbver = (footer[1] << 8) | footer[0]; - if (rdbver_ptr) { - *rdbver_ptr = rdbver; - } - if (rdbver > RDB_VERSION) return C_ERR; - - if (server.skip_checksum_validation) - return C_OK; - - uint64_t crc_payload; - memcpy(&crc_payload, footer+2, 8); - if (crc_payload == 0) /* No checksum. */ - return C_OK; - - /* Verify CRC64 */ - crc = crc64(0,p,len-8); - memrev64ifbe(&crc); - return crc == crc_payload ? C_OK : C_ERR; -} - -/* DUMP keyname - * DUMP is actually not used by Redis Cluster but it is the obvious - * complement of RESTORE and can be useful for different applications. */ -void dumpCommand(client *c) { - kvobj *o; - rio payload; - - /* Check if the key is here. */ - if ((o = lookupKeyRead(c->db,c->argv[1])) == NULL) { - addReplyNull(c); - return; - } - - /* Create the DUMP encoded representation. */ - createDumpPayload(&payload,o,c->argv[1],c->db->id,0); - - /* Transfer to the client */ - addReplyBulkSds(c,payload.io.buffer.ptr); - return; -} - -/* RESTORE key ttl serialized-value [REPLACE] [ABSTTL] [IDLETIME seconds] [FREQ frequency] */ -void restoreCommand(client *c) { - long long ttl, lfu_freq = -1, lru_idle = -1, lru_clock = -1; - rio payload; - int j, type, replace = 0, absttl = 0; - robj *obj; - - /* Parse additional options */ - for (j = 4; j < c->argc; j++) { - int additional = c->argc-j-1; - if (!strcasecmp(c->argv[j]->ptr,"replace")) { - replace = 1; - } else if (!strcasecmp(c->argv[j]->ptr,"absttl")) { - absttl = 1; - } else if (!strcasecmp(c->argv[j]->ptr,"idletime") && additional >= 1 && - lfu_freq == -1) - { - if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lru_idle,NULL) - != C_OK) return; - if (lru_idle < 0) { - addReplyError(c,"Invalid IDLETIME value, must be >= 0"); - return; - } - lru_clock = LRU_CLOCK(); - j++; /* Consume additional arg. */ - } else if (!strcasecmp(c->argv[j]->ptr,"freq") && additional >= 1 && - lru_idle == -1) - { - if (getLongLongFromObjectOrReply(c,c->argv[j+1],&lfu_freq,NULL) - != C_OK) return; - if (lfu_freq < 0 || lfu_freq > 255) { - addReplyError(c,"Invalid FREQ value, must be >= 0 and <= 255"); - return; - } - j++; /* Consume additional arg. */ - } else { - addReplyErrorObject(c,shared.syntaxerr); - return; - } - } - - /* Make sure this key does not already exist here... */ - robj *key = c->argv[1]; - kvobj *oldval = lookupKeyWrite(c->db,key); - int oldtype = oldval ? oldval->type : -1; - if (!replace && oldval) { - addReplyErrorObject(c,shared.busykeyerr); - return; - } - - /* Check if the TTL value makes sense */ - if (getLongLongFromObjectOrReply(c,c->argv[2],&ttl,NULL) != C_OK) { - return; - } else if (ttl < 0) { - addReplyError(c,"Invalid TTL value, must be >= 0"); - return; - } - - /* Verify RDB version and data checksum. */ - if (verifyDumpPayload(c->argv[3]->ptr,sdslen(c->argv[3]->ptr),NULL) == C_ERR) - { - addReplyError(c,"DUMP payload version or checksum are wrong"); - return; - } - - rioInitWithBuffer(&payload,c->argv[3]->ptr); - - /* Initialize metadata spec to collect metadata+expiry from payload. */ - KeyMetaSpec keymeta; - keyMetaSpecInit(&keymeta); - - /* Compute TTL early so we can add it to metadata spec in correct order */ - if (ttl) { - if (!absttl) ttl+=commandTimeSnapshot(); - keyMetaSpecAdd(&keymeta, KEY_META_ID_EXPIRE, ttl); - } - - /* With metadata, type = RDB_OPCODE_KEY_META. Layout: [,],, */ - type = rdbLoadType(&payload); - if (rdbResolveKeyType(&payload, &type, c->db->id, &keymeta) == -1) { - addReplyError(c,"Bad data format"); - return; - } - - /* Load the object */ - if ((obj = rdbLoadObject(type,&payload,key->ptr,c->db->id,NULL)) == NULL) - { - keyMetaSpecCleanup(&keymeta); - addReplyError(c,"Bad data format"); - return; - } - - /* Remove the old key if needed. */ - int deleted = 0; - if (replace) - deleted = dbDelete(c->db,key); - - if (ttl && checkAlreadyExpired(ttl)) { - if (deleted) { - robj *aux = server.lazyfree_lazy_server_del ? shared.unlink : shared.del; - rewriteClientCommandVector(c, 2, aux, key); - keyModified(c,c->db,key,NULL,1); - notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); - server.dirty++; - } - keyMetaSpecCleanup(&keymeta); - decrRefCount(obj); - addReply(c, shared.ok); - return; - } - - /* Create the key and set the TTL if any */ - kvobj *kv = dbAddInternal(c->db, key, &obj, NULL, &keymeta); - - /* If minExpiredField was set, then the object is hash with expiration - * on fields and need to register it in global HFE DS */ - if (kv->type == OBJ_HASH) { - uint64_t minExpiredField = hashTypeGetMinExpire(kv, 1); - if (minExpiredField != EB_EXPIRE_TIME_INVALID) - estoreAdd(c->db->subexpires, getKeySlot(key->ptr), kv, minExpiredField); - } - - if (ttl) { - if (!absttl) { - /* Propagate TTL as absolute timestamp */ - robj *ttl_obj = createStringObjectFromLongLong(ttl); - rewriteClientCommandArgument(c,2,ttl_obj); - decrRefCount(ttl_obj); - rewriteClientCommandArgument(c,c->argc,shared.absttl); - } - } - objectSetLRUOrLFU(kv, lfu_freq, lru_idle, lru_clock, 1000); - keyModified(c,c->db,key,NULL,1); - notifyKeyspaceEvent(NOTIFY_GENERIC,"restore",key,c->db->id); - - /* If we deleted a key that means REPLACE parameter was passed and the - * destination key existed. */ - if (deleted) { - notifyKeyspaceEvent(NOTIFY_OVERWRITTEN, "overwritten", key, c->db->id); - if (oldtype != kv->type) { - notifyKeyspaceEvent(NOTIFY_TYPE_CHANGED, "type_changed", key, c->db->id); - } - } - addReply(c,shared.ok); - server.dirty++; -} -/* MIGRATE socket cache implementation. - * - * We take a map between host:ip and a TCP socket that we used to connect - * to this instance in recent time. - * This sockets are closed when the max number we cache is reached, and also - * in serverCron() when they are around for more than a few seconds. */ -#define MIGRATE_SOCKET_CACHE_ITEMS 64 /* max num of items in the cache. */ -#define MIGRATE_SOCKET_CACHE_TTL 10 /* close cached sockets after 10 sec. */ - -typedef struct migrateCachedSocket { - connection *conn; - long last_dbid; - time_t last_use_time; -} migrateCachedSocket; - -/* Return a migrateCachedSocket containing a TCP socket connected with the - * target instance, possibly returning a cached one. - * - * This function is responsible of sending errors to the client if a - * connection can't be established. In this case -1 is returned. - * Otherwise on success the socket is returned, and the caller should not - * attempt to free it after usage. - * - * If the caller detects an error while using the socket, migrateCloseSocket() - * should be called so that the connection will be created from scratch - * the next time. */ -migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long timeout) { - connection *conn; - sds name = sdsempty(); - migrateCachedSocket *cs; - - /* Check if we have an already cached socket for this ip:port pair. */ - name = sdscatlen(name,host->ptr,sdslen(host->ptr)); - name = sdscatlen(name,":",1); - name = sdscatlen(name,port->ptr,sdslen(port->ptr)); - cs = dictFetchValue(server.migrate_cached_sockets,name); - if (cs) { - sdsfree(name); - cs->last_use_time = server.unixtime; - return cs; - } - - /* No cached socket, create one. */ - if (dictSize(server.migrate_cached_sockets) == MIGRATE_SOCKET_CACHE_ITEMS) { - /* Too many items, drop one at random. */ - dictEntry *de = dictGetRandomKey(server.migrate_cached_sockets); - cs = dictGetVal(de); - connClose(cs->conn); - zfree(cs); - dictDelete(server.migrate_cached_sockets,dictGetKey(de)); - } - - /* Create the connection */ - conn = connCreate(server.el, connTypeOfCluster()); - if (connBlockingConnect(conn, host->ptr, atoi(port->ptr), timeout) - != C_OK) { - addReplyError(c,"-IOERR error or timeout connecting to the client"); - connClose(conn); - sdsfree(name); - return NULL; - } - connEnableTcpNoDelay(conn); - - /* Add to the cache and return it to the caller. */ - cs = zmalloc(sizeof(*cs)); - cs->conn = conn; - - cs->last_dbid = -1; - cs->last_use_time = server.unixtime; - dictAdd(server.migrate_cached_sockets,name,cs); - return cs; -} - -/* Free a migrate cached connection. */ -void migrateCloseSocket(robj *host, robj *port) { - sds name = sdsempty(); - migrateCachedSocket *cs; - - name = sdscatlen(name,host->ptr,sdslen(host->ptr)); - name = sdscatlen(name,":",1); - name = sdscatlen(name,port->ptr,sdslen(port->ptr)); - cs = dictFetchValue(server.migrate_cached_sockets,name); - if (!cs) { - sdsfree(name); - return; - } - - connClose(cs->conn); - zfree(cs); - dictDelete(server.migrate_cached_sockets,name); - sdsfree(name); -} - -void migrateCloseTimedoutSockets(void) { - dictIterator di; - dictEntry *de; - - dictInitSafeIterator(&di, server.migrate_cached_sockets); - while((de = dictNext(&di)) != NULL) { - migrateCachedSocket *cs = dictGetVal(de); - - if ((server.unixtime - cs->last_use_time) > MIGRATE_SOCKET_CACHE_TTL) { - connClose(cs->conn); - zfree(cs); - dictDelete(server.migrate_cached_sockets,dictGetKey(de)); - } - } - dictResetIterator(&di); -} - -/* MIGRATE host port key dbid timeout [COPY | REPLACE | AUTH password | - * AUTH2 username password] - * - * On in the multiple keys form: - * - * MIGRATE host port "" dbid timeout [COPY | REPLACE | AUTH password | - * AUTH2 username password] KEYS key1 key2 ... keyN */ -void migrateCommand(client *c) { - migrateCachedSocket *cs; - int copy = 0, replace = 0, j; - char *username = NULL; - char *password = NULL; - long timeout; - long dbid; - robj **kvArray = NULL; /* Objects to migrate. */ - robj **keyArray = NULL; /* Key names. */ - robj **newargv = NULL; /* Used to rewrite the command as DEL ... keys ... */ - rio cmd, payload; - int may_retry = 1; - int write_error = 0; - int argv_rewritten = 0; - - /* To support the KEYS option we need the following additional state. */ - int first_key = 3; /* Argument index of the first key. */ - int num_keys = 1; /* By default only migrate the 'key' argument. */ - - /* Parse additional options */ - for (j = 6; j < c->argc; j++) { - int moreargs = (c->argc-1) - j; - if (!strcasecmp(c->argv[j]->ptr,"copy")) { - copy = 1; - } else if (!strcasecmp(c->argv[j]->ptr,"replace")) { - replace = 1; - } else if (!strcasecmp(c->argv[j]->ptr,"auth")) { - if (!moreargs) { - addReplyErrorObject(c,shared.syntaxerr); - return; - } - j++; - password = c->argv[j]->ptr; - redactClientCommandArgument(c,j); - } else if (!strcasecmp(c->argv[j]->ptr,"auth2")) { - if (moreargs < 2) { - addReplyErrorObject(c,shared.syntaxerr); - return; - } - username = c->argv[++j]->ptr; - redactClientCommandArgument(c,j); - password = c->argv[++j]->ptr; - redactClientCommandArgument(c,j); - } else if (!strcasecmp(c->argv[j]->ptr,"keys")) { - if (sdslen(c->argv[3]->ptr) != 0) { - addReplyError(c, - "When using MIGRATE KEYS option, the key argument" - " must be set to the empty string"); - return; - } - first_key = j+1; - num_keys = c->argc - j - 1; - break; /* All the remaining args are keys. */ - } else { - addReplyErrorObject(c,shared.syntaxerr); - return; - } - } - - /* Sanity check */ - if (getLongFromObjectOrReply(c,c->argv[5],&timeout,NULL) != C_OK || - getLongFromObjectOrReply(c,c->argv[4],&dbid,NULL) != C_OK) - { - return; - } - if (timeout <= 0) timeout = 1000; - - /* Check if the keys are here. If at least one key is to migrate, do it - * otherwise if all the keys are missing reply with "NOKEY" to signal - * the caller there was nothing to migrate. We don't return an error in - * this case, since often this is due to a normal condition like the key - * expiring in the meantime. */ - kvArray = zrealloc(kvArray,sizeof(kvobj*)*num_keys); - keyArray = zrealloc(keyArray,sizeof(robj*)*num_keys); - int num_exists = 0; - - for (j = 0; j < num_keys; j++) { - if ((kvArray[num_exists] = lookupKeyRead(c->db,c->argv[first_key+j])) != NULL) { - keyArray[num_exists] = c->argv[first_key+j]; - num_exists++; - } - } - num_keys = num_exists; - if (num_keys == 0) { - zfree(kvArray); zfree(keyArray); - addReplySds(c,sdsnew("+NOKEY\r\n")); - return; - } - - try_again: - write_error = 0; - - /* Connect */ - cs = migrateGetSocket(c,c->argv[1],c->argv[2],timeout); - if (cs == NULL) { - zfree(kvArray); zfree(keyArray); - return; /* error sent to the client by migrateGetSocket() */ - } - - rioInitWithBuffer(&cmd,sdsempty()); - - /* Authentication */ - if (password) { - int arity = username ? 3 : 2; - serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',arity)); - serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"AUTH",4)); - if (username) { - serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,username, - sdslen(username))); - } - serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,password, - sdslen(password))); - } - - /* Send the SELECT command if the current DB is not already selected. */ - int select = cs->last_dbid != dbid; /* Should we emit SELECT? */ - if (select) { - serverAssertWithInfo(c,NULL,rioWriteBulkCount(&cmd,'*',2)); - serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"SELECT",6)); - serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,dbid)); - } - - int non_expired = 0; /* Number of keys that we'll find non expired. - Note that serializing large keys may take some time - so certain keys that were found non expired by the - lookupKey() function, may be expired later. */ - - /* Create RESTORE payload and generate the protocol to call the command. */ - for (j = 0; j < num_keys; j++) { - long long ttl = 0; - long long expireat = kvobjGetExpire(kvArray[j]); - - if (expireat != -1) { - ttl = expireat-commandTimeSnapshot(); - if (ttl < 0) { - continue; - } - if (ttl < 1) ttl = 1; - } - - /* Relocate valid (non expired) keys and values into the array in successive - * positions to remove holes created by the keys that were present - * in the first lookup but are now expired after the second lookup. */ - kvArray[non_expired] = kvArray[j]; - keyArray[non_expired++] = keyArray[j]; - - serverAssertWithInfo(c,NULL, - rioWriteBulkCount(&cmd,'*',replace ? 5 : 4)); - - if (server.cluster_enabled) - serverAssertWithInfo(c,NULL, - rioWriteBulkString(&cmd,"RESTORE-ASKING",14)); - else - serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7)); - serverAssertWithInfo(c,NULL,sdsEncodedObject(keyArray[j])); - serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,keyArray[j]->ptr, - sdslen(keyArray[j]->ptr))); - serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl)); - - /* Emit the payload argument, that is the serialized object using - * the DUMP format. */ - createDumpPayload(&payload,kvArray[j],keyArray[j],dbid,0); - serverAssertWithInfo(c,NULL, - rioWriteBulkString(&cmd,payload.io.buffer.ptr, - sdslen(payload.io.buffer.ptr))); - sdsfree(payload.io.buffer.ptr); - - /* Add the REPLACE option to the RESTORE command if it was specified - * as a MIGRATE option. */ - if (replace) - serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"REPLACE",7)); - } - - /* Fix the actual number of keys we are migrating. */ - num_keys = non_expired; - - /* Transfer the query to the other node in 64K chunks. */ - errno = 0; - { - sds buf = cmd.io.buffer.ptr; - size_t pos = 0, towrite; - int nwritten = 0; - - while ((towrite = sdslen(buf)-pos) > 0) { - towrite = (towrite > (64*1024) ? (64*1024) : towrite); - nwritten = connSyncWrite(cs->conn,buf+pos,towrite,timeout); - if (nwritten != (signed)towrite) { - write_error = 1; - goto socket_err; - } - pos += nwritten; - } - } - - char buf0[1024]; /* Auth reply. */ - char buf1[1024]; /* Select reply. */ - char buf2[1024]; /* Restore reply. */ - - /* Read the AUTH reply if needed. */ - if (password && connSyncReadLine(cs->conn, buf0, sizeof(buf0), timeout) <= 0) - goto socket_err; - - /* Read the SELECT reply if needed. */ - if (select && connSyncReadLine(cs->conn, buf1, sizeof(buf1), timeout) <= 0) - goto socket_err; - - /* Read the RESTORE replies. */ - int error_from_target = 0; - int socket_error = 0; - int del_idx = 1; /* Index of the key argument for the replicated DEL op. */ - - /* Allocate the new argument vector that will replace the current command, - * to propagate the MIGRATE as a DEL command (if no COPY option was given). - * We allocate num_keys+1 because the additional argument is for "DEL" - * command name itself. */ - if (!copy) newargv = zmalloc(sizeof(robj*)*(num_keys+1)); - - for (j = 0; j < num_keys; j++) { - if (connSyncReadLine(cs->conn, buf2, sizeof(buf2), timeout) <= 0) { - socket_error = 1; - break; - } - if ((password && buf0[0] == '-') || - (select && buf1[0] == '-') || - buf2[0] == '-') - { - /* On error assume that last_dbid is no longer valid. */ - if (!error_from_target) { - cs->last_dbid = -1; - char *errbuf; - if (password && buf0[0] == '-') errbuf = buf0; - else if (select && buf1[0] == '-') errbuf = buf1; - else errbuf = buf2; - - error_from_target = 1; - addReplyErrorFormat(c,"Target instance replied with error: %s", - errbuf+1); - } - } else { - if (!copy) { - /* No COPY option: remove the local key, signal the change. */ - dbDelete(c->db,keyArray[j]); - keyModified(c,c->db,keyArray[j],NULL,1); - notifyKeyspaceEvent(NOTIFY_GENERIC,"del",keyArray[j],c->db->id); - server.dirty++; - - /* Populate the argument vector to replace the old one. */ - newargv[del_idx++] = keyArray[j]; - incrRefCount(keyArray[j]); - } - } - } - - /* On socket error, if we want to retry, do it now before rewriting the - * command vector. We only retry if we are sure nothing was processed - * and we failed to read the first reply (j == 0 test). */ - if (!error_from_target && socket_error && j == 0 && may_retry && - errno != ETIMEDOUT) - { - goto socket_err; /* A retry is guaranteed because of tested conditions.*/ - } - - /* On socket errors, close the migration socket now that we still have - * the original host/port in the ARGV. Later the original command may be - * rewritten to DEL and will be too later. */ - if (socket_error) migrateCloseSocket(c->argv[1],c->argv[2]); - - if (!copy) { - /* Translate MIGRATE as DEL for replication/AOF. Note that we do - * this only for the keys for which we received an acknowledgement - * from the receiving Redis server, by using the del_idx index. */ - if (del_idx > 1) { - newargv[0] = createStringObject("DEL",3); - /* Note that the following call takes ownership of newargv. */ - replaceClientCommandVector(c,del_idx,newargv); - argv_rewritten = 1; - } else { - /* No key transfer acknowledged, no need to rewrite as DEL. */ - zfree(newargv); - } - newargv = NULL; /* Make it safe to call zfree() on it in the future. */ - } - - /* If we are here and a socket error happened, we don't want to retry. - * Just signal the problem to the client, but only do it if we did not - * already queue a different error reported by the destination server. */ - if (!error_from_target && socket_error) { - may_retry = 0; - goto socket_err; - } - - if (!error_from_target) { - /* Success! Update the last_dbid in migrateCachedSocket, so that we can - * avoid SELECT the next time if the target DB is the same. Reply +OK. - * - * Note: If we reached this point, even if socket_error is true - * still the SELECT command succeeded (otherwise the code jumps to - * socket_err label. */ - cs->last_dbid = dbid; - addReply(c,shared.ok); - } else { - /* On error we already sent it in the for loop above, and set - * the currently selected socket to -1 to force SELECT the next time. */ - } - - sdsfree(cmd.io.buffer.ptr); - zfree(kvArray); zfree(keyArray); zfree(newargv); - return; - -/* On socket errors we try to close the cached socket and try again. - * It is very common for the cached socket to get closed, if just reopening - * it works it's a shame to notify the error to the caller. */ - socket_err: - /* Cleanup we want to perform in both the retry and no retry case. - * Note: Closing the migrate socket will also force SELECT next time. */ - sdsfree(cmd.io.buffer.ptr); - - /* If the command was rewritten as DEL and there was a socket error, - * we already closed the socket earlier. While migrateCloseSocket() - * is idempotent, the host/port arguments are now gone, so don't do it - * again. */ - if (!argv_rewritten) migrateCloseSocket(c->argv[1],c->argv[2]); - zfree(newargv); - newargv = NULL; /* This will get reallocated on retry. */ - - /* Retry only if it's not a timeout and we never attempted a retry - * (or the code jumping here did not set may_retry to zero). */ - if (errno != ETIMEDOUT && may_retry) { - may_retry = 0; - goto try_again; - } - - /* Cleanup we want to do if no retry is attempted. */ - zfree(kvArray); zfree(keyArray); - addReplyErrorSds(c, sdscatprintf(sdsempty(), - "-IOERR error or timeout %s to target instance", - write_error ? "writing" : "reading")); - return; -} - -/* Cluster node sanity check. Returns C_OK if the node id - * is valid an C_ERR otherwise. */ -int verifyClusterNodeId(const char *name, int length) { - if (length != CLUSTER_NAMELEN) return C_ERR; - for (int i = 0; i < length; i++) { - if (name[i] >= 'a' && name[i] <= 'z') continue; - if (name[i] >= '0' && name[i] <= '9') continue; - return C_ERR; - } - return C_OK; -} - -int isValidAuxChar(int c) { - return isalnum(c) || (strchr("!#$%&()*+:;<>?@[]^{|}~", c) == NULL); -} - -int isValidAuxString(char *s, unsigned int length) { - for (unsigned i = 0; i < length; i++) { - if (!isValidAuxChar(s[i])) return 0; - } - return 1; -} - -void clusterCommandMyId(client *c) { - char *name = clusterNodeGetName(getMyClusterNode()); - if (name) { - addReplyBulkCBuffer(c,name, CLUSTER_NAMELEN); - } else { - addReplyError(c, "No ID yet"); - } -} - -char* getMyClusterId(void) { - return clusterNodeGetName(getMyClusterNode()); -} - -void clusterCommandMyShardId(client *c) { - char *sid = clusterNodeGetShardId(getMyClusterNode()); - if (sid) { - addReplyBulkCBuffer(c,sid, CLUSTER_NAMELEN); - } else { - addReplyError(c, "No shard ID yet"); - } -} - -/* When a cluster command is called, we need to decide whether to return TLS info or - * non-TLS info by the client's connection type. However if the command is called by - * a Lua script or RM_call, there is no connection in the fake client, so we use - * server.current_client here to get the real client if available. And if it is not - * available (modules may call commands without a real client), we return the default - * info, which is determined by server.tls_cluster. */ -static int shouldReturnTlsInfo(void) { - if (server.current_client && server.current_client->conn) { - return connIsTLS(server.current_client->conn); - } else { - return server.tls_cluster; - } -} - -unsigned int countKeysInSlot(unsigned int slot) { - return kvstoreDictSize(server.db->keys, slot); -} - -/* Add detailed information of a node to the output buffer of the given client. */ -void addNodeDetailsToShardReply(client *c, clusterNode *node) { - - int reply_count = 0; - char *hostname; - void *node_replylen = addReplyDeferredLen(c); - - addReplyBulkCString(c, "id"); - addReplyBulkCBuffer(c, clusterNodeGetName(node), CLUSTER_NAMELEN); - reply_count++; - - if (clusterNodeTcpPort(node)) { - addReplyBulkCString(c, "port"); - addReplyLongLong(c, clusterNodeTcpPort(node)); - reply_count++; - } - - if (clusterNodeTlsPort(node)) { - addReplyBulkCString(c, "tls-port"); - addReplyLongLong(c, clusterNodeTlsPort(node)); - reply_count++; - } - - addReplyBulkCString(c, "ip"); - addReplyBulkCString(c, clusterNodeIp(node)); - reply_count++; - - addReplyBulkCString(c, "endpoint"); - addReplyBulkCString(c, clusterNodePreferredEndpoint(node)); - reply_count++; - - hostname = clusterNodeHostname(node); - if (hostname != NULL && *hostname != '\0') { - addReplyBulkCString(c, "hostname"); - addReplyBulkCString(c, hostname); - reply_count++; - } - - long long node_offset; - if (clusterNodeIsMyself(node)) { - node_offset = clusterNodeIsSlave(node) ? replicationGetSlaveOffset() : server.master_repl_offset; - } else { - node_offset = clusterNodeReplOffset(node); - } - - addReplyBulkCString(c, "role"); - addReplyBulkCString(c, clusterNodeIsSlave(node) ? "replica" : "master"); - reply_count++; - - addReplyBulkCString(c, "replication-offset"); - addReplyLongLong(c, node_offset); - reply_count++; - - addReplyBulkCString(c, "health"); - const char *health_msg = NULL; - if (clusterNodeIsFailing(node)) { - health_msg = "fail"; - } else if (clusterNodeIsSlave(node) && node_offset == 0) { - health_msg = "loading"; - } else { - health_msg = "online"; - } - addReplyBulkCString(c, health_msg); - reply_count++; - - setDeferredMapLen(c, node_replylen, reply_count); -} - -static clusterNode *clusterGetMasterFromShard(void *shard_handle) { - clusterNode *n = NULL; - void *node_it = clusterShardHandleGetNodeIterator(shard_handle); - while((n = clusterShardNodeIteratorNext(node_it)) != NULL) { - if (!clusterNodeIsFailing(n)) { - break; - } - } - clusterShardNodeIteratorFree(node_it); - if (!n) return NULL; - return clusterNodeGetMaster(n); -} - -/* Add the shard reply of a single shard based off the given primary node. */ -void addShardReplyForClusterShards(client *c, void *shard_handle) { - serverAssert(clusterGetShardNodeCount(shard_handle) > 0); - addReplyMapLen(c, 2); - addReplyBulkCString(c, "slots"); - - /* Use slot_info_pairs from the primary only */ - clusterNode *master_node = clusterGetMasterFromShard(shard_handle); - - if (master_node && clusterNodeHasSlotInfo(master_node)) { - serverAssert((clusterNodeSlotInfoCount(master_node) % 2) == 0); - addReplyArrayLen(c, clusterNodeSlotInfoCount(master_node)); - for (int i = 0; i < clusterNodeSlotInfoCount(master_node); i++) - addReplyLongLong(c, (unsigned long)clusterNodeSlotInfoEntry(master_node, i)); - } else { - /* If no slot info pair is provided, the node owns no slots */ - addReplyArrayLen(c, 0); - } - - addReplyBulkCString(c, "nodes"); - addReplyArrayLen(c, clusterGetShardNodeCount(shard_handle)); - void *node_it = clusterShardHandleGetNodeIterator(shard_handle); - for (clusterNode *n = clusterShardNodeIteratorNext(node_it); n != NULL; n = clusterShardNodeIteratorNext(node_it)) { - addNodeDetailsToShardReply(c, n); - clusterFreeNodesSlotsInfo(n); - } - clusterShardNodeIteratorFree(node_it); -} - -/* Add to the output buffer of the given client, an array of slot (start, end) - * pair owned by the shard, also the primary and set of replica(s) along with - * information about each node. */ -void clusterCommandShards(client *c) { - addReplyArrayLen(c, clusterGetShardCount()); - /* This call will add slot_info_pairs to all nodes */ - clusterGenNodesSlotsInfo(0); - dictIterator *shard_it = clusterGetShardIterator(); - for(void *shard_handle = clusterNextShardHandle(shard_it); shard_handle != NULL; shard_handle = clusterNextShardHandle(shard_it)) { - addShardReplyForClusterShards(c, shard_handle); - } - clusterFreeShardIterator(shard_it); -} - -void clusterCommandHelp(client *c) { - const char *help[] = { - "COUNTKEYSINSLOT ", - " Return the number of keys in .", - "GETKEYSINSLOT ", - " Return key names stored by current node in a slot.", - "INFO", - " Return information about the cluster.", - "KEYSLOT ", - " Return the hash slot for .", - "MYID", - " Return the node id.", - "MYSHARDID", - " Return the node's shard id.", - "NODES", - " Return cluster configuration seen by node. Output format:", - " ...", - "REPLICAS ", - " Return replicas.", - "SLOTS", - " Return information about slots range mappings. Each range is made of:", - " start, end, master and replicas IP addresses, ports and ids", - "SLOT-STATS", - " Return an array of slot usage statistics for slots assigned to the current node.", - "SHARDS", - " Return information about slot range mappings and the nodes associated with them.", - NULL - }; - - addExtendedReplyHelp(c, help, clusterCommandExtendedHelp()); -} - -void clusterCommand(client *c) { - if (server.cluster_enabled == 0) { - addReplyError(c,"This instance has cluster support disabled"); - return; - } - - if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { - clusterCommandHelp(c); - } else if (!strcasecmp(c->argv[1]->ptr,"nodes") && c->argc == 2) { - /* CLUSTER NODES */ - /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */ - sds nodes = clusterGenNodesDescription(c, 0, shouldReturnTlsInfo()); - addReplyVerbatim(c,nodes,sdslen(nodes),"txt"); - sdsfree(nodes); - } else if (!strcasecmp(c->argv[1]->ptr,"myid") && c->argc == 2) { - /* CLUSTER MYID */ - clusterCommandMyId(c); - } else if (!strcasecmp(c->argv[1]->ptr,"myshardid") && c->argc == 2) { - /* CLUSTER MYSHARDID */ - clusterCommandMyShardId(c); - } else if (!strcasecmp(c->argv[1]->ptr,"slots") && c->argc == 2) { - /* CLUSTER SLOTS */ - clusterCommandSlots(c); - } else if (!strcasecmp(c->argv[1]->ptr,"shards") && c->argc == 2) { - /* CLUSTER SHARDS */ - clusterCommandShards(c); - } else if (!strcasecmp(c->argv[1]->ptr,"info") && c->argc == 2) { - /* CLUSTER INFO */ - - sds info = genClusterInfoString(); - - /* Produce the reply protocol. */ - addReplyVerbatim(c,info,sdslen(info),"txt"); - sdsfree(info); - } else if (!strcasecmp(c->argv[1]->ptr,"keyslot") && c->argc == 3) { - /* CLUSTER KEYSLOT */ - sds key = c->argv[2]->ptr; - - addReplyLongLong(c,keyHashSlot(key,sdslen(key))); - } else if (!strcasecmp(c->argv[1]->ptr,"countkeysinslot") && c->argc == 3) { - /* CLUSTER COUNTKEYSINSLOT */ - long long slot; - - if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK) - return; - if (slot < 0 || slot >= CLUSTER_SLOTS) { - addReplyError(c,"Invalid slot"); - return; - } - - if (!clusterCanAccessKeysInSlot(slot)) { - addReplyLongLong(c, 0); - return; - } - addReplyLongLong(c,countKeysInSlot(slot)); - } else if (!strcasecmp(c->argv[1]->ptr,"getkeysinslot") && c->argc == 4) { - /* CLUSTER GETKEYSINSLOT */ - long long maxkeys, slot; - - if (getLongLongFromObjectOrReply(c,c->argv[2],&slot,NULL) != C_OK) - return; - if (getLongLongFromObjectOrReply(c,c->argv[3],&maxkeys,NULL) - != C_OK) - return; - if (slot < 0 || slot >= CLUSTER_SLOTS || maxkeys < 0) { - addReplyError(c,"Invalid slot or number of keys"); - return; - } - - if (!clusterCanAccessKeysInSlot(slot)) { - addReplyArrayLen(c, 0); - return; - } - - unsigned int keys_in_slot = countKeysInSlot(slot); - unsigned int numkeys = maxkeys > keys_in_slot ? keys_in_slot : maxkeys; - addReplyArrayLen(c,numkeys); - kvstoreDictIterator kvs_di; - dictEntry *de = NULL; - kvstoreInitDictIterator(&kvs_di, server.db->keys, slot); - for (unsigned int i = 0; i < numkeys; i++) { - de = kvstoreDictIteratorNext(&kvs_di); - serverAssert(de != NULL); - sds sdskey = kvobjGetKey(dictGetKV(de)); - addReplyBulkCBuffer(c, sdskey, sdslen(sdskey)); - } - kvstoreResetDictIterator(&kvs_di); - } else if ((!strcasecmp(c->argv[1]->ptr,"slaves") || - !strcasecmp(c->argv[1]->ptr,"replicas")) && c->argc == 3) { - /* CLUSTER SLAVES */ - /* CLUSTER REPLICAS */ - clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr)); - int j; - - /* Lookup the specified node in our table. */ - if (!n) { - addReplyErrorFormat(c,"Unknown node %s", (char*)c->argv[2]->ptr); - return; - } - - if (clusterNodeIsSlave(n)) { - addReplyError(c,"The specified node is not a master"); - return; - } - - /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */ - addReplyArrayLen(c, clusterNodeNumSlaves(n)); - for (j = 0; j < clusterNodeNumSlaves(n); j++) { - sds ni = clusterGenNodeDescription(c, clusterNodeGetSlave(n, j), shouldReturnTlsInfo()); - addReplyBulkCString(c,ni); - sdsfree(ni); - } - } else if (!strcasecmp(c->argv[1]->ptr, "migration")) { - clusterMigrationCommand(c); - } else if (!strcasecmp(c->argv[1]->ptr,"syncslots") && c->argc >= 3) { - clusterSyncSlotsCommand(c); - } else if(!clusterCommandSpecial(c)) { - addReplySubcommandSyntaxError(c); - return; - } -} - -/* Extract slot number from keys in a keys_result structure and return to caller. - * Returns: - * - The slot number if all keys belong to the same slot - * - INVALID_CLUSTER_SLOT if there are no keys or cluster is disabled - * - CLUSTER_CROSSSLOT if keys belong to different slots (cross-slot error) */ -int extractSlotFromKeysResult(robj **argv, getKeysResult *keys_result) { - if (keys_result->numkeys == 0 || !server.cluster_enabled) - return INVALID_CLUSTER_SLOT; - - int first_slot = INVALID_CLUSTER_SLOT; - for (int j = 0; j < keys_result->numkeys; j++) { - robj *this_key = argv[keys_result->keys[j].pos]; - int this_slot = (int)keyHashSlot((char*)this_key->ptr, sdslen(this_key->ptr)); - - if (first_slot == INVALID_CLUSTER_SLOT) - first_slot = this_slot; - else if (first_slot != this_slot) { - return CLUSTER_CROSSSLOT; - } - } - return first_slot; -} - -/* Return the pointer to the cluster node that is able to serve the command. - * For the function to succeed the command should only target either: - * - * 1) A single key (even multiple times like RPOPLPUSH mylist mylist). - * 2) Multiple keys in the same hash slot, while the slot is stable (no - * resharding in progress). - * - * On success the function returns the node that is able to serve the request. - * If the node is not 'myself' a redirection must be performed. The kind of - * redirection is specified setting the integer passed by reference - * 'error_code', which will be set to CLUSTER_REDIR_ASK or - * CLUSTER_REDIR_MOVED. - * - * When the node is 'myself' 'error_code' is set to CLUSTER_REDIR_NONE. - * - * If the command fails NULL is returned, and the reason of the failure is - * provided via 'error_code', which will be set to: - * - * CLUSTER_REDIR_CROSS_SLOT if the request contains multiple keys that - * don't belong to the same hash slot. - * - * CLUSTER_REDIR_UNSTABLE if the request contains multiple keys - * belonging to the same slot, but the slot is not stable (in migration or - * importing state, likely because a resharding is in progress). - * - * CLUSTER_REDIR_DOWN_UNBOUND if the request addresses a slot which is - * not bound to any node. In this case the cluster global state should be - * already "down" but it is fragile to rely on the update of the global state, - * so we also handle it here. - * - * CLUSTER_REDIR_DOWN_STATE and CLUSTER_REDIR_DOWN_RO_STATE if the cluster is - * down but the user attempts to execute a command that addresses one or more keys. */ -clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, - getKeysResult *keys_result, uint8_t read_error, uint64_t cmd_flags, int *error_code) -{ - clusterNode *myself = getMyClusterNode(); - clusterNode *n = NULL; - robj *firstkey = NULL; - int multiple_keys = 0; - multiState *ms, _ms; - pendingCommand mc; - pendingCommand *mcp = &mc; - int i, slot = 0, migrating_slot = 0, importing_slot = 0, missing_keys = 0, - existing_keys = 0; - int pubsubshard_included = 0; /* Flag to indicate if a pubsub shard cmd is included. */ - - /* Allow any key to be set if a module disabled cluster redirections. */ - if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION) - return myself; - - /* Set error code optimistically for the base case. */ - if (error_code) *error_code = CLUSTER_REDIR_NONE; - - /* Modules can turn off Redis Cluster redirection: this is useful - * when writing a module that implements a completely different - * distributed system. */ - - /* We handle all the cases as if they were EXEC commands, so we have - * a common code path for everything */ - if (cmd->proc == execCommand) { - /* If CLIENT_MULTI flag is not set EXEC is just going to return an - * error. */ - if (!(c->flags & CLIENT_MULTI)) return myself; - ms = &c->mstate; - } else { - /* In order to have a single codepath create a fake Multi State - * structure if the client is not in MULTI/EXEC state, this way - * we have a single codepath below. */ - ms = &_ms; - _ms.commands = &mcp; - _ms.count = 1; - - /* Properly initialize the fake pendingCommand */ - initPendingCommand(&mc); - mc.argv = argv; - mc.argc = argc; - mc.cmd = cmd; - mc.slot = hashslot ? *hashslot : INVALID_CLUSTER_SLOT; - mc.read_error = read_error; - if (keys_result) { - mc.keys_result = *keys_result; - mc.flags |= PENDING_CMD_KEYS_RESULT_VALID; - } - } - - /* Check that all the keys are in the same hash slot, and obtain this - * slot and the node associated. */ - for (i = 0; i < ms->count; i++) { - struct redisCommand *mcmd; - robj **margv; - int margc, j; - keyReference *keyindex; - - pendingCommand *pcmd = ms->commands[i]; - - mcmd = pcmd->cmd; - margc = pcmd->argc; - margv = pcmd->argv; - - /* Only valid for sharded pubsub as regular pubsub can operate on any node and bypasses this layer. */ - if (!pubsubshard_included && - doesCommandHaveChannelsWithFlags(mcmd, CMD_CHANNEL_PUBLISH | CMD_CHANNEL_SUBSCRIBE)) - { - pubsubshard_included = 1; - } - - /* If we have a cached keys result from preprocessCommand(), use it. - * Otherwise, extract keys result. */ - int use_cache_keys_result = pcmd->flags & PENDING_CMD_KEYS_RESULT_VALID; - getKeysResult result = GETKEYS_RESULT_INIT; - if (use_cache_keys_result) - result = pcmd->keys_result; - else - getKeysFromCommand(mcmd,margv,margc,&result); - keyindex = result.keys; - - for (j = 0; j < result.numkeys; j++) { - /* The command has keys and was checked for cross-slot between its keys in preprocessCommand() */ - if (pcmd->read_error == CLIENT_READ_CROSS_SLOT) { - /* Error: multiple keys from different slots. */ - if (error_code) - *error_code = CLUSTER_REDIR_CROSS_SLOT; - return NULL; - } - - robj *thiskey = margv[keyindex[j].pos]; - int thisslot = pcmd->slot; - if (thisslot == INVALID_CLUSTER_SLOT) - thisslot = keyHashSlot((char*)thiskey->ptr, sdslen(thiskey->ptr)); - - if (firstkey == NULL) { - /* This is the first key we see. Check what is the slot - * and node. */ - firstkey = thiskey; - slot = thisslot; - n = getNodeBySlot(slot); - - /* Error: If a slot is not served, we are in "cluster down" - * state. However the state is yet to be updated, so this was - * not trapped earlier in processCommand(). Report the same - * error to the client. */ - if (n == NULL) { - if (!use_cache_keys_result) getKeysFreeResult(&result); - if (error_code) - *error_code = CLUSTER_REDIR_DOWN_UNBOUND; - return NULL; - } - - /* If we are migrating or importing this slot, we need to check - * if we have all the keys in the request (the only way we - * can safely serve the request, otherwise we return a TRYAGAIN - * error). To do so we set the importing/migrating state and - * increment a counter for every missing key. */ - if (n == myself && - getMigratingSlotDest(slot) != NULL) - { - migrating_slot = 1; - } else if (getImportingSlotSource(slot) != NULL) { - importing_slot = 1; - } - } else { - /* If it is not the first key/channel, make sure it is exactly - * the same key/channel as the first we saw. */ - if (slot != thisslot) { - /* Error: multiple keys from different slots. */ - if (!use_cache_keys_result) getKeysFreeResult(&result); - if (error_code) - *error_code = CLUSTER_REDIR_CROSS_SLOT; - return NULL; - } - if (importing_slot && !multiple_keys && !equalStringObjects(firstkey,thiskey)) { - /* Flag this request as one with multiple different - * keys/channels when the slot is in importing state. */ - multiple_keys = 1; - } - } - - /* Migrating / Importing slot? Count keys we don't have. - * If it is pubsubshard command, it isn't required to check - * the channel being present or not in the node during the - * slot migration, the channel will be served from the source - * node until the migration completes with CLUSTER SETSLOT - * NODE . */ - int flags = LOOKUP_NOTOUCH | LOOKUP_NOSTATS | LOOKUP_NONOTIFY | LOOKUP_NOEXPIRE; - if ((migrating_slot || importing_slot) && !pubsubshard_included) - { - if (lookupKeyReadWithFlags(&server.db[0], thiskey, flags) == NULL) missing_keys++; - else existing_keys++; - } - } - if (!use_cache_keys_result) getKeysFreeResult(&result); - } - - /* No key at all in command? then we can serve the request - * without redirections or errors in all the cases. */ - if (n == NULL) return myself; - - /* Cluster is globally down but we got keys? We only serve the request - * if it is a read command and when allow_reads_when_down is enabled. */ - if (!isClusterHealthy()) { - if (pubsubshard_included) { - if (!server.cluster_allow_pubsubshard_when_down) { - if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE; - return NULL; - } - } else if (!server.cluster_allow_reads_when_down) { - /* The cluster is configured to block commands when the - * cluster is down. */ - if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE; - return NULL; - } else if (cmd_flags & CMD_WRITE) { - /* The cluster is configured to allow read only commands */ - if (error_code) *error_code = CLUSTER_REDIR_DOWN_RO_STATE; - return NULL; - } else { - /* Fall through and allow the command to be executed: - * this happens when server.cluster_allow_reads_when_down is - * true and the command is not a write command */ - } - } - - /* Return the hashslot by reference. */ - if (hashslot) *hashslot = slot; - - /* MIGRATE always works in the context of the local node if the slot - * is open (migrating or importing state). We need to be able to freely - * move keys among instances in this case. */ - if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand) - return myself; - - /* If we don't have all the keys and we are migrating the slot, send - * an ASK redirection or TRYAGAIN. */ - if (migrating_slot && missing_keys) { - /* If we have keys but we don't have all keys, we return TRYAGAIN */ - if (existing_keys) { - if (error_code) *error_code = CLUSTER_REDIR_UNSTABLE; - return NULL; - } else { - if (error_code) *error_code = CLUSTER_REDIR_ASK; - return getMigratingSlotDest(slot); - } - } - - /* If we are receiving the slot, and the client correctly flagged the - * request as "ASKING", we can serve the request. However if the request - * involves multiple keys and we don't have them all, the only option is - * to send a TRYAGAIN error. */ - if (importing_slot && - (c->flags & CLIENT_ASKING || cmd_flags & CMD_ASKING)) - { - if (multiple_keys && missing_keys) { - if (error_code) *error_code = CLUSTER_REDIR_UNSTABLE; - return NULL; - } else { - return myself; - } - } - - /* Handle the read-only client case reading from a slave: if this - * node is a slave and the request is about a hash slot our master - * is serving, we can reply without redirection. */ - int is_write_command = (cmd_flags & CMD_WRITE) || - (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE)); - if (((c->flags & CLIENT_READONLY) || pubsubshard_included) && - !is_write_command && - clusterNodeIsSlave(myself) && - clusterNodeGetSlaveof(myself) == n) - { - return myself; - } - - /* Base case: just return the right node. However, if this node is not - * myself, set error_code to MOVED since we need to issue a redirection. */ - if (n != myself && error_code) *error_code = CLUSTER_REDIR_MOVED; - return n; -} - -/* Send the client the right redirection code, according to error_code - * that should be set to one of CLUSTER_REDIR_* macros. - * - * If CLUSTER_REDIR_ASK or CLUSTER_REDIR_MOVED error codes - * are used, then the node 'n' should not be NULL, but should be the - * node we want to mention in the redirection. Moreover hashslot should - * be set to the hash slot that caused the redirection. */ -void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code) { - if (error_code == CLUSTER_REDIR_CROSS_SLOT) { - addReplyError(c,"-CROSSSLOT Keys in request don't hash to the same slot"); - } else if (error_code == CLUSTER_REDIR_UNSTABLE) { - /* The request spawns multiple keys in the same slot, - * but the slot is not "stable" currently as there is - * a migration or import in progress. */ - addReplyError(c,"-TRYAGAIN Multiple keys request during rehashing of slot"); - } else if (error_code == CLUSTER_REDIR_DOWN_STATE) { - addReplyError(c,"-CLUSTERDOWN The cluster is down"); - } else if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) { - addReplyError(c,"-CLUSTERDOWN The cluster is down and only accepts read commands"); - } else if (error_code == CLUSTER_REDIR_DOWN_UNBOUND) { - addReplyError(c,"-CLUSTERDOWN Hash slot not served"); - } else if (error_code == CLUSTER_REDIR_MOVED || - error_code == CLUSTER_REDIR_ASK) - { - /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */ - int port = clusterNodeClientPort(n, shouldReturnTlsInfo()); - addReplyErrorSds(c,sdscatprintf(sdsempty(), - "-%s %d %s:%d", - (error_code == CLUSTER_REDIR_ASK) ? "ASK" : "MOVED", - hashslot, clusterNodePreferredEndpoint(n), port)); - } else { - serverPanic("getNodeByQuery() unknown error."); - } -} - -/* This function is called by the function processing clients incrementally - * to detect timeouts, in order to handle the following case: - * - * 1) A client blocks with BLPOP or similar blocking operation. - * 2) The master migrates the hash slot elsewhere or turns into a slave. - * 3) The client may remain blocked forever (or up to the max timeout time) - * waiting for a key change that will never happen. - * - * If the client is found to be blocked into a hash slot this node no - * longer handles, the client is sent a redirection error, and the function - * returns 1. Otherwise 0 is returned and no operation is performed. */ -int clusterRedirectBlockedClientIfNeeded(client *c) { - clusterNode *myself = getMyClusterNode(); - if (c->flags & CLIENT_BLOCKED && - (c->bstate.btype == BLOCKED_LIST || - c->bstate.btype == BLOCKED_ZSET || - c->bstate.btype == BLOCKED_STREAM || - c->bstate.btype == BLOCKED_MODULE)) - { - dictEntry *de; - dictIterator di; - - /* If the cluster is down, unblock the client with the right error. - * If the cluster is configured to allow reads on cluster down, we - * still want to emit this error since a write will be required - * to unblock them which may never come. */ - if (!isClusterHealthy()) { - clusterRedirectClient(c,NULL,0,CLUSTER_REDIR_DOWN_STATE); - return 1; - } - - /* If the client is blocked on module, but not on a specific key, - * don't unblock it (except for the CLUSTER_FAIL case above). */ - if (c->bstate.btype == BLOCKED_MODULE && !moduleClientIsBlockedOnKeys(c)) - return 0; - - /* All keys must belong to the same slot, so check first key only. */ - dictInitIterator(&di, c->bstate.keys); - if ((de = dictNext(&di)) != NULL) { - robj *key = dictGetKey(de); - int slot = keyHashSlot((char*)key->ptr, sdslen(key->ptr)); - clusterNode *node = getNodeBySlot(slot); - - /* if the client is read-only and attempting to access key that our - * replica can handle, allow it. */ - if ((c->flags & CLIENT_READONLY) && - !(c->lastcmd->flags & CMD_WRITE) && - clusterNodeIsSlave(myself) && clusterNodeGetSlaveof(myself) == node) - { - node = myself; - } - - /* We send an error and unblock the client if: - * 1) The slot is unassigned, emitting a cluster down error. - * 2) The slot is not handled by this node, nor being imported. */ - if (node != myself && getImportingSlotSource(slot) == NULL) - { - if (node == NULL) { - clusterRedirectClient(c,NULL,0, - CLUSTER_REDIR_DOWN_UNBOUND); - } else { - clusterRedirectClient(c,node,slot, - CLUSTER_REDIR_MOVED); - } - dictResetIterator(&di); - return 1; - } - } - dictResetIterator(&di); - } - return 0; -} - -/* Returns an indication if the replica node is fully available - * and should be listed in CLUSTER SLOTS response. - * Returns 1 for available nodes, 0 for nodes that have - * not finished their initial sync, in failed state, or are - * otherwise considered not available to serve read commands. */ -static int isReplicaAvailable(clusterNode *node) { - if (clusterNodeIsFailing(node)) { - return 0; - } - long long repl_offset = clusterNodeReplOffset(node); - if (clusterNodeIsMyself(node)) { - /* Nodes do not update their own information - * in the cluster node list. */ - repl_offset = replicationGetSlaveOffset(); - } - return (repl_offset != 0); -} - -void addNodeToNodeReply(client *c, clusterNode *node) { - char* hostname = clusterNodeHostname(node); - addReplyArrayLen(c, 4); - if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_IP) { - addReplyBulkCString(c, clusterNodeIp(node)); - } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_HOSTNAME) { - if (hostname != NULL && hostname[0] != '\0') { - addReplyBulkCString(c, hostname); - } else { - addReplyBulkCString(c, "?"); - } - } else if (server.cluster_preferred_endpoint_type == CLUSTER_ENDPOINT_TYPE_UNKNOWN_ENDPOINT) { - addReplyNull(c); - } else { - serverPanic("Unrecognized preferred endpoint type"); - } - - /* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */ - addReplyLongLong(c, clusterNodeClientPort(node, shouldReturnTlsInfo())); - addReplyBulkCBuffer(c, clusterNodeGetName(node), CLUSTER_NAMELEN); - - /* Add the additional endpoint information, this is all the known networking information - * that is not the preferred endpoint. Note the logic is evaluated twice so we can - * correctly report the number of additional network arguments without using a deferred - * map, an assertion is made at the end to check we set the right length. */ - int length = 0; - if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) { - length++; - } - if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME - && hostname != NULL && hostname[0] != '\0') - { - length++; - } - addReplyMapLen(c, length); - - if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_IP) { - addReplyBulkCString(c, "ip"); - addReplyBulkCString(c, clusterNodeIp(node)); - length--; - } - if (server.cluster_preferred_endpoint_type != CLUSTER_ENDPOINT_TYPE_HOSTNAME - && hostname != NULL && hostname[0] != '\0') - { - addReplyBulkCString(c, "hostname"); - addReplyBulkCString(c, hostname); - length--; - } - serverAssert(length == 0); -} - -void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, int end_slot) { - int i, nested_elements = 3; /* slots (2) + master addr (1) */ - for (i = 0; i < clusterNodeNumSlaves(node); i++) { - if (!isReplicaAvailable(clusterNodeGetSlave(node, i))) continue; - nested_elements++; - } - addReplyArrayLen(c, nested_elements); - addReplyLongLong(c, start_slot); - addReplyLongLong(c, end_slot); - addNodeToNodeReply(c, node); - - /* Remaining nodes in reply are replicas for slot range */ - for (i = 0; i < clusterNodeNumSlaves(node); i++) { - /* This loop is copy/pasted from clusterGenNodeDescription() - * with modifications for per-slot node aggregation. */ - if (!isReplicaAvailable(clusterNodeGetSlave(node, i))) continue; - addNodeToNodeReply(c, clusterNodeGetSlave(node, i)); - nested_elements--; - } - serverAssert(nested_elements == 3); /* Original 3 elements */ -} - -void clusterCommandSlots(client * c) { - /* Format: 1) 1) start slot - * 2) end slot - * 3) 1) master IP - * 2) master port - * 3) node ID - * 4) 1) replica IP - * 2) replica port - * 3) node ID - * ... continued until done - */ - clusterNode *n = NULL; - int num_masters = 0, start = -1; - void *slot_replylen = addReplyDeferredLen(c); - - for (int i = 0; i <= CLUSTER_SLOTS; i++) { - /* Find start node and slot id. */ - if (n == NULL) { - if (i == CLUSTER_SLOTS) break; - n = getNodeBySlot(i); - start = i; - continue; - } - - /* Add cluster slots info when occur different node with start - * or end of slot. */ - if (i == CLUSTER_SLOTS || n != getNodeBySlot(i)) { - addNodeReplyForClusterSlot(c, n, start, i-1); - num_masters++; - if (i == CLUSTER_SLOTS) break; - n = getNodeBySlot(i); - start = i; - } - } - setDeferredArrayLen(c, slot_replylen, num_masters); -} - -/* ----------------------------------------------------------------------------- - * Cluster functions related to serving / redirecting clients - * -------------------------------------------------------------------------- */ - -/* The ASKING command is required after a -ASK redirection. - * The client should issue ASKING before to actually send the command to - * the target instance. See the Redis Cluster specification for more - * information. */ -void askingCommand(client *c) { - if (server.cluster_enabled == 0) { - addReplyError(c,"This instance has cluster support disabled"); - return; - } - c->flags |= CLIENT_ASKING; - addReply(c,shared.ok); -} - -/* The READONLY command is used by clients to enter the read-only mode. - * In this mode slaves will not redirect clients as long as clients access - * with read-only commands to keys that are served by the slave's master. */ -void readonlyCommand(client *c) { - if (server.cluster_enabled == 0) { - addReplyError(c,"This instance has cluster support disabled"); - return; - } - c->flags |= CLIENT_READONLY; - addReply(c,shared.ok); -} - -/* Remove all the keys in the specified hash slot. - * The number of removed items is returned. */ -unsigned int clusterDelKeysInSlot(unsigned int hashslot, int by_command) { - unsigned int j = 0; - - if (!kvstoreDictSize(server.db->keys, (int) hashslot)) - return 0; - - kvstoreDictIterator kvs_di; - dictEntry *de = NULL; - kvstoreInitDictSafeIterator(&kvs_di, server.db->keys, (int) hashslot); - while((de = kvstoreDictIteratorNext(&kvs_di)) != NULL) { - enterExecutionUnit(1, 0); - sds sdskey = kvobjGetKey(dictGetKV(de)); - robj *key = createStringObject(sdskey, sdslen(sdskey)); - dbDelete(&server.db[0], key); - - keyModified(NULL, &server.db[0], key, NULL, 1); - if (by_command) { - /* Keys are deleted by a command (trimslots), we need to notify the - * keyspace event. Though, we don't need to propagate the DEL - * command, as the command (trimslots) will be propagated. */ - notifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id); - } else { - /* Propagate the DEL command */ - propagateDeletion(&server.db[0], key, server.lazyfree_lazy_server_del); - /* The keys are not actually logically deleted from the database, - * just moved to another node. The modules needs to know that these - * keys are no longer available locally, so just send the keyspace - * notification to the modules, but not to clients. */ - moduleNotifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id); - } - exitExecutionUnit(); - postExecutionUnitOperations(); - decrRefCount(key); - j++; - server.dirty++; - } - kvstoreResetDictIterator(&kvs_di); - return j; -} - -/* Delete the keys in the slot ranges. Returns the number of deleted items */ -unsigned int clusterDelKeysInSlotRangeArray(slotRangeArray *slots, int by_command) { - unsigned int j = 0; - for (int i = 0; i < slots->num_ranges; i++) { - for (int slot = slots->ranges[i].start; slot <= slots->ranges[i].end; slot++) { - j += clusterDelKeysInSlot(slot, by_command); - } - } - return j; -} - -int clusterIsMySlot(int slot) { - return getMyClusterNode() == getNodeBySlot(slot); -} - -void replySlotsFlushAndFree(client *c, slotRangeArray *slots) { - addReplyArrayLen(c, slots->num_ranges); - for (int i = 0 ; i < slots->num_ranges ; i++) { - addReplyArrayLen(c, 2); - addReplyLongLong(c, slots->ranges[i].start); - addReplyLongLong(c, slots->ranges[i].end); - } - slotRangeArrayFree(slots); -} - -/* Normalizes (sorts and merges adjacent ranges), checks that slot ranges are - * well-formed and non-overlapping. */ -int slotRangeArrayNormalizeAndValidate(slotRangeArray *slots, sds *err) { - unsigned char used_slots[CLUSTER_SLOTS] = {0}; - - if (slots->num_ranges <= 0 || slots->num_ranges >= CLUSTER_SLOTS) { - *err = sdscatprintf(sdsempty(), "invalid number of slot ranges: %d", slots->num_ranges); - return C_ERR; - } - - /* Sort and merge adjacent slot ranges. */ - slotRangeArraySortAndMerge(slots); - - for (int i = 0; i < slots->num_ranges; i++) { - if (slots->ranges[i].start >= CLUSTER_SLOTS || - slots->ranges[i].end >= CLUSTER_SLOTS) - { - *err = sdscatprintf(sdsempty(), "slot range is out of range: %d-%d", - slots->ranges[i].start, slots->ranges[i].end); - return C_ERR; - } - - if (slots->ranges[i].start > slots->ranges[i].end) { - *err = sdscatprintf(sdsempty(), "start slot number %d is greater than end slot number %d", - slots->ranges[i].start, slots->ranges[i].end); - return C_ERR; - } - - for (int j = slots->ranges[i].start; j <= slots->ranges[i].end; j++) { - if (used_slots[j]) { - *err = sdscatprintf(sdsempty(), "Slot %d specified multiple times", j); - return C_ERR; - } - used_slots[j]++; - } - } - return C_OK; -} - -/* Create a slot range array with the specified number of ranges. */ -slotRangeArray *slotRangeArrayCreate(int num_ranges) { - slotRangeArray *slots = zcalloc(sizeof(slotRangeArray) + num_ranges * sizeof(slotRange)); - slots->num_ranges = num_ranges; - return slots; -} - -/* Duplicate the slot range array. */ -slotRangeArray *slotRangeArrayDup(slotRangeArray *slots) { - slotRangeArray *dup = slotRangeArrayCreate(slots->num_ranges); - memcpy(dup->ranges, slots->ranges, sizeof(slotRange) * slots->num_ranges); - return dup; -} - -/* Set the slot range at the specified index. */ -void slotRangeArraySet(slotRangeArray *slots, int idx, int start, int end) { - slots->ranges[idx].start = start; - slots->ranges[idx].end = end; -} - -/* Create a slot range string in the format of: "1000-2000 3000-4000 ..." */ -sds slotRangeArrayToString(slotRangeArray *slots) { - sds s = sdsempty(); - if (slots == NULL || slots->num_ranges == 0) return s; - - for (int i = 0; i < slots->num_ranges; i++) { - slotRange *sr = &slots->ranges[i]; - s = sdscatprintf(s, "%d-%d ", sr->start, sr->end); - } - sdssetlen(s, sdslen(s) - 1); - s[sdslen(s)] = '\0'; - - return s; -} - -/* Parse a slot range string in the format "1000-2000 3000-4000 ..." into a slotRangeArray. - * Returns a new slotRangeArray on success, NULL on failure. */ -slotRangeArray *slotRangeArrayFromString(sds data) { - int num_ranges; - long long start, end; - slotRangeArray *slots = NULL; - if (!data || sdslen(data) == 0) return NULL; - - sds *parts = sdssplitlen(data, sdslen(data), " ", 1, &num_ranges); - if (num_ranges <= 0) goto err; - - slots = slotRangeArrayCreate(num_ranges); - - /* Parse each slot range */ - for (int i = 0; i < num_ranges; i++) { - char *dash = strchr(parts[i], '-'); - if (!dash) goto err; - - if (string2ll(parts[i], dash - parts[i], &start) == 0 || - string2ll(dash + 1, sdslen(parts[i]) - (dash - parts[i]) - 1, &end) == 0) - goto err; - slotRangeArraySet(slots, i, start, end); - } - - /* Validate all ranges */ - sds err_msg = NULL; - if (slotRangeArrayNormalizeAndValidate(slots, &err_msg) != C_OK) { - if (err_msg) sdsfree(err_msg); - goto err; - } - sdsfreesplitres(parts, num_ranges); - return slots; - -err: - if (slots) slotRangeArrayFree(slots); - sdsfreesplitres(parts, num_ranges); - return NULL; -} - -static int compareSlotRange(const void *a, const void *b) { - const slotRange *sa = a; - const slotRange *sb = b; - if (sa->start < sb->start) return -1; - if (sa->start > sb->start) return 1; - return 0; -} - -/* Sort slot ranges by start slot and merge adjacent ranges. - * Adjacent means: prev.end + 1 == next.start. - * e.g. 1000-2000 2001-3000 0-100 => 0-100 1000-3000 - * - * Note: Overlapping ranges are not merged.*/ -void slotRangeArraySortAndMerge(slotRangeArray *slots) { - if (!slots || slots->num_ranges <= 1) return; - - qsort(slots->ranges, slots->num_ranges, sizeof(slotRange), compareSlotRange); - - int idx = 0; - for (int i = 1; i < slots->num_ranges; i++) { - if (slots->ranges[idx].end + 1 == slots->ranges[i].start) - slots->ranges[idx].end = slots->ranges[i].end; - else - slots->ranges[++idx] = slots->ranges[i]; - } - slots->num_ranges = idx + 1; -} - -/* Compare two slot range arrays, return 1 if equal, 0 otherwise */ -int slotRangeArrayIsEqual(slotRangeArray *slots1, slotRangeArray *slots2) { - slotRangeArraySortAndMerge(slots1); - slotRangeArraySortAndMerge(slots2); - - if (slots1->num_ranges != slots2->num_ranges) return 0; - - for (int i = 0; i < slots1->num_ranges; i++) { - if (slots1->ranges[i].start != slots2->ranges[i].start || - slots1->ranges[i].end != slots2->ranges[i].end) { - return 0; - } - } - return 1; -} - -/* Add a slot to the slot range array. - * Usage: - * slotRangeArray *slots = NULL - * slots = slotRangeArrayAppend(slots, 1000); - * slots = slotRangeArrayAppend(slots, 1001); - * slots = slotRangeArrayAppend(slots, 1003); - * slots = slotRangeArrayAppend(slots, 1004); - * slots = slotRangeArrayAppend(slots, 1005); - * - * Result: 1000-1001, 1003-1005 - * Note: `slot` must be greater than the previous slot. - * */ -slotRangeArray *slotRangeArrayAppend(slotRangeArray *slots, int slot) { - if (slots == NULL) { - slots = slotRangeArrayCreate(4); - slots->ranges[0].start = slot; - slots->ranges[0].end = slot; - slots->num_ranges = 1; - return slots; - } - - serverAssert(slots->num_ranges >= 0 && slots->num_ranges <= CLUSTER_SLOTS); - serverAssert(slot > slots->ranges[slots->num_ranges - 1].end); - - /* Check if we can extend the last range */ - slotRange *last = &slots->ranges[slots->num_ranges - 1]; - if (slot == last->end + 1) { - last->end = slot; - return slots; - } - - /* Calculate current capacity and reallocate if needed */ - int cap = (int) ((zmalloc_size(slots) - sizeof(slotRangeArray)) / sizeof(slotRange)); - if (slots->num_ranges >= cap) - slots = zrealloc(slots, sizeof(slotRangeArray) + sizeof(slotRange) * cap * 2); - - /* Add new single-slot range */ - slots->ranges[slots->num_ranges].start = slot; - slots->ranges[slots->num_ranges].end = slot; - slots->num_ranges++; - - return slots; -} - -/* Returns 1 if the slot range array contains the given slot, 0 otherwise. */ -int slotRangeArrayContains(slotRangeArray *slots, unsigned int slot) { - for (int i = 0; i < slots->num_ranges; i++) - if (slots->ranges[i].start <= slot && slots->ranges[i].end >= slot) - return 1; - return 0; -} - -/* Free the slot range array. */ -void slotRangeArrayFree(slotRangeArray *slots) { - zfree(slots); -} - -/* Generic version of slotRangeArrayFree(). */ -void slotRangeArrayFreeGeneric(void *slots) { - slotRangeArrayFree(slots); -} - -/* Slot range array iterator */ -slotRangeArrayIter *slotRangeArrayGetIterator(slotRangeArray *slots) { - slotRangeArrayIter *it = zmalloc(sizeof(*it)); - it->slots = slots; - it->range_index = 0; - it->cur_slot = slots->num_ranges > 0 ? slots->ranges[0].start : -1; - return it; -} - -/* Returns the next slot in the array, or -1 if there are no more slots. */ -int slotRangeArrayNext(slotRangeArrayIter *it) { - if (it->range_index >= it->slots->num_ranges) return -1; - - if (it->cur_slot < it->slots->ranges[it->range_index].end) { - it->cur_slot++; - } else { - it->range_index++; - if (it->range_index < it->slots->num_ranges) - it->cur_slot = it->slots->ranges[it->range_index].start; - else - it->cur_slot = -1; /* finished */ - } - return it->cur_slot; -} - -int slotRangeArrayGetCurrentSlot(slotRangeArrayIter *it) { - return it->cur_slot; -} - -void slotRangeArrayIteratorFree(slotRangeArrayIter *it) { - zfree(it); -} - -/* Parse slot range pairs from argv starting at `pos`. - * `argc` is the argument count, `pos` is the first slot argument index. - * Returns a slotRangeArray or NULL on error. */ -slotRangeArray *parseSlotRangesOrReply(client *c, int argc, int pos) { - int start, end, count; - slotRangeArray *slots; - - /* Ensure there is at least one (start,end) slot range pairs. */ - if (argc < 0 || pos < 0 || pos >= argc || (argc - pos) < 2 || ((argc - pos) % 2) != 0) { - addReplyErrorArity(c); - return NULL; - } - - count = (argc - pos) / 2; - slots = slotRangeArrayCreate(count); - slots->num_ranges = 0; - - for (int j = pos; j < argc; j += 2) { - if ((start = getSlotOrReply(c, c->argv[j])) == -1 || - (end = getSlotOrReply(c, c->argv[j + 1])) == -1) - { - slotRangeArrayFree(slots); - return NULL; - } - slotRangeArraySet(slots, slots->num_ranges, start, end); - slots->num_ranges++; - } - - sds err = NULL; - if (slotRangeArrayNormalizeAndValidate(slots, &err) != C_OK) { - addReplyErrorSds(c, err); - slotRangeArrayFree(slots); - return NULL; - } - return slots; -} - -/* Return 1 if the keys in the slot can be accessed, 0 otherwise. */ -int clusterCanAccessKeysInSlot(int slot) { - /* If not in cluster mode, all keys are accessible */ - if (server.cluster_enabled == 0) return 1; - - /* If the slot is being imported under old slot migration approach, we should - * allow to list keys from the slot as previously. */ - if (getImportingSlotSource(slot)) return 1; - - /* If using atomic slot migration, check if the slot belongs to the current - * node or its master, return 1 if so. */ - clusterNode *myself = getMyClusterNode(); - if (clusterNodeIsSlave(myself)) { - clusterNode *master = clusterNodeGetMaster(myself); - if (master && clusterNodeCoversSlot(master, slot)) - return 1; - } else { - if (clusterNodeCoversSlot(myself, slot)) - return 1; - } - return 0; -} - -/* Return the slot ranges that belong to the current node or its master. */ -slotRangeArray *clusterGetLocalSlotRanges(void) { - slotRangeArray *slots = NULL; - - if (!server.cluster_enabled) { - slots = slotRangeArrayCreate(1); - slotRangeArraySet(slots, 0, 0, CLUSTER_SLOTS - 1); - return slots; - } - - clusterNode *master = clusterNodeGetMaster(getMyClusterNode()); - if (master) { - for (int i = 0; i < CLUSTER_SLOTS; i++) { - if (clusterNodeCoversSlot(master, i)) - slots = slotRangeArrayAppend(slots, i); - } - } - return slots ? slots : slotRangeArrayCreate(0); -} - -/* Partially flush destination DB in a cluster node, based on the slot range. - * - * Usage: SFLUSH [ ]* [SYNC|ASYNC] - * - * This is an initial implementation of SFLUSH (slots flush) which is limited to - * flushing a single shard as a whole, but in the future the same command may be - * used to partially flush a shard based on hash slots. Currently only if provided - * slots cover entirely the slots of a node, the node will be flushed and the - * return value will be pairs of slot ranges. Otherwise, a single empty set will - * be returned. If possible, SFLUSH SYNC will be run as blocking ASYNC as an - * optimization. - */ -void sflushCommand(client *c) { - int flags = EMPTYDB_NO_FLAGS, argc = c->argc; - - if (server.cluster_enabled == 0) { - addReplyError(c,"This instance has cluster support disabled"); - return; - } - - /* check if last argument is SYNC or ASYNC */ - if (!strcasecmp(c->argv[c->argc-1]->ptr,"sync")) { - flags = EMPTYDB_NO_FLAGS; - argc--; - } else if (!strcasecmp(c->argv[c->argc-1]->ptr,"async")) { - flags = EMPTYDB_ASYNC; - argc--; - } else if (server.lazyfree_lazy_user_flush) { - flags = EMPTYDB_ASYNC; - } - - /* parse the slot range */ - if (argc % 2 == 0) { - addReplyErrorArity(c); - return; - } - - /* Parse slot ranges from the command arguments. */ - slotRangeArray *slots = parseSlotRangesOrReply(c, argc, 1); - if (!slots) return; - - /* Iterate and find the slot ranges that belong to this node. Save them in - * a new slotRangeArray. It is allocated on heap since there is a chance - * that FLUSH SYNC will be running as blocking ASYNC and only later reply - * with slot ranges */ - unsigned char slots_to_flush[CLUSTER_SLOTS] = {0}; /* Requested slots to flush */ - slotRangeArray *myslots = NULL; - for (int i = 0; i < slots->num_ranges; i++) { - for (int j = slots->ranges[i].start; j <= slots->ranges[i].end; j++) { - if (clusterIsMySlot(j)) { - myslots = slotRangeArrayAppend(myslots, j); - slots_to_flush[j] = 1; - } - } - } - - /* Verify that all slots of mynode got covered. See sflushCommand() comment. */ - int all_slots_covered = 1; - for (int i = 0; i < CLUSTER_SLOTS; i++) { - if (clusterIsMySlot(i) && !slots_to_flush[i]) { - all_slots_covered = 0; - break; - } - } - if (myslots == NULL || !all_slots_covered) { - addReplyArrayLen(c, 0); - slotRangeArrayFree(slots); - slotRangeArrayFree(myslots); - return; - } - slotRangeArrayFree(slots); - - /* Flush selected slots. If not flush as blocking async, then reply immediately */ - if (flushCommandCommon(c, FLUSH_TYPE_SLOTS, flags, myslots) == 0) - replySlotsFlushAndFree(c, myslots); -} - -/* The READWRITE command just clears the READONLY command state. */ -void readwriteCommand(client *c) { - if (server.cluster_enabled == 0) { - addReplyError(c,"This instance has cluster support disabled"); - return; - } - c->flags &= ~CLIENT_READONLY; - addReply(c,shared.ok); -} - -/* Resets transient cluster stats that we expose via INFO or other means that we want - * to reset via CONFIG RESETSTAT. The function is also used in order to - * initialize these fields in clusterInit() at server startup. */ -void resetClusterStats(void) { - if (!server.cluster_enabled) return; - - clusterSlotStatResetAll(); -} - -/* This function is called at server startup in order to initialize cluster data - * structures that are shared between the different cluster implementations. */ -void clusterCommonInit(void) { - resetClusterStats(); - asmInit(); -} - -/* This function is called after the node startup in order to check if there - * are any slots that we have keys for, but are not assigned to us. If so, - * we delete the keys. */ -void clusterDeleteKeysInUnownedSlots(void) { - if (clusterNodeIsSlave(getMyClusterNode())) return; - - /* Check that all the slots we have keys for are assigned to us. Otherwise, - * delete the keys. */ - for (int i = 0; i < CLUSTER_SLOTS; i++) { - /* Skip if: no keys in the slot, it's our slot, or we are importing it. */ - if (!countKeysInSlot(i) || - clusterIsMySlot(i) || - getImportingSlotSource(i)) - { - continue; - } - - serverLog(LL_NOTICE, "I have keys for slot %d, but the slot is " - "assigned to another node. " - "Deleting keys in the slot.", i); - /* With atomic slot migration, it is safe to drop keys from slots - * that are not owned. This will not result in data loss under the - * legacy slot migration approach either, since the importing state - * has already been persisted in node.conf. */ - clusterDelKeysInSlot(i, 0); - } -} - - -/* This function is called after the node startup in order to verify that data - * loaded from disk is in agreement with the cluster configuration: - * - * 1) If we find keys about hash slots we have no responsibility for, the - * following happens: - * A) If no other node is in charge according to the current cluster - * configuration, we add these slots to our node. - * B) If according to our config other nodes are already in charge for - * this slots, we set the slots as IMPORTING from our point of view - * in order to justify we have those slots, and in order to make - * redis-cli aware of the issue, so that it can try to fix it. - * 2) If we find data in a DB different than DB0 we return C_ERR to - * signal the caller it should quit the server with an error message - * or take other actions. - * - * The function always returns C_OK even if it will try to correct - * the error described in "1". However if data is found in DB different - * from DB0, C_ERR is returned. - * - * The function also uses the logging facility in order to warn the user - * about desynchronizations between the data we have in memory and the - * cluster configuration. */ -int verifyClusterConfigWithData(void) { - /* Return ASAP if a module disabled cluster redirections. In that case - * every master can store keys about every possible hash slot. */ - if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION) - return C_OK; - - /* If this node is a slave, don't perform the check at all as we - * completely depend on the replication stream. */ - if (clusterNodeIsSlave(getMyClusterNode())) return C_OK; - - /* Make sure we only have keys in DB0. */ - for (int i = 1; i < server.dbnum; i++) { - if (kvstoreSize(server.db[i].keys)) return C_ERR; - } - - /* Take over slots that we have keys for, but are assigned to no one. */ - clusterClaimUnassignedSlots(); - /* Delete keys in unowned slots */ - clusterDeleteKeysInUnownedSlots(); - return C_OK; -} diff --git a/examples/redis-unstable/src/cluster.h b/examples/redis-unstable/src/cluster.h deleted file mode 100644 index 7daf093..0000000 --- a/examples/redis-unstable/src/cluster.h +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Copyright (c) 2024-present, Valkey contributors. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - * - * Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information. - */ - -#ifndef __CLUSTER_H -#define __CLUSTER_H - -/*----------------------------------------------------------------------------- - * Redis cluster exported API. - *----------------------------------------------------------------------------*/ - -#define CLUSTER_SLOT_MASK_BITS 14 /* Number of bits used for slot id. */ -#define CLUSTER_SLOTS (1< redis: initiates/advances/cancels ASM operations - * - clusterAsmOnEvent(...) redis -> impl: notifies state changes - * - * Generic steps for an alternative implementation: - * - On destination side, implementation calls clusterAsmProcess(ASM_EVENT_IMPORT_START) - * to start an import operation. - * - Redis calls clusterAsmOnEvent() when an ASM event occurs. - * - On the source side, Redis will call clusterAsmOnEvent(ASM_EVENT_HANDOFF_PREP) - * when slots are ready to be handed off and the write pause is needed. - * - Implementation stops the traffic to the slots and calls clusterAsmProcess(ASM_EVENT_HANDOFF) - * - On the destination side, Redis calls clusterAsmOnEvent(ASM_EVENT_TAKEOVER) - * when destination node is ready to take over the slot, waiting for ownership change. - * - Cluster implementation updates the config and calls clusterAsmProcess(ASM_EVENT_DONE) - * to notify Redis that the slots ownership has changed. - * - * Sequence diagram for import: - * - Note: shows only the events that cluster implementation needs to react. - * - * ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ - * │ Destination │ │ Destination │ │ Source │ │ Source │ - * │ Cluster impl │ │ Master │ │ Master │ │ Cluster impl │ - * └───────┬───────┘ └───────┬───────┘ └───────┬───────┘ └───────┬───────┘ - * │ │ │ │ - * │ ASM_EVENT_IMPORT_START │ │ │ - * ├─────────────────────────────►│ │ │ - * │ │ CLUSTER SYNCSLOTS │ │ - * │ ├────────────────────────►│ │ - * │ │ │ │ - * │ │ SNAPSHOT(restore cmds) │ │ - * │ │◄────────────────────────┤ │ - * │ │ Repl stream │ │ - * │ │◄────────────────────────┤ │ - * │ │ │ ASM_EVENT_HANDOFF_PREP │ - * │ │ ├────────────────────────────►│ - * │ │ │ ASM_EVENT_HANDOFF │ - * │ │ │◄────────────────────────────┤ - * │ │ Drain repl stream │ │ - * │ │◄────────────────────────┤ │ - * │ ASM_EVENT_TAKEOVER │ │ │ - * │◄─────────────────────────────┤ │ │ - * │ │ │ │ - * │ ASM_EVENT_DONE │ │ │ - * ├─────────────────────────────►│ │ ASM_EVENT_DONE │ - * │ │ │◄────────────────────────────┤ - * │ │ │ │ - */ - -#define ASM_EVENT_IMPORT_START 1 /* Start a new import operation (destination side) */ -#define ASM_EVENT_CANCEL 2 /* Cancel an ongoing import/migrate operation (source and destination side) */ -#define ASM_EVENT_HANDOFF_PREP 3 /* Slot is ready to be handed off to the destination shard (source side) */ -#define ASM_EVENT_HANDOFF 4 /* Notify that the slot can be handed off (source side) */ -#define ASM_EVENT_TAKEOVER 5 /* Ready to take over the slot, waiting for config change (destination side) */ -#define ASM_EVENT_DONE 6 /* Notify that import/migrate is completed, config is updated (source and destination side) */ - -#define ASM_EVENT_IMPORT_PREP 7 /* Import is about to start, the implementation may reject by returning C_ERR */ -#define ASM_EVENT_IMPORT_STARTED 8 /* Import started */ -#define ASM_EVENT_IMPORT_FAILED 9 /* Import failed */ -#define ASM_EVENT_IMPORT_COMPLETED 10 /* Import completed (config updated) */ -#define ASM_EVENT_MIGRATE_PREP 11 /* Migrate is about to start, the implementation may reject by returning C_ERR */ -#define ASM_EVENT_MIGRATE_STARTED 12 /* Migrate started */ -#define ASM_EVENT_MIGRATE_FAILED 13 /* Migrate failed */ -#define ASM_EVENT_MIGRATE_COMPLETED 14 /* Migrate completed (config updated) */ - - -/* Called by cluster implementation to request an ASM operation. (cluster impl --> redis) - * Valid values for 'event': - * ASM_EVENT_IMPORT_START - * ASM_EVENT_CANCEL - * ASM_EVENT_HANDOFF - * ASM_EVENT_DONE - * - * For ASM_EVENT_IMPORT_START, 'task_id' should be a unique string. - * For other events (ASM_EVENT_CANCEL, ASM_EVENT_HANDOFF, ASM_EVENT_DONE), - * 'task_id' should match the ID from the corresponding import operation. - * Usage: - * char *task_id = malloc(CLUSTER_NAMELEN + 1); - * getRandomHexChars(task_id, CLUSTER_NAMELEN); - * task_id[CLUSTER_NAMELEN] = '\0'; - * - * slotRangeArray *slots = slotRangeArrayCreate(1); - * slotRangeArraySet(slots, 0, 0, 1000); - * - * const char *err = NULL; - * int ret = clusterAsmProcess(task_id, ASM_EVENT_IMPORT_START, slots, &err); - * zfree(task_id); - * slotRangeArrayFree(slots); - * - * if (ret != C_OK) { - * perror(err); - * return; - * } - * - * For ASM_EVENT_CANCEL, if `task_id` is NULL, all tasks will be cancelled. - * If `arg` parameter is provided, it should be a pointer to an int. It will be - * set to the number of tasks cancelled. - * - * Return value: - * - Returns C_OK on success, C_ERR on failure and 'err' will be set to the - * error message. - * - * Memory management: - * - There is no ownership transfer of 'task_id', 'err' or `slotRangeArray`. - * - `task_id` and `slotRangeArray` should be allocated and be freed by the - * caller. Redis internally will make a copy of these. - * - `err` is allocated by Redis and should NOT be freed by the caller. - **/ -int clusterAsmProcess(const char *task_id, int event, void *arg, char **err); - -/* Called when an ASM event occurs to notify the cluster implementation. (redis --> cluster impl) - * - * `arg` will point to a `slotRangeArray` for the following events: - * ASM_EVENT_IMPORT_PREP - * ASM_EVENT_IMPORT_STARTED - * ASM_EVENT_MIGRATE_PREP - * ASM_EVENT_MIGRATE_STARTED - * ASM_EVENT_HANDOFF_PREP - * - * Memory management: - * - Redis owns the `task_id` and `slotRangeArray`. - * - * Returns C_OK on success. - * - * If the cluster implementation returns C_ERR for ASM_EVENT_IMPORT_PREP or - * ASM_EVENT_MIGRATE_PREP, operation will not start. - **/ -int clusterAsmOnEvent(const char *task_id, int event, void *arg); - -#endif /* __CLUSTER_H */ diff --git a/examples/redis-unstable/src/cluster_asm.c b/examples/redis-unstable/src/cluster_asm.c deleted file mode 100644 index a090453..0000000 --- a/examples/redis-unstable/src/cluster_asm.c +++ /dev/null @@ -1,3602 +0,0 @@ -/* cluster_asm.c -- Atomic slot migration implementation for cluster - * - * Copyright (c) 2025-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "server.h" -#include "cluster.h" -#include "functions.h" -#include "cluster_asm.h" -#include "cluster_slot_stats.h" - -#define ASM_IMPORT (1 << 1) -#define ASM_MIGRATE (1 << 2) - -#define ASM_DEBUG_TRIM_DEFAULT 0 -#define ASM_DEBUG_TRIM_NONE 1 -#define ASM_DEBUG_TRIM_BG 2 -#define ASM_DEBUG_TRIM_ACTIVE 3 - -#define ASM_AOF_MIN_ITEMS_PER_KEY 512 /* Minimum number of items per key to use AOF format encoding */ - -typedef struct asmTask { - sds id; /* Task ID */ - int operation; /* Either ASM_IMPORT or ASM_MIGRATE */ - slotRangeArray *slots; /* List of slot ranges for this migration task */ - int state; /* Current state of the task */ - int dest_state; /* Destination node's main state (approximate) */ - char source[CLUSTER_NAMELEN]; /* Source node name */ - char dest[CLUSTER_NAMELEN]; /* Destination node name */ - clusterNode *source_node; /* Source node */ - connection *main_channel_conn; /* Main channel connection */ - connection *rdb_channel_conn; /* RDB channel connection */ - int rdb_channel_state; /* State of the RDB channel */ - unsigned long long dest_offset; /* Destination offset */ - unsigned long long source_offset; /* Source offset */ - int cross_slot_during_propagating; /* If cross-slot commands are encountered during propagating */ - int stream_eof_during_streaming; /* If STREAM-EOF is received during streaming buffer */ - replDataBuf sync_buffer; /* Buffer for the stream */ - client *main_channel_client; /* Client for the main channel on the source side */ - client *rdb_channel_client; /* Client for the RDB channel on the source side */ - long long retry_count; /* Number of retries for this task */ - mstime_t create_time; /* Task creation time */ - mstime_t start_time; /* Task start time */ - mstime_t end_time; /* Task end time */ - mstime_t paused_time; /* The time when the slot writes were paused */ - mstime_t dest_slots_snapshot_time; /* The time when the destination starts applying the slot snapshot */ - mstime_t dest_accum_applied_time; /* The time when the destination finishes applying the accumulated buffer */ - sds error; /* Error message for this task */ - redisOpArray *pre_snapshot_module_cmds; /* Module commands to be propagated at the beginning of slot migration */ -} asmTask; - -struct asmManager { - list *tasks; /* List of asmTask to be processed */ - list *archived_tasks; /* List of archived asmTask */ - list *pending_trim_jobs; /* List of pending trim jobs (due to write pause) */ - list *active_trim_jobs; /* List of active trim jobs */ - slotRangeArrayIter *active_trim_it; /* Iterator of the current active trim job */ - size_t sync_buffer_peak; /* Peak size of sync buffer */ - asmTask *master_task; /* The task that is currently active on the master */ - - /* Fail point injection for debugging */ - int debug_fail_channel; /* Channel where the task will fail */ - int debug_fail_state; /* State where the task will fail */ - int debug_trim_method; /* Method to trim the buffer */ - int debug_active_trim_delay; /* Sleep before trimming each key */ - - /* Active trim stats */ - unsigned long long active_trim_started; /* Number of times active trim was started */ - unsigned long long active_trim_completed; /* Number of times active trim was completed */ - unsigned long long active_trim_cancelled; /* Number of times active trim was cancelled */ - unsigned long long active_trim_current_job_keys; /* Total number of keys to trim in the current job */ - unsigned long long active_trim_current_job_trimmed; /* Number of keys trimmed in the current job */ -}; - -enum asmState { - /* Common state */ - ASM_NONE = 0, - ASM_CONNECTING, - ASM_AUTH_REPLY, - ASM_CANCELED, - ASM_FAILED, - ASM_COMPLETED, - - /* Import state */ - ASM_SEND_HANDSHAKE, - ASM_HANDSHAKE_REPLY, - ASM_SEND_SYNCSLOTS, - ASM_SYNCSLOTS_REPLY, - ASM_INIT_RDBCHANNEL, - ASM_ACCUMULATE_BUF, - ASM_READY_TO_STREAM, - ASM_STREAMING_BUF, - ASM_WAIT_STREAM_EOF, - ASM_TAKEOVER, - - /* Migrate state */ - ASM_WAIT_RDBCHANNEL, - ASM_WAIT_BGSAVE_START, - ASM_SEND_BULK_AND_STREAM, - ASM_SEND_STREAM, - ASM_HANDOFF_PREP, - ASM_HANDOFF, - ASM_STREAM_EOF, - - /* RDB channel state */ - ASM_RDBCHANNEL_REQUEST, - ASM_RDBCHANNEL_REPLY, - ASM_RDBCHANNEL_TRANSFER, -}; - -enum asmChannel { - ASM_IMPORT_MAIN_CHANNEL = 1, /* Main channel for the import task */ - ASM_IMPORT_RDB_CHANNEL, /* RDB channel for the import task */ - ASM_MIGRATE_MAIN_CHANNEL, /* Main channel for the migrate task */ - ASM_MIGRATE_RDB_CHANNEL /* RDB channel for the migrate task */ -}; - -/* Global ASM manager */ -struct asmManager *asmManager = NULL; - -/* replication.c */ -char *sendCommand(connection *conn, ...); -char *sendCommandArgv(connection *conn, int argc, char **argv, size_t *argv_lens); -char *receiveSynchronousResponse(connection *conn); -ConnectionType *connTypeOfReplication(void); -int startBgsaveForReplication(int mincapa, int req); -void createReplicationBacklogIfNeeded(void); -/* cluster.c */ -void createDumpPayload(rio *payload, robj *o, robj *key, int dbid, int skip_checksum); -/* cluster_asm.c */ -static void asmStartImportTask(asmTask *task); -static void asmTaskCancel(asmTask *task, const char *reason); -static void asmSyncBufferReadFromConn(connection *conn); -static void propagateTrimSlots(slotRangeArray *slots); -void asmTrimJobSchedule(slotRangeArray *slots); -void asmTrimJobProcessPending(void); -void asmCancelPendingTrimJobs(void); -void asmTriggerActiveTrim(slotRangeArray *slots); -void asmActiveTrimEnd(void); -int asmIsAnyTrimJobOverlaps(slotRangeArray *slots); -void asmTrimSlotsIfNotOwned(slotRangeArray *slots); -void asmNotifyStateChange(asmTask *task, int event); - -void asmInit(void) { - asmManager = zcalloc(sizeof(*asmManager)); - asmManager->tasks = listCreate(); - asmManager->archived_tasks = listCreate(); - asmManager->pending_trim_jobs = listCreate(); - asmManager->sync_buffer_peak = 0; - asmManager->master_task = NULL; - asmManager->debug_fail_channel = -1; - asmManager->debug_fail_state = -1; - asmManager->debug_trim_method = ASM_DEBUG_TRIM_DEFAULT; - asmManager->debug_active_trim_delay = 0; - asmManager->active_trim_jobs = listCreate(); - asmManager->active_trim_started = 0; - asmManager->active_trim_completed = 0; - asmManager->active_trim_cancelled = 0; - listSetFreeMethod(asmManager->active_trim_jobs, slotRangeArrayFreeGeneric); -} - -char *asmTaskStateToString(int state) { - switch (state) { - case ASM_NONE: return "none"; - case ASM_CONNECTING: return "connecting"; - case ASM_AUTH_REPLY: return "auth-reply"; - case ASM_CANCELED: return "canceled"; - case ASM_FAILED: return "failed"; - case ASM_COMPLETED: return "completed"; - - /* Import state */ - case ASM_SEND_HANDSHAKE: return "send-handshake"; - case ASM_HANDSHAKE_REPLY: return "handshake-reply"; - case ASM_SEND_SYNCSLOTS: return "send-syncslots"; - case ASM_SYNCSLOTS_REPLY: return "syncslots-reply"; - case ASM_INIT_RDBCHANNEL: return "init-rdbchannel"; - case ASM_ACCUMULATE_BUF: return "accumulate-buffer"; - case ASM_READY_TO_STREAM: return "ready-to-stream"; - case ASM_STREAMING_BUF: return "streaming-buffer"; - case ASM_WAIT_STREAM_EOF: return "wait-stream-eof"; - case ASM_TAKEOVER: return "takeover"; - - /* Migrate state */ - case ASM_WAIT_RDBCHANNEL: return "wait-rdbchannel"; - case ASM_WAIT_BGSAVE_START: return "wait-bgsave-start"; - case ASM_SEND_BULK_AND_STREAM: return "send-bulk-and-stream"; - case ASM_SEND_STREAM: return "send-stream"; - case ASM_HANDOFF_PREP: return "handoff-prep"; - case ASM_HANDOFF: return "handoff"; - case ASM_STREAM_EOF: return "stream-eof"; - - /* RDB channel state */ - case ASM_RDBCHANNEL_REQUEST: return "rdbchannel-request"; - case ASM_RDBCHANNEL_REPLY: return "rdbchannel-reply"; - case ASM_RDBCHANNEL_TRANSFER: return "rdbchannel-transfer"; - - default: return "unknown"; - } - serverAssert(0); /* Unreachable */ -} - -const char *asmChannelToString(int channel) { - switch (channel) { - case ASM_IMPORT_MAIN_CHANNEL: return "import-main-channel"; - case ASM_IMPORT_RDB_CHANNEL: return "import-rdb-channel"; - case ASM_MIGRATE_MAIN_CHANNEL: return "migrate-main-channel"; - case ASM_MIGRATE_RDB_CHANNEL: return "migrate-rdb-channel"; - default: return "unknown"; - } -} - -int asmDebugSetFailPoint(char *channel, char *state) { - if (!asmManager) { - serverLog(LL_WARNING, "ASM manager is not initialized"); - return C_ERR; - } - asmManager->debug_fail_channel = -1; - asmManager->debug_fail_state = -1; - if (!channel && !state) return C_ERR; - if (sdslen(channel) == 0 && sdslen(state) == 0) { - serverLog(LL_WARNING, "ASM fail point is cleared"); - return C_OK; - } - - for (int i = ASM_IMPORT_MAIN_CHANNEL; i <= ASM_MIGRATE_RDB_CHANNEL; i++) { - if (!strcasecmp(channel, asmChannelToString(i))) { - asmManager->debug_fail_channel = i; - break; - } - } - if (asmManager->debug_fail_channel == -1) return C_ERR; - - for (int i = ASM_NONE; i <= ASM_RDBCHANNEL_TRANSFER; i++) { - if (!strcasecmp(state, asmTaskStateToString(i))) { - asmManager->debug_fail_state = i; - break; - } - } - if (asmManager->debug_fail_state == -1) return C_ERR; - - serverLog(LL_NOTICE, "ASM fail point set: channel=%s, state=%s", channel, state); - return C_OK; -} - -int asmDebugSetTrimMethod(const char *method, int active_trim_delay) { - if (!asmManager) { - serverLog(LL_WARNING, "ASM manager is not initialized"); - return C_ERR; - } - int prev = asmManager->debug_trim_method; - if (!strcasecmp(method, "default")) asmManager->debug_trim_method = ASM_DEBUG_TRIM_DEFAULT; - else if (!strcasecmp(method, "none")) asmManager->debug_trim_method = ASM_DEBUG_TRIM_NONE; - else if (!strcasecmp(method, "bg")) asmManager->debug_trim_method = ASM_DEBUG_TRIM_BG; - else if (!strcasecmp(method, "active")) asmManager->debug_trim_method = ASM_DEBUG_TRIM_ACTIVE; - else return C_ERR; - - /* If we are switching from none to default, delete all the keys in the - * slots we don't own */ - if (prev == ASM_DEBUG_TRIM_NONE && asmManager->debug_trim_method != ASM_DEBUG_TRIM_NONE) { - for (int i = 0; i < CLUSTER_SLOTS; i++) - if (!clusterIsMySlot(i)) - clusterDelKeysInSlot(i, 0); - } - asmManager->debug_active_trim_delay = active_trim_delay; - serverLog(LL_NOTICE, "ASM trim method was set=%s, active_trim_delay=%d", method, active_trim_delay); - return C_OK; -} - -int asmDebugIsFailPointActive(int channel, int state) { - if (!asmManager) return 0; /* ASM manager not initialized */ - if (asmManager->debug_fail_channel == channel && asmManager->debug_fail_state == state) { - serverLog(LL_NOTICE, "ASM fail point active: channel=%s, state=%s", - asmChannelToString(channel), asmTaskStateToString(state)); - return 1; - } - return 0; -} - -sds asmCatInfoString(sds info) { - int active_tasks = 0; - - listIter li; - listNode *ln; - listRewind(asmManager->tasks, &li); - while ((ln = listNext(&li)) != NULL) { - asmTask *task = listNodeValue(ln); - if (task->operation == ASM_IMPORT || - (task->operation == ASM_MIGRATE && task->state != ASM_FAILED)) - { - active_tasks++; - } - } - - return sdscatprintf(info ? info : sdsempty(), - "cluster_slot_migration_active_tasks:%d\r\n" - "cluster_slot_migration_active_trim_running:%lu\r\n" - "cluster_slot_migration_active_trim_current_job_keys:%llu\r\n" - "cluster_slot_migration_active_trim_current_job_trimmed:%llu\r\n" - "cluster_slot_migration_stats_active_trim_started:%llu\r\n" - "cluster_slot_migration_stats_active_trim_completed:%llu\r\n" - "cluster_slot_migration_stats_active_trim_cancelled:%llu\r\n", - active_tasks, - listLength(asmManager->active_trim_jobs), - asmManager->active_trim_current_job_keys, - asmManager->active_trim_current_job_trimmed, - asmManager->active_trim_started, - asmManager->active_trim_completed, - asmManager->active_trim_cancelled); -} - -void asmTaskReset(asmTask *task) { - task->state = ASM_NONE; - task->dest_state = ASM_NONE; - task->rdb_channel_state = ASM_NONE; - task->main_channel_conn = NULL; - task->rdb_channel_conn = NULL; - task->dest_offset = 0; - task->source_offset = 0; - task->stream_eof_during_streaming = 0; - task->cross_slot_during_propagating = 0; - replDataBufInit(&task->sync_buffer); - task->main_channel_client = NULL; - task->rdb_channel_client = NULL; - task->paused_time = 0; - task->dest_slots_snapshot_time = 0; - task->dest_accum_applied_time = 0; - task->pre_snapshot_module_cmds = NULL; -} - -asmTask *asmTaskCreate(const char *task_id) { - asmTask *task = zcalloc(sizeof(*task)); - task->error = sdsempty(); - asmTaskReset(task); - task->slots = NULL; - task->source_node = NULL; - task->retry_count = 0; - task->create_time = server.mstime; - task->start_time = -1; - task->end_time = -1; - if (task_id) { - task->id = sdsnew(task_id); - } else { - task->id = sdsnewlen(NULL, CLUSTER_NAMELEN); - getRandomHexChars(task->id, CLUSTER_NAMELEN); - } - - return task; -} - -void asmTaskFree(asmTask *task) { - replDataBufClear(&task->sync_buffer); - sdsfree(task->id); - slotRangeArrayFree(task->slots); - sdsfree(task->error); - zfree(task); -} - -/* Convert the task state to the corresponding event. */ -int asmTaskStateToEvent(asmTask *task) { - if (task->operation == ASM_IMPORT) { - if (task->state == ASM_COMPLETED) return ASM_EVENT_IMPORT_COMPLETED; - else if (task->state == ASM_FAILED) return ASM_EVENT_IMPORT_FAILED; - else return ASM_EVENT_IMPORT_STARTED; - } else { - if (task->state == ASM_COMPLETED) return ASM_EVENT_MIGRATE_COMPLETED; - else if (task->state == ASM_FAILED) return ASM_EVENT_MIGRATE_FAILED; - else return ASM_EVENT_MIGRATE_STARTED; - } -} - -/* Serialize ASM task information into a string for transmission to replicas. - * Format: "task_id:source_node:dest_node:operation:state:slot_ranges" - * Where slot_ranges is in the format "1000-2000 3000-4000 ..." */ -sds asmTaskSerialize(asmTask *task) { - sds serialized = sdsempty(); - - /* Add task ID */ - serialized = sdscatprintf(serialized, "%s:", task->id); - - /* Add source node ID (40 chars) */ - serialized = sdscatlen(serialized, task->source, CLUSTER_NAMELEN); - serialized = sdscat(serialized, ":"); - - /* Add destination node ID (40 chars) */ - serialized = sdscatlen(serialized, task->dest, CLUSTER_NAMELEN); - serialized = sdscat(serialized, ":"); - - /* Add operation type */ - serialized = sdscatprintf(serialized, "%s:", task->operation == ASM_IMPORT ? - "import" : "migrate"); - - /* Add current state */ - serialized = sdscatprintf(serialized, "%s:", asmTaskStateToString(task->state)); - - /* Add slot ranges sds */ - sds slots_str = slotRangeArrayToString(task->slots); - serialized = sdscatprintf(serialized, "%s", slots_str); - sdsfree(slots_str); - - return serialized; -} - -/* Deserialize ASM task information from a string and create a complete asmTask. - * Format: "task_id:source_node:dest_node:operation:state:slot_ranges" - * Returns a new asmTask on success, NULL on failure. */ -asmTask *asmTaskDeserialize(sds data) { - int count, idx = 0; - asmTask *task = NULL; - if (!data || sdslen(data) == 0) return NULL; - - sds *parts = sdssplitlen(data, sdslen(data), ":", 1, &count); - if (count < 6) goto err; - - /* Parse task ID */ - if (sdslen(parts[idx]) == 0) goto err; - task = asmTaskCreate(parts[idx]); - if (!task) goto err; - idx++; - - /* Parse source node ID */ - if (sdslen(parts[idx]) != CLUSTER_NAMELEN) goto err; - memcpy(task->source, parts[idx], CLUSTER_NAMELEN); - idx++; - - /* Parse destination node ID */ - if (sdslen(parts[idx]) != CLUSTER_NAMELEN) goto err; - memcpy(task->dest, parts[idx], CLUSTER_NAMELEN); - idx++; - - /* Parse operation type */ - if (!strcasecmp(parts[idx], "import")) { - task->operation = ASM_IMPORT; - } else if (!strcasecmp(parts[idx], "migrate")) { - task->operation = ASM_MIGRATE; - } else { - goto err; - } - idx++; - - /* Parse state */ - task->state = ASM_NONE; /* Default state */ - for (int state = ASM_NONE; state <= ASM_RDBCHANNEL_TRANSFER; state++) { - if (!strcasecmp(parts[idx], asmTaskStateToString(state))) { - task->state = state; - break; - } - } - idx++; - - /* Parse slot ranges */ - task->slots = slotRangeArrayFromString(parts[idx]); - if (!task->slots) goto err; - idx++; - - /* Ignore any extra fields for future compatibility */ - - sdsfreesplitres(parts, count); - return task; - -err: - if (task) asmTaskFree(task); - sdsfreesplitres(parts, count); - return NULL; -} - -/* Notify replicas about ASM task information to maintain consistency during - * slot migration. This function sends a CLUSTER SYNCSLOTS CONF ASM-TASK command - * to all connected replicas with the serialized task information. */ -void asmNotifyReplicasStateChange(struct asmTask *task) { - if (!server.cluster_enabled || !clusterNodeIsMaster(getMyClusterNode())) return; - - /* Create command arguments for CLUSTER SYNCSLOTS CONF ASM-TASK */ - robj *argv[5]; - argv[0] = createStringObject("CLUSTER", 7); - argv[1] = createStringObject("SYNCSLOTS", 9); - argv[2] = createStringObject("CONF", 4); - argv[3] = createStringObject("ASM-TASK", 8); - argv[4] = createObject(OBJ_STRING, asmTaskSerialize(task)); - - /* Send the command to all replicas */ - replicationFeedSlaves(server.slaves, -1, argv, 5); - - /* Clean up command objects */ - for (int i = 0; i < 5; i++) { - decrRefCount(argv[i]); - } -} - -/* Dump the active import ASM task information. */ -sds asmDumpActiveImportTask(void) { - if (!server.cluster_enabled) return NULL; - - /* For replica, dump the master active task. */ - if (clusterNodeIsSlave(getMyClusterNode()) && - asmManager->master_task && - asmManager->master_task->state != ASM_FAILED && - asmManager->master_task->state != ASM_COMPLETED) - { - return asmTaskSerialize(asmManager->master_task); - } - - /* For master, dump the first active task. */ - if (!asmManager || listLength(asmManager->tasks) == 0) return NULL; - asmTask *task = listNodeValue(listFirst(asmManager->tasks)); - if (task->state == ASM_NONE || task->state == ASM_FAILED || - task->state == ASM_COMPLETED) return NULL; - - return asmTaskSerialize(task); -} - -size_t asmGetPeakSyncBufferSize(void) { - if (!asmManager) return 0; - /* Compute peak sync buffer usage. The current task's peak may not - * reflect in asmManager->sync_buffer_peak immediately. */ - size_t peak = asmManager->sync_buffer_peak; - asmTask *task = listFirst(asmManager->tasks) ? - listNodeValue(listFirst(asmManager->tasks)) : NULL; - if (task && task->operation == ASM_IMPORT) - peak = max(task->sync_buffer.peak, asmManager->sync_buffer_peak); - - return peak; -} - -size_t asmGetImportInputBufferSize(void) { - if (!asmManager || listLength(asmManager->tasks) == 0) return 0; - - asmTask *task = listNodeValue(listFirst(asmManager->tasks)); - if (task->operation == ASM_IMPORT) - return task->sync_buffer.mem_used; - - return 0; -} - -size_t asmGetMigrateOutputBufferSize(void) { - if (!asmManager || listLength(asmManager->tasks) == 0) return 0; - - asmTask *task = listNodeValue(listFirst(asmManager->tasks)); - if (task->operation == ASM_MIGRATE && task->main_channel_client) - return getClientOutputBufferMemoryUsage(task->main_channel_client); - - return 0; -} - -/* Returns the ASM task with the given ID, or NULL if no such task exists. */ -static asmTask *asmLookupTaskAt(list *tasks, const char *id) { - listIter li; - listNode *ln; - - listRewind(tasks, &li); - while ((ln = listNext(&li)) != NULL) { - asmTask *task = listNodeValue(ln); - if (!strcmp(task->id, id)) return task; - } - return NULL; -} - -/* Returns the ASM task with the given ID, or NULL if no such task exists. */ -asmTask *asmLookupTaskById(const char *id) { - return asmLookupTaskAt(asmManager->tasks, id); -} - -/* Returns the ASM task that is identical to the given slot range array, or NULL - * if no such task exists. */ -asmTask *asmLookupTaskBySlotRangeArray(slotRangeArray *slots) { - listIter li; - listNode *ln; - - listRewind(asmManager->tasks, &li); - while ((ln = listNext(&li)) != NULL) { - asmTask *task = listNodeValue(ln); - if (slotRangeArrayIsEqual(task->slots, slots)) - return task; - } - return NULL; -} - -/* Returns the slot range array for the given task ID */ -slotRangeArray *asmTaskGetSlotRanges(const char *task_id) { - asmTask *task = NULL; - if (!task_id || (task = asmLookupTaskById(task_id)) == NULL) return NULL; - - return task->slots; -} - -/* Returns 1 if the slot range array overlaps with the given slot range. */ -static int slotRangeArrayOverlaps(slotRangeArray *slots, slotRange *req) { - for (int i = 0; i < slots->num_ranges; i++) { - slotRange *sr = &slots->ranges[i]; - if (sr->start <= req->end && sr->end >= req->start) - return 1; - } - return 0; -} - -/* Returns 1 if the two slot range arrays overlap, 0 otherwise. */ -static int slotRangeArraysOverlap(slotRangeArray *slots1, slotRangeArray *slots2) { - for (int i = 0; i < slots1->num_ranges; i++) { - slotRange *sr1 = &slots1->ranges[i]; - if (slotRangeArrayOverlaps(slots2, sr1)) return 1; - } - return 0; -} - -/* Returns the ASM task that overlaps with the given slot range, or NULL if - * no such task exists. */ -static asmTask *lookupAsmTaskBySlotRange(slotRange *req) { - listIter li; - listNode *ln; - - listRewind(asmManager->tasks, &li); - while ((ln = listNext(&li)) != NULL) { - asmTask *task = listNodeValue(ln); - if (slotRangeArrayOverlaps(task->slots, req)) - return task; - } - return NULL; -} - -/* Validates the given slot ranges for a migration task: - * - Ensures the current node is a master. - * - Verifies all slots are in a STABLE state. - * - Confirms all slots belong to a single source node. - * - Confirms no ongoing import task that overlaps with the slot ranges. - * - * Returns the source node if validation succeeds. - * Otherwise, returns NULL and sets 'err' variable. */ -static clusterNode *validateImportSlotRanges(slotRangeArray *slots, sds *err, asmTask *current) { - clusterNode *source = NULL; - - *err = NULL; - - /* Ensure this is a master node */ - if (!clusterNodeIsMaster(getMyClusterNode())) { - *err = sdsnew("slot migration not allowed on replica."); - goto out; - } - - /* Ensure no manual migration is in progress. */ - for (int i = 0; i < CLUSTER_SLOTS; i++) { - if (getImportingSlotSource(i) != NULL || - getMigratingSlotDest(i) != NULL) - { - *err = sdsnew("all slot states must be STABLE to start a slot migration task."); - goto out; - } - } - - for (int i = 0; i < slots->num_ranges; i++) { - slotRange *sr = &slots->ranges[i]; - - /* Ensure no import task overlaps with this slot range. - * Skip check current task that is running for this slot range. */ - asmTask *task = lookupAsmTaskBySlotRange(sr); - if (task && task != current && task->operation == ASM_IMPORT) { - *err = sdscatprintf(sdsempty(), - "overlapping import exists for slot range: %d-%d", - sr->start, sr->end); - goto out; - } - - /* Validate if we can start migration task for this slot range. */ - for (int j = sr->start; j <= sr->end; j++) { - clusterNode *node = getNodeBySlot(j); - if (node == NULL) { - *err = sdscatprintf(sdsempty(), "slot has no owner: %d", j); - goto out; - } - - if (!source) { - source = node; - } else if (source != node) { - *err = sdsnew("slots belong to different source nodes"); - goto out; - } - } - } - -out: - return *err ? NULL : source; -} - -/* Returns 1 if a task with the specified operation is in progress, 0 otherwise. */ -static int asmTaskInProgress(int operation) { - listIter li; - listNode *ln; - - if (!asmManager || listLength(asmManager->tasks) == 0) return 0; - - listRewind(asmManager->tasks, &li); - while ((ln = listNext(&li)) != NULL) { - asmTask *task = listNodeValue(ln); - if (task->operation == operation) return 1; - } - return 0; -} - -/* Returns 1 if a migrate task is in progress, 0 otherwise. */ -int asmMigrateInProgress(void) { - return asmTaskInProgress(ASM_MIGRATE); -} - -/* Returns 1 if an import task is in progress, 0 otherwise. */ -int asmImportInProgress(void) { - return asmTaskInProgress(ASM_IMPORT); -} - -/* Returns 1 if the task is in a state where it can receive replication stream -* for the slot range, 0 otherwise. */ -inline static int asmCanFeedMigrationClient(asmTask *task) { - return task->operation == ASM_MIGRATE && - !task->cross_slot_during_propagating && - (task->state == ASM_SEND_BULK_AND_STREAM || - task->state == ASM_SEND_STREAM || - task->state == ASM_HANDOFF_PREP); -} - -/* Feed the migration client with the replication stream for the slot range. */ -void asmFeedMigrationClient(robj **argv, int argc) { - asmTask *task = NULL; - - if (server.cluster_enabled == 0 || listLength(asmManager->tasks) == 0) - return; - - /* Check if there is a migrate task that can receive replication stream. */ - task = listNodeValue(listFirst(asmManager->tasks)); - if (!asmCanFeedMigrationClient(task)) return; - - /* Ensure all arguments are converted to string encoding if necessary, - * since getSlotFromCommand expects them to be string-encoded. - * Generally the arguments are string-encoded, but we may rewrite - * the command arguments to integer encoding. */ - for (int i = 0; i < argc; i++) { - if (!sdsEncodedObject(argv[i])) { - serverAssert(argv[i]->encoding == OBJ_ENCODING_INT); - robj *old = argv[i]; - argv[i] = createStringObjectFromLongLongWithSds((long)old->ptr); - decrRefCount(old); - } - } - - /* Check if the command belongs to the slot range. */ - struct redisCommand *cmd = lookupCommand(argv, argc); - serverAssert(cmd); - - int slot = getSlotFromCommand(cmd, argv, argc); - - /* If the command does not have keys, skip it now. - * SELECT is not propagated, since we only support a single db in cluster mode. - * MULTI/EXEC is not needed, since transaction semantics are unnecessary - * before the slot handoff. - * FUNCTION subcommands should be executed on all nodes, so here we skip it, - * and even propagating them may cause an error when executing. - * - * NOTICE: if some keyless commands should be propagated to the destination, - * we should identify them here and send. */ - if (slot == INVALID_CLUSTER_SLOT) return; - - /* Generally we reject cross-slot commands before executing, but module may - * replicate this kind of command, so we check again. To guarantee data - * consistency, we cancel the task if we encounter a cross-slot command. */ - if (slot == CLUSTER_CROSSSLOT) { - /* We cannot cancel the task directly here, since it may lead to a recursive - * call: asmTaskCancel() --> moduleFireServerEvent() --> moduleFreeContext() - * --> postExecutionUnitOperations() --> propagateNow(). Even worse, this - * could result in propagating pending commands to the replication stream twice. - * To avoid this, we simply set a flag here, cancel the task in beforeSleep. */ - task->cross_slot_during_propagating = 1; - return; - } - - /* Check if the slot belongs to the task's slot range. */ - slotRange sr = {slot, slot}; - if (!slotRangeArrayOverlaps(task->slots, &sr)) return; - - if (unlikely(asmDebugIsFailPointActive(ASM_MIGRATE_MAIN_CHANNEL, task->state))) - freeClientAsync(task->main_channel_client); - - /* Feed main channel with the command. */ - client *c = task->main_channel_client; - size_t prev_bytes = getNormalClientPendingReplyBytes(c); - - addReplyArrayLen(c, argc); - for (int i = 0; i < argc; i++) - addReplyBulk(c, argv[i]); - - /* Update the task's source offset to reflect the bytes sent. */ - task->source_offset += (getNormalClientPendingReplyBytes(c) - prev_bytes); -} - -asmTask *asmCreateImportTask(const char *task_id, slotRangeArray *slots, sds *err) { - clusterNode *source; - - *err = NULL; - /* Validate that the slot ranges are valid and that migration can be - * initiated for them. */ - source = validateImportSlotRanges(slots, err, NULL); - if (!source) - goto err; - - if (source == getMyClusterNode()) { - *err = sdsnew("this node is already the owner of the slot range"); - goto err; - } - - /* Only support a single task at a time now. */ - if (listLength(asmManager->tasks) != 0) { - asmTask *current = listNodeValue(listFirst(asmManager->tasks)); - if (current->state == ASM_FAILED) { - /* We can create a new import task only if the current one is failed, - * cancel the failed task to create a new one. */ - asmTaskCancel(current, "new import requested"); - } else { - *err = sdsnew("another ASM task is already in progress"); - goto err; - } - } - /* There should be no task in progress. */ - serverAssert(listLength(asmManager->tasks) == 0); - - /* Create a slot migration task */ - asmTask *task = asmTaskCreate(task_id); - task->slots = slots; - task->state = ASM_NONE; - task->operation = ASM_IMPORT; - task->source_node = source; - memcpy(task->source, clusterNodeGetName(source), CLUSTER_NAMELEN); - memcpy(task->dest, getMyClusterId(), CLUSTER_NAMELEN); - - listAddNodeTail(asmManager->tasks, task); - sds slots_str = slotRangeArrayToString(slots); - serverLog(LL_NOTICE, "Import task %s created: src=%.40s, dest=%.40s, slots=%s", - task->id, task->source, task->dest, slots_str); - sdsfree(slots_str); - - return task; - -err: - slotRangeArrayFree(slots); - return NULL; -} - -/* CLUSTER MIGRATION IMPORT - * - * Sent by operator to the destination node to start the migration. */ -static void clusterMigrationCommandImport(client *c) { - /* Validate slot range arg count */ - int remaining = c->argc - 3; - if (remaining == 0 || remaining % 2 != 0) { - addReplyErrorArity(c); - return; - } - - slotRangeArray *slots = parseSlotRangesOrReply(c, c->argc, 3); - if (!slots) return; - - sds err = NULL; - asmTask *task = asmCreateImportTask(NULL, slots, &err); - if (!task) { - addReplyErrorSds(c, err); - return; - } - - addReplyBulkCString(c, task->id); -} - -/* CLUSTER MIGRATION CANCEL [ID | ALL] - * - Reply: Number of cancelled tasks - * - * Cancels import tasks that overlap with the specified slot ranges. - * Multiple tasks may be cancelled. */ -static void clusterMigrationCommandCancel(client *c) { - sds task_id = NULL; - int num_cancelled = 0; - - /* Validate slot range arg count */ - if (c->argc != 4 && c->argc != 5) { - addReplyErrorArity(c); - return; - } - - if (!strcasecmp(c->argv[3]->ptr, "id")) { - if (c->argc != 5) { - addReplyErrorArity(c); - return; - } - task_id = c->argv[4]->ptr; - } else if (!strcasecmp(c->argv[3]->ptr, "all")) { - if (c->argc != 4) { - addReplyErrorArity(c); - return; - } - } else { - addReplyError(c, "unknown argument"); - return; - } - - num_cancelled = clusterAsmCancel(task_id, "user request"); - addReplyLongLong(c, num_cancelled); -} - -/* Reply with the status of the task. */ -static void replyTaskStatus(client *c, asmTask *task) { - mstime_t p = 0; - - addReplyMapLen(c, 12); - addReplyBulkCString(c, "id"); - addReplyBulkCString(c, task->id); - addReplyBulkCString(c, "slots"); - addReplyBulkSds(c, slotRangeArrayToString(task->slots)); - addReplyBulkCString(c, "source"); - addReplyBulkCBuffer(c, task->source, CLUSTER_NAMELEN); - addReplyBulkCString(c, "dest"); - addReplyBulkCBuffer(c, task->dest, CLUSTER_NAMELEN); - addReplyBulkCString(c, "operation"); - addReplyBulkCString(c, task->operation == ASM_IMPORT ? "import" : "migrate"); - addReplyBulkCString(c, "state"); - addReplyBulkCString(c, asmTaskStateToString(task->state)); - addReplyBulkCString(c, "last_error"); - addReplyBulkCBuffer(c, task->error, sdslen(task->error)); - addReplyBulkCString(c, "retries"); - addReplyLongLong(c, task->retry_count); - addReplyBulkCString(c, "create_time"); - addReplyLongLong(c, task->create_time); - addReplyBulkCString(c, "start_time"); - addReplyLongLong(c, task->start_time); - addReplyBulkCString(c, "end_time"); - addReplyLongLong(c, task->end_time); - - if (task->operation == ASM_MIGRATE && task->state == ASM_COMPLETED) - p = task->end_time - task->paused_time; - addReplyBulkCString(c, "write_pause_ms"); - addReplyLongLong(c, p); -} - -/* CLUSTER MIGRATION STATUS [ID | ALL] - * - Reply: Array of atomic slot migration tasks */ -static void clusterMigrationCommandStatus(client *c) { - listIter li; - listNode *ln; - - if (c->argc != 4 && c->argc != 5) { - addReplyErrorArity(c); - return; - } - - if (!strcasecmp(c->argv[3]->ptr, "id")) { - if (c->argc != 5) { - addReplyErrorArity(c); - return; - } - sds id = c->argv[4]->ptr; - asmTask *task = asmLookupTaskAt(asmManager->tasks, id); - if (!task) task = asmLookupTaskAt(asmManager->archived_tasks, id); - if (!task) { - addReplyArrayLen(c, 0); - return; - } - - addReplyArrayLen(c, 1); - replyTaskStatus(c, task); - } else if (!strcasecmp(c->argv[3]->ptr, "all")) { - if (c->argc != 4) { - addReplyErrorArity(c); - return; - } - addReplyArrayLen(c, listLength(asmManager->tasks) + - listLength(asmManager->archived_tasks)); - listRewind(asmManager->tasks, &li); - while ((ln = listNext(&li)) != NULL) - replyTaskStatus(c, listNodeValue(ln)); - - listRewind(asmManager->archived_tasks, &li); - while ((ln = listNext(&li)) != NULL) - replyTaskStatus(c, listNodeValue(ln)); - } else { - addReplyError(c, "unknown argument"); - return; - } -} - -/* CLUSTER MIGRATION - * | - * STATUS [ID | ALL] | - * CANCEL [ID | ALL]> -*/ -void clusterMigrationCommand(client *c) { - if (c->argc < 4) { - addReplyErrorArity(c); - return; - } - - if (strcasecmp(c->argv[2]->ptr, "import") == 0) { - clusterMigrationCommandImport(c); - } else if (strcasecmp(c->argv[2]->ptr, "status") == 0) { - clusterMigrationCommandStatus(c); - } else if (strcasecmp(c->argv[2]->ptr, "cancel") == 0) { - clusterMigrationCommandCancel(c); - } else { - addReplyError(c, "unknown argument"); - } -} - -/* Return the number of keys in the specified slot ranges. */ -unsigned long long asmCountKeysInSlots(slotRangeArray *slots) { - if (!slots) return 0; - - unsigned long long key_count = 0; - for (int i = 0; i < slots->num_ranges; i++) { - for (int j = slots->ranges[i].start; j <= slots->ranges[i].end; j++) { - key_count += kvstoreDictSize(server.db[0].keys, j); - } - } - return key_count; -} - -/* Log a human-readable message for ASM task lifecycle events. */ -void asmLogTaskEvent(asmTask *task, int event) { - sds str = slotRangeArrayToString(task->slots); - - switch (event) { - case ASM_EVENT_IMPORT_STARTED: - serverLog(LL_NOTICE, "Import task %s started for slots: %s", task->id, str); - break; - case ASM_EVENT_IMPORT_FAILED: - serverLog(LL_NOTICE, "Import task %s failed for slots: %s", task->id, str); - break; - case ASM_EVENT_TAKEOVER: - serverLog(LL_NOTICE, "Import task %s is ready to takeover slots: %s", task->id, str); - break; - case ASM_EVENT_IMPORT_COMPLETED: - serverLog(LL_NOTICE, "Import task %s completed for slots: %s (imported %llu keys)", - task->id, str, asmCountKeysInSlots(task->slots)); - break; - case ASM_EVENT_MIGRATE_STARTED: - serverLog(LL_NOTICE, "Migrate task %s started for slots: %s (keys at start: %llu)", - task->id, str, asmCountKeysInSlots(task->slots)); - break; - case ASM_EVENT_MIGRATE_FAILED: - serverLog(LL_NOTICE, "Migrate task %s failed for slots: %s", task->id, str); - break; - case ASM_EVENT_HANDOFF_PREP: - serverLog(LL_NOTICE, "Migrate task %s preparing to handoff for slots: %s", task->id, str); - break; - case ASM_EVENT_MIGRATE_COMPLETED: - serverLog(LL_NOTICE, "Migrate task %s completed for slots: %s (migrated %llu keys)", - task->id, str, asmCountKeysInSlots(task->slots)); - break; - default: - break; - } - - sdsfree(str); -} - -/* Notify the state change to the module and the cluster implementation. */ -void asmNotifyStateChange(asmTask *task, int event) { - RedisModuleClusterSlotMigrationInfo info = { - .version = REDISMODULE_CLUSTER_SLOT_MIGRATION_INFO_VERSION, - .task_id = task->id, - .slots = (RedisModuleSlotRangeArray *) task->slots - }; - memcpy(info.source_node_id, task->source, CLUSTER_NAMELEN); - memcpy(info.destination_node_id, task->dest, CLUSTER_NAMELEN); - - int module_event = -1; - if (event == ASM_EVENT_IMPORT_STARTED) module_event = REDISMODULE_SUBEVENT_CLUSTER_SLOT_MIGRATION_IMPORT_STARTED; - else if (event == ASM_EVENT_IMPORT_COMPLETED) module_event = REDISMODULE_SUBEVENT_CLUSTER_SLOT_MIGRATION_IMPORT_COMPLETED; - else if (event == ASM_EVENT_IMPORT_FAILED) module_event = REDISMODULE_SUBEVENT_CLUSTER_SLOT_MIGRATION_IMPORT_FAILED; - else if (event == ASM_EVENT_MIGRATE_STARTED) module_event = REDISMODULE_SUBEVENT_CLUSTER_SLOT_MIGRATION_MIGRATE_STARTED; - else if (event == ASM_EVENT_MIGRATE_COMPLETED) module_event = REDISMODULE_SUBEVENT_CLUSTER_SLOT_MIGRATION_MIGRATE_COMPLETED; - else if (event == ASM_EVENT_MIGRATE_FAILED) module_event = REDISMODULE_SUBEVENT_CLUSTER_SLOT_MIGRATION_MIGRATE_FAILED; - serverAssert(module_event != -1); - - moduleFireServerEvent(REDISMODULE_EVENT_CLUSTER_SLOT_MIGRATION, module_event, &info); - serverLog(LL_DEBUG, "Fire cluster asm module event, task %s: state=%s", - task->id, asmTaskStateToString(task->state)); - - if (clusterNodeIsMaster(getMyClusterNode())) { - /* Notify the cluster impl only if it is a real active import task. */ - if (task != asmManager->master_task) { - asmLogTaskEvent(task, event); - clusterAsmOnEvent(task->id, event, task->slots); - } - asmNotifyReplicasStateChange(task); /* Propagate state change to replicas */ - } -} - -void asmImportSetFailed(asmTask *task) { - serverAssert(task->operation == ASM_IMPORT); - if (task->state == ASM_FAILED) return; - - /* If we are in the RDB channel transfer state, we need to - * close the client that was created for the RDB channel. */ - if (task->rdb_channel_conn && task->rdb_channel_state == ASM_RDBCHANNEL_TRANSFER) { - client *c = connGetPrivateData(task->rdb_channel_conn); - serverAssert(c->task == task); - task->rdb_channel_conn = NULL; - c->task = NULL; - c->flags &= ~CLIENT_MASTER; - freeClientAsync(c); - } - - /* If in the wait stream EOF or streaming buffer state, we need to close the - * client that was created for the main channel. */ - if (task->main_channel_conn && - (task->state == ASM_STREAMING_BUF || task->state == ASM_WAIT_STREAM_EOF)) - { - client *c = connGetPrivateData(task->main_channel_conn); - serverAssert(c->task == task); - task->main_channel_conn = NULL; - c->task = NULL; - c->flags &= ~CLIENT_MASTER; - freeClientAsync(c); - } - - /* Close the connections */ - if (task->rdb_channel_conn) connClose(task->rdb_channel_conn); - if (task->main_channel_conn) connClose(task->main_channel_conn); - task->rdb_channel_conn = NULL; - task->main_channel_conn = NULL; - - /* Clear the replication data buffer */ - asmManager->sync_buffer_peak = max(asmManager->sync_buffer_peak, task->sync_buffer.peak); - replDataBufClear(&task->sync_buffer); - - /* Mark the task as failed and notify the cluster */ - task->state = ASM_FAILED; - asmNotifyStateChange(task, ASM_EVENT_IMPORT_FAILED); - /* This node may become replica, only master can setup new slot trimming jobs. */ - if (clusterNodeIsMaster(getMyClusterNode())) - asmTrimJobSchedule(task->slots); -} - -void asmMigrateSetFailed(asmTask *task) { - serverAssert(task->operation == ASM_MIGRATE); - if (task->state == ASM_FAILED) return; - - /* Close the RDB and main channel clients*/ - if (task->rdb_channel_client) { - task->rdb_channel_client->task = NULL; - freeClientAsync(task->rdb_channel_client); - task->rdb_channel_client = NULL; - } - if (task->main_channel_client) { - task->main_channel_client->task = NULL; - freeClientAsync(task->main_channel_client); - task->main_channel_client = NULL; - } - - /* Actually it is not necessary to clear the sync buffer here, - * to make asmTaskReset work properly after migrate task failed */ - replDataBufClear(&task->sync_buffer); - - /* Mark the task as failed and notify the cluster */ - task->state = ASM_FAILED; - asmNotifyStateChange(task, ASM_EVENT_MIGRATE_FAILED); -} - -void asmTaskSetFailed(asmTask *task, const char *fmt, ...) { - va_list ap; - sds error = sdsempty(); - - /* Set the error message */ - va_start(ap, fmt); - error = sdscatvprintf(error, fmt, ap); - va_end(ap); - error = sdscatprintf(error, " (state: %s, rdb_channel_state: %s)", - asmTaskStateToString(task->state), - asmTaskStateToString(task->rdb_channel_state)); - sdsfree(task->error); - task->error = error; - - /* Log the error */ - sds slots_str = slotRangeArrayToString(task->slots); - serverLog(LL_WARNING, "%s task %s failed: slots=%s, err=%s", - task->operation == ASM_IMPORT ? "Import" : "Migrate", - task->id, slots_str, task->error); - sdsfree(slots_str); - - if (task->operation == ASM_IMPORT) - asmImportSetFailed(task); - else - asmMigrateSetFailed(task); -} - -/* The task is completed or canceled. Update stats and move it to - * the archived list. */ -void asmTaskFinalize(asmTask *task) { - listNode *ln = listFirst(asmManager->tasks); - serverAssert(ln->value == task); - - task->source_node = NULL; /* Should never access it */ - task->end_time = server.mstime; - - if (task->operation == ASM_IMPORT) { - asmManager->sync_buffer_peak = max(asmManager->sync_buffer_peak, - task->sync_buffer.peak); - replDataBufClear(&task->sync_buffer); /* Not used, so save memory */ - } - - /* Move the task to the archived list */ - listUnlinkNode(asmManager->tasks, ln); - listLinkNodeHead(asmManager->archived_tasks, ln); -} - -static void asmTaskCancel(asmTask *task, const char *reason) { - if (task->state == ASM_CANCELED) return; - - asmTaskSetFailed(task, "Cancelled due to %s", reason); - task->state = ASM_CANCELED; - asmTaskFinalize(task); -} - -void asmImportTakeover(asmTask *task) { - serverAssert(task->state == ASM_WAIT_STREAM_EOF || - task->state == ASM_STREAMING_BUF); - - /* Free the main channel connection since it is no longer needed. */ - serverAssert(task->main_channel_conn != NULL); - client *c = connGetPrivateData(task->main_channel_conn); - c->task = NULL; - c->flags &= ~CLIENT_MASTER; - freeClientAsync(c); - task->main_channel_conn = NULL; - - task->state = ASM_TAKEOVER; - asmLogTaskEvent(task, ASM_EVENT_TAKEOVER); - clusterAsmOnEvent(task->id, ASM_EVENT_TAKEOVER, task->slots); -} - -void asmCallbackOnFreeClient(client *c) { - asmTask *task = c->task; - if (!task) return; - - /* If the RDB channel connection is closed, mark the task as failed. */ - if (c->conn && task->rdb_channel_conn == c->conn) { - /* We create the client only when transferring data on the RDB channel */ - serverAssert(task->rdb_channel_state == ASM_RDBCHANNEL_TRANSFER); - task->rdb_channel_conn = NULL; /* Will be freed by freeClient */ - c->flags &= ~CLIENT_MASTER; - asmTaskSetFailed(task, "RDB channel - Connection is closed"); - return; - } - - if (c->conn && task->main_channel_conn == c->conn) { - /* After or in the process of streaming buffer to DB, a client will be - * created based on the main channel connection. */ - serverAssert(task->state == ASM_STREAMING_BUF || - task->state == ASM_WAIT_STREAM_EOF); - task->main_channel_conn = NULL; /* Will be freed by freeClient */ - c->flags &= ~CLIENT_MASTER; - asmTaskSetFailed(task, "Main channel - Connection is closed"); - return; - } - - if (c == task->rdb_channel_client) { - /* TODO: Detect whether the bgsave is completed successfully and - * update the state properly. */ - task->rdb_channel_state = ASM_COMPLETED; - /* We may not have detected whether the child process has exited yet, - * so we can't determine whether the client has completed the slots - * snapshot transfer. If the RDB channel is interrupted unexpectedly, - * the destination side will also close the main channel. - * So here we just reset the RDB channel client of task. */ - task->rdb_channel_client = NULL; - return; - } - - /* If the main channel client is closed, we need to mark the task as failed - * and clean up the RDB channel client if it exists. */ - if (c == task->main_channel_client) { - task->main_channel_client = NULL; - /* The rdb channel client will be cleaned up */ - asmTaskSetFailed(task, "Main and RDB channel clients are disconnected."); - return; - } -} - -/* Sends an AUTH command to the source node using the internal secret. - * Returns an error string if the command fails, or NULL on success. */ -char *asmSendInternalAuth(connection *conn) { - size_t len = 0; - const char *internal_secret = clusterGetSecret(&len); - serverAssert(internal_secret != NULL); - - sds secret = sdsnewlen(internal_secret, len); - char *err = sendCommand(conn, "AUTH", "internal connection", secret, NULL); - sdsfree(secret); - return err; -} - -/* Handles the RDB channel sync with the source node. - * This function is called when the RDB channel is established - * and ready to sync with the source node. */ -void asmRdbChannelSyncWithSource(connection *conn) { - asmTask *task = connGetPrivateData(conn); - char *err = NULL; - sds task_error_msg = NULL; - - /* Check for errors in the socket: after a non blocking connect() we - * may find that the socket is in error state. */ - if (connGetState(conn) != CONN_STATE_CONNECTED) - goto error; - - /* Check if the task is in a fail point state */ - if (unlikely(asmDebugIsFailPointActive(ASM_IMPORT_RDB_CHANNEL, task->rdb_channel_state))) { - char buf[1]; - /* Simulate a failure by shutting down the connection. On some operating - * systems (e.g. Linux), the socket's receive buffer is not flushed - * immediately, so we issue a dummy read to drain any pending data and - * surface the error condition. - * using shutdown() instead of connShutdown() because connTLSShutdown() - * will free the connection directly, which is not what we want. */ - shutdown(conn->fd, SHUT_RDWR); - connRead(conn, buf, 1); - } - - if (task->rdb_channel_state == ASM_CONNECTING) { - connSetReadHandler(conn, asmRdbChannelSyncWithSource); - connSetWriteHandler(conn, NULL); - - /* Send AUTH command to source node using internal auth */ - err = asmSendInternalAuth(conn); - if (err) goto write_error; - task->rdb_channel_state = ASM_AUTH_REPLY; - return; - } - - if (task->rdb_channel_state == ASM_AUTH_REPLY) { - err = receiveSynchronousResponse(conn); - /* The source node did not reply */ - if (err == NULL) goto no_response_error; - - /* Check `+OK` reply */ - if (!strcmp(err, "+OK")) { - sdsfree(err); - err = NULL; - task->rdb_channel_state = ASM_RDBCHANNEL_REQUEST; - serverLog(LL_NOTICE, "Source node replied to AUTH command, syncslots rdb channel operation can continue..."); - } else { - task_error_msg = sdscatprintf(sdsempty(), - "Error reply to AUTH from source: %s", err); - sdsfree(err); - goto error; - } - } - - if (task->rdb_channel_state == ASM_RDBCHANNEL_REQUEST) { - err = sendCommand(conn, "CLUSTER", "SYNCSLOTS", "RDBCHANNEL", task->id, NULL); - if (err) goto write_error; - task->rdb_channel_state = ASM_RDBCHANNEL_REPLY; - return; - } - - if (task->rdb_channel_state == ASM_RDBCHANNEL_REPLY) { - err = receiveSynchronousResponse(conn); - /* The source node did not reply */ - if (err == NULL) goto no_response_error; - - /* Ignore ‘\n' sent from the source node to keep the connection alive. */ - if (sdslen(err) == 0) { - serverLog(LL_DEBUG, "Received an empty line in RDBCHANNEL reply, slots snapshot delivery will start later"); - sdsfree(err); - return; - } - - /* Check `+SLOTSSNAPSHOT` reply */ - if (!strncmp(err, "+SLOTSSNAPSHOT", strlen("+SLOTSSNAPSHOT"))) { - sdsfree(err); - err = NULL; - task->state = ASM_ACCUMULATE_BUF; - /* The main channel buffers pending commands. */ - connSetReadHandler(task->main_channel_conn, asmSyncBufferReadFromConn); - - task->rdb_channel_state = ASM_RDBCHANNEL_TRANSFER; - client *c = createClient(conn); - c->flags |= (CLIENT_MASTER | CLIENT_INTERNAL | CLIENT_ASM_IMPORTING); - c->querybuf = sdsempty(); - c->authenticated = 1; - c->user = NULL; - c->task = task; - serverLog(LL_NOTICE, - "Source node replied to SLOTSSNAPSHOT, syncing slots snapshot can continue..."); - } else { - task_error_msg = sdscatprintf(sdsempty(), - "Error reply to CLUSTER SYNCSLOTS RDBCHANNEL from the source: %s", err); - sdsfree(err); - goto error; - } - return; - } - return; - -no_response_error: - task_error_msg = sdsnew("Source node did not respond to command during RDBCHANNELSYNCSLOTS handshake"); - /* Fall through to regular error handling */ - -error: - asmTaskSetFailed(task, "RDB channel - Failed to sync with the source node: %s", - task_error_msg ? task_error_msg : connGetLastError(conn)); - sdsfree(task_error_msg); - return; - -write_error: /* Handle sendCommand() errors. */ - task_error_msg = sdscatprintf(sdsempty(), "Failed to send command to the source node: %s", err); - sdsfree(err); - goto error; -} - -char *asmSendSlotRangesSync(connection *conn, asmTask *task) { - /* Prepare CLUSTER SYNCSLOTS SYNC command */ - serverAssert(task->slots->num_ranges <= CLUSTER_SLOTS); - int argc = task->slots->num_ranges * 2 + 4; - char **args = zcalloc(sizeof(char*) * argc); - size_t *lens = zcalloc(sizeof(size_t) * argc); - - args[0] = "CLUSTER"; - args[1] = "SYNCSLOTS"; - args[2] = "SYNC"; - args[3] = task->id; - lens[0] = strlen("CLUSTER"); - lens[1] = strlen("SYNCSLOTS"); - lens[2] = strlen("SYNC"); - lens[3] = sdslen(task->id); - - int i = 4; - for (int j = 0; j < task->slots->num_ranges; j++) { - slotRange *sr = &task->slots->ranges[j]; - args[i] = sdscatprintf(sdsempty(), "%d", sr->start); - lens[i] = sdslen(args[i]); - args[i+1] = sdscatprintf(sdsempty(), "%d", sr->end); - lens[i+1] = sdslen(args[i+1]); - i += 2; - } - serverAssert(i == argc); - - /* Send command to source node */ - char *err = sendCommandArgv(conn, argc, args, lens); - - /* Free allocated memory */ - for (int j = 4; j < argc; j++) { - sdsfree(args[j]); - } - zfree(args); - zfree(lens); - - return err; -} - -void asmSyncWithSource(connection *conn) { - asmTask *task = connGetPrivateData(conn); - char *err = NULL; - - /* Some task errors are not network issues, we record them explicitly. */ - sds task_error_msg = NULL; - - /* Check for errors in the socket: after a non blocking connect() we - * may find that the socket is in error state. */ - if (connGetState(conn) != CONN_STATE_CONNECTED) - goto error; - - /* Check if the fail point is active for this channel and state */ - if (unlikely(asmDebugIsFailPointActive(ASM_IMPORT_MAIN_CHANNEL, task->state))) { - char buf[1]; - shutdown(conn->fd, SHUT_RDWR); - connRead(conn, buf, 1); - } - - if (task->state == ASM_CONNECTING) { - connSetReadHandler(conn, asmSyncWithSource); - connSetWriteHandler(conn, NULL); - /* Send AUTH command to source node using internal auth */ - err = asmSendInternalAuth(conn); - if (err) goto write_error; - task->state = ASM_AUTH_REPLY; - return; - } - - if (task->state == ASM_AUTH_REPLY) { - err = receiveSynchronousResponse(conn); - /* The source node did not reply */ - if (err == NULL) goto no_response_error; - - /* Check `+OK` reply */ - if (!strcmp(err, "+OK")) { - sdsfree(err); - err = NULL; - task->state = ASM_SEND_HANDSHAKE; - serverLog(LL_NOTICE, "Source node replied to AUTH command, syncslots can continue..."); - } else { - task_error_msg = sdscatprintf(sdsempty(), - "Error reply to AUTH from the source: %s", err); - sdsfree(err); - goto error; - } - } - - if (task->state == ASM_SEND_HANDSHAKE) { - sds node_id = sdsnewlen(clusterNodeGetName(getMyClusterNode()), CLUSTER_NAMELEN); - err = sendCommand(conn, "CLUSTER", "SYNCSLOTS", "CONF", "NODE-ID", node_id, NULL); - sdsfree(node_id); - if (err) goto write_error; - task->state = ASM_HANDSHAKE_REPLY; - return; - } - - if (task->state == ASM_HANDSHAKE_REPLY) { - err = receiveSynchronousResponse(conn); - /* The source node did not reply */ - if (err == NULL) goto no_response_error; - - /* Check `+OK` reply */ - if (!strcmp(err, "+OK")) { - sdsfree(err); - err = NULL; - task->state = ASM_SEND_SYNCSLOTS; - serverLog(LL_NOTICE, "Source node replied to SYNCSLOTS CONF command, syncslots can continue..."); - } else { - task_error_msg = sdscatprintf(sdsempty(), - "Error reply to CLUSTER SYNCSLOTS CONF from the source: %s", err); - sdsfree(err); - goto error; - } - } - - if (task->state == ASM_SEND_SYNCSLOTS) { - err = asmSendSlotRangesSync(conn, task); - if (err) goto write_error; - - task->state = ASM_SYNCSLOTS_REPLY; - return; - } - - if (task->state == ASM_SYNCSLOTS_REPLY) { - err = receiveSynchronousResponse(conn); - /* The source node did not reply */ - if (err == NULL) goto no_response_error; - - /* Check `+RDBCHANNELSYNCSLOTS` reply */ - if (!strncmp(err, "+RDBCHANNELSYNCSLOTS", strlen("+RDBCHANNELSYNCSLOTS"))) { - sdsfree(err); - err = NULL; - task->state = ASM_INIT_RDBCHANNEL; - serverLog(LL_NOTICE, - "Source node replied to SYNCSLOTS SYNC, syncslots can continue..."); - } else if (!strncmp(err, "-NOTREADY", strlen("-NOTREADY"))) { - /* The source-side cluster is temporarily not ready to start a - * migration and replied -NOTREADY. We could fail this attempt and - * let the import task start another attempt later but that could - * trigger unnecessary cleanup in the cluster implementation. - * Instead, we'll retry sending SYNCSLOTS later in asmCron(). */ - sdsfree(err); - task->state = ASM_SEND_SYNCSLOTS; - serverLog(LL_NOTICE, - "Source node replied to SYNCSLOTS SYNC with -NOTREADY, will retry later..."); - return; - } else { - task_error_msg = sdscatprintf(sdsempty(), - "Error reply to CLUSTER SYNCSLOTS SYNC from the source: %s", err); - sdsfree(err); - goto error; - } - } - - if (task->state == ASM_INIT_RDBCHANNEL) { - /* Create RDB channel connection */ - char *ip = clusterNodeIp(task->source_node); - int port = server.tls_replication ? clusterNodeTlsPort(task->source_node) : - clusterNodeTcpPort(task->source_node); - task->rdb_channel_conn = connCreate(server.el, connTypeOfReplication()); - if (connConnect(task->rdb_channel_conn, ip, port, - server.bind_source_addr, asmRdbChannelSyncWithSource) == C_ERR) - { - serverLog(LL_WARNING, "Unable to connect to the source node: %s", - connGetLastError(task->rdb_channel_conn)); - goto error; - } - task->rdb_channel_state = ASM_CONNECTING; - connSetPrivateData(task->rdb_channel_conn, task); - serverLog(LL_NOTICE, - "RDB channel connection to source node %.40s established, waiting for AUTH reply...", - task->source); - - /* Main channel waits for the new event */ - connSetReadHandler(conn, NULL); - return; - } - return; - -no_response_error: - serverLog(LL_WARNING, "Source node did not respond to command during SYNCSLOTS handshake"); - /* Fall through to regular error handling */ - -error: - asmTaskSetFailed(task, "Main channel - Failed to sync with source node: %s", - task_error_msg ? task_error_msg : connGetLastError(conn)); - sdsfree(task_error_msg); - return; - -write_error: /* Handle sendCommand() errors. */ - serverLog(LL_WARNING, "Failed to send command to source node: %s", err); - sdsfree(err); - goto error; -} - -int asmImportSendACK(asmTask *task) { - serverAssert(task->operation == ASM_IMPORT && task->state == ASM_WAIT_STREAM_EOF); - serverLog(LL_DEBUG, "Destination node applied offset is %lld", task->dest_offset); - - char offset[64]; - ull2string(offset, sizeof(offset), task->dest_offset); - - char *err = sendCommand(task->main_channel_conn, "CLUSTER", "SYNCSLOTS", "ACK", - asmTaskStateToString(task->state), offset, NULL); - if (err) { - asmTaskSetFailed(task, "Main channel - Failed to send ACK: %s", err); - sdsfree(err); - return C_ERR; - } - return C_OK; -} - -/* Called when the RDB channel begins sending the snapshot. - * From this point on, the main channel also starts sending incremental streams. */ -void asmSlotSnapshotAndStreamStart(struct asmTask *task) { - if (task == NULL || task->state != ASM_WAIT_BGSAVE_START) return; - - if (unlikely(asmDebugIsFailPointActive(ASM_MIGRATE_RDB_CHANNEL, task->state))) { - shutdown(task->rdb_channel_client->conn->fd, SHUT_RDWR); - return; - } - task->main_channel_client->replstate = SLAVE_STATE_SEND_BULK_AND_STREAM; - - task->state = ASM_SEND_BULK_AND_STREAM; - task->rdb_channel_state = ASM_RDBCHANNEL_TRANSFER; - - /* From the source node's perspective, the destination node begins to accumulate - * the buffer while the RDB channel starts applying the slot snapshot data. */ - task->dest_state = ASM_ACCUMULATE_BUF; - task->dest_slots_snapshot_time = server.mstime; -} - -/* Called when the RDB channel has succeeded in sending the snapshot. */ -void asmSlotSnapshotSucceed(struct asmTask *task) { - if (task == NULL || task->state != ASM_SEND_BULK_AND_STREAM) return; - - /* The destination starts sending ACKs to keep the main channel alive after - * receiving the snapshot, so here we need to update the last interaction - * time to avoid false timeout. */ - task->main_channel_client->lastinteraction = server.unixtime; - - task->state = ASM_SEND_STREAM; - task->rdb_channel_state = ASM_COMPLETED; -} - -/* Called when the RDB channel fails to send the snapshot. */ -void asmSlotSnapshotFailed(struct asmTask *task) { - if (task == NULL || task->state != ASM_SEND_BULK_AND_STREAM) return; - - asmTaskSetFailed(task, "RDB channel - Failed to send slots snapshot"); -} - -/* CLUSTER SYNCSLOTS SNAPSHOT-EOF - * - * This command is sent by the source node to the destination node to indicate - * that the slots snapshot has ended. */ -void clusterSyncSlotsSnapshotEOF(client *c) { - /* This client is RDB channel connection. */ - asmTask *task = c->task; - if (!task || task->rdb_channel_state != ASM_RDBCHANNEL_TRANSFER || - c->conn != task->rdb_channel_conn) - { - /* Unexpected SNAPSHOT-EOF command */ - serverLog(LL_WARNING, "Unexpected CLUSTER SYNCSLOTS SNAPSHOT-EOF command: " - "rdb_channel_state=%s", - asmTaskStateToString(task ? task->rdb_channel_state : ASM_NONE)); - freeClientAsync(c); - return; - } - - /* RDB channel state: ASM_RDBCHANNEL_TRANSFER */ - if (unlikely(asmDebugIsFailPointActive(ASM_IMPORT_RDB_CHANNEL, task->rdb_channel_state))) { - freeClientAsync(c); /* Simulate a failure */ - return; - } - - /* Clear the RDB channel connection */ - task->rdb_channel_conn = NULL; - task->rdb_channel_state = ASM_COMPLETED; - serverLog(LL_NOTICE, "RDB channel snapshot transfer completed for the import task."); - - /* Free the RDB channel connection. */ - c->task = NULL; - c->flags &= ~CLIENT_MASTER; - freeClientAsync(c); - - /* Will start streaming the buffer to DB, don't start here since now - * we are in the context of executing command, otherwise, redis will - * generate a big MULTI-EXEC including all the commands in the buffer. - * just update the state here, and do it in beforeSleep(). */ - task->state = ASM_READY_TO_STREAM; - connSetReadHandler(task->main_channel_conn, NULL); -} - -/* CLUSTER SYNCSLOTS STREAM-EOF - * - * This command is sent by the source node to the destination node to indicate - * that the slot sync stream has ended and the slots can be handed off. */ -void clusterSyncSlotsStreamEOF(client *c) { - asmTask *task = c->task; - - if (!task || task->operation != ASM_IMPORT) { - serverLog(LL_WARNING, "Unexpected CLUSTER SYNCSLOTS STREAM-EOF command"); - freeClientAsync(c); - return; - } - - if (task->state == ASM_STREAMING_BUF) { - /* We are still streaming the buffer to DB, mark the EOF received, and we - * can take over after streaming is EOF. Since we may release the context - * in asmImportTakeover, this breaks the context for streaming buffer. */ - task->stream_eof_during_streaming = 1; - serverLog(LL_NOTICE, "CLUSTER SYNCSLOTS STREAM-EOF received during streaming buffer"); - return; - } - - if (task->state != ASM_WAIT_STREAM_EOF) { - serverLog(LL_WARNING, "Unexpected CLUSTER SYNCSLOTS STREAM-EOF state: %s", - asmTaskStateToString(task->state)); - freeClientAsync(c); - return; - } - serverLog(LL_NOTICE, "CLUSTER SYNCSLOTS STREAM-EOF received when waiting for STREAM-EOF"); - - /* STREAM-EOF received, the source is ready to handoff, takeover now. */ - asmImportTakeover(task); -} - -/* Start the import task. */ -static void asmStartImportTask(asmTask *task) { - if (task->operation != ASM_IMPORT || task->state != ASM_NONE) return; - sds slots_str = slotRangeArrayToString(task->slots); - - /* Sanity check: Clean up any keys that exist in slots not owned by this node. - * This handles cases where users previously migrated slots using legacy method - * but left behind orphaned keys, or maybe cluster missed cleaning up during - * previous operations, which could interfere with the ASM import process. */ - asmTrimSlotsIfNotOwned(task->slots); - - /* Check if there is any trim job in progress for the slot ranges. - * We can't start the import task since the trim job will modify the data.*/ - int trim_in_progress = asmIsAnyTrimJobOverlaps(task->slots); - - /* Notify the cluster implementation to prepare for the import task. */ - int impl_ret = clusterAsmOnEvent(task->id, ASM_EVENT_IMPORT_PREP, task->slots); - - /* We do not start the import task if trim is disabled by module. */ - int disabled_by_module = server.cluster_module_trim_disablers > 0; - - static int start_blocked_logged = 0; - /* Cannot start import task since pause action is performed. Otherwise, we - * will break the promise that no writes are performed during the pause. */ - if (isPausedActions(PAUSE_ACTION_CLIENT_ALL) || - isPausedActions(PAUSE_ACTION_CLIENT_WRITE) || - trim_in_progress || - impl_ret != C_OK || - disabled_by_module) - { - const char *reason = disabled_by_module ? "trim is disabled by module" : - impl_ret != C_OK ? "cluster is not ready" : - trim_in_progress ? "trim in progress for some of the slots" : - "server paused"; - if (start_blocked_logged == 0) { - serverLog(LL_WARNING, "Can not start import task %s for slots: %s due to %s", - task->id, slots_str, reason); - start_blocked_logged = 1; - } - sdsfree(slots_str); - return; - } - start_blocked_logged = 0; /* Reset the log flag */ - - /* Detect if the cluster topology is changed. We should cancel the task if - * we can not schedule it, and update the source node if needed. */ - sds err = NULL; - clusterNode *source = validateImportSlotRanges(task->slots, &err, task); - if (!source) { - asmTaskCancel(task, err); - sdsfree(slots_str); - sdsfree(err); - return; - } - /* Now I'm the owner of the slot range, cancel the import task. */ - if (source == getMyClusterNode()) { - asmTaskCancel(task, "slots owned by myself now"); - sdsfree(slots_str); - return; - } - /* Change the source node if needed. */ - if (source != task->source_node) { - task->source_node = source; - memcpy(task->source, clusterNodeGetName(source), CLUSTER_NAMELEN); - serverLog(LL_NOTICE, "Import task %s source node changed: slots=%s, " - "new_source=%.40s", task->id, slots_str, clusterNodeGetName(source)); - } - sdsfree(slots_str); - - task->state = ASM_CONNECTING; - task->start_time = server.mstime; - asmNotifyStateChange(task, ASM_EVENT_IMPORT_STARTED); - - task->main_channel_conn = connCreate(server.el, connTypeOfReplication()); - char *ip = clusterNodeIp(task->source_node); - int port = server.tls_replication ? clusterNodeTlsPort(task->source_node) : - clusterNodeTcpPort(task->source_node); - if (connConnect(task->main_channel_conn, ip, port, server.bind_source_addr, - asmSyncWithSource) == C_ERR) - { - asmTaskSetFailed(task, "Main channel - Failed to connect to source node: %s", - connGetLastError(task->main_channel_conn)); - return; - } - connSetPrivateData(task->main_channel_conn, task); -} - -void clusterSyncSlotsCommand(client *c) { - /* Only internal clients are allowed to execute this command to avoid - * potential attack, since some state changes are not well protected, - * external clients may damage the slot migration state. */ - if (!(c->flags & (CLIENT_INTERNAL | CLIENT_MASTER))) { - addReplyError(c, "CLUSTER SYNCSLOTS subcommands are only allowed for internal clients"); - c->flags |= CLIENT_CLOSE_AFTER_REPLY; - return; - } - - /* On replica, only allow master client to execute CONF subcommand. */ - if (!clusterNodeIsMaster(getMyClusterNode())) { - if (!(c->flags & CLIENT_MASTER)) { - /* Not master client, reject all subcommands and close the connection. */ - addReplyError(c, "CLUSTER SYNCSLOTS subcommands are only allowed for master"); - c->flags |= CLIENT_CLOSE_AFTER_REPLY; - return; - } else { - /* Only allow CONF subcommand on replica. */ - if (strcasecmp(c->argv[2]->ptr, "conf")) return; - } - } - - if (!strcasecmp(c->argv[2]->ptr, "sync") && c->argc >= 6) { - /* CLUSTER SYNCSLOTS SYNC [ ] */ - if (c->argc % 2 == 1) { - addReplyErrorArity(c); - return; - } - - slotRangeArray *slots = parseSlotRangesOrReply(c, c->argc, 4); - if (!slots) return; - - /* Validate that the slot ranges are valid and that migration can be - * initiated for them. */ - sds err = NULL; - clusterNode *source = validateImportSlotRanges(slots, &err, NULL); - if (!source) { - addReplyErrorSds(c, err); - slotRangeArrayFree(slots); - return; - } - - /* Check if the source node is the same as the current node. */ - if (source != getMyClusterNode()) { - addReplyError(c, "This node is not the owner of the slots"); - slotRangeArrayFree(slots); - return; - } - - /* Verify the destination node is known and is a master. */ - if (c->node_id) { - clusterNode *dest = clusterLookupNode(c->node_id, CLUSTER_NAMELEN); - if (dest == NULL || !clusterNodeIsMaster(dest)) { - addReplyErrorFormat(c, "Destination node %.40s is not a master", c->node_id); - slotRangeArrayFree(slots); - return; - } - } - - sds task_id = c->argv[3]->ptr; - /* Notify the cluster implementation to prepare for the migrate task. */ - if (clusterAsmOnEvent(task_id, ASM_EVENT_MIGRATE_PREP, slots) != C_OK || - asmDebugIsFailPointActive(ASM_MIGRATE_MAIN_CHANNEL, ASM_NONE)) - { - addReplyError(c, "-NOTREADY Cluster is not ready to migrate slots"); - slotRangeArrayFree(slots); - return; - } - - /* We do not start the migrate task if trim is disabled by module. */ - int disabled_by_module = server.cluster_module_trim_disablers > 0; - if (disabled_by_module) { - addReplyError(c, "Trim is disabled by module"); - slotRangeArrayFree(slots); - return; - } - - asmTask *task = listLength(asmManager->tasks) == 0 ? NULL : - listNodeValue(listFirst(asmManager->tasks)); - if (task && !strcmp(task->id, task_id) && - task->operation == ASM_MIGRATE && task->state == ASM_FAILED && - slotRangeArrayIsEqual(slots, task->slots) && - memcmp(task->dest, c->node_id, CLUSTER_NAMELEN) == 0) - { - /* Reuse the failed task */ - asmTaskReset(task); - slotRangeArrayFree(task->slots); /* Will be set again later */ - task->retry_count++; - } else if (task) { - if (task->state == ASM_FAILED) { - /* We can create a new migrate task only if the current one is - * failed, cancel the failed task to create a new one. */ - asmTaskCancel(task, "new migration requested"); - task = NULL; - } else { - addReplyError(c, "Another ASM task is already in progress"); - slotRangeArrayFree(slots); - return; - } - } - - /* Create the migrate slots task and add it to the list, - * otherwise reuse the existing one */ - if (task == NULL) { - task = asmTaskCreate(task_id); - task->start_time = server.mstime; /* Start immediately */ - serverAssert(listLength(asmManager->tasks) == 0); - listAddNodeTail(asmManager->tasks, task); - } - - task->slots = slots; - task->operation = ASM_MIGRATE; - memcpy(task->source, clusterNodeGetName(getMyClusterNode()), CLUSTER_NAMELEN); - if (c->node_id) memcpy(task->dest, c->node_id, CLUSTER_NAMELEN); - - task->main_channel_client = c; - c->task = task; - - /* We mark the main channel client as a replica, so this client is limited - * by the client output buffer settings for replicas. The replstate has - * no real significance, just to prevent it from going online. */ - c->flags |= (CLIENT_SLAVE | CLIENT_ASM_MIGRATING); - c->replstate = SLAVE_STATE_WAIT_RDB_CHANNEL; - if (server.repl_disable_tcp_nodelay) - connDisableTcpNoDelay(c->conn); /* Non-critical if it fails. */ - listAddNodeTail(server.slaves, c); - createReplicationBacklogIfNeeded(); - - /* Wait for RDB channel to be ready */ - task->state = ASM_WAIT_RDBCHANNEL; - - sds slots_str = slotRangeArrayToString(slots); - serverLog(LL_NOTICE, "Migrate task %s created: src=%.40s, dest=%.40s, slots=%s", - task->id, task->source, task->dest, slots_str); - sdsfree(slots_str); - - asmNotifyStateChange(task, ASM_EVENT_MIGRATE_STARTED); - - /* Keep the client in the main thread to avoid data races between the - * connWrite call below and the client's event handler in IO threads. */ - if (c->tid != IOTHREAD_MAIN_THREAD_ID) keepClientInMainThread(c); - - /* addReply*() is not suitable for clients in SLAVE_STATE_WAIT_RDB_CHANNEL state. */ - if (connWrite(c->conn, "+RDBCHANNELSYNCSLOTS\r\n", 22) != 22) - freeClientAsync(c); - } else if (!strcasecmp(c->argv[2]->ptr, "rdbchannel") && c->argc == 4) { - /* CLUSTER SYNCSLOTS RDBCHANNEL */ - sds task_id = c->argv[3]->ptr; - if (sdslen(task_id) != CLUSTER_NAMELEN) { - addReplyError(c, "Invalid task id"); - return; - } - - if (listLength(asmManager->tasks) == 0) { - addReplyError(c, "No slot migration task in progress"); - return; - } - - asmTask *task = listNodeValue(listFirst(asmManager->tasks)); - if (task->operation != ASM_MIGRATE || task->state != ASM_WAIT_RDBCHANNEL || - strcmp(task->id, task_id) != 0) - { - addReplyError(c, "Another migration task is already in progress"); - return; - } - - if (unlikely(asmDebugIsFailPointActive(ASM_MIGRATE_MAIN_CHANNEL, task->state))) { - /* Close the main channel client before rdb channel client connects */ - if (task->main_channel_client) - freeClient(task->main_channel_client); - } - - /* The main channel client must be present when setting RDB channel client */ - if (task->main_channel_client == NULL) { - /* Maybe the main channel connection is closed. */ - addReplyError(c, "Main channel connection is not established"); - return; - } - - /* Mark the client as a slave to generate slots snapshot */ - c->flags |= (CLIENT_SLAVE | CLIENT_REPL_RDB_CHANNEL | CLIENT_REPL_RDBONLY | CLIENT_ASM_MIGRATING); - c->slave_capa |= SLAVE_CAPA_EOF; - c->slave_req |= (SLAVE_REQ_SLOTS_SNAPSHOT | SLAVE_REQ_RDB_CHANNEL); - c->replstate = SLAVE_STATE_WAIT_BGSAVE_START; - c->repldbfd = -1; - if (server.repl_disable_tcp_nodelay) - connDisableTcpNoDelay(c->conn); /* Non-critical if it fails. */ - listAddNodeTail(server.slaves, c); - - /* Wait for bgsave to start for slots sync */ - task->state = ASM_WAIT_BGSAVE_START; - task->rdb_channel_state = ASM_WAIT_BGSAVE_START; - task->rdb_channel_client = c; - c->task = task; - - /* Keep the client in the main thread to avoid data races between the - * connWrite call in startBgsaveForReplication and the client's event - * handler in IO threads. */ - if (c->tid != IOTHREAD_MAIN_THREAD_ID) keepClientInMainThread(c); - - if (!hasActiveChildProcess()) { - startBgsaveForReplication(c->slave_capa, c->slave_req); - } else { - serverLog(LL_NOTICE, "BGSAVE for slots snapshot sync delayed"); - } - } else if (!strcasecmp(c->argv[2]->ptr, "snapshot-eof") && c->argc == 3) { - /* CLUSTER SYNCSLOTS SNAPSHOT-EOF */ - clusterSyncSlotsSnapshotEOF(c); - } else if (!strcasecmp(c->argv[2]->ptr, "stream-eof") && c->argc == 3) { - /* CLUSTER SYNCSLOTS STREAM-EOF */ - clusterSyncSlotsStreamEOF(c); - } else if (!strcasecmp(c->argv[2]->ptr, "ack") && c->argc == 5) { - /* CLUSTER SYNCSLOTS ACK */ - long long offset; - int dest_state; - - if (!strcasecmp(c->argv[3]->ptr, asmTaskStateToString(ASM_STREAMING_BUF))) { - dest_state = ASM_STREAMING_BUF; - } else if (!strcasecmp(c->argv[3]->ptr, asmTaskStateToString(ASM_WAIT_STREAM_EOF))) { - dest_state = ASM_WAIT_STREAM_EOF; - } else { - return; /* Not support now. */ - } - - if ((getLongLongFromObject(c->argv[4], &offset) != C_OK)) - return; - - if (c->task && c->task->operation == ASM_MIGRATE) { - /* Update the state and ACKed offset from destination. */ - asmTask *task = c->task; - task->dest_state = dest_state; - if (task->dest_offset > (unsigned long long) offset) { - serverLog(LL_WARNING, "CLUSTER SYNCSLOTS ACK received, dest state: %s, " - "but offset %lld is less than the current dest offset %lld", - asmTaskStateToString(dest_state), offset, task->dest_offset); - return; - } - task->dest_offset = offset; - serverLog(LL_DEBUG, "CLUSTER SYNCSLOTS ACK received, dest state: %s, " - "updated dest offset to %lld, source offset: %lld", - asmTaskStateToString(dest_state), task->dest_offset, task->source_offset); - - /* Record the time when the destination finishes applying the accumulated buffer */ - if (task->dest_state == ASM_WAIT_STREAM_EOF && task->dest_accum_applied_time == 0) - task->dest_accum_applied_time = server.mstime; - - /* Pause write if needed */ - if (task->state == ASM_SEND_BULK_AND_STREAM || task->state == ASM_SEND_STREAM) { - /* Pause writes on the main channel if the lag is less than the threshold. */ - if (task->dest_offset + server.asm_handoff_max_lag_bytes >= task->source_offset) { - if (unlikely(asmDebugIsFailPointActive(ASM_MIGRATE_MAIN_CHANNEL, ASM_HANDOFF_PREP))) - return; /* Do not enter handoff prep state for testing buffer drain timeout. */ - - serverLog(LL_NOTICE, "The applied offset lag %lld is less than the threshold %lld, " - "pausing writes for slot handoff", - task->source_offset - task->dest_offset, - server.asm_handoff_max_lag_bytes); - task->state = ASM_HANDOFF_PREP; - asmLogTaskEvent(task, ASM_EVENT_HANDOFF_PREP); - clusterAsmOnEvent(task->id, ASM_EVENT_HANDOFF_PREP, task->slots); - } - } - } - } else if (!strcasecmp(c->argv[2]->ptr, "fail") && c->argc == 4) { - /* CLUSTER SYNCSLOTS FAIL */ - return; /* This is a no-op, just to handle the command syntax. */ - } else if (!strcasecmp(c->argv[2]->ptr, "conf") && c->argc >= 5) { - /* CLUSTER SYNCSLOTS CONF