summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/src/server.c
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:40:55 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:40:55 +0100
commit5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch)
tree1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/src/server.c
parentc7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff)
downloadcrep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz
Add Redis source code for testing
Diffstat (limited to 'examples/redis-unstable/src/server.c')
-rw-r--r--examples/redis-unstable/src/server.c7941
1 files changed, 7941 insertions, 0 deletions
diff --git a/examples/redis-unstable/src/server.c b/examples/redis-unstable/src/server.c
new file mode 100644
index 0000000..8edfb87
--- /dev/null
+++ b/examples/redis-unstable/src/server.c
@@ -0,0 +1,7941 @@
1/*
2 * Copyright (c) 2009-Present, Redis Ltd.
3 * All rights reserved.
4 *
5 * Copyright (c) 2024-present, Valkey contributors.
6 * All rights reserved.
7 *
8 * Licensed under your choice of (a) the Redis Source Available License 2.0
9 * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
10 * GNU Affero General Public License v3 (AGPLv3).
11 *
12 * Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
13 */
14
15#include "server.h"
16#include "monotonic.h"
17#include "cluster.h"
18#include "cluster_slot_stats.h"
19#include "slowlog.h"
20#include "bio.h"
21#include "latency.h"
22#include "atomicvar.h"
23#include "mt19937-64.h"
24#include "functions.h"
25#include "hdr_histogram.h"
26#include "syscheck.h"
27#include "threads_mngr.h"
28#include "fmtargs.h"
29#include "mstr.h"
30#include "ebuckets.h"
31#include "cluster_asm.h"
32#include "fwtree.h"
33#include "estore.h"
34#include "chk.h"
35
36#include <time.h>
37#include <signal.h>
38#include <sys/wait.h>
39#include <errno.h>
40#include <ctype.h>
41#include <stdarg.h>
42#include <arpa/inet.h>
43#include <sys/stat.h>
44#include <fcntl.h>
45#include <sys/file.h>
46#include <sys/time.h>
47#include <sys/resource.h>
48#include <sys/uio.h>
49#include <sys/un.h>
50#include <limits.h>
51#include <float.h>
52#include <math.h>
53#include <sys/utsname.h>
54#include <locale.h>
55#include <sys/socket.h>
56
57#ifdef __linux__
58#include <sys/mman.h>
59#endif
60
61#if defined(HAVE_SYSCTL_KIPC_SOMAXCONN) || defined(HAVE_SYSCTL_KERN_SOMAXCONN)
62#include <sys/sysctl.h>
63#endif
64
65#ifdef __GNUC__
66#define GNUC_VERSION_STR STRINGIFY(__GNUC__) "." STRINGIFY(__GNUC_MINOR__) "." STRINGIFY(__GNUC_PATCHLEVEL__)
67#else
68#define GNUC_VERSION_STR "0.0.0"
69#endif
70
71/* Our shared "common" objects */
72
73struct sharedObjectsStruct shared;
74
75/* Global vars that are actually used as constants. The following double
76 * values are used for double on-disk serialization, and are initialized
77 * at runtime to avoid strange compiler optimizations. */
78
79double R_Zero, R_PosInf, R_NegInf, R_Nan;
80
81/*================================= Globals ================================= */
82
83/* Global vars */
84struct redisServer server; /* Server global state */
85
86/*============================ Internal prototypes ========================== */
87
88static inline int isShutdownInitiated(void);
89static inline int isCommandReusable(struct redisCommand *cmd, robj *commandArg);
90int isReadyToShutdown(void);
91int finishShutdown(void);
92const char *replstateToString(int replstate);
93
94/*============================ Utility functions ============================ */
95
96/* Check if a given command can be reused without performing a lookup.
97 * A command is reusable if:
98 * - It is not NULL.
99 * - It does not have subcommands (subcommands_dict == NULL).
100 * This preserves simplicity on the check and accounts for the majority of the use cases.
101 * - Its full name matches the provided command argument. */
102static inline int isCommandReusable(struct redisCommand *cmd, robj *commandArg) {
103 return cmd != NULL &&
104 cmd->subcommands_dict == NULL &&
105 strcasecmp(cmd->fullname, commandArg->ptr) == 0;
106}
107
108/* This macro tells if we are in the context of loading an AOF. */
109#define isAOFLoadingContext() \
110 ((server.current_client && server.current_client->id == CLIENT_ID_AOF) ? 1 : 0)
111
112/* We use a private localtime implementation which is fork-safe. The logging
113 * function of Redis may be called from other threads. */
114void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst);
115
116static inline int shouldShutdownAsap(void) {
117 int shutdown_asap;
118 atomicGet(server.shutdown_asap, shutdown_asap);
119 return shutdown_asap;
120}
121
122/* Low level logging. To use only for very big messages, otherwise
123 * serverLog() is to prefer. */
124void serverLogRaw(int level, const char *msg) {
125 const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
126 const char *c = ".-*#";
127 FILE *fp;
128 char buf[64];
129 int rawmode = (level & LL_RAW);
130 int log_to_stdout = server.logfile[0] == '\0';
131
132 level &= 0xff; /* clear flags */
133 if (level < server.verbosity) return;
134
135 fp = log_to_stdout ? stdout : fopen(server.logfile,"a");
136 if (!fp) return;
137
138 if (rawmode) {
139 fprintf(fp,"%s",msg);
140 } else {
141 int off;
142 struct timeval tv;
143 int role_char;
144 int daylight_active = 0;
145 pid_t pid = getpid();
146
147 gettimeofday(&tv,NULL);
148 struct tm tm;
149 atomicGet(server.daylight_active, daylight_active);
150 nolocks_localtime(&tm,tv.tv_sec,server.timezone,daylight_active);
151 off = strftime(buf,sizeof(buf),"%d %b %Y %H:%M:%S.",&tm);
152 snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
153 if (server.sentinel_mode) {
154 role_char = 'X'; /* Sentinel. */
155 } else if (pid != server.pid) {
156 role_char = 'C'; /* RDB / AOF writing child. */
157 } else {
158 role_char = (server.masterhost ? 'S':'M'); /* Slave or Master. */
159 }
160 fprintf(fp,"%d:%c %s %c %s\n",
161 (int)getpid(),role_char, buf,c[level],msg);
162 }
163 fflush(fp);
164
165 if (!log_to_stdout) fclose(fp);
166 if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
167}
168
169/* Like serverLogRaw() but with printf-alike support. This is the function that
170 * is used across the code. The raw version is only used in order to dump
171 * the INFO output on crash. */
172void _serverLog(int level, const char *fmt, ...) {
173 va_list ap;
174 char msg[LOG_MAX_LEN];
175
176 va_start(ap, fmt);
177 vsnprintf(msg, sizeof(msg), fmt, ap);
178 va_end(ap);
179
180 serverLogRaw(level,msg);
181}
182
183/* Low level logging from signal handler. Should be used with pre-formatted strings.
184 See serverLogFromHandler. */
185void serverLogRawFromHandler(int level, const char *msg) {
186 int fd;
187 int log_to_stdout = server.logfile[0] == '\0';
188 char buf[64];
189
190 if ((level&0xff) < server.verbosity || (log_to_stdout && server.daemonize))
191 return;
192 fd = log_to_stdout ? STDOUT_FILENO :
193 open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
194 if (fd == -1) return;
195 if (level & LL_RAW) {
196 if (write(fd,msg,strlen(msg)) == -1) goto err;
197 }
198 else {
199 ll2string(buf,sizeof(buf),getpid());
200 if (write(fd,buf,strlen(buf)) == -1) goto err;
201 if (write(fd,":signal-handler (",17) == -1) goto err;
202 ll2string(buf,sizeof(buf),time(NULL));
203 if (write(fd,buf,strlen(buf)) == -1) goto err;
204 if (write(fd,") ",2) == -1) goto err;
205 if (write(fd,msg,strlen(msg)) == -1) goto err;
206 if (write(fd,"\n",1) == -1) goto err;
207 }
208err:
209 if (!log_to_stdout) close(fd);
210}
211
212/* An async-signal-safe version of serverLog. if LL_RAW is not included in level flags,
213 * The message format is: <pid>:signal-handler (<time>) <msg> \n
214 * with LL_RAW flag only the msg is printed (with no new line at the end)
215 *
216 * We actually use this only for signals that are not fatal from the point
217 * of view of Redis. Signals that are going to kill the server anyway and
218 * where we need printf-alike features are served by serverLog(). */
219void serverLogFromHandler(int level, const char *fmt, ...) {
220 va_list ap;
221 char msg[LOG_MAX_LEN];
222
223 va_start(ap, fmt);
224 vsnprintf_async_signal_safe(msg, sizeof(msg), fmt, ap);
225 va_end(ap);
226
227 serverLogRawFromHandler(level, msg);
228}
229
230/* Return the UNIX time in microseconds */
231long long ustime(void) {
232 struct timeval tv;
233 long long ust;
234
235 gettimeofday(&tv, NULL);
236 ust = ((long long)tv.tv_sec)*1000000;
237 ust += tv.tv_usec;
238 return ust;
239}
240
241/* Return the UNIX time in milliseconds */
242mstime_t mstime(void) {
243 return ustime()/1000;
244}
245
246/* Return the command time snapshot in milliseconds.
247 * The time the command started is the logical time it runs,
248 * and all the time readings during the execution time should
249 * reflect the same time.
250 * More details can be found in the comments below. */
251mstime_t commandTimeSnapshot(void) {
252 /* When we are in the middle of a command execution, we want to use a
253 * reference time that does not change: in that case we just use the
254 * cached time, that we update before each call in the call() function.
255 * This way we avoid that commands such as RPOPLPUSH or similar, that
256 * may re-open the same key multiple times, can invalidate an already
257 * open object in a next call, if the next call will see the key expired,
258 * while the first did not.
259 * This is specifically important in the context of scripts, where we
260 * pretend that time freezes. This way a key can expire only the first time
261 * it is accessed and not in the middle of the script execution, making
262 * propagation to slaves / AOF consistent. See issue #1525 for more info.
263 * Note that we cannot use the cached server.mstime because it can change
264 * in processEventsWhileBlocked etc. */
265 return server.cmd_time_snapshot;
266}
267
268/* After an RDB dump or AOF rewrite we exit from children using _exit() instead of
269 * exit(), because the latter may interact with the same file objects used by
270 * the parent process. However if we are testing the coverage normal exit() is
271 * used in order to obtain the right coverage information.
272 * There is a caveat for when we exit due to a signal.
273 * In this case we want the function to be async signal safe, so we can't use exit()
274 */
275void exitFromChild(int retcode, int from_signal) {
276#ifdef COVERAGE_TEST
277 if (!from_signal) {
278 exit(retcode);
279 } else {
280 _exit(retcode);
281 }
282#else
283 UNUSED(from_signal);
284 _exit(retcode);
285#endif
286}
287
288/*====================== Hash table type implementation ==================== */
289
290/* This is a hash table type that uses the SDS dynamic strings library as
291 * keys and redis objects as values (objects can hold SDS strings,
292 * lists, sets). */
293
294void dictVanillaFree(dict *d, void *val)
295{
296 UNUSED(d);
297 zfree(val);
298}
299
300void dictListDestructor(dict *d, void *val)
301{
302 UNUSED(d);
303 listRelease((list*)val);
304}
305
306void dictDictDestructor(dict *d, void *val)
307{
308 UNUSED(d);
309 dictRelease((dict*)val);
310}
311
312size_t dictSdsKeyLen(dict *d, const void *key) {
313 UNUSED(d);
314 return sdslen((sds)key);
315}
316
317static const void *kvGetKey(const void *kv) {
318 sds sdsKey = kvobjGetKey((kvobj *) kv);
319 return sdsKey;
320}
321
322int dictSdsCompareKV(dictCmpCache *cache, const void *sdsKey1, const void *sdsKey2)
323{
324 /* is first cmp call of a new lookup */
325 if (cache->useCache == 0) {
326 cache->useCache = 1;
327 cache->data[0].sz = sdslen((sds) sdsKey1);
328 }
329
330 size_t l1 = cache->data[0].sz;
331 size_t l2 = sdslen((sds)sdsKey2);
332 if (l1 != l2) return 0;
333 return memcmp(sdsKey1, sdsKey2, l1) == 0;
334}
335
336static void dictDestructorKV(dict *d, void *kv) {
337 UNUSED(d);
338 if (kv == NULL) return;
339 if (server.memory_tracking_per_slot) {
340 kvstoreDictMetadata *meta = (kvstoreDictMetadata *)dictMetadata(d);
341 size_t alloc_size = kvobjAllocSize(kv);
342 debugServerAssert(alloc_size <= meta->alloc_size);
343 meta->alloc_size -= alloc_size;
344 }
345 decrRefCount(kv);
346}
347
348int dictSdsKeyCompare(dictCmpCache *cache, const void *key1,
349 const void *key2)
350{
351 int l1,l2;
352 UNUSED(cache);
353
354 l1 = sdslen((sds)key1);
355 l2 = sdslen((sds)key2);
356 if (l1 != l2) return 0;
357 return memcmp(key1, key2, l1) == 0;
358}
359
360/* A case insensitive version used for the command lookup table and other
361 * places where case insensitive non binary-safe comparison is needed. */
362int dictSdsKeyCaseCompare(dictCmpCache *cache, const void *key1,
363 const void *key2)
364{
365 UNUSED(cache);
366 return strcasecmp(key1, key2) == 0;
367}
368
369void dictObjectDestructor(dict *d, void *val)
370{
371 UNUSED(d);
372 if (val == NULL) return; /* Lazy freeing will set value to NULL. */
373 decrRefCount(val);
374}
375
376void dictSdsDestructor(dict *d, void *val)
377{
378 UNUSED(d);
379 sdsfree(val);
380}
381
382void setSdsDestructor(dict *d, void *val) {
383 *htGetMetadataSize(d) -= sdsAllocSize(val);
384 sdsfree(val);
385}
386
387size_t setDictMetadataBytes(dict *d) {
388 UNUSED(d);
389 return sizeof(size_t);
390}
391
392void *dictSdsDup(dict *d, const void *key) {
393 UNUSED(d);
394 return sdsdup((const sds) key);
395}
396
397int dictObjKeyCompare(dictCmpCache *cache, const void *key1,
398 const void *key2)
399{
400 const robj *o1 = key1, *o2 = key2;
401 return dictSdsKeyCompare(cache, o1->ptr,o2->ptr);
402}
403
404uint64_t dictObjHash(const void *key) {
405 const robj *o = key;
406 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
407}
408
409uint64_t dictPtrHash(const void *key) {
410 return dictGenHashFunction((unsigned char*)&key,sizeof(key));
411}
412
413uint64_t dictSdsHash(const void *key) {
414 return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
415}
416
417uint64_t dictSdsCaseHash(const void *key) {
418 return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
419}
420
421/* Dict hash function for null terminated string */
422uint64_t dictCStrHash(const void *key) {
423 return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
424}
425
426/* Dict hash function for null terminated string */
427uint64_t dictCStrCaseHash(const void *key) {
428 return dictGenCaseHashFunction((unsigned char*)key, strlen((char*)key));
429}
430
431/* Dict hash function for client */
432uint64_t dictClientHash(const void *key) {
433 return ((client *)key)->id;
434}
435
436/* Dict compare function for client */
437int dictClientKeyCompare(dictCmpCache *cache, const void *key1, const void *key2) {
438 UNUSED(cache);
439 return ((client *)key1)->id == ((client *)key2)->id;
440}
441
442/* Dict compare function for null terminated string */
443int dictCStrKeyCompare(dictCmpCache *cache, const void *key1, const void *key2) {
444 int l1,l2;
445 UNUSED(cache);
446
447 l1 = strlen((char*)key1);
448 l2 = strlen((char*)key2);
449 if (l1 != l2) return 0;
450 return memcmp(key1, key2, l1) == 0;
451}
452
453/* Dict case insensitive compare function for null terminated string */
454int dictCStrKeyCaseCompare(dictCmpCache *cache, const void *key1, const void *key2) {
455 UNUSED(cache);
456 return strcasecmp(key1, key2) == 0;
457}
458
459int dictEncObjKeyCompare(dictCmpCache *cache, const void *key1, const void *key2)
460{
461 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
462 int cmp;
463
464 if (o1->encoding == OBJ_ENCODING_INT &&
465 o2->encoding == OBJ_ENCODING_INT)
466 return o1->ptr == o2->ptr;
467
468 /* Due to OBJ_STATIC_REFCOUNT, we avoid calling getDecodedObject() without
469 * good reasons, because it would incrRefCount() the object, which
470 * is invalid. So we check to make sure dictFind() works with static
471 * objects as well. */
472 if (o1->refcount != OBJ_STATIC_REFCOUNT) o1 = getDecodedObject(o1);
473 if (o2->refcount != OBJ_STATIC_REFCOUNT) o2 = getDecodedObject(o2);
474 cmp = dictSdsKeyCompare(cache,o1->ptr,o2->ptr);
475 if (o1->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o1);
476 if (o2->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o2);
477 return cmp;
478}
479
480uint64_t dictEncObjHash(const void *key) {
481 robj *o = (robj*) key;
482
483 if (sdsEncodedObject(o)) {
484 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
485 } else if (o->encoding == OBJ_ENCODING_INT) {
486 char buf[32];
487 int len;
488
489 len = ll2string(buf,32,(long)o->ptr);
490 return dictGenHashFunction((unsigned char*)buf, len);
491 } else {
492 serverPanic("Unknown string encoding");
493 }
494}
495
496static size_t kvstoreMetadataBytes(kvstore *kvs) {
497 UNUSED(kvs);
498 return sizeof(kvstoreMetadata);
499}
500
501static size_t kvstoreDictMetaBytes(dict *d) {
502 UNUSED(d);
503 return sizeof(kvstoreDictMetadata);
504}
505
506static int kvstoreCanFreeDict(kvstore *kvs, int didx) {
507 kvstoreDictMetadata *meta = kvstoreGetDictMeta(kvs, didx, 0);
508 debugServerAssert(meta->alloc_size == 0);
509 /* Free if not in cluster */
510 if (!server.cluster_enabled) return 1;
511
512 /* Don't free if we have stats for this slot and the relevant tracking is enabled. */
513 int has_cpu_stats = (server.cluster_slot_stats_enabled & CLUSTER_SLOT_STATS_CPU) && meta->cpu_usec;
514 int has_net_stats = (server.cluster_slot_stats_enabled & CLUSTER_SLOT_STATS_NET) &&
515 (meta->network_bytes_in || meta->network_bytes_out);
516 if ((has_cpu_stats || has_net_stats) && clusterIsMySlot(didx)) {
517 return 0;
518 }
519
520 /* Otherwise, we can free */
521 return 1;
522}
523
524static void kvstoreOnEmpty(kvstore *kvs) {
525 kvstoreMetadata *meta = kvstoreGetMetadata(kvs);
526 memset(&meta->keysizes_hist, 0, sizeof(meta->keysizes_hist));
527}
528
529static void kvstoreOnDictEmpty(kvstore *kvs, int didx) {
530 kvstoreDictMetadata *meta = kvstoreGetDictMeta(kvs, didx, 0);
531#ifdef DEBUG_ASSERTIONS
532 dictEmpty(kvstoreGetDict(kvs, didx), NULL);
533#endif
534 debugServerAssert(meta->alloc_size == 0);
535 memset(&meta->keysizes_hist, 0, sizeof(meta->keysizes_hist));
536}
537
538/* Return 1 if currently we allow dict to expand. Dict may allocate huge
539 * memory to contain hash buckets when dict expands, that may lead redis
540 * rejects user's requests or evicts some keys, we can stop dict to expand
541 * provisionally if used memory will be over maxmemory after dict expands,
542 * but to guarantee the performance of redis, we still allow dict to expand
543 * if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */
544int dictResizeAllowed(size_t moreMem, double usedRatio) {
545 /* for debug purposes: dict is not allowed to be resized. */
546 if (!server.dict_resizing) return 0;
547
548 if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) {
549 return !overMaxmemoryAfterAlloc(moreMem);
550 } else {
551 return 1;
552 }
553}
554
555/* Generic hash table type where keys are Redis Objects, Values
556 * dummy pointers. */
557dictType objectKeyPointerValueDictType = {
558 dictEncObjHash, /* hash function */
559 NULL, /* key dup */
560 NULL, /* val dup */
561 dictEncObjKeyCompare, /* key compare */
562 dictObjectDestructor, /* key destructor */
563 NULL, /* val destructor */
564 NULL /* allow to expand */
565};
566
567/* Like objectKeyPointerValueDictType(), but values can be destroyed, if
568 * not NULL, calling zfree(). */
569dictType objectKeyHeapPointerValueDictType = {
570 dictEncObjHash, /* hash function */
571 NULL, /* key dup */
572 NULL, /* val dup */
573 dictEncObjKeyCompare, /* key compare */
574 dictObjectDestructor, /* key destructor */
575 dictVanillaFree, /* val destructor */
576 NULL /* allow to expand */
577};
578
579/* Set dictionary type. Keys are SDS strings, values are not used. */
580dictType setDictType = {
581 dictSdsHash, /* hash function */
582 NULL, /* key dup */
583 NULL, /* val dup */
584 dictSdsKeyCompare, /* key compare */
585 setSdsDestructor, /* key destructor */
586 NULL, /* val destructor */
587 NULL, /* allow to expand */
588 .no_value = 1, /* no values in this dict */
589 .keys_are_odd = 1, /* an SDS string is always an odd pointer */
590 .dictMetadataBytes = setDictMetadataBytes,
591};
592
593/* Db->dict, keys are of type kvobj, unification of key and value */
594dictType dbDictType = {
595 dictSdsHash, /* hash function */
596 NULL, /* key dup */
597 NULL, /* val dup */
598 dictSdsCompareKV, /* lookup key compare */
599 dictDestructorKV, /* key destructor */
600 NULL, /* val destructor */
601 dictResizeAllowed, /* allow to resize */
602 .no_value = 1, /* keys and values are unified (kvobj) */
603 .keys_are_odd = 0, /* simple kvobj (robj) struct */
604 .keyFromStoredKey = kvGetKey, /* get key from stored-key */
605};
606
607/* Db->expires */
608dictType dbExpiresDictType = {
609 dictSdsHash, /* hash function */
610 NULL, /* key dup */
611 NULL, /* val dup */
612 dictSdsCompareKV, /* key compare */
613 NULL, /* key destructor */
614 NULL, /* val destructor */
615 dictResizeAllowed, /* allow to resize */
616 .no_value = 1, /* keys and values are unified (kvobj) */
617 .keys_are_odd = 0, /* simple kvobj (robj) struct */
618 .keyFromStoredKey = kvGetKey, /* get key from stored-key */
619};
620
621/* Command table. sds string -> command struct pointer. */
622dictType commandTableDictType = {
623 dictSdsCaseHash, /* hash function */
624 NULL, /* key dup */
625 NULL, /* val dup */
626 dictSdsKeyCaseCompare, /* key compare */
627 dictSdsDestructor, /* key destructor */
628 NULL, /* val destructor */
629 NULL, /* allow to expand */
630 .force_full_rehash = 1, /* force full rehashing */
631};
632
633/* Hash type hash table (note that small hashes are represented with listpacks) */
634dictType hashDictType = {
635 dictSdsHash, /* hash function */
636 NULL, /* key dup */
637 NULL, /* val dup */
638 dictSdsKeyCompare, /* key compare */
639 dictSdsDestructor, /* key destructor */
640 dictSdsDestructor, /* val destructor */
641 NULL, /* allow to expand */
642};
643
644/* Dict type without destructor */
645dictType sdsReplyDictType = {
646 dictSdsHash, /* hash function */
647 NULL, /* key dup */
648 NULL, /* val dup */
649 dictSdsKeyCompare, /* key compare */
650 NULL, /* key destructor */
651 NULL, /* val destructor */
652 NULL /* allow to expand */
653};
654
655/* Keylist hash table type has unencoded redis objects as keys and
656 * lists as values. It's used for blocking operations (BLPOP) and to
657 * map swapped keys to a list of clients waiting for this keys to be loaded. */
658dictType keylistDictType = {
659 dictObjHash, /* hash function */
660 NULL, /* key dup */
661 NULL, /* val dup */
662 dictObjKeyCompare, /* key compare */
663 dictObjectDestructor, /* key destructor */
664 dictListDestructor, /* val destructor */
665 NULL /* allow to expand */
666};
667
668/* KeyDict hash table type has unencoded redis objects as keys and
669 * dicts as values. It's used for PUBSUB command to track clients subscribing the channels. */
670dictType objToDictDictType = {
671 dictObjHash, /* hash function */
672 NULL, /* key dup */
673 NULL, /* val dup */
674 dictObjKeyCompare, /* key compare */
675 dictObjectDestructor, /* key destructor */
676 dictDictDestructor, /* val destructor */
677 NULL /* allow to expand */
678};
679
680/* Modules system dictionary type. Keys are module name,
681 * values are pointer to RedisModule struct. */
682dictType modulesDictType = {
683 dictSdsCaseHash, /* hash function */
684 NULL, /* key dup */
685 NULL, /* val dup */
686 dictSdsKeyCaseCompare, /* key compare */
687 dictSdsDestructor, /* key destructor */
688 NULL, /* val destructor */
689 NULL /* allow to expand */
690};
691
692/* Migrate cache dict type. */
693dictType migrateCacheDictType = {
694 dictSdsHash, /* hash function */
695 NULL, /* key dup */
696 NULL, /* val dup */
697 dictSdsKeyCompare, /* key compare */
698 dictSdsDestructor, /* key destructor */
699 NULL, /* val destructor */
700 NULL /* allow to expand */
701};
702
703/* Dict for for case-insensitive search using null terminated C strings.
704 * The keys stored in dict are sds though. */
705dictType stringSetDictType = {
706 dictCStrCaseHash, /* hash function */
707 NULL, /* key dup */
708 NULL, /* val dup */
709 dictCStrKeyCaseCompare, /* key compare */
710 dictSdsDestructor, /* key destructor */
711 NULL, /* val destructor */
712 NULL /* allow to expand */
713};
714
715/* Dict for for case-insensitive search using null terminated C strings.
716 * The key and value do not have a destructor. */
717dictType externalStringType = {
718 dictCStrCaseHash, /* hash function */
719 NULL, /* key dup */
720 NULL, /* val dup */
721 dictCStrKeyCaseCompare, /* key compare */
722 NULL, /* key destructor */
723 NULL, /* val destructor */
724 NULL /* allow to expand */
725};
726
727/* Dict for case-insensitive search using sds objects with a zmalloc
728 * allocated object as the value. */
729dictType sdsHashDictType = {
730 dictSdsCaseHash, /* hash function */
731 NULL, /* key dup */
732 NULL, /* val dup */
733 dictSdsKeyCaseCompare, /* key compare */
734 dictSdsDestructor, /* key destructor */
735 dictVanillaFree, /* val destructor */
736 NULL /* allow to expand */
737};
738
739/* Client Set dictionary type. Keys are client, values are not used. */
740dictType clientDictType = {
741 dictClientHash, /* hash function */
742 NULL, /* key dup */
743 NULL, /* val dup */
744 dictClientKeyCompare, /* key compare */
745 .no_value = 1, /* no values in this dict */
746 .keys_are_odd = 0 /* a client pointer is not an odd pointer */
747};
748
749kvstoreType kvstoreBaseType = {
750 NULL, /* kvstore metadata size */
751 NULL, /* dict metadata size */
752 NULL, /* can free dict */
753 NULL, /* on kvstore empty */
754 NULL, /* on dict empty */
755};
756
757kvstoreType kvstoreExType = {
758 kvstoreMetadataBytes, /* kvstore metadata size */
759 kvstoreDictMetaBytes, /* dict metadata size */
760 kvstoreCanFreeDict, /* can free dict */
761 kvstoreOnEmpty, /* on kvstore empty */
762 kvstoreOnDictEmpty, /* on dict empty */
763};
764
765/* This function is called once a background process of some kind terminates,
766 * as we want to avoid resizing the hash tables when there is a child in order
767 * to play well with copy-on-write (otherwise when a resize happens lots of
768 * memory pages are copied). The goal of this function is to update the ability
769 * for dict.c to resize or rehash the tables accordingly to the fact we have an
770 * active fork child running. */
771void updateDictResizePolicy(void) {
772 if (server.in_fork_child != CHILD_TYPE_NONE)
773 dictSetResizeEnabled(DICT_RESIZE_FORBID);
774 else if (hasActiveChildProcess())
775 dictSetResizeEnabled(DICT_RESIZE_AVOID);
776 else
777 dictSetResizeEnabled(DICT_RESIZE_ENABLE);
778}
779
780const char *strChildType(int type) {
781 switch(type) {
782 case CHILD_TYPE_RDB: return "RDB";
783 case CHILD_TYPE_AOF: return "AOF";
784 case CHILD_TYPE_LDB: return "LDB";
785 case CHILD_TYPE_MODULE: return "MODULE";
786 default: return "Unknown";
787 }
788}
789
790/* Return true if there are active children processes doing RDB saving,
791 * AOF rewriting, or some side process spawned by a loaded module. */
792int hasActiveChildProcess(void) {
793 return server.child_pid != -1;
794}
795
796void resetChildState(void) {
797 server.child_type = CHILD_TYPE_NONE;
798 server.child_pid = -1;
799 server.stat_current_cow_peak = 0;
800 server.stat_current_cow_bytes = 0;
801 server.stat_current_cow_updated = 0;
802 server.stat_current_save_keys_processed = 0;
803 server.stat_module_progress = 0;
804 server.stat_current_save_keys_total = 0;
805 updateDictResizePolicy();
806 closeChildInfoPipe();
807 moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
808 REDISMODULE_SUBEVENT_FORK_CHILD_DIED,
809 NULL);
810}
811
812/* Return if child type is mutually exclusive with other fork children */
813int isMutuallyExclusiveChildType(int type) {
814 return type == CHILD_TYPE_RDB || type == CHILD_TYPE_AOF || type == CHILD_TYPE_MODULE;
815}
816
817/* Returns true when we're inside a long command that yielded to the event loop. */
818int isInsideYieldingLongCommand(void) {
819 return scriptIsTimedout() || server.busy_module_yield_flags;
820}
821
822/* Return true if this instance has persistence completely turned off:
823 * both RDB and AOF are disabled. */
824int allPersistenceDisabled(void) {
825 return server.saveparamslen == 0 && server.aof_state == AOF_OFF;
826}
827
828/* ======================= Cron: called every 100 ms ======================== */
829
830/* Add a sample to the instantaneous metric. This function computes the quotient
831 * of the increment of value and base, which is useful to record operation count
832 * per second, or the average time consumption of an operation.
833 *
834 * current_value - The dividend
835 * current_base - The divisor
836 * */
837void trackInstantaneousMetric(int metric, long long current_value, long long current_base, long long factor) {
838 if (server.inst_metric[metric].last_sample_base > 0) {
839 long long base = current_base - server.inst_metric[metric].last_sample_base;
840 long long value = current_value - server.inst_metric[metric].last_sample_value;
841 long long avg = base > 0 ? (value * factor / base) : 0;
842 server.inst_metric[metric].samples[server.inst_metric[metric].idx] = avg;
843 server.inst_metric[metric].idx++;
844 server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES;
845 }
846 server.inst_metric[metric].last_sample_base = current_base;
847 server.inst_metric[metric].last_sample_value = current_value;
848}
849
850/* Return the mean of all the samples. */
851long long getInstantaneousMetric(int metric) {
852 int j;
853 long long sum = 0;
854
855 for (j = 0; j < STATS_METRIC_SAMPLES; j++)
856 sum += server.inst_metric[metric].samples[j];
857 return sum / STATS_METRIC_SAMPLES;
858}
859
860/* The client query buffer is an sds.c string that can end with a lot of
861 * free space not used, this function reclaims space if needed.
862 *
863 * The function always returns 0 as it never terminates the client. */
864int clientsCronResizeQueryBuffer(client *c) {
865 /* If the client query buffer is NULL, it is using the reusable query buffer and there is nothing to do. */
866 if (c->querybuf == NULL) return 0;
867 size_t querybuf_size = sdsalloc(c->querybuf);
868 time_t idletime = server.unixtime - c->lastinteraction;
869
870 /* Only resize the query buffer if the buffer is actually wasting at least a
871 * few kbytes */
872 if (sdsavail(c->querybuf) > 1024*4) {
873 /* There are two conditions to resize the query buffer: */
874 if (idletime > 2) {
875 /* 1) Query is idle for a long time. */
876 size_t remaining = sdslen(c->querybuf) - c->qb_pos;
877 if (!(c->flags & CLIENT_MASTER) && !remaining) {
878 /* If the client is not a master and no data is pending,
879 * The client can safely use the reusable query buffer in the next read - free the client's querybuf. */
880 sdsfree(c->querybuf);
881 /* By setting the querybuf to NULL, the client will use the reusable query buffer in the next read.
882 * We don't move the client to the reusable query buffer immediately, because if we allocated a private
883 * query buffer for the client, it's likely that the client will use it again soon. */
884 c->querybuf = NULL;
885 } else {
886 c->querybuf = sdsRemoveFreeSpace(c->querybuf, 1);
887 }
888 } else if (querybuf_size > PROTO_RESIZE_THRESHOLD && querybuf_size/2 > c->querybuf_peak) {
889 /* 2) Query buffer is too big for latest peak and is larger than
890 * resize threshold. Trim excess space but only up to a limit,
891 * not below the recent peak and current c->querybuf (which will
892 * be soon get used). If we're in the middle of a bulk then make
893 * sure not to resize to less than the bulk length. */
894 size_t resize = sdslen(c->querybuf);
895 if (resize < c->querybuf_peak) resize = c->querybuf_peak;
896 if (c->bulklen != -1 && resize < (size_t)c->bulklen + 2) resize = c->bulklen + 2;
897 c->querybuf = sdsResize(c->querybuf, resize, 1);
898 }
899 }
900
901 /* Reset the peak again to capture the peak memory usage in the next
902 * cycle. */
903 c->querybuf_peak = c->querybuf ? sdslen(c->querybuf) : 0;
904 /* We reset to either the current used, or currently processed bulk size,
905 * which ever is bigger. */
906 if (c->bulklen != -1 && (size_t)c->bulklen + 2 > c->querybuf_peak) c->querybuf_peak = c->bulklen + 2;
907 return 0;
908}
909
910/* The client output buffer can be adjusted to better fit the memory requirements.
911 *
912 * the logic is:
913 * in case the last observed peak size of the buffer equals the buffer size - we double the size
914 * in case the last observed peak size of the buffer is less than half the buffer size - we shrink by half.
915 * The buffer peak will be reset back to the buffer position every server.reply_buffer_peak_reset_time milliseconds
916 * The function always returns 0 as it never terminates the client. */
917int clientsCronResizeOutputBuffer(client *c, mstime_t now_ms) {
918
919 size_t new_buffer_size = 0;
920 char *oldbuf = NULL;
921 const size_t buffer_target_shrink_size = c->buf_usable_size/2;
922 const size_t buffer_target_expand_size = c->buf_usable_size*2;
923
924 /* in case the resizing is disabled return immediately */
925 if(!server.reply_buffer_resizing_enabled)
926 return 0;
927
928 /* Don't resize encoded buffers. When buf is encoded, we track the last
929 * partially written payloadHeader pointer, so we can't
930 * reallocate the buffer as it would invalidate this pointer. */
931 if (c->buf_encoded) return 0;
932
933 if (buffer_target_shrink_size >= PROTO_REPLY_MIN_BYTES &&
934 c->buf_peak < buffer_target_shrink_size )
935 {
936 new_buffer_size = max(PROTO_REPLY_MIN_BYTES,c->buf_peak+1);
937 server.stat_reply_buffer_shrinks++;
938 } else if (buffer_target_expand_size < PROTO_REPLY_CHUNK_BYTES*2 &&
939 c->buf_peak == c->buf_usable_size)
940 {
941 new_buffer_size = min(PROTO_REPLY_CHUNK_BYTES,buffer_target_expand_size);
942 server.stat_reply_buffer_expands++;
943 }
944
945 serverAssertWithInfo(c, NULL, (!new_buffer_size) || (new_buffer_size >= (size_t)c->bufpos));
946
947 /* reset the peak value each server.reply_buffer_peak_reset_time seconds. in case the client will be idle
948 * it will start to shrink.
949 */
950 if (server.reply_buffer_peak_reset_time >=0 &&
951 now_ms - c->buf_peak_last_reset_time >= server.reply_buffer_peak_reset_time)
952 {
953 c->buf_peak = c->bufpos;
954 c->buf_peak_last_reset_time = now_ms;
955 }
956
957 if (new_buffer_size) {
958 oldbuf = c->buf;
959 c->buf = zmalloc_usable(new_buffer_size, &c->buf_usable_size);
960 memcpy(c->buf,oldbuf,c->bufpos);
961 zfree(oldbuf);
962 }
963 return 0;
964}
965
966/* This function is used in order to track clients using the biggest amount
967 * of memory in the latest few seconds. This way we can provide such information
968 * in the INFO output (clients section), without having to do an O(N) scan for
969 * all the clients.
970 *
971 * This is how it works. We have an array of CLIENTS_PEAK_MEM_USAGE_SLOTS slots
972 * where we track, for each, the biggest client output and input buffers we
973 * saw in that slot. Every slot corresponds to one of the latest seconds, since
974 * the array is indexed by doing UNIXTIME % CLIENTS_PEAK_MEM_USAGE_SLOTS.
975 *
976 * When we want to know what was recently the peak memory usage, we just scan
977 * such few slots searching for the maximum value. */
978#define CLIENTS_PEAK_MEM_USAGE_SLOTS 8
979size_t ClientsPeakMemInput[CLIENTS_PEAK_MEM_USAGE_SLOTS] = {0};
980size_t ClientsPeakMemOutput[CLIENTS_PEAK_MEM_USAGE_SLOTS] = {0};
981int CurrentPeakMemUsageSlot = 0;
982
983int clientsCronTrackExpansiveClients(client *c) {
984 size_t qb_size = c->querybuf ? sdsZmallocSize(c->querybuf) : 0;
985 size_t argv_size = c->argv ? zmalloc_size(c->argv) : 0;
986 size_t in_usage = qb_size + c->all_argv_len_sum + argv_size;
987 size_t out_usage = getClientOutputBufferMemoryUsage(c);
988
989 /* Track the biggest values observed so far in this slot. */
990 if (in_usage > ClientsPeakMemInput[CurrentPeakMemUsageSlot])
991 ClientsPeakMemInput[CurrentPeakMemUsageSlot] = in_usage;
992 if (out_usage > ClientsPeakMemOutput[CurrentPeakMemUsageSlot])
993 ClientsPeakMemOutput[CurrentPeakMemUsageSlot] = out_usage;
994
995 return 0; /* This function never terminates the client. */
996}
997
998/* All normal clients are placed in one of the "mem usage buckets" according
999 * to how much memory they currently use. We use this function to find the
1000 * appropriate bucket based on a given memory usage value. The algorithm simply
1001 * does a log2(mem) to ge the bucket. This means, for examples, that if a
1002 * client's memory usage doubles it's moved up to the next bucket, if it's
1003 * halved we move it down a bucket.
1004 * For more details see CLIENT_MEM_USAGE_BUCKETS documentation in server.h. */
1005static inline clientMemUsageBucket *getMemUsageBucket(size_t mem) {
1006 int size_in_bits = 8*(int)sizeof(mem);
1007 int clz = mem > 0 ? __builtin_clzl(mem) : size_in_bits;
1008 int bucket_idx = size_in_bits - clz;
1009 if (bucket_idx > CLIENT_MEM_USAGE_BUCKET_MAX_LOG)
1010 bucket_idx = CLIENT_MEM_USAGE_BUCKET_MAX_LOG;
1011 else if (bucket_idx < CLIENT_MEM_USAGE_BUCKET_MIN_LOG)
1012 bucket_idx = CLIENT_MEM_USAGE_BUCKET_MIN_LOG;
1013 bucket_idx -= CLIENT_MEM_USAGE_BUCKET_MIN_LOG;
1014 return &server.client_mem_usage_buckets[bucket_idx];
1015}
1016
1017/*
1018 * This method updates the client memory usage and update the
1019 * server stats for client type.
1020 *
1021 * This method is called from the clientsCron to have updated
1022 * stats for non CLIENT_TYPE_NORMAL/PUBSUB clients to accurately
1023 * provide information around clients memory usage.
1024 *
1025 * It is also used in updateClientMemUsageAndBucket to have latest
1026 * client memory usage information to place it into appropriate client memory
1027 * usage bucket.
1028 */
1029void updateClientMemoryUsage(client *c) {
1030 serverAssert(c->conn);
1031 size_t mem = getClientMemoryUsage(c, NULL);
1032 int type = getClientType(c);
1033 /* Now that we have the memory used by the client, remove the old
1034 * value from the old category, and add it back. */
1035 server.stat_clients_type_memory[c->last_memory_type] -= c->last_memory_usage;
1036 server.stat_clients_type_memory[type] += mem;
1037 /* Remember what we added and where, to remove it next time. */
1038 c->last_memory_type = type;
1039 c->last_memory_usage = mem;
1040}
1041
1042int clientEvictionAllowed(client *c) {
1043 if (server.maxmemory_clients == 0 || c->flags & CLIENT_NO_EVICT || !c->conn) {
1044 return 0;
1045 }
1046 int type = getClientType(c);
1047 return (type == CLIENT_TYPE_NORMAL || type == CLIENT_TYPE_PUBSUB);
1048}
1049
1050
1051/* This function is used to cleanup the client's previously tracked memory usage.
1052 * This is called during incremental client memory usage tracking as well as
1053 * used to reset when client to bucket allocation is not required when
1054 * client eviction is disabled. */
1055void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
1056 if (c->mem_usage_bucket) {
1057 c->mem_usage_bucket->mem_usage_sum -= c->last_memory_usage;
1058 /* If this client can't be evicted then remove it from the mem usage
1059 * buckets */
1060 if (!allow_eviction) {
1061 listDelNode(c->mem_usage_bucket->clients, c->mem_usage_bucket_node);
1062 c->mem_usage_bucket = NULL;
1063 c->mem_usage_bucket_node = NULL;
1064 }
1065 }
1066}
1067
1068/* This is called only if explicit clients when something changed their buffers,
1069 * so we can track clients' memory and enforce clients' maxmemory in real time.
1070 *
1071 * This also adds the client to the correct memory usage bucket. Each bucket contains
1072 * all clients with roughly the same amount of memory. This way we group
1073 * together clients consuming about the same amount of memory and can quickly
1074 * free them in case we reach maxmemory-clients (client eviction).
1075 *
1076 * Note: This function filters clients of type no-evict, master or replica regardless
1077 * of whether the eviction is enabled or not, so the memory usage we get from these
1078 * types of clients via the INFO command may be out of date.
1079 *
1080 * returns 1 if client eviction for this client is allowed, 0 otherwise.
1081 */
1082int updateClientMemUsageAndBucket(client *c) {
1083 /* The unlikely case this function was called from a thread different
1084 * than the main one is a module call from a spawned thread. This is safe
1085 * since this call must have been made after calling
1086 * RedisModule_ThreadSafeContextLock i.e the module is holding the GIL. In
1087 * that special case we assert that at least the updated client's
1088 * running_tid is the main thread. The true main thread is allowed to call
1089 * this function on clients handled by IO-threads as it makes sure the
1090 * IO-threads are paused, f.e see cleintsCron() and evictClients(). */
1091 serverAssert((pthread_equal(pthread_self(), server.main_thread_id) ||
1092 c->running_tid == IOTHREAD_MAIN_THREAD_ID) && c->conn);
1093 int allow_eviction = clientEvictionAllowed(c);
1094 removeClientFromMemUsageBucket(c, allow_eviction);
1095
1096 if (!allow_eviction) {
1097 return 0;
1098 }
1099
1100 /* Update client memory usage. */
1101 updateClientMemoryUsage(c);
1102
1103 /* Update the client in the mem usage buckets */
1104 clientMemUsageBucket *bucket = getMemUsageBucket(c->last_memory_usage);
1105 bucket->mem_usage_sum += c->last_memory_usage;
1106 if (bucket != c->mem_usage_bucket) {
1107 if (c->mem_usage_bucket)
1108 listDelNode(c->mem_usage_bucket->clients,
1109 c->mem_usage_bucket_node);
1110 c->mem_usage_bucket = bucket;
1111 listAddNodeTail(bucket->clients, c);
1112 c->mem_usage_bucket_node = listLast(bucket->clients);
1113 }
1114 return 1;
1115}
1116
1117/* Return the max samples in the memory usage of clients tracked by
1118 * the function clientsCronTrackExpansiveClients(). */
1119void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
1120 size_t i = 0, o = 0;
1121 for (int j = 0; j < CLIENTS_PEAK_MEM_USAGE_SLOTS; j++) {
1122 if (ClientsPeakMemInput[j] > i) i = ClientsPeakMemInput[j];
1123 if (ClientsPeakMemOutput[j] > o) o = ClientsPeakMemOutput[j];
1124 }
1125 *in_usage = i;
1126 *out_usage = o;
1127}
1128
1129/* Run cron tasks for a single client. Return 1 if the client should
1130 * be terminated, 0 otherwise. */
1131int clientsCronRunClient(client *c) {
1132 mstime_t now = server.mstime;
1133 /* The following functions do different service checks on the client.
1134 * The protocol is that they return non-zero if the client was
1135 * terminated. */
1136 if (clientsCronHandleTimeout(c,now)) return 1;
1137 if (clientsCronResizeQueryBuffer(c)) return 1;
1138 if (clientsCronResizeOutputBuffer(c,now)) return 1;
1139
1140 if (clientsCronTrackExpansiveClients(c)) return 1;
1141
1142 /* Iterating all the clients in getMemoryOverheadData() is too slow and
1143 * in turn would make the INFO command too slow. So we perform this
1144 * computation incrementally and track the (not instantaneous but updated
1145 * to the second) total memory used by clients using clientsCron() in
1146 * a more incremental way (depending on server.hz).
1147 * If client eviction is enabled, update the bucket as well. */
1148 if (!updateClientMemUsageAndBucket(c))
1149 updateClientMemoryUsage(c);
1150
1151 if (closeClientOnOutputBufferLimitReached(c, 0)) return 1;
1152 return 0;
1153}
1154
1155/* Periodic maintenance for the pending command pool.
1156 * This function should be called from serverCron to manage pool size based on utilization patterns. */
1157void pendingCommandPoolCron(void) {
1158 /* Only shrink pool when IO threads are not active */
1159 if (server.io_threads_active) return;
1160
1161 /* Calculate utilization rate based on minimum pool size reached */
1162 if (server.cmd_pool.capacity > PENDING_COMMAND_POOL_SIZE) {
1163 /* If utilization is below threshold, shrink the pool */
1164 double utilization_ratio = 1.0 - (double)server.cmd_pool.min_size / server.cmd_pool.capacity;
1165 if (utilization_ratio < 0.5)
1166 shrinkPendingCommandPool();
1167 }
1168
1169 /* Reset tracking for next interval */
1170 server.cmd_pool.min_size = server.cmd_pool.size; /* Reset to current size */
1171}
1172
1173/* This function is called by serverCron() and is used in order to perform
1174 * operations on clients that are important to perform constantly. For instance
1175 * we use this function in order to disconnect clients after a timeout, including
1176 * clients blocked in some blocking command with a non-zero timeout.
1177 *
1178 * The function makes some effort to process all the clients every second, even
1179 * if this cannot be strictly guaranteed, since serverCron() may be called with
1180 * an actual frequency lower than server.hz in case of latency events like slow
1181 * commands.
1182 *
1183 * It is very important for this function, and the functions it calls, to be
1184 * very fast: sometimes Redis has tens of hundreds of connected clients, and the
1185 * default server.hz value is 10, so sometimes here we need to process thousands
1186 * of clients per second, turning this function into a source of latency.
1187 */
1188void clientsCron(void) {
1189 /* Try to process at least numclients/server.hz of clients
1190 * per call. Since normally (if there are no big latency events) this
1191 * function is called server.hz times per second, in the average case we
1192 * process all the clients in 1 second. */
1193 int numclients = listLength(server.clients);
1194 int iterations = numclients/server.hz;
1195
1196 /* Process at least a few clients while we are at it, even if we need
1197 * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract
1198 * of processing each client once per second. */
1199 if (iterations < CLIENTS_CRON_MIN_ITERATIONS)
1200 iterations = (numclients < CLIENTS_CRON_MIN_ITERATIONS) ?
1201 numclients : CLIENTS_CRON_MIN_ITERATIONS;
1202
1203
1204 CurrentPeakMemUsageSlot = server.unixtime % CLIENTS_PEAK_MEM_USAGE_SLOTS;
1205 /* Always zero the next sample, so that when we switch to that second, we'll
1206 * only register samples that are greater in that second without considering
1207 * the history of such slot.
1208 *
1209 * Note: our index may jump to any random position if serverCron() is not
1210 * called for some reason with the normal frequency, for instance because
1211 * some slow command is called taking multiple seconds to execute. In that
1212 * case our array may end containing data which is potentially older
1213 * than CLIENTS_PEAK_MEM_USAGE_SLOTS seconds: however this is not a problem
1214 * since here we want just to track if "recently" there were very expansive
1215 * clients from the POV of memory usage. */
1216 int zeroidx = (CurrentPeakMemUsageSlot+1) % CLIENTS_PEAK_MEM_USAGE_SLOTS;
1217 ClientsPeakMemInput[zeroidx] = 0;
1218 ClientsPeakMemOutput[zeroidx] = 0;
1219
1220 while(listLength(server.clients) && iterations--) {
1221 client *c;
1222 listNode *head;
1223
1224 /* Take the current head, process, and then rotate the head to tail.
1225 * This way we can fairly iterate all clients step by step. */
1226 head = listFirst(server.clients);
1227 c = listNodeValue(head);
1228 listRotateHeadToTail(server.clients);
1229
1230 /* Clients handled by IO threads will be processed by IOThreadClientsCron. */
1231 if (c->tid != IOTHREAD_MAIN_THREAD_ID) continue;
1232
1233 clientsCronRunClient(c);
1234 }
1235}
1236
1237/* This function handles 'background' operations we are required to do
1238 * incrementally in Redis databases, such as active key expiring, resizing,
1239 * rehashing. */
1240void databasesCron(void) {
1241 /* Expire keys by random sampling. Not required for slaves
1242 * as master will synthesize DELs for us. */
1243 if (server.active_expire_enabled) {
1244 if (iAmMaster()) {
1245 activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
1246 } else {
1247 expireSlaveKeys();
1248 }
1249 }
1250
1251 /* Defrag keys gradually. */
1252 activeDefragCycle();
1253
1254 /* Handle active-trim */
1255 if (server.cluster_enabled)
1256 asmActiveTrimCycle();
1257
1258 /* Perform hash tables rehashing if needed, but only if there are no
1259 * other processes saving the DB on disk. Otherwise rehashing is bad
1260 * as will cause a lot of copy-on-write of memory pages. */
1261 if (!hasActiveChildProcess()) {
1262 /* We use global counters so if we stop the computation at a given
1263 * DB we'll be able to start from the successive in the next
1264 * cron loop iteration. */
1265 static unsigned int resize_db = 0;
1266 static unsigned int rehash_db = 0;
1267 int dbs_per_call = CRON_DBS_PER_CALL;
1268 int j;
1269
1270 /* Don't test more DBs than we have. */
1271 if (dbs_per_call > server.dbnum) dbs_per_call = server.dbnum;
1272
1273 for (j = 0; j < dbs_per_call; j++) {
1274 redisDb *db = &server.db[resize_db % server.dbnum];
1275 kvstoreTryResizeDicts(db->keys, CRON_DICTS_PER_DB);
1276 kvstoreTryResizeDicts(db->expires, CRON_DICTS_PER_DB);
1277 resize_db++;
1278 }
1279
1280 /* Rehash */
1281 if (server.activerehashing) {
1282 uint64_t elapsed_us = 0;
1283 for (j = 0; j < dbs_per_call; j++) {
1284 redisDb *db = &server.db[rehash_db % server.dbnum];
1285 elapsed_us += kvstoreIncrementallyRehash(db->keys, INCREMENTAL_REHASHING_THRESHOLD_US - elapsed_us);
1286 if (elapsed_us >= INCREMENTAL_REHASHING_THRESHOLD_US)
1287 break;
1288 elapsed_us += kvstoreIncrementallyRehash(db->expires, INCREMENTAL_REHASHING_THRESHOLD_US - elapsed_us);
1289 if (elapsed_us >= INCREMENTAL_REHASHING_THRESHOLD_US)
1290 break;
1291 rehash_db++;
1292 }
1293 }
1294 }
1295}
1296
1297static inline void updateCachedTimeWithUs(int update_daylight_info, const long long ustime) {
1298 server.ustime = ustime;
1299 server.mstime = server.ustime / 1000;
1300 time_t unixtime = server.mstime / 1000;
1301 atomicSet(server.unixtime, unixtime);
1302
1303 /* To get information about daylight saving time, we need to call
1304 * localtime_r and cache the result. However calling localtime_r in this
1305 * context is safe since we will never fork() while here, in the main
1306 * thread. The logging function will call a thread safe version of
1307 * localtime that has no locks. */
1308 if (update_daylight_info) {
1309 struct tm tm;
1310 time_t ut = server.unixtime;
1311 localtime_r(&ut,&tm);
1312 atomicSet(server.daylight_active, tm.tm_isdst);
1313 }
1314}
1315
1316/* We take a cached value of the unix time in the global state because with
1317 * virtual memory and aging there is to store the current time in objects at
1318 * every object access, and accuracy is not needed. To access a global var is
1319 * a lot faster than calling time(NULL).
1320 *
1321 * This function should be fast because it is called at every command execution
1322 * in call(), so it is possible to decide if to update the daylight saving
1323 * info or not using the 'update_daylight_info' argument. Normally we update
1324 * such info only when calling this function from serverCron() but not when
1325 * calling it from call(). */
1326void updateCachedTime(int update_daylight_info) {
1327 const long long us = ustime();
1328 updateCachedTimeWithUs(update_daylight_info, us);
1329}
1330
1331/* Performing required operations in order to enter an execution unit.
1332 * In general, if we are already inside an execution unit then there is nothing to do,
1333 * otherwise we need to update cache times so the same cached time will be used all over
1334 * the execution unit.
1335 * update_cached_time - if 0, will not update the cached time even if required.
1336 * us - if not zero, use this time for cached time, otherwise get current time. */
1337void enterExecutionUnit(int update_cached_time, long long us) {
1338 if (server.execution_nesting++ == 0 && update_cached_time) {
1339 if (us == 0) {
1340 us = ustime();
1341 }
1342 updateCachedTimeWithUs(0, us);
1343 server.cmd_time_snapshot = server.mstime;
1344 }
1345}
1346
1347void exitExecutionUnit(void) {
1348 --server.execution_nesting;
1349}
1350
1351void checkChildrenDone(void) {
1352 int statloc = 0;
1353 pid_t pid;
1354
1355 if ((pid = waitpid(-1, &statloc, WNOHANG)) != 0) {
1356 int exitcode = WIFEXITED(statloc) ? WEXITSTATUS(statloc) : -1;
1357 int bysignal = 0;
1358
1359 if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
1360
1361 /* sigKillChildHandler catches the signal and calls exit(), but we
1362 * must make sure not to flag lastbgsave_status, etc incorrectly.
1363 * We could directly terminate the child process via SIGUSR1
1364 * without handling it */
1365 if (exitcode == SERVER_CHILD_NOERROR_RETVAL) {
1366 bysignal = SIGUSR1;
1367 exitcode = 1;
1368 }
1369
1370 if (pid == -1) {
1371 serverLog(LL_WARNING,"waitpid() returned an error: %s. "
1372 "child_type: %s, child_pid = %d",
1373 strerror(errno),
1374 strChildType(server.child_type),
1375 (int) server.child_pid);
1376 } else if (pid == server.child_pid) {
1377 if (server.child_type == CHILD_TYPE_RDB) {
1378 backgroundSaveDoneHandler(exitcode, bysignal);
1379 } else if (server.child_type == CHILD_TYPE_AOF) {
1380 backgroundRewriteDoneHandler(exitcode, bysignal);
1381 } else if (server.child_type == CHILD_TYPE_MODULE) {
1382 ModuleForkDoneHandler(exitcode, bysignal);
1383 } else {
1384 serverPanic("Unknown child type %d for child pid %d", server.child_type, server.child_pid);
1385 exit(1);
1386 }
1387 if (!bysignal && exitcode == 0) receiveChildInfo();
1388 resetChildState();
1389 } else {
1390 if (!ldbRemoveChild(pid)) {
1391 serverLog(LL_WARNING,
1392 "Warning, detected child with unmatched pid: %ld",
1393 (long) pid);
1394 }
1395 }
1396
1397 /* start any pending forks immediately. */
1398 replicationStartPendingFork();
1399 }
1400}
1401
1402/* Record the max memory used since the server was started. */
1403void updatePeakMemory(void) {
1404 size_t zmalloc_used = zmalloc_used_memory();
1405 if (zmalloc_used > server.stat_peak_memory) {
1406 server.stat_peak_memory = zmalloc_used;
1407 server.stat_peak_memory_time = server.unixtime;
1408 }
1409
1410 size_t zmalloc_peak = zmalloc_get_peak_memory();
1411 if (zmalloc_peak > server.stat_peak_memory) {
1412 server.stat_peak_memory = zmalloc_peak;
1413 server.stat_peak_memory_time = zmalloc_get_peak_memory_time();
1414 }
1415}
1416
1417/* Called from serverCron and cronUpdateMemoryStats to update cached memory metrics. */
1418void cronUpdateMemoryStats(void) {
1419 updatePeakMemory();
1420
1421 run_with_period(100) {
1422 /* Sample the RSS and other metrics here since this is a relatively slow call.
1423 * We must sample the zmalloc_used at the same time we take the rss, otherwise
1424 * the frag ratio calculate may be off (ratio of two samples at different times) */
1425 server.cron_malloc_stats.process_rss = zmalloc_get_rss();
1426 server.cron_malloc_stats.zmalloc_used = zmalloc_used_memory();
1427 /* Sampling the allocator info can be slow too.
1428 * The fragmentation ratio it'll show is potentially more accurate
1429 * it excludes other RSS pages such as: shared libraries, LUA and other non-zmalloc
1430 * allocations, and allocator reserved pages that can be pursed (all not actual frag) */
1431 zmalloc_get_allocator_info(1,
1432 &server.cron_malloc_stats.allocator_allocated,
1433 &server.cron_malloc_stats.allocator_active,
1434 &server.cron_malloc_stats.allocator_resident,
1435 NULL,
1436 &server.cron_malloc_stats.allocator_muzzy,
1437 &server.cron_malloc_stats.allocator_frag_smallbins_bytes);
1438 if (server.lua_arena != UINT_MAX) {
1439 zmalloc_get_allocator_info_by_arena(server.lua_arena,
1440 0,
1441 &server.cron_malloc_stats.lua_allocator_allocated,
1442 &server.cron_malloc_stats.lua_allocator_active,
1443 &server.cron_malloc_stats.lua_allocator_resident,
1444 &server.cron_malloc_stats.lua_allocator_frag_smallbins_bytes);
1445 }
1446 /* in case the allocator isn't providing these stats, fake them so that
1447 * fragmentation info still shows some (inaccurate metrics) */
1448 if (!server.cron_malloc_stats.allocator_resident)
1449 server.cron_malloc_stats.allocator_resident = server.cron_malloc_stats.process_rss;
1450 if (!server.cron_malloc_stats.allocator_active)
1451 server.cron_malloc_stats.allocator_active = server.cron_malloc_stats.allocator_resident;
1452 if (!server.cron_malloc_stats.allocator_allocated)
1453 server.cron_malloc_stats.allocator_allocated = server.cron_malloc_stats.zmalloc_used;
1454 }
1455}
1456
1457/* This is our timer interrupt, called server.hz times per second.
1458 * Here is where we do a number of things that need to be done asynchronously.
1459 * For instance:
1460 *
1461 * - Active expired keys collection (it is also performed in a lazy way on
1462 * lookup).
1463 * - Software watchdog.
1464 * - Update some statistic.
1465 * - Incremental rehashing of the DBs hash tables.
1466 * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
1467 * - Clients timeout of different kinds.
1468 * - Replication reconnection.
1469 * - Many more...
1470 *
1471 * Everything directly called here will be called server.hz times per second,
1472 * so in order to throttle execution of things we want to do less frequently
1473 * a macro is used: run_with_period(milliseconds) { .... }
1474 */
1475
1476int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
1477 int j;
1478 UNUSED(eventLoop);
1479 UNUSED(id);
1480 UNUSED(clientData);
1481
1482 /* Software watchdog: deliver the SIGALRM that will reach the signal
1483 * handler if we don't return here fast enough. */
1484 if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
1485
1486 server.hz = server.config_hz;
1487 /* Adapt the server.hz value to the number of configured clients. If we have
1488 * many clients, we want to call serverCron() with an higher frequency. */
1489 if (server.dynamic_hz) {
1490 while (listLength(server.clients) / server.hz >
1491 MAX_CLIENTS_PER_CLOCK_TICK)
1492 {
1493 server.hz *= 2;
1494 if (server.hz > CONFIG_MAX_HZ) {
1495 server.hz = CONFIG_MAX_HZ;
1496 break;
1497 }
1498 }
1499 }
1500
1501 /* for debug purposes: skip actual cron work if pause_cron is on */
1502 if (server.pause_cron) return 1000/server.hz;
1503
1504 monotime cron_start = getMonotonicUs();
1505
1506 run_with_period(100) {
1507 long long stat_net_input_bytes, stat_net_output_bytes;
1508 long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
1509 atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
1510 atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
1511 atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
1512 atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
1513 monotime current_time = getMonotonicUs();
1514 long long factor = 1000000; // us
1515 trackInstantaneousMetric(STATS_METRIC_COMMAND, server.stat_numcommands, current_time, factor);
1516 trackInstantaneousMetric(STATS_METRIC_NET_INPUT, stat_net_input_bytes + stat_net_repl_input_bytes,
1517 current_time, factor);
1518 trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT, stat_net_output_bytes + stat_net_repl_output_bytes,
1519 current_time, factor);
1520 trackInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION, stat_net_repl_input_bytes, current_time,
1521 factor);
1522 trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION, stat_net_repl_output_bytes,
1523 current_time, factor);
1524 trackInstantaneousMetric(STATS_METRIC_EL_CYCLE, server.duration_stats[EL_DURATION_TYPE_EL].cnt,
1525 current_time, factor);
1526 trackInstantaneousMetric(STATS_METRIC_EL_DURATION, server.duration_stats[EL_DURATION_TYPE_EL].sum,
1527 server.duration_stats[EL_DURATION_TYPE_EL].cnt, 1);
1528 }
1529
1530 /* We have just LRU_BITS bits per object for LRU information.
1531 * So we use an (eventually wrapping) LRU clock.
1532 *
1533 * Note that even if the counter wraps it's not a big problem,
1534 * everything will still work but some object will appear younger
1535 * to Redis. However for this to happen a given object should never be
1536 * touched for all the time needed to the counter to wrap, which is
1537 * not likely.
1538 *
1539 * Note that you can change the resolution altering the
1540 * LRU_CLOCK_RESOLUTION define. */
1541 server.lruclock = getLRUClock();
1542
1543 cronUpdateMemoryStats();
1544
1545 /* We received a SIGTERM or SIGINT, shutting down here in a safe way, as it is
1546 * not ok doing so inside the signal handler. */
1547 if (shouldShutdownAsap() && !isShutdownInitiated()) {
1548 int shutdownFlags = SHUTDOWN_NOFLAGS;
1549 int last_sig_received;
1550 atomicGet(server.last_sig_received, last_sig_received);
1551 if (last_sig_received == SIGINT && server.shutdown_on_sigint)
1552 shutdownFlags = server.shutdown_on_sigint;
1553 else if (last_sig_received == SIGTERM && server.shutdown_on_sigterm)
1554 shutdownFlags = server.shutdown_on_sigterm;
1555
1556 if (prepareForShutdown(shutdownFlags) == C_OK) exit(0);
1557 } else if (isShutdownInitiated()) {
1558 if (server.mstime >= server.shutdown_mstime || isReadyToShutdown()) {
1559 if (finishShutdown() == C_OK) exit(0);
1560 /* Shutdown failed. Continue running. An error has been logged. */
1561 }
1562 }
1563
1564 /* Show some info about non-empty databases */
1565 if (server.verbosity <= LL_VERBOSE) {
1566 run_with_period(5000) {
1567 for (j = 0; j < server.dbnum; j++) {
1568 long long size, used, vkeys;
1569
1570 size = kvstoreBuckets(server.db[j].keys);
1571 used = kvstoreSize(server.db[j].keys);
1572 vkeys = kvstoreSize(server.db[j].expires);
1573 if (used || vkeys) {
1574 serverLog(LL_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
1575 }
1576 }
1577 }
1578 }
1579
1580 /* Show information about connected clients */
1581 if (!server.sentinel_mode) {
1582 run_with_period(5000) {
1583 serverLog(LL_DEBUG,
1584 "%lu clients connected (%lu replicas), %zu bytes in use",
1585 listLength(server.clients)-listLength(server.slaves),
1586 replicationLogicalReplicaCount(),
1587 zmalloc_used_memory());
1588 }
1589 }
1590
1591 /* We need to do a few operations on clients asynchronously. */
1592 clientsCron();
1593
1594 /* Handle background operations on Redis databases. */
1595 databasesCron();
1596
1597 /* Start a scheduled AOF rewrite if this was requested by the user while
1598 * a BGSAVE was in progress. */
1599 if (!hasActiveChildProcess() &&
1600 server.aof_rewrite_scheduled &&
1601 !aofRewriteLimited())
1602 {
1603 rewriteAppendOnlyFileBackground();
1604 }
1605
1606 /* Check if a background saving or AOF rewrite in progress terminated. */
1607 if (hasActiveChildProcess() || ldbPendingChildren())
1608 {
1609 run_with_period(1000) receiveChildInfo();
1610 checkChildrenDone();
1611 } else {
1612 /* If there is not a background saving/rewrite in progress check if
1613 * we have to save/rewrite now. */
1614 for (j = 0; j < server.saveparamslen; j++) {
1615 struct saveparam *sp = server.saveparams+j;
1616
1617 /* Save if we reached the given amount of changes,
1618 * the given amount of seconds, and if the latest bgsave was
1619 * successful or if, in case of an error, at least
1620 * CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */
1621 if (server.dirty >= sp->changes &&
1622 server.unixtime-server.lastsave > sp->seconds &&
1623 (server.unixtime-server.lastbgsave_try >
1624 CONFIG_BGSAVE_RETRY_DELAY ||
1625 server.lastbgsave_status == C_OK))
1626 {
1627 serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
1628 sp->changes, (int)sp->seconds);
1629 rdbSaveInfo rsi, *rsiptr;
1630 rsiptr = rdbPopulateSaveInfo(&rsi);
1631 rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE);
1632 break;
1633 }
1634 }
1635
1636 /* Trigger an AOF rewrite if needed. */
1637 if (server.aof_state == AOF_ON &&
1638 !hasActiveChildProcess() &&
1639 server.aof_rewrite_perc &&
1640 server.aof_current_size > server.aof_rewrite_min_size)
1641 {
1642 long long base = server.aof_rewrite_base_size ?
1643 server.aof_rewrite_base_size : 1;
1644 long long growth = (server.aof_current_size*100/base) - 100;
1645 if (growth >= server.aof_rewrite_perc && !aofRewriteLimited()) {
1646 serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
1647 rewriteAppendOnlyFileBackground();
1648 }
1649 }
1650 }
1651 /* Just for the sake of defensive programming, to avoid forgetting to
1652 * call this function when needed. */
1653 updateDictResizePolicy();
1654
1655 /* AOF postponed flush: Try at every cron cycle if the slow fsync
1656 * completed. */
1657 if ((server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) &&
1658 server.aof_flush_postponed_start)
1659 {
1660 flushAppendOnlyFile(0);
1661 }
1662
1663 /* AOF write errors: in this case we have a buffer to flush as well and
1664 * clear the AOF error in case of success to make the DB writable again,
1665 * however to try every second is enough in case of 'hz' is set to
1666 * a higher frequency. */
1667 run_with_period(1000) {
1668 if ((server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) &&
1669 server.aof_last_write_status == C_ERR)
1670 {
1671 flushAppendOnlyFile(0);
1672 }
1673 }
1674
1675 /* Clear the paused actions state if needed. */
1676 updatePausedActions();
1677
1678 /* Replication cron function -- used to reconnect to master,
1679 * detect transfer failures, start background RDB transfers and so forth.
1680 *
1681 * If Redis is trying to failover then run the replication cron faster so
1682 * progress on the handshake happens more quickly. */
1683 if (server.failover_state != NO_FAILOVER) {
1684 run_with_period(100) replicationCron();
1685 } else {
1686 run_with_period(1000) replicationCron();
1687 }
1688
1689 /* Run the Redis Cluster cron. */
1690 run_with_period(100) {
1691 if (server.cluster_enabled) {
1692 clusterCron();
1693 asmCron();
1694 }
1695 }
1696
1697 /* Run the Sentinel timer if we are in sentinel mode. */
1698 if (server.sentinel_mode) sentinelTimer();
1699
1700 /* Cleanup expired MIGRATE cached sockets. */
1701 run_with_period(1000) {
1702 migrateCloseTimedoutSockets();
1703 }
1704
1705 /* Cleanup expired IDMP entries from tracked streams */
1706 run_with_period(1000) {
1707 handleExpiredIdmpEntries();
1708 }
1709
1710 /* Periodically shrink pending command reuse pool */
1711 run_with_period(2000) {
1712 pendingCommandPoolCron();
1713 }
1714
1715 /* Resize tracking keys table if needed. This is also done at every
1716 * command execution, but we want to be sure that if the last command
1717 * executed changes the value via CONFIG SET, the server will perform
1718 * the operation even if completely idle. */
1719 if (server.tracking_clients) trackingLimitUsedSlots();
1720
1721 /* Check if hotkey tracking duration has expired and auto-stop if needed */
1722 if (server.hotkeys && server.hotkeys->active && server.hotkeys->duration > 0) {
1723 mstime_t elapsed = (server.mstime - server.hotkeys->start);
1724 if (elapsed >= server.hotkeys->duration) {
1725 server.hotkeys->active = 0;
1726 server.hotkeys->duration = elapsed;
1727 }
1728 }
1729
1730 /* Start a scheduled BGSAVE if the corresponding flag is set. This is
1731 * useful when we are forced to postpone a BGSAVE because an AOF
1732 * rewrite is in progress.
1733 *
1734 * Note: this code must be after the replicationCron() call above so
1735 * make sure when refactoring this file to keep this order. This is useful
1736 * because we want to give priority to RDB savings for replication. */
1737 if (!hasActiveChildProcess() &&
1738 server.rdb_bgsave_scheduled &&
1739 (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
1740 server.lastbgsave_status == C_OK))
1741 {
1742 rdbSaveInfo rsi, *rsiptr;
1743 rsiptr = rdbPopulateSaveInfo(&rsi);
1744 if (rdbSaveBackground(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_NONE) == C_OK)
1745 server.rdb_bgsave_scheduled = 0;
1746 }
1747
1748 run_with_period(100) {
1749 if (moduleCount()) modulesCron();
1750 }
1751
1752 /* Fire the cron loop modules event. */
1753 RedisModuleCronLoopV1 ei = {REDISMODULE_CRON_LOOP_VERSION,server.hz};
1754 moduleFireServerEvent(REDISMODULE_EVENT_CRON_LOOP,
1755 0,
1756 &ei);
1757
1758 server.cronloops++;
1759
1760 server.el_cron_duration = getMonotonicUs() - cron_start;
1761
1762 return 1000/server.hz;
1763}
1764
1765
1766void blockingOperationStarts(void) {
1767 if(!server.blocking_op_nesting++){
1768 updateCachedTime(0);
1769 server.blocked_last_cron = server.mstime;
1770 }
1771}
1772
1773void blockingOperationEnds(void) {
1774 if(!(--server.blocking_op_nesting)){
1775 server.blocked_last_cron = 0;
1776 }
1777}
1778
1779/* This function fills in the role of serverCron during RDB or AOF loading, and
1780 * also during blocked scripts.
1781 * It attempts to do its duties at a similar rate as the configured server.hz,
1782 * and updates cronloops variable so that similarly to serverCron, the
1783 * run_with_period can be used. */
1784void whileBlockedCron(void) {
1785 /* Here we may want to perform some cron jobs (normally done server.hz times
1786 * per second). */
1787
1788 /* Since this function depends on a call to blockingOperationStarts, let's
1789 * make sure it was done. */
1790 serverAssert(server.blocked_last_cron);
1791
1792 /* In case we were called too soon, leave right away. This way one time
1793 * jobs after the loop below don't need an if. and we don't bother to start
1794 * latency monitor if this function is called too often. */
1795 if (server.blocked_last_cron >= server.mstime)
1796 return;
1797
1798 /* Increment server.cronloops so that run_with_period works. */
1799 long hz_ms = 1000 / server.hz;
1800 int cronloops = (server.mstime - server.blocked_last_cron + (hz_ms - 1)) / hz_ms; /* rounding up */
1801 server.blocked_last_cron += cronloops * hz_ms;
1802 server.cronloops += cronloops;
1803
1804 mstime_t latency;
1805 latencyStartMonitor(latency);
1806
1807 /* Only defragment during AOF loading. */
1808 if (isAOFLoadingContext()) defragWhileBlocked();
1809
1810 /* Update memory stats during loading (excluding blocked scripts) */
1811 if (server.loading) cronUpdateMemoryStats();
1812
1813 latencyEndMonitor(latency);
1814 latencyAddSampleIfNeeded("while-blocked-cron",latency);
1815
1816 /* We received a SIGTERM during loading, shutting down here in a safe way,
1817 * as it isn't ok doing so inside the signal handler. */
1818 if (shouldShutdownAsap() && server.loading) {
1819 if (prepareForShutdown(SHUTDOWN_NOSAVE) == C_OK) exit(0);
1820 serverLog(LL_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1821 atomicSet(server.shutdown_asap, 0);
1822 atomicSet(server.last_sig_received, 0);
1823 }
1824}
1825
1826static void sendGetackToReplicas(void) {
1827 robj *argv[3];
1828 argv[0] = shared.replconf;
1829 argv[1] = shared.getack;
1830 argv[2] = shared.special_asterick; /* Not used argument. */
1831 replicationFeedSlaves(server.slaves, -1, argv, 3);
1832}
1833
1834extern int ProcessingEventsWhileBlocked;
1835
1836/* This function gets called every time Redis is entering the
1837 * main loop of the event driven library, that is, before to sleep
1838 * for ready file descriptors.
1839 *
1840 * Note: This function is (currently) called from two functions:
1841 * 1. aeMain - The main server loop
1842 * 2. processEventsWhileBlocked - Process clients during RDB/AOF load
1843 *
1844 * If it was called from processEventsWhileBlocked we don't want
1845 * to perform all actions (For example, we don't want to expire
1846 * keys), but we do need to perform some actions.
1847 *
1848 * The most important is freeClientsInAsyncFreeQueue but we also
1849 * call some other low-risk functions. */
1850void beforeSleep(struct aeEventLoop *eventLoop) {
1851 UNUSED(eventLoop);
1852
1853 updatePeakMemory();
1854
1855 /* Just call a subset of vital functions in case we are re-entering
1856 * the event loop from processEventsWhileBlocked(). Note that in this
1857 * case we keep track of the number of events we are processing, since
1858 * processEventsWhileBlocked() wants to stop ASAP if there are no longer
1859 * events to handle. */
1860 if (ProcessingEventsWhileBlocked) {
1861 uint64_t processed = 0;
1862 processed += connTypeProcessPendingData(server.el);
1863 if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
1864 flushAppendOnlyFile(0);
1865 processed += handleClientsWithPendingWrites();
1866 processed += freeClientsInAsyncFreeQueue();
1867
1868 /* Let the clients after the blocking call be processed. */
1869 processClientsOfAllIOThreads();
1870 /* New connections may have been established while blocked, clients from
1871 * IO thread may have replies to write, ensure they are promptly sent to
1872 * IO threads. */
1873 processed += sendPendingClientsToIOThreads();
1874
1875 server.events_processed_while_blocked += processed;
1876 return;
1877 }
1878
1879 /* Handle pending data(typical TLS). (must be done before flushAppendOnlyFile) */
1880 connTypeProcessPendingData(server.el);
1881
1882 /* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
1883 int dont_sleep = connTypeHasPendingData(server.el);
1884
1885 /* Call the Redis Cluster before sleep function. Note that this function
1886 * may change the state of Redis Cluster (from ok to fail or vice versa),
1887 * so it's a good idea to call it before serving the unblocked clients
1888 * later in this function, must be done before blockedBeforeSleep. */
1889 if (server.cluster_enabled) {
1890 clusterBeforeSleep();
1891 asmBeforeSleep();
1892 }
1893
1894 /* Handle blocked clients.
1895 * must be done before flushAppendOnlyFile, in case of appendfsync=always,
1896 * since the unblocked clients may write data. */
1897 blockedBeforeSleep();
1898
1899 /* Record cron time in beforeSleep, which is the sum of active-expire, active-defrag and all other
1900 * tasks done by cron and beforeSleep, but excluding read, write and AOF, that are counted by other
1901 * sets of metrics. */
1902 monotime cron_start_time_before_aof = getMonotonicUs();
1903
1904 /* Run a fast expire cycle (the called function will return
1905 * ASAP if a fast cycle is not needed). */
1906 if (server.active_expire_enabled && iAmMaster())
1907 activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
1908
1909 if (moduleCount()) {
1910 moduleFireServerEvent(REDISMODULE_EVENT_EVENTLOOP,
1911 REDISMODULE_SUBEVENT_EVENTLOOP_BEFORE_SLEEP,
1912 NULL);
1913 }
1914
1915 /* Send all the slaves an ACK request if at least one client blocked
1916 * during the previous event loop iteration. Note that we do this after
1917 * processUnblockedClients(), so if there are multiple pipelined WAITs
1918 * and the just unblocked WAIT gets blocked again, we don't have to wait
1919 * a server cron cycle in absence of other event loop events. See #6623.
1920 *
1921 * We also don't send the ACKs while clients are paused, since it can
1922 * increment the replication backlog, they'll be sent after the pause
1923 * if we are still the master. */
1924 if (server.get_ack_from_slaves && !isPausedActionsWithUpdate(PAUSE_ACTION_REPLICA)) {
1925 sendGetackToReplicas();
1926 server.get_ack_from_slaves = 0;
1927 }
1928
1929 /* We may have received updates from clients about their current offset. NOTE:
1930 * this can't be done where the ACK is received since failover will disconnect
1931 * our clients. */
1932 updateFailoverStatus();
1933
1934 /* Since we rely on current_client to send scheduled invalidation messages
1935 * we have to flush them after each command, so when we get here, the list
1936 * must be empty. */
1937 serverAssert(listLength(server.tracking_pending_keys) == 0);
1938 serverAssert(listLength(server.pending_push_messages) == 0);
1939
1940 /* Send the invalidation messages to clients participating to the
1941 * client side caching protocol in broadcasting (BCAST) mode. */
1942 trackingBroadcastInvalidationMessages();
1943
1944 /* Record time consumption of AOF writing. */
1945 monotime aof_start_time = getMonotonicUs();
1946 /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing below. */
1947 monotime duration_before_aof = aof_start_time - cron_start_time_before_aof;
1948 /* Record the fsync'd offset before flushAppendOnly */
1949 long long prev_fsynced_reploff = server.fsynced_reploff;
1950
1951 /* Write the AOF buffer on disk,
1952 * must be done before handleClientsWithPendingWrites and
1953 * sendPendingClientsToIOThreads, in case of appendfsync=always. */
1954 if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
1955 flushAppendOnlyFile(0);
1956
1957 /* Record time consumption of AOF writing. */
1958 durationAddSample(EL_DURATION_TYPE_AOF, getMonotonicUs() - aof_start_time);
1959
1960 /* Update the fsynced replica offset.
1961 * If an initial rewrite is in progress then not all data is guaranteed to have actually been
1962 * persisted to disk yet, so we cannot update the field. We will wait for the rewrite to complete. */
1963 if (server.aof_state == AOF_ON && server.fsynced_reploff != -1) {
1964 long long fsynced_reploff_pending;
1965 atomicGet(server.fsynced_reploff_pending, fsynced_reploff_pending);
1966 server.fsynced_reploff = fsynced_reploff_pending;
1967
1968 /* If we have blocked [WAIT]AOF clients, and fsynced_reploff changed, we want to try to
1969 * wake them up ASAP. */
1970 if (listLength(server.clients_waiting_acks) && prev_fsynced_reploff != server.fsynced_reploff)
1971 dont_sleep = 1;
1972 }
1973
1974 if (server.io_threads_num > 1) {
1975 /* Corresponding to IOThreadBeforeSleep, process the clients from IO threads
1976 * without notification. */
1977 if (processClientsOfAllIOThreads() > 0) {
1978 /* If there are clients that are processed, it means IO thread is busy to
1979 * trafer clients to main thread, so the main thread does not sleep. */
1980 dont_sleep = 1;
1981 }
1982 if (!dont_sleep) {
1983 atomicSetWithSync(server.running, 0); /* Not running if going to sleep. */
1984 /* Try to process the clients from IO threads again, since before setting running
1985 * to 0, some clients may be transferred without notification. */
1986 processClientsOfAllIOThreads();
1987 }
1988 }
1989
1990 /* Handle writes with pending output buffers. */
1991 handleClientsWithPendingWrites();
1992
1993 /* Check if IO thread replicas have any pending read or writes and send them
1994 * back to their threads if so. */
1995 putReplicasInPendingClientsToIOThreads();
1996
1997 /* Let io thread to handle its pending clients. */
1998 sendPendingClientsToIOThreads();
1999
2000 /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing above. */
2001 monotime cron_start_time_after_write = getMonotonicUs();
2002
2003 /* Close clients that need to be closed asynchronous */
2004 freeClientsInAsyncFreeQueue();
2005
2006 /* Incrementally trim replication backlog, 10 times the normal speed is
2007 * to free replication backlog as much as possible. */
2008 if (server.repl_backlog)
2009 incrementalTrimReplicationBacklog(10*REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
2010
2011 /* Disconnect some clients if they are consuming too much memory. */
2012 evictClients();
2013
2014 /* Record cron time in beforeSleep. */
2015 monotime duration_after_write = getMonotonicUs() - cron_start_time_after_write;
2016
2017 /* Record eventloop latency. */
2018 if (server.el_start > 0) {
2019 monotime el_duration = getMonotonicUs() - server.el_start;
2020 durationAddSample(EL_DURATION_TYPE_EL, el_duration);
2021 }
2022 server.el_cron_duration += duration_before_aof + duration_after_write;
2023 durationAddSample(EL_DURATION_TYPE_CRON, server.el_cron_duration);
2024 server.el_cron_duration = 0;
2025 /* Record max command count per cycle. */
2026 if (server.stat_numcommands > server.el_cmd_cnt_start) {
2027 long long el_command_cnt = server.stat_numcommands - server.el_cmd_cnt_start;
2028 if (el_command_cnt > server.el_cmd_cnt_max) {
2029 server.el_cmd_cnt_max = el_command_cnt;
2030 }
2031 }
2032
2033 /* Don't sleep at all before the next beforeSleep() if needed (e.g. a
2034 * connection has pending data) */
2035 aeSetDontWait(server.el, dont_sleep);
2036
2037 /* Before we are going to sleep, let the threads access the dataset by
2038 * releasing the GIL. Redis main thread will not touch anything at this
2039 * time. */
2040 if (moduleCount()) moduleReleaseGIL();
2041 /********************* WARNING ********************
2042 * Do NOT add anything below moduleReleaseGIL !!! *
2043 ***************************** ********************/
2044}
2045
2046/* This function is called immediately after the event loop multiplexing
2047 * API returned, and the control is going to soon return to Redis by invoking
2048 * the different events callbacks. */
2049void afterSleep(struct aeEventLoop *eventLoop) {
2050 UNUSED(eventLoop);
2051 /********************* WARNING ********************
2052 * Do NOT add anything above moduleAcquireGIL !!! *
2053 ***************************** ********************/
2054 if (!ProcessingEventsWhileBlocked) {
2055 /* Acquire the modules GIL so that their threads won't touch anything. */
2056 if (moduleCount()) {
2057 mstime_t latency;
2058 latencyStartMonitor(latency);
2059
2060 atomicSet(server.module_gil_acquring, 1);
2061 moduleAcquireGIL();
2062 atomicSet(server.module_gil_acquring, 0);
2063 moduleFireServerEvent(REDISMODULE_EVENT_EVENTLOOP,
2064 REDISMODULE_SUBEVENT_EVENTLOOP_AFTER_SLEEP,
2065 NULL);
2066 latencyEndMonitor(latency);
2067 latencyAddSampleIfNeeded("module-acquire-GIL",latency);
2068 }
2069 /* Set the eventloop start time. */
2070 server.el_start = getMonotonicUs();
2071 /* Set the eventloop command count at start. */
2072 server.el_cmd_cnt_start = server.stat_numcommands;
2073 }
2074
2075 /* Set running after waking up */
2076 if (server.io_threads_num > 1) atomicSetWithSync(server.running, 1);
2077
2078 /* Update the time cache. */
2079 updateCachedTime(1);
2080
2081 /* Update command time snapshot in case it'll be required without a command
2082 * e.g. somehow used by module timers. Don't update it while yielding to a
2083 * blocked command, call() will handle that and restore the original time. */
2084 if (!ProcessingEventsWhileBlocked) {
2085 server.cmd_time_snapshot = server.mstime;
2086 }
2087}
2088
2089/* =========================== Server initialization ======================== */
2090
2091void createSharedObjects(void) {
2092 int j;
2093
2094 /* Shared command responses */
2095 shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
2096 shared.emptybulk = createObject(OBJ_STRING,sdsnew("$0\r\n\r\n"));
2097 shared.czero = createObject(OBJ_STRING,sdsnew(":0\r\n"));
2098 shared.cone = createObject(OBJ_STRING,sdsnew(":1\r\n"));
2099 shared.emptyarray = createObject(OBJ_STRING,sdsnew("*0\r\n"));
2100 shared.pong = createObject(OBJ_STRING,sdsnew("+PONG\r\n"));
2101 shared.queued = createObject(OBJ_STRING,sdsnew("+QUEUED\r\n"));
2102 shared.emptyscan = createObject(OBJ_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"));
2103 shared.space = createObject(OBJ_STRING,sdsnew(" "));
2104 shared.plus = createObject(OBJ_STRING,sdsnew("+"));
2105
2106 /* Shared command error responses */
2107 shared.wrongtypeerr = createObject(OBJ_STRING,sdsnew(
2108 "-WRONGTYPE Operation against a key holding the wrong kind of value\r\n"));
2109 shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
2110 shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
2111 "-ERR no such key\r\n"));
2112 shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
2113 "-ERR syntax error\r\n"));
2114 shared.sameobjecterr = createObject(OBJ_STRING,sdsnew(
2115 "-ERR source and destination objects are the same\r\n"));
2116 shared.outofrangeerr = createObject(OBJ_STRING,sdsnew(
2117 "-ERR index out of range\r\n"));
2118 shared.noscripterr = createObject(OBJ_STRING,sdsnew(
2119 "-NOSCRIPT No matching script. Please use EVAL.\r\n"));
2120 shared.loadingerr = createObject(OBJ_STRING,sdsnew(
2121 "-LOADING Redis is loading the dataset in memory\r\n"));
2122 shared.slowevalerr = createObject(OBJ_STRING,sdsnew(
2123 "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
2124 shared.slowscripterr = createObject(OBJ_STRING,sdsnew(
2125 "-BUSY Redis is busy running a script. You can only call FUNCTION KILL or SHUTDOWN NOSAVE.\r\n"));
2126 shared.slowmoduleerr = createObject(OBJ_STRING,sdsnew(
2127 "-BUSY Redis is busy running a module command.\r\n"));
2128 shared.masterdownerr = createObject(OBJ_STRING,sdsnew(
2129 "-MASTERDOWN Link with MASTER is down and replica-serve-stale-data is set to 'no'.\r\n"));
2130 shared.bgsaveerr = createObject(OBJ_STRING,sdsnew(
2131 "-MISCONF Redis is configured to save RDB snapshots, but it's currently unable to persist to disk. Commands that may modify the data set are disabled, because this instance is configured to report errors during writes if RDB snapshotting fails (stop-writes-on-bgsave-error option). Please check the Redis logs for details about the RDB error.\r\n"));
2132 shared.roslaveerr = createObject(OBJ_STRING,sdsnew(
2133 "-READONLY You can't write against a read only replica.\r\n"));
2134 shared.noautherr = createObject(OBJ_STRING,sdsnew(
2135 "-NOAUTH Authentication required.\r\n"));
2136 shared.oomerr = createObject(OBJ_STRING,sdsnew(
2137 "-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
2138 shared.execaborterr = createObject(OBJ_STRING,sdsnew(
2139 "-EXECABORT Transaction discarded because of previous errors.\r\n"));
2140 shared.noreplicaserr = createObject(OBJ_STRING,sdsnew(
2141 "-NOREPLICAS Not enough good replicas to write.\r\n"));
2142 shared.busykeyerr = createObject(OBJ_STRING,sdsnew(
2143 "-BUSYKEY Target key name already exists.\r\n"));
2144
2145 /* The shared NULL depends on the protocol version. */
2146 shared.null[0] = NULL;
2147 shared.null[1] = NULL;
2148 shared.null[2] = createObject(OBJ_STRING,sdsnew("$-1\r\n"));
2149 shared.null[3] = createObject(OBJ_STRING,sdsnew("_\r\n"));
2150
2151 shared.nullarray[0] = NULL;
2152 shared.nullarray[1] = NULL;
2153 shared.nullarray[2] = createObject(OBJ_STRING,sdsnew("*-1\r\n"));
2154 shared.nullarray[3] = createObject(OBJ_STRING,sdsnew("_\r\n"));
2155
2156 shared.emptymap[0] = NULL;
2157 shared.emptymap[1] = NULL;
2158 shared.emptymap[2] = createObject(OBJ_STRING,sdsnew("*0\r\n"));
2159 shared.emptymap[3] = createObject(OBJ_STRING,sdsnew("%0\r\n"));
2160
2161 shared.emptyset[0] = NULL;
2162 shared.emptyset[1] = NULL;
2163 shared.emptyset[2] = createObject(OBJ_STRING,sdsnew("*0\r\n"));
2164 shared.emptyset[3] = createObject(OBJ_STRING,sdsnew("~0\r\n"));
2165
2166 for (j = 0; j < PROTO_SHARED_SELECT_CMDS; j++) {
2167 char dictid_str[64];
2168 int dictid_len;
2169
2170 dictid_len = ll2string(dictid_str,sizeof(dictid_str),j);
2171 shared.select[j] = createObject(OBJ_STRING,
2172 sdscatprintf(sdsempty(),
2173 "*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
2174 dictid_len, dictid_str));
2175 }
2176 shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
2177 shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
2178 shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
2179 shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
2180 shared.ssubscribebulk = createStringObject("$10\r\nssubscribe\r\n", 17);
2181 shared.sunsubscribebulk = createStringObject("$12\r\nsunsubscribe\r\n", 19);
2182 shared.smessagebulk = createStringObject("$8\r\nsmessage\r\n", 14);
2183 shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
2184 shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
2185
2186 /* Shared command names */
2187 shared.del = createStringObject("DEL",3);
2188 shared.unlink = createStringObject("UNLINK",6);
2189 shared.rpop = createStringObject("RPOP",4);
2190 shared.lpop = createStringObject("LPOP",4);
2191 shared.lpush = createStringObject("LPUSH",5);
2192 shared.rpoplpush = createStringObject("RPOPLPUSH",9);
2193 shared.lmove = createStringObject("LMOVE",5);
2194 shared.blmove = createStringObject("BLMOVE",6);
2195 shared.zpopmin = createStringObject("ZPOPMIN",7);
2196 shared.zpopmax = createStringObject("ZPOPMAX",7);
2197 shared.multi = createStringObject("MULTI",5);
2198 shared.exec = createStringObject("EXEC",4);
2199 shared.hset = createStringObject("HSET",4);
2200 shared.srem = createStringObject("SREM",4);
2201 shared.xgroup = createStringObject("XGROUP",6);
2202 shared.xclaim = createStringObject("XCLAIM",6);
2203 shared.script = createStringObject("SCRIPT",6);
2204 shared.replconf = createStringObject("REPLCONF",8);
2205 shared.pexpireat = createStringObject("PEXPIREAT",9);
2206 shared.pexpire = createStringObject("PEXPIRE",7);
2207 shared.persist = createStringObject("PERSIST",7);
2208 shared.set = createStringObject("SET",3);
2209 shared.eval = createStringObject("EVAL",4);
2210 shared.hpexpireat = createStringObject("HPEXPIREAT",10);
2211 shared.hpersist = createStringObject("HPERSIST",8);
2212 shared.hdel = createStringObject("HDEL",4);
2213 shared.hsetex = createStringObject("HSETEX",6);
2214
2215 /* Shared command argument */
2216 shared.left = createStringObject("left",4);
2217 shared.right = createStringObject("right",5);
2218 shared.pxat = createStringObject("PXAT", 4);
2219 shared.time = createStringObject("TIME",4);
2220 shared.retrycount = createStringObject("RETRYCOUNT",10);
2221 shared.force = createStringObject("FORCE",5);
2222 shared.justid = createStringObject("JUSTID",6);
2223 shared.entriesread = createStringObject("ENTRIESREAD",11);
2224 shared.lastid = createStringObject("LASTID",6);
2225 shared.default_username = createStringObject("default",7);
2226 shared.ping = createStringObject("ping",4);
2227 shared.setid = createStringObject("SETID",5);
2228 shared.keepttl = createStringObject("KEEPTTL",7);
2229 shared.absttl = createStringObject("ABSTTL",6);
2230 shared.load = createStringObject("LOAD",4);
2231 shared.createconsumer = createStringObject("CREATECONSUMER",14);
2232 shared.getack = createStringObject("GETACK",6);
2233 shared.special_asterick = createStringObject("*",1);
2234 shared.special_equals = createStringObject("=",1);
2235 shared.redacted = makeObjectShared(createStringObject("(redacted)",10));
2236 shared.fields = createStringObject("FIELDS",6);
2237
2238 for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
2239 shared.integers[j] =
2240 makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
2241 initObjectLRUOrLFU(shared.integers[j]);
2242 shared.integers[j]->encoding = OBJ_ENCODING_INT;
2243 }
2244 for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) {
2245 shared.mbulkhdr[j] = createObject(OBJ_STRING,
2246 sdscatprintf(sdsempty(),"*%d\r\n",j));
2247 shared.bulkhdr[j] = createObject(OBJ_STRING,
2248 sdscatprintf(sdsempty(),"$%d\r\n",j));
2249 shared.maphdr[j] = createObject(OBJ_STRING,
2250 sdscatprintf(sdsempty(),"%%%d\r\n",j));
2251 shared.sethdr[j] = createObject(OBJ_STRING,
2252 sdscatprintf(sdsempty(),"~%d\r\n",j));
2253 }
2254 /* The following two shared objects, minstring and maxstring, are not
2255 * actually used for their value but as a special object meaning
2256 * respectively the minimum possible string and the maximum possible
2257 * string in string comparisons for the ZRANGEBYLEX command. */
2258 shared.minstring = sdsnew("minstring");
2259 shared.maxstring = sdsnew("maxstring");
2260}
2261
2262void initServerClientMemUsageBuckets(void) {
2263 if (server.client_mem_usage_buckets)
2264 return;
2265 server.client_mem_usage_buckets = zmalloc(sizeof(clientMemUsageBucket)*CLIENT_MEM_USAGE_BUCKETS);
2266 for (int j = 0; j < CLIENT_MEM_USAGE_BUCKETS; j++) {
2267 server.client_mem_usage_buckets[j].mem_usage_sum = 0;
2268 server.client_mem_usage_buckets[j].clients = listCreate();
2269 }
2270}
2271
2272void freeServerClientMemUsageBuckets(void) {
2273 if (!server.client_mem_usage_buckets)
2274 return;
2275 for (int j = 0; j < CLIENT_MEM_USAGE_BUCKETS; j++)
2276 listRelease(server.client_mem_usage_buckets[j].clients);
2277 zfree(server.client_mem_usage_buckets);
2278 server.client_mem_usage_buckets = NULL;
2279}
2280
2281void initServerConfig(void) {
2282 int j;
2283 char *default_bindaddr[CONFIG_DEFAULT_BINDADDR_COUNT] = CONFIG_DEFAULT_BINDADDR;
2284
2285 initConfigValues();
2286 updateCachedTime(1);
2287 server.cmd_time_snapshot = server.mstime;
2288 getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE);
2289 server.runid[CONFIG_RUN_ID_SIZE] = '\0';
2290 changeReplicationId();
2291 clearReplicationId2();
2292 server.hz = CONFIG_DEFAULT_HZ; /* Initialize it ASAP, even if it may get
2293 updated later after loading the config.
2294 This value may be used before the server
2295 is initialized. */
2296 server.timezone = getTimeZone(); /* Initialized by tzset(). */
2297 server.configfile = NULL;
2298 server.executable = NULL;
2299 server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
2300 server.dbg_assert_keysizes = 0; /* Disabled by default */
2301 server.dbg_assert_alloc_per_slot = 0; /* Disabled by default */
2302 server.bindaddr_count = CONFIG_DEFAULT_BINDADDR_COUNT;
2303 for (j = 0; j < CONFIG_DEFAULT_BINDADDR_COUNT; j++)
2304 server.bindaddr[j] = zstrdup(default_bindaddr[j]);
2305 memset(server.listeners, 0x00, sizeof(server.listeners));
2306 server.active_expire_enabled = 1;
2307 server.allow_access_expired = 0;
2308 server.allow_access_trimmed = 0;
2309 server.skip_checksum_validation = 0;
2310 server.loading = 0;
2311 server.async_loading = 0;
2312 server.loading_rdb_used_mem = 0;
2313 server.aof_state = AOF_OFF;
2314 server.aof_rewrite_base_size = 0;
2315 server.aof_rewrite_scheduled = 0;
2316 server.aof_flush_sleep = 0;
2317 server.aof_last_fsync = time(NULL) * 1000;
2318 server.aof_cur_timestamp = 0;
2319 atomicSet(server.aof_bio_fsync_status,C_OK);
2320 server.aof_rewrite_time_last = -1;
2321 server.aof_rewrite_time_start = -1;
2322 server.aof_lastbgrewrite_status = C_OK;
2323 server.aof_delayed_fsync = 0;
2324 server.aof_fd = -1;
2325 server.aof_selected_db = -1; /* Make sure the first time will not match */
2326 server.aof_flush_postponed_start = 0;
2327 server.aof_last_incr_size = 0;
2328 server.aof_last_incr_fsync_offset = 0;
2329 server.active_defrag_running = 0;
2330 server.active_defrag_configuration_changed = 0;
2331 server.notify_keyspace_events = 0;
2332 server.blocked_clients = 0;
2333 memset(server.blocked_clients_by_type,0,
2334 sizeof(server.blocked_clients_by_type));
2335 server.shutdown_asap = 0;
2336 server.crashing = 0;
2337 server.shutdown_flags = 0;
2338 server.shutdown_mstime = 0;
2339 server.cluster_module_flags = CLUSTER_MODULE_FLAG_NONE;
2340 server.cluster_module_trim_disablers = 0;
2341 server.migrate_cached_sockets = dictCreate(&migrateCacheDictType);
2342 server.next_client_id = 1; /* Client IDs, start from 1 .*/
2343 server.page_size = sysconf(_SC_PAGESIZE);
2344 server.pause_cron = 0;
2345 server.dict_resizing = 1;
2346
2347 server.latency_tracking_info_percentiles_len = 3;
2348 server.latency_tracking_info_percentiles = zmalloc(sizeof(double)*(server.latency_tracking_info_percentiles_len));
2349 server.latency_tracking_info_percentiles[0] = 50.0; /* p50 */
2350 server.latency_tracking_info_percentiles[1] = 99.0; /* p99 */
2351 server.latency_tracking_info_percentiles[2] = 99.9; /* p999 */
2352
2353 server.lruclock = getLRUClock();
2354 resetServerSaveParams();
2355
2356 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
2357 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
2358 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
2359
2360 /* Replication related */
2361 server.masterhost = NULL;
2362 server.masterport = 6379;
2363 server.master = NULL;
2364 server.cached_master = NULL;
2365 server.master_initial_offset = -1;
2366 server.repl_state = REPL_STATE_NONE;
2367 server.repl_rdb_ch_state = REPL_RDB_CH_STATE_NONE;
2368 server.repl_num_master_disconnection = 0;
2369 server.repl_full_sync_buffer = (struct replDataBuf) {0};
2370 server.repl_transfer_tmpfile = NULL;
2371 server.repl_transfer_fd = -1;
2372 server.repl_transfer_s = NULL;
2373 server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
2374 server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
2375 server.repl_up_since = 0;
2376 server.master_repl_offset = 0;
2377 server.fsynced_reploff_pending = 0;
2378 server.repl_stream_lastio = server.unixtime;
2379 server.repl_total_sync_attempts = 0;
2380
2381 /* Replication partial resync backlog */
2382 server.repl_backlog = NULL;
2383 server.repl_no_slaves_since = time(NULL);
2384
2385 /* Failover related */
2386 server.failover_end_time = 0;
2387 server.force_failover = 0;
2388 server.target_replica_host = NULL;
2389 server.target_replica_port = 0;
2390 server.failover_state = NO_FAILOVER;
2391
2392 /* Client output buffer limits */
2393 for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++)
2394 server.client_obuf_limits[j] = clientBufferLimitsDefaults[j];
2395
2396 /* Linux OOM Score config */
2397 for (j = 0; j < CONFIG_OOM_COUNT; j++)
2398 server.oom_score_adj_values[j] = configOOMScoreAdjValuesDefaults[j];
2399
2400 /* Double constants initialization */
2401 R_Zero = 0.0;
2402 R_PosInf = 1.0/R_Zero;
2403 R_NegInf = -1.0/R_Zero;
2404 R_Nan = R_Zero/R_Zero;
2405
2406 /* Command table -- we initialize it here as it is part of the
2407 * initial configuration, since command names may be changed via
2408 * redis.conf using the rename-command directive. */
2409 server.commands = dictCreate(&commandTableDictType);
2410 server.orig_commands = dictCreate(&commandTableDictType);
2411 populateCommandTable();
2412
2413 /* Debugging */
2414 server.watchdog_period = 0;
2415}
2416
2417extern char **environ;
2418
2419/* Restart the server, executing the same executable that started this
2420 * instance, with the same arguments and configuration file.
2421 *
2422 * The function is designed to directly call execve() so that the new
2423 * server instance will retain the PID of the previous one.
2424 *
2425 * The list of flags, that may be bitwise ORed together, alter the
2426 * behavior of this function:
2427 *
2428 * RESTART_SERVER_NONE No flags.
2429 * RESTART_SERVER_GRACEFULLY Do a proper shutdown before restarting.
2430 * RESTART_SERVER_CONFIG_REWRITE Rewrite the config file before restarting.
2431 *
2432 * On success the function does not return, because the process turns into
2433 * a different process. On error C_ERR is returned. */
2434int restartServer(int flags, mstime_t delay) {
2435 int j;
2436
2437 /* Check if we still have accesses to the executable that started this
2438 * server instance. */
2439 if (access(server.executable,X_OK) == -1) {
2440 serverLog(LL_WARNING,"Can't restart: this process has no "
2441 "permissions to execute %s", server.executable);
2442 return C_ERR;
2443 }
2444
2445 /* Config rewriting. */
2446 if (flags & RESTART_SERVER_CONFIG_REWRITE &&
2447 server.configfile &&
2448 rewriteConfig(server.configfile, 0) == -1)
2449 {
2450 serverLog(LL_WARNING,"Can't restart: configuration rewrite process "
2451 "failed: %s", strerror(errno));
2452 return C_ERR;
2453 }
2454
2455 /* Perform a proper shutdown. We don't wait for lagging replicas though. */
2456 if (flags & RESTART_SERVER_GRACEFULLY &&
2457 prepareForShutdown(SHUTDOWN_NOW) != C_OK)
2458 {
2459 serverLog(LL_WARNING,"Can't restart: error preparing for shutdown");
2460 return C_ERR;
2461 }
2462
2463 /* Close all file descriptors, with the exception of stdin, stdout, stderr
2464 * which are useful if we restart a Redis server which is not daemonized. */
2465 for (j = 3; j < (int)server.maxclients + 1024; j++) {
2466 /* Test the descriptor validity before closing it, otherwise
2467 * Valgrind issues a warning on close(). */
2468 if (fcntl(j,F_GETFD) != -1) close(j);
2469 }
2470
2471 /* Execute the server with the original command line. */
2472 if (delay) usleep(delay*1000);
2473 zfree(server.exec_argv[0]);
2474 server.exec_argv[0] = zstrdup(server.executable);
2475 execve(server.executable,server.exec_argv,environ);
2476
2477 /* If an error occurred here, there is nothing we can do, but exit. */
2478 _exit(1);
2479
2480 return C_ERR; /* Never reached. */
2481}
2482
2483/* This function will configure the current process's oom_score_adj according
2484 * to user specified configuration. This is currently implemented on Linux
2485 * only.
2486 *
2487 * A process_class value of -1 implies OOM_CONFIG_MASTER or OOM_CONFIG_REPLICA,
2488 * depending on current role.
2489 */
2490int setOOMScoreAdj(int process_class) {
2491 if (process_class == -1)
2492 process_class = (server.masterhost ? CONFIG_OOM_REPLICA : CONFIG_OOM_MASTER);
2493
2494 serverAssert(process_class >= 0 && process_class < CONFIG_OOM_COUNT);
2495
2496#ifdef HAVE_PROC_OOM_SCORE_ADJ
2497 /* The following statics are used to indicate Redis has changed the process's oom score.
2498 * And to save the original score so we can restore it later if needed.
2499 * We need this so when we disabled oom-score-adj (also during configuration rollback
2500 * when another configuration parameter was invalid and causes a rollback after
2501 * applying a new oom-score) we can return to the oom-score value from before our
2502 * adjustments. */
2503 static int oom_score_adjusted_by_redis = 0;
2504 static int oom_score_adj_base = 0;
2505
2506 int fd;
2507 int val;
2508 char buf[64];
2509
2510 if (server.oom_score_adj != OOM_SCORE_ADJ_NO) {
2511 if (!oom_score_adjusted_by_redis) {
2512 oom_score_adjusted_by_redis = 1;
2513 /* Backup base value before enabling Redis control over oom score */
2514 fd = open("/proc/self/oom_score_adj", O_RDONLY);
2515 if (fd < 0 || read(fd, buf, sizeof(buf)) < 0) {
2516 serverLog(LL_WARNING, "Unable to read oom_score_adj: %s", strerror(errno));
2517 if (fd != -1) close(fd);
2518 return C_ERR;
2519 }
2520 oom_score_adj_base = atoi(buf);
2521 close(fd);
2522 }
2523
2524 val = server.oom_score_adj_values[process_class];
2525 if (server.oom_score_adj == OOM_SCORE_RELATIVE)
2526 val += oom_score_adj_base;
2527 if (val > 1000) val = 1000;
2528 if (val < -1000) val = -1000;
2529 } else if (oom_score_adjusted_by_redis) {
2530 oom_score_adjusted_by_redis = 0;
2531 val = oom_score_adj_base;
2532 }
2533 else {
2534 return C_OK;
2535 }
2536
2537 snprintf(buf, sizeof(buf) - 1, "%d\n", val);
2538
2539 fd = open("/proc/self/oom_score_adj", O_WRONLY);
2540 if (fd < 0 || write(fd, buf, strlen(buf)) < 0) {
2541 serverLog(LL_WARNING, "Unable to write oom_score_adj: %s", strerror(errno));
2542 if (fd != -1) close(fd);
2543 return C_ERR;
2544 }
2545
2546 close(fd);
2547 return C_OK;
2548#else
2549 /* Unsupported */
2550 return C_ERR;
2551#endif
2552}
2553
2554/* This function will try to raise the max number of open files accordingly to
2555 * the configured max number of clients. It also reserves a number of file
2556 * descriptors (CONFIG_MIN_RESERVED_FDS) for extra operations of
2557 * persistence, listening sockets, log files and so forth.
2558 *
2559 * If it will not be possible to set the limit accordingly to the configured
2560 * max number of clients, the function will do the reverse setting
2561 * server.maxclients to the value that we can actually handle. */
2562void adjustOpenFilesLimit(void) {
2563 rlim_t maxfiles = server.maxclients+CONFIG_MIN_RESERVED_FDS;
2564 struct rlimit limit;
2565
2566 if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
2567 serverLog(LL_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
2568 strerror(errno));
2569 server.maxclients = 1024-CONFIG_MIN_RESERVED_FDS;
2570 } else {
2571 rlim_t oldlimit = limit.rlim_cur;
2572
2573 /* Set the max number of files if the current limit is not enough
2574 * for our needs. */
2575 if (oldlimit < maxfiles) {
2576 rlim_t bestlimit;
2577 int setrlimit_error = 0;
2578
2579 /* Try to set the file limit to match 'maxfiles' or at least
2580 * to the higher value supported less than maxfiles. */
2581 bestlimit = maxfiles;
2582 while(bestlimit > oldlimit) {
2583 rlim_t decr_step = 16;
2584
2585 limit.rlim_cur = bestlimit;
2586 limit.rlim_max = bestlimit;
2587 if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
2588 setrlimit_error = errno;
2589
2590 /* We failed to set file limit to 'bestlimit'. Try with a
2591 * smaller limit decrementing by a few FDs per iteration. */
2592 if (bestlimit < decr_step) {
2593 bestlimit = oldlimit;
2594 break;
2595 }
2596 bestlimit -= decr_step;
2597 }
2598
2599 /* Assume that the limit we get initially is still valid if
2600 * our last try was even lower. */
2601 if (bestlimit < oldlimit) bestlimit = oldlimit;
2602
2603 if (bestlimit < maxfiles) {
2604 unsigned int old_maxclients = server.maxclients;
2605 server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS;
2606 /* maxclients is unsigned so may overflow: in order
2607 * to check if maxclients is now logically less than 1
2608 * we test indirectly via bestlimit. */
2609 if (bestlimit <= CONFIG_MIN_RESERVED_FDS) {
2610 serverLog(LL_WARNING,"Your current 'ulimit -n' "
2611 "of %llu is not enough for the server to start. "
2612 "Please increase your open file limit to at least "
2613 "%llu. Exiting.",
2614 (unsigned long long) oldlimit,
2615 (unsigned long long) maxfiles);
2616 exit(1);
2617 }
2618 serverLog(LL_WARNING,"You requested maxclients of %d "
2619 "requiring at least %llu max file descriptors.",
2620 old_maxclients,
2621 (unsigned long long) maxfiles);
2622 serverLog(LL_WARNING,"Server can't set maximum open files "
2623 "to %llu because of OS error: %s.",
2624 (unsigned long long) maxfiles, strerror(setrlimit_error));
2625 serverLog(LL_WARNING,"Current maximum open files is %llu. "
2626 "maxclients has been reduced to %d to compensate for "
2627 "low ulimit. "
2628 "If you need higher maxclients increase 'ulimit -n'.",
2629 (unsigned long long) bestlimit, server.maxclients);
2630 } else {
2631 serverLog(LL_NOTICE,"Increased maximum number of open files "
2632 "to %llu (it was originally set to %llu).",
2633 (unsigned long long) maxfiles,
2634 (unsigned long long) oldlimit);
2635 }
2636 }
2637 }
2638}
2639
2640/* Check that server.tcp_backlog can be actually enforced in Linux according
2641 * to the value of /proc/sys/net/core/somaxconn, or warn about it. */
2642void checkTcpBacklogSettings(void) {
2643#if defined(HAVE_PROC_SOMAXCONN)
2644 FILE *fp = fopen("/proc/sys/net/core/somaxconn","r");
2645 char buf[1024];
2646 if (!fp) return;
2647 if (fgets(buf,sizeof(buf),fp) != NULL) {
2648 int somaxconn = atoi(buf);
2649 if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
2650 serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
2651 }
2652 }
2653 fclose(fp);
2654#elif defined(HAVE_SYSCTL_KIPC_SOMAXCONN)
2655 int somaxconn, mib[3];
2656 size_t len = sizeof(int);
2657
2658 mib[0] = CTL_KERN;
2659 mib[1] = KERN_IPC;
2660 mib[2] = KIPC_SOMAXCONN;
2661
2662 if (sysctl(mib, 3, &somaxconn, &len, NULL, 0) == 0) {
2663 if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
2664 serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because kern.ipc.somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
2665 }
2666 }
2667#elif defined(HAVE_SYSCTL_KERN_SOMAXCONN)
2668 int somaxconn, mib[2];
2669 size_t len = sizeof(int);
2670
2671 mib[0] = CTL_KERN;
2672 mib[1] = KERN_SOMAXCONN;
2673
2674 if (sysctl(mib, 2, &somaxconn, &len, NULL, 0) == 0) {
2675 if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
2676 serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because kern.somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
2677 }
2678 }
2679#elif defined(SOMAXCONN)
2680 if (SOMAXCONN < server.tcp_backlog) {
2681 serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because SOMAXCONN is set to the lower value of %d.", server.tcp_backlog, SOMAXCONN);
2682 }
2683#endif
2684}
2685
2686void closeListener(connListener *sfd) {
2687 int j;
2688
2689 for (j = 0; j < sfd->count; j++) {
2690 if (sfd->fd[j] == -1) continue;
2691
2692 aeDeleteFileEvent(server.el, sfd->fd[j], AE_READABLE);
2693 close(sfd->fd[j]);
2694 }
2695
2696 sfd->count = 0;
2697}
2698
2699/* Create an event handler for accepting new connections in TCP or TLS domain sockets.
2700 * This works atomically for all socket fds */
2701int createSocketAcceptHandler(connListener *sfd, aeFileProc *accept_handler) {
2702 int j;
2703
2704 for (j = 0; j < sfd->count; j++) {
2705 if (aeCreateFileEvent(server.el, sfd->fd[j], AE_READABLE, accept_handler,sfd) == AE_ERR) {
2706 /* Rollback */
2707 for (j = j-1; j >= 0; j--) aeDeleteFileEvent(server.el, sfd->fd[j], AE_READABLE);
2708 return C_ERR;
2709 }
2710 }
2711 return C_OK;
2712}
2713
2714/* Initialize a set of file descriptors to listen to the specified 'port'
2715 * binding the addresses specified in the Redis server configuration.
2716 *
2717 * The listening file descriptors are stored in the integer array 'fds'
2718 * and their number is set in '*count'. Actually @sfd should be 'listener',
2719 * for the historical reasons, let's keep 'sfd' here.
2720 *
2721 * The addresses to bind are specified in the global server.bindaddr array
2722 * and their number is server.bindaddr_count. If the server configuration
2723 * contains no specific addresses to bind, this function will try to
2724 * bind * (all addresses) for both the IPv4 and IPv6 protocols.
2725 *
2726 * On success the function returns C_OK.
2727 *
2728 * On error the function returns C_ERR. For the function to be on
2729 * error, at least one of the server.bindaddr addresses was
2730 * impossible to bind, or no bind addresses were specified in the server
2731 * configuration but the function is not able to bind * for at least
2732 * one of the IPv4 or IPv6 protocols. */
2733int listenToPort(connListener *sfd) {
2734 int j;
2735 int port = sfd->port;
2736 char **bindaddr = sfd->bindaddr;
2737
2738 /* If we have no bind address, we don't listen on a TCP socket */
2739 if (sfd->bindaddr_count == 0) return C_OK;
2740
2741 for (j = 0; j < sfd->bindaddr_count; j++) {
2742 char* addr = bindaddr[j];
2743 int optional = *addr == '-';
2744 if (optional) addr++;
2745 if (strchr(addr,':')) {
2746 /* Bind IPv6 address. */
2747 sfd->fd[sfd->count] = anetTcp6Server(server.neterr,port,addr,server.tcp_backlog);
2748 } else {
2749 /* Bind IPv4 address. */
2750 sfd->fd[sfd->count] = anetTcpServer(server.neterr,port,addr,server.tcp_backlog);
2751 }
2752 if (sfd->fd[sfd->count] == ANET_ERR) {
2753 int net_errno = errno;
2754 serverLog(LL_WARNING,
2755 "Warning: Could not create server TCP listening socket %s:%d: %s",
2756 addr, port, server.neterr);
2757 if (net_errno == EADDRNOTAVAIL && optional)
2758 continue;
2759 if (net_errno == ENOPROTOOPT || net_errno == EPROTONOSUPPORT ||
2760 net_errno == ESOCKTNOSUPPORT || net_errno == EPFNOSUPPORT ||
2761 net_errno == EAFNOSUPPORT)
2762 continue;
2763
2764 /* Rollback successful listens before exiting */
2765 closeListener(sfd);
2766 return C_ERR;
2767 }
2768 if (server.socket_mark_id > 0) anetSetSockMarkId(NULL, sfd->fd[sfd->count], server.socket_mark_id);
2769 anetNonBlock(NULL,sfd->fd[sfd->count]);
2770 anetCloexec(sfd->fd[sfd->count]);
2771 sfd->count++;
2772 }
2773 return C_OK;
2774}
2775
2776/* Resets the stats that we expose via INFO or other means that we want
2777 * to reset via CONFIG RESETSTAT. The function is also used in order to
2778 * initialize these fields in initServer() at server startup. */
2779void resetServerStats(void) {
2780 int j;
2781
2782 server.stat_numcommands = 0;
2783 server.stat_numconnections = 0;
2784 server.stat_expiredkeys = 0;
2785 server.stat_expired_subkeys = 0;
2786 server.stat_expired_stale_perc = 0;
2787 server.stat_expired_time_cap_reached_count = 0;
2788 server.stat_expire_cycle_time_used = 0;
2789 server.stat_evictedkeys = 0;
2790 server.stat_evictedclients = 0;
2791 server.stat_evictedscripts = 0;
2792 server.stat_total_eviction_exceeded_time = 0;
2793 server.stat_last_eviction_exceeded_time = 0;
2794 server.stat_keyspace_misses = 0;
2795 server.stat_keyspace_hits = 0;
2796 server.stat_active_defrag_hits = 0;
2797 server.stat_active_defrag_misses = 0;
2798 server.stat_active_defrag_key_hits = 0;
2799 server.stat_active_defrag_key_misses = 0;
2800 server.stat_active_defrag_scanned = 0;
2801 server.stat_total_active_defrag_time = 0;
2802 server.stat_last_active_defrag_time = 0;
2803 server.stat_fork_time = 0;
2804 server.stat_fork_rate = 0;
2805 server.stat_total_forks = 0;
2806 server.stat_rejected_conn = 0;
2807 server.stat_sync_full = 0;
2808 server.stat_sync_partial_ok = 0;
2809 server.stat_sync_partial_err = 0;
2810 for (j = 0; j < IO_THREADS_MAX_NUM; j++) {
2811 atomicSet(server.stat_io_reads_processed[j], 0);
2812 atomicSet(server.stat_io_writes_processed[j], 0);
2813 }
2814 atomicSet(server.stat_client_qbuf_limit_disconnections, 0);
2815 server.stat_client_outbuf_limit_disconnections = 0;
2816 for (j = 0; j < STATS_METRIC_COUNT; j++) {
2817 server.inst_metric[j].idx = 0;
2818 server.inst_metric[j].last_sample_base = 0;
2819 server.inst_metric[j].last_sample_value = 0;
2820 memset(server.inst_metric[j].samples,0,
2821 sizeof(server.inst_metric[j].samples));
2822 }
2823 server.stat_aof_rewrites = 0;
2824 server.stat_rdb_saves = 0;
2825 server.stat_aofrw_consecutive_failures = 0;
2826 server.stat_rdb_consecutive_failures = 0;
2827 atomicSet(server.stat_net_input_bytes, 0);
2828 atomicSet(server.stat_net_output_bytes, 0);
2829 atomicSet(server.stat_net_repl_input_bytes, 0);
2830 atomicSet(server.stat_net_repl_output_bytes, 0);
2831 server.stat_unexpected_error_replies = 0;
2832 server.stat_total_error_replies = 0;
2833 server.stat_dump_payload_sanitizations = 0;
2834 server.aof_delayed_fsync = 0;
2835 server.stat_reply_buffer_shrinks = 0;
2836 server.stat_reply_buffer_expands = 0;
2837 server.stat_cluster_incompatible_ops = 0;
2838 server.stat_total_prefetch_batches = 0;
2839 server.stat_total_prefetch_entries = 0;
2840 memset(server.duration_stats, 0, sizeof(durationStats) * EL_DURATION_TYPE_NUM);
2841 server.el_cmd_cnt_max = 0;
2842 lazyfreeResetStats();
2843}
2844
2845/* Make the thread killable at any time, so that kill threads functions
2846 * can work reliably (default cancelability type is PTHREAD_CANCEL_DEFERRED).
2847 * Needed for pthread_cancel used by the fast memory test used by the crash report. */
2848void makeThreadKillable(void) {
2849 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
2850 pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
2851}
2852
2853void initServer(void) {
2854 int j;
2855
2856 signal(SIGHUP, SIG_IGN);
2857 signal(SIGPIPE, SIG_IGN);
2858 setupSignalHandlers();
2859 ThreadsManager_init();
2860 makeThreadKillable();
2861
2862 if (server.syslog_enabled) {
2863 openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
2864 server.syslog_facility);
2865 }
2866
2867 /* Initialization after setting defaults from the config system. */
2868 server.aof_state = server.aof_enabled ? AOF_ON : AOF_OFF;
2869 server.fsynced_reploff = server.aof_enabled ? 0 : -1;
2870 server.hz = server.config_hz;
2871 server.pid = getpid();
2872 server.in_fork_child = CHILD_TYPE_NONE;
2873 server.rdb_pipe_read = -1;
2874 server.rdb_child_exit_pipe = -1;
2875 server.main_thread_id = pthread_self();
2876 server.current_client = NULL;
2877 server.errors = raxNew();
2878 server.errors_enabled = 1;
2879 server.execution_nesting = 0;
2880 server.clients = listCreate();
2881 server.clients_index = raxNew();
2882 server.clients_to_close = listCreate();
2883 server.slaves = listCreate();
2884 server.monitors = listCreate();
2885 server.clients_pending_write = listCreate();
2886 server.clients_pending_read = listCreate();
2887 server.clients_with_pending_ref_reply = listCreate();
2888 server.clients_timeout_table = raxNew();
2889 server.replication_allowed = 1;
2890 server.slaveseldb = -1; /* Force to emit the first SELECT command. */
2891 server.unblocked_clients = listCreate();
2892 server.ready_keys = listCreate();
2893 server.tracking_pending_keys = listCreate();
2894 server.pending_push_messages = listCreate();
2895 server.clients_waiting_acks = listCreate();
2896 server.get_ack_from_slaves = 0;
2897 server.paused_actions = 0;
2898 memset(server.client_pause_per_purpose, 0,
2899 sizeof(server.client_pause_per_purpose));
2900 server.postponed_clients = listCreate();
2901 server.events_processed_while_blocked = 0;
2902 server.system_memory_size = zmalloc_get_memory_size();
2903 server.blocked_last_cron = 0;
2904 server.blocking_op_nesting = 0;
2905 server.thp_enabled = 0;
2906 server.cluster_drop_packet_filter = -1;
2907 server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME;
2908 server.reply_buffer_resizing_enabled = 1;
2909 server.reply_copy_avoidance_enabled = 1;
2910 server.client_mem_usage_buckets = NULL;
2911 /* Enable per slot memory accounting only if cluster-slot-stats-enabled
2912 * includes 'mem' at startup. Memory tracking can be disabled at runtime
2913 * but cannot be re-enabled, to avoid situation where we would need to
2914 * catch up or iterate over all slots and kvobjs. */
2915 server.memory_tracking_per_slot = clusterSlotStatsEnabled(CLUSTER_SLOT_STATS_MEM);
2916 resetReplicationBuffer();
2917
2918 /* Make sure the locale is set on startup based on the config file. */
2919 if (setlocale(LC_COLLATE,server.locale_collate) == NULL) {
2920 serverLog(LL_WARNING, "Failed to configure LOCALE for invalid locale name.");
2921 exit(1);
2922 }
2923
2924 createSharedObjects();
2925 adjustOpenFilesLimit();
2926 const char *clk_msg = monotonicInit();
2927 serverLog(LL_NOTICE, "monotonic clock: %s", clk_msg);
2928 server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
2929 if (server.el == NULL) {
2930 serverLog(LL_WARNING,
2931 "Failed creating the event loop. Error message: '%s'",
2932 strerror(errno));
2933 exit(1);
2934 }
2935 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
2936
2937 /* Create the Redis databases, and initialize other internal state. */
2938 int slot_count_bits = 0;
2939 int flags = KVSTORE_ALLOCATE_DICTS_ON_DEMAND;
2940 if (server.cluster_enabled) {
2941 slot_count_bits = CLUSTER_SLOT_MASK_BITS;
2942 flags |= KVSTORE_FREE_EMPTY_DICTS;
2943 }
2944 for (j = 0; j < server.dbnum; j++) {
2945 server.db[j].keys = kvstoreCreate(&kvstoreExType, &dbDictType, slot_count_bits, flags);
2946 server.db[j].expires = kvstoreCreate(&kvstoreBaseType, &dbExpiresDictType, slot_count_bits, flags);
2947 server.db[j].subexpires = estoreCreate(&subexpiresBucketsType, slot_count_bits);
2948 server.db[j].expires_cursor = 0;
2949 server.db[j].blocking_keys = dictCreate(&keylistDictType);
2950 server.db[j].blocking_keys_unblock_on_nokey = dictCreate(&objectKeyPointerValueDictType);
2951 server.db[j].stream_claim_pending_keys = dictCreate(&objectKeyPointerValueDictType);
2952 server.db[j].stream_idmp_keys = dictCreate(&objectKeyPointerValueDictType);
2953 server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType);
2954 server.db[j].watched_keys = dictCreate(&keylistDictType);
2955 server.db[j].id = j;
2956 server.db[j].avg_ttl = 0;
2957 }
2958 evictionPoolAlloc(); /* Initialize the LRU keys pool. */
2959 /* Note that server.pubsub_channels was chosen to be a kvstore (with only one dict, which
2960 * seems odd) just to make the code cleaner by making it be the same type as server.pubsubshard_channels
2961 * (which has to be kvstore), see pubsubtype.serverPubSubChannels */
2962 server.pubsub_channels = kvstoreCreate(
2963 &kvstoreBaseType, &objToDictDictType,
2964 0, KVSTORE_ALLOCATE_DICTS_ON_DEMAND);
2965 server.pubsub_patterns = dictCreate(&objToDictDictType);
2966 server.pubsubshard_channels = kvstoreCreate(
2967 &kvstoreBaseType, &objToDictDictType,
2968 slot_count_bits, KVSTORE_ALLOCATE_DICTS_ON_DEMAND | KVSTORE_FREE_EMPTY_DICTS);
2969 server.pubsub_clients = 0;
2970 server.watching_clients = 0;
2971 server.cronloops = 0;
2972 server.in_exec = 0;
2973 server.busy_module_yield_flags = BUSY_MODULE_YIELD_NONE;
2974 server.busy_module_yield_reply = NULL;
2975 server.client_pause_in_transaction = 0;
2976 server.child_pid = -1;
2977 server.child_type = CHILD_TYPE_NONE;
2978 server.rdb_child_type = RDB_CHILD_TYPE_NONE;
2979 server.rdb_pipe_conns = NULL;
2980 server.rdb_pipe_numconns = 0;
2981 server.rdb_pipe_numconns_writing = 0;
2982 server.rdb_pipe_buff = NULL;
2983 server.rdb_pipe_bufflen = 0;
2984 server.rdb_bgsave_scheduled = 0;
2985 server.child_info_pipe[0] = -1;
2986 server.child_info_pipe[1] = -1;
2987 server.child_info_nread = 0;
2988 server.aof_buf = sdsempty();
2989 server.lastsave = time(NULL); /* At startup we consider the DB saved. */
2990 server.lastbgsave_try = 0; /* At startup we never tried to BGSAVE. */
2991 server.rdb_save_time_last = -1;
2992 server.rdb_save_time_start = -1;
2993 server.rdb_last_load_keys_expired = 0;
2994 server.rdb_last_load_keys_loaded = 0;
2995 server.dirty = 0;
2996 resetServerStats();
2997 /* A few stats we don't want to reset: server startup time, and peak mem. */
2998 server.stat_starttime = time(NULL);
2999 server.stat_peak_memory = 0;
3000 server.stat_peak_memory_time = server.unixtime;
3001 server.stat_current_cow_peak = 0;
3002 server.stat_current_cow_bytes = 0;
3003 server.stat_current_cow_updated = 0;
3004 server.stat_current_save_keys_processed = 0;
3005 server.stat_current_save_keys_total = 0;
3006 server.stat_rdb_cow_bytes = 0;
3007 server.stat_aof_cow_bytes = 0;
3008 server.stat_module_cow_bytes = 0;
3009 server.stat_module_progress = 0;
3010 for (int j = 0; j < CLIENT_TYPE_COUNT; j++)
3011 server.stat_clients_type_memory[j] = 0;
3012 server.stat_cluster_links_memory = 0;
3013 server.cron_malloc_stats.zmalloc_used = 0;
3014 server.cron_malloc_stats.process_rss = 0;
3015 server.cron_malloc_stats.allocator_allocated = 0;
3016 server.cron_malloc_stats.allocator_active = 0;
3017 server.cron_malloc_stats.allocator_resident = 0;
3018 server.repl_current_sync_attempts = 0;
3019 server.lastbgsave_status = C_OK;
3020 server.aof_last_write_status = C_OK;
3021 server.aof_last_write_errno = 0;
3022 server.repl_good_slaves_count = 0;
3023 server.last_sig_received = 0;
3024 memset(server.io_threads_clients_num, 0, sizeof(server.io_threads_clients_num));
3025 atomicSetWithSync(server.running, 0);
3026
3027 /* Initiate acl info struct */
3028 server.acl_info.invalid_cmd_accesses = 0;
3029 server.acl_info.invalid_key_accesses = 0;
3030 server.acl_info.user_auth_failures = 0;
3031 server.acl_info.invalid_channel_accesses = 0;
3032 server.acl_info.acl_access_denied_tls_cert = 0;
3033
3034 /* Initialize the shared pending command pool. */
3035 server.cmd_pool.size = 0;
3036 server.cmd_pool.capacity = PENDING_COMMAND_POOL_SIZE;
3037 server.cmd_pool.pool = zmalloc(sizeof(pendingCommand*) * PENDING_COMMAND_POOL_SIZE);
3038 server.cmd_pool.min_size = 0;
3039
3040 /* Create the timer callback, this is our way to process many background
3041 * operations incrementally, like clients timeout, eviction of unaccessed
3042 * expired keys and so forth. */
3043 if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
3044 serverPanic("Can't create event loop timers.");
3045 exit(1);
3046 }
3047
3048 /* Register a readable event for the pipe used to awake the event loop
3049 * from module threads. */
3050 if (aeCreateFileEvent(server.el, server.module_pipe[0], AE_READABLE,
3051 modulePipeReadable,NULL) == AE_ERR) {
3052 serverPanic(
3053 "Error registering the readable event for the module pipe.");
3054 }
3055
3056 /* Register before and after sleep handlers (note this needs to be done
3057 * before loading persistence since it is used by processEventsWhileBlocked. */
3058 aeSetBeforeSleepProc(server.el,beforeSleep);
3059 aeSetAfterSleepProc(server.el,afterSleep);
3060
3061 /* 32 bit instances are limited to 4GB of address space, so if there is
3062 * no explicit limit in the user provided configuration we set a limit
3063 * at 3 GB using maxmemory with 'noeviction' policy'. This avoids
3064 * useless crashes of the Redis instance for out of memory. */
3065 if (server.arch_bits == 32 && server.maxmemory == 0) {
3066 serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
3067 server.maxmemory = 3072LL*(1024*1024); /* 3 GB */
3068 server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
3069 }
3070
3071 luaEnvInit();
3072 scriptingInit(1);
3073 if (functionsInit() == C_ERR) {
3074 serverPanic("Functions initialization failed, check the server logs.");
3075 exit(1);
3076 }
3077 slowlogInit();
3078 latencyMonitorInit();
3079
3080 /* Initialize ACL default password if it exists */
3081 ACLUpdateDefaultUserPassword(server.requirepass);
3082
3083 applyWatchdogPeriod();
3084
3085 if (server.maxmemory_clients != 0)
3086 initServerClientMemUsageBuckets();
3087
3088 prefetchCommandsBatchInit();
3089}
3090
3091void initListeners(void) {
3092 /* Setup listeners from server config for TCP/TLS/Unix */
3093 int conn_index;
3094 connListener *listener;
3095 if (server.port != 0) {
3096 conn_index = connectionIndexByType(CONN_TYPE_SOCKET);
3097 if (conn_index < 0)
3098 serverPanic("Failed finding connection listener of %s", CONN_TYPE_SOCKET);
3099 listener = &server.listeners[conn_index];
3100 listener->bindaddr = server.bindaddr;
3101 listener->bindaddr_count = server.bindaddr_count;
3102 listener->port = server.port;
3103 listener->ct = connectionByType(CONN_TYPE_SOCKET);
3104 }
3105
3106 if (server.tls_port || server.tls_replication || server.tls_cluster) {
3107 ConnectionType *ct_tls = connectionTypeTls();
3108 if (!ct_tls) {
3109 serverLog(LL_WARNING, "Failed finding TLS support.");
3110 exit(1);
3111 }
3112 if (connTypeConfigure(ct_tls, &server.tls_ctx_config, 1) == C_ERR) {
3113 serverLog(LL_WARNING, "Failed to configure TLS. Check logs for more info.");
3114 exit(1);
3115 }
3116 }
3117
3118 if (server.tls_port != 0) {
3119 conn_index = connectionIndexByType(CONN_TYPE_TLS);
3120 if (conn_index < 0)
3121 serverPanic("Failed finding connection listener of %s", CONN_TYPE_TLS);
3122 listener = &server.listeners[conn_index];
3123 listener->bindaddr = server.bindaddr;
3124 listener->bindaddr_count = server.bindaddr_count;
3125 listener->port = server.tls_port;
3126 listener->ct = connectionByType(CONN_TYPE_TLS);
3127 }
3128 if (server.unixsocket != NULL) {
3129 conn_index = connectionIndexByType(CONN_TYPE_UNIX);
3130 if (conn_index < 0)
3131 serverPanic("Failed finding connection listener of %s", CONN_TYPE_UNIX);
3132 listener = &server.listeners[conn_index];
3133 listener->bindaddr = &server.unixsocket;
3134 listener->bindaddr_count = 1;
3135 listener->ct = connectionByType(CONN_TYPE_UNIX);
3136 listener->priv = &server.unixsocketperm; /* Unix socket specified */
3137 }
3138
3139 /* create all the configured listener, and add handler to start to accept */
3140 int listen_fds = 0;
3141 for (int j = 0; j < CONN_TYPE_MAX; j++) {
3142 listener = &server.listeners[j];
3143 if (listener->ct == NULL)
3144 continue;
3145
3146 if (connListen(listener) == C_ERR) {
3147 serverLog(LL_WARNING, "Failed listening on port %u (%s), aborting.", listener->port, listener->ct->get_type(NULL));
3148 exit(1);
3149 }
3150
3151 if (createSocketAcceptHandler(listener, connAcceptHandler(listener->ct)) != C_OK)
3152 serverPanic("Unrecoverable error creating %s listener accept handler.", listener->ct->get_type(NULL));
3153
3154 listen_fds += listener->count;
3155 }
3156
3157 if (listen_fds == 0) {
3158 serverLog(LL_WARNING, "Configured to not listen anywhere, exiting.");
3159 exit(1);
3160 }
3161}
3162
3163/* Some steps in server initialization need to be done last (after modules
3164 * are loaded).
3165 * Specifically, creation of threads due to a race bug in ld.so, in which
3166 * Thread Local Storage initialization collides with dlopen call.
3167 * see: https://sourceware.org/bugzilla/show_bug.cgi?id=19329 */
3168void InitServerLast(void) {
3169 bioInit();
3170 initThreadedIO();
3171 set_jemalloc_bg_thread(server.jemalloc_bg_thread);
3172 server.initial_memory_usage = zmalloc_used_memory();
3173}
3174
3175/* The purpose of this function is to try to "glue" consecutive range
3176 * key specs in order to build the legacy (first,last,step) spec
3177 * used by the COMMAND command.
3178 * By far the most common case is just one range spec (e.g. SET)
3179 * but some commands' ranges were split into two or more ranges
3180 * in order to have different flags for different keys (e.g. SMOVE,
3181 * first key is "RW ACCESS DELETE", second key is "RW INSERT").
3182 *
3183 * Additionally set the CMD_MOVABLE_KEYS flag for commands that may have key
3184 * names in their arguments, but the legacy range spec doesn't cover all of them.
3185 *
3186 * This function uses very basic heuristics and is "best effort":
3187 * 1. Only commands which have only "range" specs are considered.
3188 * 2. Only range specs with keystep of 1 are considered.
3189 * 3. The order of the range specs must be ascending (i.e.
3190 * lastkey of spec[i] == firstkey-1 of spec[i+1]).
3191 *
3192 * This function will succeed on all native Redis commands and may
3193 * fail on module commands, even if it only has "range" specs that
3194 * could actually be "glued", in the following cases:
3195 * 1. The order of "range" specs is not ascending (e.g. the spec for
3196 * the key at index 2 was added before the spec of the key at
3197 * index 1).
3198 * 2. The "range" specs have keystep >1.
3199 *
3200 * If this functions fails it means that the legacy (first,last,step)
3201 * spec used by COMMAND will show 0,0,0. This is not a dire situation
3202 * because anyway the legacy (first,last,step) spec is to be deprecated
3203 * and one should use the new key specs scheme.
3204 */
3205void populateCommandLegacyRangeSpec(struct redisCommand *c) {
3206 memset(&c->legacy_range_key_spec, 0, sizeof(c->legacy_range_key_spec));
3207
3208 /* Set the movablekeys flag if we have a GETKEYS flag for modules.
3209 * Note that for native redis commands, we always have keyspecs,
3210 * with enough information to rely on for movablekeys. */
3211 if (c->flags & CMD_MODULE_GETKEYS)
3212 c->flags |= CMD_MOVABLE_KEYS;
3213
3214 /* no key-specs, no keys, exit. */
3215 if (c->key_specs_num == 0) {
3216 return;
3217 }
3218
3219 if (c->key_specs_num == 1 &&
3220 c->key_specs[0].begin_search_type == KSPEC_BS_INDEX &&
3221 c->key_specs[0].find_keys_type == KSPEC_FK_RANGE)
3222 {
3223 /* Quick win, exactly one range spec. */
3224 c->legacy_range_key_spec = c->key_specs[0];
3225 /* If it has the incomplete flag, set the movablekeys flag on the command. */
3226 if (c->key_specs[0].flags & CMD_KEY_INCOMPLETE)
3227 c->flags |= CMD_MOVABLE_KEYS;
3228 return;
3229 }
3230
3231 int firstkey = INT_MAX, lastkey = 0;
3232 int prev_lastkey = 0;
3233 for (int i = 0; i < c->key_specs_num; i++) {
3234 if (c->key_specs[i].begin_search_type != KSPEC_BS_INDEX ||
3235 c->key_specs[i].find_keys_type != KSPEC_FK_RANGE)
3236 {
3237 /* Found an incompatible (non range) spec, skip it, and set the movablekeys flag. */
3238 c->flags |= CMD_MOVABLE_KEYS;
3239 continue;
3240 }
3241 if (c->key_specs[i].fk.range.keystep != 1 ||
3242 (prev_lastkey && prev_lastkey != c->key_specs[i].bs.index.pos-1))
3243 {
3244 /* Found a range spec that's not plain (step of 1) or not consecutive to the previous one.
3245 * Skip it, and we set the movablekeys flag. */
3246 c->flags |= CMD_MOVABLE_KEYS;
3247 continue;
3248 }
3249 if (c->key_specs[i].flags & CMD_KEY_INCOMPLETE) {
3250 /* The spec we're using is incomplete, we can use it, but we also have to set the movablekeys flag. */
3251 c->flags |= CMD_MOVABLE_KEYS;
3252 }
3253 firstkey = min(firstkey, c->key_specs[i].bs.index.pos);
3254 /* Get the absolute index for lastkey (in the "range" spec, lastkey is relative to firstkey) */
3255 int lastkey_abs_index = c->key_specs[i].fk.range.lastkey;
3256 if (lastkey_abs_index >= 0)
3257 lastkey_abs_index += c->key_specs[i].bs.index.pos;
3258 /* For lastkey we use unsigned comparison to handle negative values correctly */
3259 lastkey = max((unsigned)lastkey, (unsigned)lastkey_abs_index);
3260 prev_lastkey = lastkey;
3261 }
3262
3263 if (firstkey == INT_MAX) {
3264 /* Couldn't find range specs, the legacy range spec will remain empty, and we set the movablekeys flag. */
3265 c->flags |= CMD_MOVABLE_KEYS;
3266 return;
3267 }
3268
3269 serverAssert(firstkey != 0);
3270 serverAssert(lastkey != 0);
3271
3272 c->legacy_range_key_spec.begin_search_type = KSPEC_BS_INDEX;
3273 c->legacy_range_key_spec.bs.index.pos = firstkey;
3274 c->legacy_range_key_spec.find_keys_type = KSPEC_FK_RANGE;
3275 c->legacy_range_key_spec.fk.range.lastkey = lastkey < 0 ? lastkey : (lastkey-firstkey); /* in the "range" spec, lastkey is relative to firstkey */
3276 c->legacy_range_key_spec.fk.range.keystep = 1;
3277 c->legacy_range_key_spec.fk.range.limit = 0;
3278}
3279
3280sds catSubCommandFullname(const char *parent_name, const char *sub_name) {
3281 return sdscatfmt(sdsempty(), "%s|%s", parent_name, sub_name);
3282}
3283
3284void commandAddSubcommand(struct redisCommand *parent, struct redisCommand *subcommand, const char *declared_name) {
3285 if (!parent->subcommands_dict)
3286 parent->subcommands_dict = dictCreate(&commandTableDictType);
3287
3288 subcommand->parent = parent; /* Assign the parent command */
3289 subcommand->id = ACLGetCommandID(subcommand->fullname); /* Assign the ID used for ACL. */
3290
3291 serverAssert(dictAdd(parent->subcommands_dict, sdsnew(declared_name), subcommand) == DICT_OK);
3292}
3293
3294/* Set implicit ACl categories (see comment above the definition of
3295 * struct redisCommand). */
3296void setImplicitACLCategories(struct redisCommand *c) {
3297 if (c->flags & CMD_WRITE)
3298 c->acl_categories |= ACL_CATEGORY_WRITE;
3299 /* Exclude scripting commands from the RO category. */
3300 if (c->flags & CMD_READONLY && !(c->acl_categories & ACL_CATEGORY_SCRIPTING))
3301 c->acl_categories |= ACL_CATEGORY_READ;
3302 if (c->flags & CMD_ADMIN)
3303 c->acl_categories |= ACL_CATEGORY_ADMIN|ACL_CATEGORY_DANGEROUS;
3304 if (c->flags & CMD_PUBSUB)
3305 c->acl_categories |= ACL_CATEGORY_PUBSUB;
3306 if (c->flags & CMD_FAST)
3307 c->acl_categories |= ACL_CATEGORY_FAST;
3308 if (c->flags & CMD_BLOCKING)
3309 c->acl_categories |= ACL_CATEGORY_BLOCKING;
3310
3311 /* If it's not @fast is @slow in this binary world. */
3312 if (!(c->acl_categories & ACL_CATEGORY_FAST))
3313 c->acl_categories |= ACL_CATEGORY_SLOW;
3314}
3315
3316/* Recursively populate the command structure.
3317 *
3318 * On success, the function return C_OK. Otherwise C_ERR is returned and we won't
3319 * add this command in the commands dict. */
3320int populateCommandStructure(struct redisCommand *c) {
3321 /* If the command marks with CMD_SENTINEL, it exists in sentinel. */
3322 if (!(c->flags & CMD_SENTINEL) && server.sentinel_mode)
3323 return C_ERR;
3324
3325 /* If the command marks with CMD_ONLY_SENTINEL, it only exists in sentinel. */
3326 if (c->flags & CMD_ONLY_SENTINEL && !server.sentinel_mode)
3327 return C_ERR;
3328
3329 /* Translate the command string flags description into an actual
3330 * set of flags. */
3331 setImplicitACLCategories(c);
3332
3333 /* We start with an unallocated histogram and only allocate memory when a command
3334 * has been issued for the first time */
3335 c->latency_histogram = NULL;
3336
3337 /* Handle the legacy range spec and the "movablekeys" flag (must be done after populating all key specs). */
3338 populateCommandLegacyRangeSpec(c);
3339
3340 /* Assign the ID used for ACL. */
3341 c->id = ACLGetCommandID(c->fullname);
3342
3343 /* Handle subcommands */
3344 if (c->subcommands) {
3345 for (int j = 0; c->subcommands[j].declared_name; j++) {
3346 struct redisCommand *sub = c->subcommands+j;
3347
3348 sub->fullname = catSubCommandFullname(c->declared_name, sub->declared_name);
3349 if (populateCommandStructure(sub) == C_ERR)
3350 continue;
3351
3352 commandAddSubcommand(c, sub, sub->declared_name);
3353 }
3354 }
3355
3356 return C_OK;
3357}
3358
3359extern struct redisCommand redisCommandTable[];
3360
3361/* Populates the Redis Command Table dict from the static table in commands.c
3362 * which is auto generated from the json files in the commands folder. */
3363void populateCommandTable(void) {
3364 int j;
3365 struct redisCommand *c;
3366
3367 for (j = 0;; j++) {
3368 c = redisCommandTable + j;
3369 if (c->declared_name == NULL)
3370 break;
3371
3372 int retval1, retval2;
3373
3374 c->fullname = sdsnew(c->declared_name);
3375 if (populateCommandStructure(c) == C_ERR)
3376 continue;
3377
3378 retval1 = dictAdd(server.commands, sdsdup(c->fullname), c);
3379 /* Populate an additional dictionary that will be unaffected
3380 * by rename-command statements in redis.conf. */
3381 retval2 = dictAdd(server.orig_commands, sdsdup(c->fullname), c);
3382 serverAssert(retval1 == DICT_OK && retval2 == DICT_OK);
3383 }
3384}
3385
3386void resetCommandTableStats(dict* commands) {
3387 struct redisCommand *c;
3388 dictEntry *de;
3389 dictIterator di;
3390
3391 dictInitSafeIterator(&di, commands);
3392 while((de = dictNext(&di)) != NULL) {
3393 c = (struct redisCommand *) dictGetVal(de);
3394 c->microseconds = 0;
3395 c->calls = 0;
3396 c->rejected_calls = 0;
3397 c->failed_calls = 0;
3398 if(c->latency_histogram) {
3399 hdr_close(c->latency_histogram);
3400 c->latency_histogram = NULL;
3401 }
3402 if (c->subcommands_dict)
3403 resetCommandTableStats(c->subcommands_dict);
3404 }
3405 dictResetIterator(&di);
3406}
3407
3408void resetErrorTableStats(void) {
3409 freeErrorsRadixTreeAsync(server.errors);
3410 server.errors = raxNew();
3411 server.errors_enabled = 1;
3412}
3413
3414/* ========================== Redis OP Array API ============================ */
3415
3416int redisOpArrayAppend(redisOpArray *oa, int dbid, robj **argv, int argc, int target) {
3417 redisOp *op;
3418 int prev_capacity = oa->capacity;
3419
3420 if (oa->numops == 0) {
3421 oa->capacity = 16;
3422 } else if (oa->numops >= oa->capacity) {
3423 oa->capacity *= 2;
3424 }
3425
3426 if (prev_capacity != oa->capacity)
3427 oa->ops = zrealloc(oa->ops,sizeof(redisOp)*oa->capacity);
3428 op = oa->ops+oa->numops;
3429 op->dbid = dbid;
3430 op->argv = argv;
3431 op->argc = argc;
3432 op->target = target;
3433 oa->numops++;
3434 return oa->numops;
3435}
3436
3437void redisOpArrayFree(redisOpArray *oa) {
3438 while(oa->numops) {
3439 int j;
3440 redisOp *op;
3441
3442 oa->numops--;
3443 op = oa->ops+oa->numops;
3444 for (j = 0; j < op->argc; j++)
3445 decrRefCount(op->argv[j]);
3446 zfree(op->argv);
3447 }
3448 /* no need to free the actual op array, we reuse the memory for future commands */
3449 serverAssert(!oa->numops);
3450}
3451
3452/* ====================== Commands lookup and execution ===================== */
3453
3454int isContainerCommandBySds(sds s) {
3455 struct redisCommand *base_cmd = dictFetchValue(server.commands, s);
3456 int has_subcommands = base_cmd && base_cmd->subcommands_dict;
3457 return has_subcommands;
3458}
3459
3460struct redisCommand *lookupSubcommand(struct redisCommand *container, sds sub_name) {
3461 return dictFetchValue(container->subcommands_dict, sub_name);
3462}
3463
3464/* Look up a command by argv and argc
3465 *
3466 * If `strict` is not 0 we expect argc to be exact (i.e. argc==2
3467 * for a subcommand and argc==1 for a top-level command)
3468 * `strict` should be used every time we want to look up a command
3469 * name (e.g. in COMMAND INFO) rather than to find the command
3470 * a user requested to execute (in processCommand).
3471 */
3472struct redisCommand *lookupCommandLogic(dict *commands, robj **argv, int argc, int strict) {
3473 struct redisCommand *base_cmd = dictFetchValue(commands, argv[0]->ptr);
3474 int has_subcommands = base_cmd && base_cmd->subcommands_dict;
3475 if (argc == 1 || !has_subcommands) {
3476 if (strict && argc != 1)
3477 return NULL;
3478 /* Note: It is possible that base_cmd->proc==NULL (e.g. CONFIG) */
3479 return base_cmd;
3480 } else { /* argc > 1 && has_subcommands */
3481 if (strict && argc != 2)
3482 return NULL;
3483 /* Note: Currently we support just one level of subcommands */
3484 return lookupSubcommand(base_cmd, argv[1]->ptr);
3485 }
3486}
3487
3488struct redisCommand *lookupCommand(robj **argv, int argc) {
3489 return lookupCommandLogic(server.commands,argv,argc,0);
3490}
3491
3492struct redisCommand *lookupCommandBySdsLogic(dict *commands, sds s) {
3493 int argc, j;
3494 sds *strings = sdssplitlen(s,sdslen(s),"|",1,&argc);
3495 if (strings == NULL)
3496 return NULL;
3497 if (argc < 1 || argc > 2) {
3498 /* Currently we support just one level of subcommands */
3499 sdsfreesplitres(strings,argc);
3500 return NULL;
3501 }
3502
3503 serverAssert(argc > 0); /* Avoid warning `-Wmaybe-uninitialized` in lookupCommandLogic() */
3504 robj objects[argc];
3505 robj *argv[argc];
3506 for (j = 0; j < argc; j++) {
3507 initStaticStringObject(objects[j],strings[j]);
3508 argv[j] = &objects[j];
3509 }
3510
3511 struct redisCommand *cmd = lookupCommandLogic(commands,argv,argc,1);
3512 sdsfreesplitres(strings,argc);
3513 return cmd;
3514}
3515
3516struct redisCommand *lookupCommandBySds(sds s) {
3517 return lookupCommandBySdsLogic(server.commands,s);
3518}
3519
3520struct redisCommand *lookupCommandByCStringLogic(dict *commands, const char *s) {
3521 struct redisCommand *cmd;
3522 sds name = sdsnew(s);
3523
3524 cmd = lookupCommandBySdsLogic(commands,name);
3525 sdsfree(name);
3526 return cmd;
3527}
3528
3529struct redisCommand *lookupCommandByCString(const char *s) {
3530 return lookupCommandByCStringLogic(server.commands,s);
3531}
3532
3533/* Lookup the command in the current table, if not found also check in
3534 * the original table containing the original command names unaffected by
3535 * redis.conf rename-command statement.
3536 *
3537 * This is used by functions rewriting the argument vector such as
3538 * rewriteClientCommandVector() in order to set client->cmd pointer
3539 * correctly even if the command was renamed. */
3540struct redisCommand *lookupCommandOrOriginal(robj **argv ,int argc) {
3541 struct redisCommand *cmd = lookupCommandLogic(server.commands, argv, argc, 0);
3542
3543 if (!cmd) cmd = lookupCommandLogic(server.orig_commands, argv, argc, 0);
3544 return cmd;
3545}
3546
3547/* Commands arriving from the master client or AOF client, should never be rejected. */
3548int mustObeyClient(client *c) {
3549 return c->id == CLIENT_ID_AOF || c->flags & CLIENT_MASTER;
3550}
3551
3552static int shouldPropagate(int target) {
3553 if (!server.replication_allowed || target == PROPAGATE_NONE || server.loading)
3554 return 0;
3555
3556 if (target & PROPAGATE_AOF) {
3557 if (server.aof_state != AOF_OFF)
3558 return 1;
3559 }
3560 if (target & PROPAGATE_REPL) {
3561 if (server.masterhost == NULL && (server.repl_backlog || listLength(server.slaves) != 0 || asmMigrateInProgress()))
3562 return 1;
3563 }
3564
3565 return 0;
3566}
3567
3568/* Propagate the specified command (in the context of the specified database id)
3569 * to AOF and Slaves.
3570 *
3571 * flags are an xor between:
3572 * + PROPAGATE_NONE (no propagation of command at all)
3573 * + PROPAGATE_AOF (propagate into the AOF file if is enabled)
3574 * + PROPAGATE_REPL (propagate into the replication link)
3575 *
3576 * This is an internal low-level function and should not be called!
3577 *
3578 * The API for propagating commands is alsoPropagate().
3579 *
3580 * dbid value of -1 is saved to indicate that the called do not want
3581 * to replicate SELECT for this command (used for database neutral commands).
3582 */
3583static void propagateNow(int dbid, robj **argv, int argc, int target) {
3584 if (!shouldPropagate(target))
3585 return;
3586
3587 /* This needs to be unreachable since the dataset should be fixed during
3588 * replica pause (otherwise data may be lost during a failover) */
3589 serverAssert(!(isPausedActions(PAUSE_ACTION_REPLICA) &&
3590 (!server.client_pause_in_transaction)));
3591
3592 if (server.aof_state != AOF_OFF && target & PROPAGATE_AOF)
3593 feedAppendOnlyFile(dbid,argv,argc);
3594 if (target & PROPAGATE_REPL) {
3595 replicationFeedSlaves(server.slaves,dbid,argv,argc);
3596 asmFeedMigrationClient(argv, argc);
3597 }
3598}
3599
3600/* Used inside commands to schedule the propagation of additional commands
3601 * after the current command is propagated to AOF / Replication.
3602 *
3603 * dbid is the database ID the command should be propagated into.
3604 * Arguments of the command to propagate are passed as an array of redis
3605 * objects pointers of len 'argc', using the 'argv' vector.
3606 *
3607 * The function does not take a reference to the passed 'argv' vector,
3608 * so it is up to the caller to release the passed argv (but it is usually
3609 * stack allocated). The function automatically increments ref count of
3610 * passed objects, so the caller does not need to. */
3611void alsoPropagate(int dbid, robj **argv, int argc, int target) {
3612 robj **argvcopy;
3613 int j;
3614
3615 if (!shouldPropagate(target))
3616 return;
3617
3618 argvcopy = zmalloc(sizeof(robj*)*argc);
3619 for (j = 0; j < argc; j++) {
3620 argvcopy[j] = argv[j];
3621 incrRefCount(argv[j]);
3622 }
3623 redisOpArrayAppend(&server.also_propagate,dbid,argvcopy,argc,target);
3624}
3625
3626/* It is possible to call the function forceCommandPropagation() inside a
3627 * Redis command implementation in order to to force the propagation of a
3628 * specific command execution into AOF / Replication. */
3629void forceCommandPropagation(client *c, int flags) {
3630 serverAssert(c->cmd->flags & (CMD_WRITE | CMD_MAY_REPLICATE));
3631 if (flags & PROPAGATE_REPL) c->flags |= CLIENT_FORCE_REPL;
3632 if (flags & PROPAGATE_AOF) c->flags |= CLIENT_FORCE_AOF;
3633}
3634
3635/* Avoid that the executed command is propagated at all. This way we
3636 * are free to just propagate what we want using the alsoPropagate()
3637 * API. */
3638void preventCommandPropagation(client *c) {
3639 c->flags |= CLIENT_PREVENT_PROP;
3640}
3641
3642/* AOF specific version of preventCommandPropagation(). */
3643void preventCommandAOF(client *c) {
3644 c->flags |= CLIENT_PREVENT_AOF_PROP;
3645}
3646
3647/* Replication specific version of preventCommandPropagation(). */
3648void preventCommandReplication(client *c) {
3649 c->flags |= CLIENT_PREVENT_REPL_PROP;
3650}
3651
3652/* Log the last command a client executed into the slowlog. */
3653void slowlogPushCurrentCommand(client *c, struct redisCommand *cmd, ustime_t duration) {
3654 /* Some commands may contain sensitive data that should not be available in the slowlog. */
3655 if (cmd->flags & CMD_SKIP_SLOWLOG)
3656 return;
3657
3658 /* If command argument vector was rewritten, use the original
3659 * arguments. */
3660 robj **argv = c->original_argv ? c->original_argv : c->argv;
3661 int argc = c->original_argv ? c->original_argc : c->argc;
3662 slowlogPushEntryIfNeeded(c,argv,argc,duration);
3663}
3664
3665/* This function is called in order to update the total command histogram duration.
3666 * The latency unit is nano-seconds.
3667 * If needed it will allocate the histogram memory and trim the duration to the upper/lower tracking limits*/
3668void updateCommandLatencyHistogram(struct hdr_histogram **latency_histogram, int64_t duration_hist){
3669 if (duration_hist < LATENCY_HISTOGRAM_MIN_VALUE)
3670 duration_hist=LATENCY_HISTOGRAM_MIN_VALUE;
3671 if (duration_hist>LATENCY_HISTOGRAM_MAX_VALUE)
3672 duration_hist=LATENCY_HISTOGRAM_MAX_VALUE;
3673 if (*latency_histogram==NULL)
3674 hdr_init(LATENCY_HISTOGRAM_MIN_VALUE,LATENCY_HISTOGRAM_MAX_VALUE,LATENCY_HISTOGRAM_PRECISION,latency_histogram);
3675 hdr_record_value(*latency_histogram,duration_hist);
3676}
3677
3678/* Handle the alsoPropagate() API to handle commands that want to propagate
3679 * multiple separated commands. Note that alsoPropagate() is not affected
3680 * by CLIENT_PREVENT_PROP flag. */
3681static void propagatePendingCommands(void) {
3682 if (server.also_propagate.numops == 0)
3683 return;
3684
3685 int j;
3686 redisOp *rop;
3687
3688 /* If we got here it means we have finished an execution-unit.
3689 * If that unit has caused propagation of multiple commands, they
3690 * should be propagated as a transaction */
3691 int transaction = server.also_propagate.numops > 1;
3692
3693 /* In case a command that may modify random keys was run *directly*
3694 * (i.e. not from within a script, MULTI/EXEC, RM_Call, etc.) we want
3695 * to avoid using a transaction (much like active-expire) */
3696 if (server.current_client &&
3697 server.current_client->cmd &&
3698 server.current_client->cmd->flags & CMD_TOUCHES_ARBITRARY_KEYS)
3699 {
3700 transaction = 0;
3701 }
3702
3703 if (transaction) {
3704 /* We use dbid=-1 to indicate we do not want to replicate SELECT.
3705 * It'll be inserted together with the next command (inside the MULTI) */
3706 propagateNow(-1,&shared.multi,1,PROPAGATE_AOF|PROPAGATE_REPL);
3707 }
3708
3709 for (j = 0; j < server.also_propagate.numops; j++) {
3710 rop = &server.also_propagate.ops[j];
3711 serverAssert(rop->target);
3712 propagateNow(rop->dbid,rop->argv,rop->argc,rop->target);
3713 }
3714
3715 if (transaction) {
3716 /* We use dbid=-1 to indicate we do not want to replicate select */
3717 propagateNow(-1,&shared.exec,1,PROPAGATE_AOF|PROPAGATE_REPL);
3718 }
3719
3720 redisOpArrayFree(&server.also_propagate);
3721}
3722
3723/* Performs operations that should be performed after an execution unit ends.
3724 * Execution unit is a code that should be done atomically.
3725 * Execution units can be nested and are not necessarily starts with Redis command.
3726 *
3727 * For example the following is a logical unit:
3728 * active expire ->
3729 * trigger del notification of some module ->
3730 * accessing a key ->
3731 * trigger key miss notification of some other module
3732 *
3733 * What we want to achieve is that the entire execution unit will be done atomically,
3734 * currently with respect to replication and post jobs, but in the future there might
3735 * be other considerations. So we basically want the `postUnitOperations` to trigger
3736 * after the entire chain finished. */
3737void postExecutionUnitOperations(void) {
3738 if (server.execution_nesting)
3739 return;
3740
3741 firePostExecutionUnitJobs();
3742
3743 /* If we are at the top-most call() and not inside a an active module
3744 * context (e.g. within a module timer) we can propagate what we accumulated. */
3745 propagatePendingCommands();
3746
3747 /* Module subsystem post-execution-unit logic */
3748 modulePostExecutionUnitOperations();
3749}
3750
3751/* Increment the command failure counters (either rejected_calls or failed_calls).
3752 * The decision which counter to increment is done using the flags argument, options are:
3753 * * ERROR_COMMAND_REJECTED - update rejected_calls
3754 * * ERROR_COMMAND_FAILED - update failed_calls
3755 *
3756 * The function also reset the prev_err_count to make sure we will not count the same error
3757 * twice, its possible to pass a NULL cmd value to indicate that the error was counted elsewhere.
3758 *
3759 * The function returns true if stats was updated and false if not. */
3760int incrCommandStatsOnError(struct redisCommand *cmd, int flags) {
3761 /* hold the prev error count captured on the last command execution */
3762 static long long prev_err_count = 0;
3763 int res = 0;
3764 if (cmd) {
3765 if ((server.stat_total_error_replies - prev_err_count) > 0) {
3766 if (flags & ERROR_COMMAND_REJECTED) {
3767 cmd->rejected_calls++;
3768 res = 1;
3769 } else if (flags & ERROR_COMMAND_FAILED) {
3770 cmd->failed_calls++;
3771 res = 1;
3772 }
3773 }
3774 }
3775 prev_err_count = server.stat_total_error_replies;
3776 return res;
3777}
3778
3779/* Returns true if the command is not internal, or the connection is internal. */
3780static bool commandVisibleForClient(client *c, struct redisCommand *cmd) {
3781 return (!(cmd->flags & CMD_INTERNAL)) || (c->flags & CLIENT_INTERNAL);
3782}
3783
3784/* Call() is the core of Redis execution of a command.
3785 *
3786 * The following flags can be passed:
3787 * CMD_CALL_NONE No flags.
3788 * CMD_CALL_PROPAGATE_AOF Append command to AOF if it modified the dataset
3789 * or if the client flags are forcing propagation.
3790 * CMD_CALL_PROPAGATE_REPL Send command to slaves if it modified the dataset
3791 * or if the client flags are forcing propagation.
3792 * CMD_CALL_PROPAGATE Alias for PROPAGATE_AOF|PROPAGATE_REPL.
3793 * CMD_CALL_FULL Alias for SLOWLOG|STATS|PROPAGATE.
3794 *
3795 * The exact propagation behavior depends on the client flags.
3796 * Specifically:
3797 *
3798 * 1. If the client flags CLIENT_FORCE_AOF or CLIENT_FORCE_REPL are set
3799 * and assuming the corresponding CMD_CALL_PROPAGATE_AOF/REPL is set
3800 * in the call flags, then the command is propagated even if the
3801 * dataset was not affected by the command.
3802 * 2. If the client flags CLIENT_PREVENT_REPL_PROP or CLIENT_PREVENT_AOF_PROP
3803 * are set, the propagation into AOF or to slaves is not performed even
3804 * if the command modified the dataset.
3805 *
3806 * Note that regardless of the client flags, if CMD_CALL_PROPAGATE_AOF
3807 * or CMD_CALL_PROPAGATE_REPL are not set, then respectively AOF or
3808 * slaves propagation will never occur.
3809 *
3810 * Client flags are modified by the implementation of a given command
3811 * using the following API:
3812 *
3813 * forceCommandPropagation(client *c, int flags);
3814 * preventCommandPropagation(client *c);
3815 * preventCommandAOF(client *c);
3816 * preventCommandReplication(client *c);
3817 *
3818 */
3819void call(client *c, int flags) {
3820 long long dirty;
3821 uint64_t client_old_flags = c->flags;
3822 struct redisCommand *real_cmd = c->realcmd;
3823 client *prev_client = server.executing_client;
3824 server.executing_client = c;
3825
3826 /* When call() is issued during loading the AOF we don't want commands called
3827 * from module, exec or LUA to go into the slowlog or to populate statistics. */
3828 int update_command_stats = !isAOFLoadingContext();
3829
3830 /* We want to be aware of a client which is making a first time attempt to execute this command
3831 * and a client which is reprocessing command again (after being unblocked).
3832 * Blocked clients can be blocked in different places and not always it means the call() function has been
3833 * called. For example this is required for avoiding double logging to monitors.*/
3834 int reprocessing_command = (c->flags & CLIENT_REEXECUTING_COMMAND) ? 1 : 0;
3835
3836 /* Initialization: clear the flags that must be set by the command on
3837 * demand, and initialize the array for additional commands propagation. */
3838 c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
3839
3840 /* Redis core is in charge of propagation when the first entry point
3841 * of call() is processCommand().
3842 * The only other option to get to call() without having processCommand
3843 * as an entry point is if a module triggers RM_Call outside of call()
3844 * context (for example, in a timer).
3845 * In that case, the module is in charge of propagation. */
3846
3847 /* Call the command. */
3848 dirty = server.dirty;
3849 long long old_master_repl_offset = server.master_repl_offset;
3850 incrCommandStatsOnError(NULL, 0);
3851
3852 const long long call_timer = ustime();
3853 enterExecutionUnit(1, call_timer);
3854
3855 /* setting the CLIENT_EXECUTING_COMMAND flag so we will avoid
3856 * sending client side caching message in the middle of a command reply.
3857 * In case of blocking commands, the flag will be un-set only after successfully
3858 * re-processing and unblock the client.*/
3859 c->flags |= CLIENT_EXECUTING_COMMAND;
3860
3861 monotime monotonic_start = 0;
3862 if (monotonicGetType() == MONOTONIC_CLOCK_HW)
3863 monotonic_start = getMonotonicUs();
3864
3865 c->cmd->proc(c);
3866
3867 exitExecutionUnit();
3868
3869 /* In case client is blocked after trying to execute the command,
3870 * it means the execution is not yet completed and we MIGHT reprocess the command in the future. */
3871 if (!(c->flags & CLIENT_BLOCKED)) c->flags &= ~(CLIENT_EXECUTING_COMMAND);
3872
3873 /* In order to avoid performance implication due to querying the clock using a system call 3 times,
3874 * we use a monotonic clock, when we are sure its cost is very low, and fall back to non-monotonic call otherwise. */
3875 ustime_t duration;
3876 if (monotonicGetType() == MONOTONIC_CLOCK_HW)
3877 duration = getMonotonicUs() - monotonic_start;
3878 else
3879 duration = ustime() - call_timer;
3880
3881 c->duration += duration;
3882 dirty = server.dirty-dirty;
3883 if (dirty < 0) dirty = 0;
3884
3885 /* Update failed command calls if required. */
3886
3887 if (!incrCommandStatsOnError(real_cmd, ERROR_COMMAND_FAILED) && c->deferred_reply_errors) {
3888 /* When call is used from a module client, error stats, and total_error_replies
3889 * isn't updated since these errors, if handled by the module, are internal,
3890 * and not reflected to users. however, the commandstats does show these calls
3891 * (made by RM_Call), so it should log if they failed or succeeded. */
3892 real_cmd->failed_calls++;
3893 }
3894
3895 /* After executing command, we will close the client after writing entire
3896 * reply if it is set 'CLIENT_CLOSE_AFTER_COMMAND' flag. */
3897 if (c->flags & CLIENT_CLOSE_AFTER_COMMAND) {
3898 c->flags &= ~CLIENT_CLOSE_AFTER_COMMAND;
3899 c->flags |= CLIENT_CLOSE_AFTER_REPLY;
3900 }
3901
3902 /* Note: the code below uses the real command that was executed
3903 * c->cmd and c->lastcmd may be different, in case of MULTI-EXEC or
3904 * re-written commands such as EXPIRE, GEOADD, etc. */
3905
3906 /* Record the latency this command induced on the main thread.
3907 * unless instructed by the caller not to log. (happens when processing
3908 * a MULTI-EXEC from inside an AOF). */
3909 if (update_command_stats) {
3910 char *latency_event = (real_cmd->flags & CMD_FAST) ?
3911 "fast-command" : "command";
3912 latencyAddSampleIfNeeded(latency_event,duration/1000);
3913 if (server.execution_nesting == 0)
3914 durationAddSample(EL_DURATION_TYPE_CMD, duration);
3915 }
3916
3917 /* Log the command into the Slow log if needed.
3918 * If the client is blocked we will handle slowlog when it is unblocked. */
3919 if (update_command_stats && !(c->flags & CLIENT_BLOCKED))
3920 slowlogPushCurrentCommand(c, real_cmd, c->duration);
3921
3922 /* Send the command to clients in MONITOR mode if applicable,
3923 * since some administrative commands are considered too dangerous to be shown.
3924 * Other exceptions is a client which is unblocked and retrying to process the command
3925 * or we are currently in the process of loading AOF. */
3926 if (update_command_stats && !reprocessing_command &&
3927 !(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN)))
3928 {
3929 robj **argv = c->original_argv ? c->original_argv : c->argv;
3930 int argc = c->original_argv ? c->original_argc : c->argc;
3931 replicationFeedMonitors(c,server.monitors,c->db->id,argv,argc);
3932 }
3933
3934 /* Populate the per-command and per-slot statistics that we show in INFO commandstats and CLUSTER SLOT-STATS,
3935 * respectively. If the client is blocked we will handle latency stats and duration when it is unblocked. */
3936 if (update_command_stats && !(c->flags & CLIENT_BLOCKED)) {
3937 real_cmd->calls++;
3938 real_cmd->microseconds += c->duration;
3939 if (server.latency_tracking_enabled && !(c->flags & CLIENT_BLOCKED))
3940 updateCommandLatencyHistogram(&(real_cmd->latency_histogram), c->duration*1000);
3941 clusterSlotStatsAddCpuDuration(c, c->duration);
3942 }
3943
3944 /* Populate the per-key hotkey stats. Before updating stats for a command
3945 * we need to do some setup on the hotkeyStats structure. We only do this
3946 * once during the outer-most call in case of nesting.
3947 * NOTE: even though we update the network bytes during nested calls we
3948 * only update the duration, since the outer-most call records the whole
3949 * duration. */
3950 if (update_command_stats && !(c->flags & CLIENT_BLOCKED) &&
3951 !server.execution_nesting)
3952 {
3953 /* First we need to prepare the hotkeyStats for updates */
3954 hotkeyStatsPreCurrentCmd(server.hotkeys, c);
3955
3956 /* Update the current cmd's keys with the commands duration */
3957 hotkeyMetrics metrics = {c->duration, 0};
3958 hotkeyStatsUpdateCurrentCmd(server.hotkeys, metrics);
3959 }
3960
3961 /* The duration needs to be reset after each call except for a blocked command,
3962 * which is expected to record and reset the duration after unblocking. */
3963 if (!(c->flags & CLIENT_BLOCKED)) {
3964 c->duration = 0;
3965 }
3966
3967 /* Propagate the command into the AOF and replication link.
3968 * We never propagate EXEC explicitly, it will be implicitly
3969 * propagated if needed (see propagatePendingCommands).
3970 * Also, module commands take care of themselves */
3971 if (flags & CMD_CALL_PROPAGATE &&
3972 (c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP &&
3973 c->cmd->proc != execCommand &&
3974 !(c->cmd->flags & CMD_MODULE))
3975 {
3976 int propagate_flags = PROPAGATE_NONE;
3977
3978 /* Check if the command operated changes in the data set. If so
3979 * set for replication / AOF propagation. */
3980 if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL);
3981
3982 /* If the client forced AOF / replication of the command, set
3983 * the flags regardless of the command effects on the data set. */
3984 if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
3985 if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
3986
3987 /* However prevent AOF / replication propagation if the command
3988 * implementation called preventCommandPropagation() or similar,
3989 * or if we don't have the call() flags to do so. */
3990 if (c->flags & CLIENT_PREVENT_REPL_PROP ||
3991 c->flags & CLIENT_MODULE_PREVENT_REPL_PROP ||
3992 !(flags & CMD_CALL_PROPAGATE_REPL))
3993 propagate_flags &= ~PROPAGATE_REPL;
3994 if (c->flags & CLIENT_PREVENT_AOF_PROP ||
3995 c->flags & CLIENT_MODULE_PREVENT_AOF_PROP ||
3996 !(flags & CMD_CALL_PROPAGATE_AOF))
3997 propagate_flags &= ~PROPAGATE_AOF;
3998
3999 /* Call alsoPropagate() only if at least one of AOF / replication
4000 * propagation is needed. */
4001 if (propagate_flags != PROPAGATE_NONE)
4002 alsoPropagate(c->db->id,c->argv,c->argc,propagate_flags);
4003 }
4004
4005 /* Restore the old replication flags, since call() can be executed
4006 * recursively. */
4007 c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
4008 c->flags |= client_old_flags &
4009 (CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
4010
4011 /* If the client has keys tracking enabled for client side caching,
4012 * make sure to remember the keys it fetched via this command. For read-only
4013 * scripts, don't process the script, only the commands it executes. */
4014 if ((c->cmd->flags & CMD_READONLY) && (c->cmd->proc != evalRoCommand)
4015 && (c->cmd->proc != evalShaRoCommand) && (c->cmd->proc != fcallroCommand))
4016 {
4017 /* We use the tracking flag of the original external client that
4018 * triggered the command, but we take the keys from the actual command
4019 * being executed. */
4020 if (server.current_client &&
4021 (server.current_client->flags & CLIENT_TRACKING) &&
4022 !(server.current_client->flags & CLIENT_TRACKING_BCAST))
4023 {
4024 trackingRememberKeys(server.current_client, c);
4025 }
4026 }
4027
4028 if (!(c->flags & CLIENT_BLOCKED)) {
4029 /* Modules may call commands in cron, in which case server.current_client
4030 * is not set. */
4031 if (server.current_client) {
4032 server.current_client->commands_processed++;
4033 }
4034 server.stat_numcommands++;
4035 }
4036
4037 /* Do some maintenance job and cleanup */
4038 afterCommand(c);
4039
4040 /* The afterCommand updates the replication network bytes. At this point we
4041 * are ready to update the ingress/egress net bytes and cleanup tracking
4042 * of the current command. */
4043 if (update_command_stats && !(c->flags & CLIENT_BLOCKED)) {
4044 /* Update the current cmd's keys with the commands output bytes */
4045 hotkeyMetrics metrics =
4046 {0, c->net_output_bytes_curr_cmd + c->net_input_bytes_curr_cmd};
4047 hotkeyStatsUpdateCurrentCmd(server.hotkeys, metrics);
4048
4049 /* Just like curr cmd setup we only do the cleanup in case we are not in
4050 * a nested command. */
4051 if (!server.execution_nesting)
4052 hotkeyStatsPostCurrentCmd(server.hotkeys);
4053 }
4054
4055 /* Clear the original argv.
4056 * If the client is blocked we will handle slowlog when it is unblocked.
4057 * NOTE: we free the origin argv only after hoykeyStatsPostCurrentCmd as
4058 * hotkeyStats updates depend on original_argv. */
4059 if (!(c->flags & CLIENT_BLOCKED))
4060 freeClientOriginalArgv(c);
4061
4062 /* Remember the replication offset of the client, right after its last
4063 * command that resulted in propagation. */
4064 if (old_master_repl_offset != server.master_repl_offset)
4065 c->woff = server.master_repl_offset;
4066
4067 /* Client pause takes effect after a transaction has finished. This needs
4068 * to be located after everything is propagated. */
4069 if (!server.in_exec && server.client_pause_in_transaction) {
4070 server.client_pause_in_transaction = 0;
4071 }
4072
4073 server.executing_client = prev_client;
4074}
4075
4076/* Used when a command that is ready for execution needs to be rejected, due to
4077 * various pre-execution checks. it returns the appropriate error to the client.
4078 * If there's a transaction is flags it as dirty, and if the command is EXEC,
4079 * it aborts the transaction.
4080 * The duration is reset, since we reject the command, and it did not record.
4081 * Note: 'reply' is expected to end with \r\n */
4082void rejectCommand(client *c, robj *reply) {
4083 flagTransaction(c);
4084 c->duration = 0;
4085 if (c->cmd) c->cmd->rejected_calls++;
4086 if (c->cmd && c->cmd->proc == execCommand) {
4087 execCommandAbort(c, reply->ptr);
4088 } else {
4089 /* using addReplyError* rather than addReply so that the error can be logged. */
4090 addReplyErrorObject(c, reply);
4091 }
4092}
4093
4094void rejectCommandSds(client *c, sds s) {
4095 flagTransaction(c);
4096 c->duration = 0;
4097 if (c->cmd) c->cmd->rejected_calls++;
4098 if (c->cmd && c->cmd->proc == execCommand) {
4099 execCommandAbort(c, s);
4100 sdsfree(s);
4101 } else {
4102 /* The following frees 's'. */
4103 addReplyErrorSds(c, s);
4104 }
4105}
4106
4107void rejectCommandFormat(client *c, const char *fmt, ...) {
4108 va_list ap;
4109 va_start(ap,fmt);
4110 sds s = sdscatvprintf(sdsempty(),fmt,ap);
4111 va_end(ap);
4112 /* Make sure there are no newlines in the string, otherwise invalid protocol
4113 * is emitted (The args come from the user, they may contain any character). */
4114 sdsmapchars(s, "\r\n", " ", 2);
4115 rejectCommandSds(c, s);
4116}
4117
4118/* This is called after a command in call, we can do some maintenance job in it. */
4119void afterCommand(client *c) {
4120 /* Should be done before trackingHandlePendingKeyInvalidations so that we
4121 * reply to client before invalidating cache (makes more sense) */
4122 postExecutionUnitOperations();
4123
4124 /* Flush pending tracking invalidations. */
4125 trackingHandlePendingKeyInvalidations();
4126
4127 clusterSlotStatsAddNetworkBytesOutForUserClient(c);
4128
4129 /* Flush other pending push messages. only when we are not in nested call.
4130 * So the messages are not interleaved with transaction response. */
4131 if (!server.execution_nesting)
4132 listJoin(c->reply, server.pending_push_messages);
4133
4134 /* Assert keysizes histogram if enabled */
4135 if (unlikely(server.dbg_assert_keysizes))
4136 dbgAssertKeysizesHist(c->db);
4137
4138 /* Assert per-slot alloc_size if enabled */
4139 if (unlikely(server.dbg_assert_alloc_per_slot))
4140 dbgAssertAllocSizePerSlot(c->db);
4141}
4142
4143/* Check if c->cmd exists, fills `err` with details in case it doesn't.
4144 * Return 1 if exists. */
4145int commandCheckExistence(client *c, sds *err) {
4146 if (c->cmd)
4147 return 1;
4148 if (!err)
4149 return 0;
4150 if (isContainerCommandBySds(c->argv[0]->ptr)) {
4151 /* If we can't find the command but argv[0] by itself is a command
4152 * it means we're dealing with an invalid subcommand. Print Help. */
4153 sds cmd = sdsnew((char *)c->argv[0]->ptr);
4154 sdstoupper(cmd);
4155 *err = sdsnew(NULL);
4156
4157 if (c->argc < 2) {
4158 *err = sdscatprintf(*err, "missing subcommand. Try %s HELP.", cmd);
4159 } else {
4160 *err = sdscatprintf(*err, "unknown subcommand '%.128s'. Try %s HELP.",
4161 (char *)c->argv[1]->ptr, cmd);
4162 }
4163
4164 sdsfree(cmd);
4165 } else {
4166 *err = sdsnew(NULL);
4167 *err = sdscatprintf(*err, "unknown command '%.128s'", (char *)c->argv[0]->ptr);
4168
4169 if (c->argc >= 2) {
4170 sds args = sdsempty();
4171 for (int i = 1; i < c->argc && sdslen(args) < 128; i++)
4172 args = sdscatprintf(args, "'%.*s' ", 128 - (int)sdslen(args), (char *)c->argv[i]->ptr);
4173 *err = sdscatprintf(*err, ", with args beginning with: %s", args);
4174 sdsfree(args);
4175 }
4176 }
4177 /* Make sure there are no newlines in the string, otherwise invalid protocol
4178 * is emitted (The args come from the user, they may contain any character). */
4179 sdsmapchars(*err, "\r\n", " ", 2);
4180 return 0;
4181}
4182
4183/* Check if c->argc is valid for c->cmd, fills `err` with details in case it isn't.
4184 * Return 1 if valid. */
4185int commandCheckArity(struct redisCommand *cmd, int argc, sds *err) {
4186 if ((cmd->arity > 0 && cmd->arity != argc) || (argc < -cmd->arity)) {
4187 if (err) {
4188 *err = sdsnew(NULL);
4189 *err = sdscatprintf(*err, "wrong number of arguments for '%s' command", cmd->fullname);
4190 }
4191 return 0;
4192 }
4193
4194 return 1;
4195}
4196
4197/* If we're executing a script, try to extract a set of command flags from
4198 * it, in case it declared them. Note this is just an attempt, we don't yet
4199 * know the script command is well formed.*/
4200uint64_t getCommandFlags(client *c) {
4201 uint64_t cmd_flags = c->cmd->flags;
4202
4203 if (c->cmd->proc == fcallCommand || c->cmd->proc == fcallroCommand) {
4204 cmd_flags = fcallGetCommandFlags(c, cmd_flags);
4205 } else if (c->cmd->proc == evalCommand || c->cmd->proc == evalRoCommand ||
4206 c->cmd->proc == evalShaCommand || c->cmd->proc == evalShaRoCommand)
4207 {
4208 cmd_flags = evalGetCommandFlags(c, cmd_flags);
4209 }
4210
4211 return cmd_flags;
4212}
4213
4214void preprocessCommand(client *c, pendingCommand *pcmd) {
4215 pcmd->slot = INVALID_CLUSTER_SLOT;
4216 if (pcmd->argc == 0)
4217 return;
4218
4219 /* Check if we can reuse the previous command instead of looking it up.
4220 * The previous command is either the penultimate pending command (if it exists), or c->lastcmd. */
4221 struct redisCommand *last_cmd = pcmd->prev ? pcmd->prev->cmd : c->lastcmd;
4222
4223 if (isCommandReusable(last_cmd, pcmd->argv[0]))
4224 pcmd->cmd = last_cmd;
4225 else
4226 pcmd->cmd = lookupCommand(pcmd->argv, pcmd->argc);
4227
4228 if (!pcmd->cmd) {
4229 pcmd->read_error = CLIENT_READ_COMMAND_NOT_FOUND;
4230 return;
4231 }
4232
4233 if ((pcmd->cmd->arity > 0 && pcmd->cmd->arity != pcmd->argc) ||
4234 (pcmd->argc < -pcmd->cmd->arity))
4235 {
4236 pcmd->read_error = CLIENT_READ_BAD_ARITY;
4237 return;
4238 }
4239
4240 pcmd->keys_result = (getKeysResult)GETKEYS_RESULT_INIT;
4241 int num_keys = extractKeysAndSlot(pcmd->cmd, pcmd->argv, pcmd->argc,
4242 &pcmd->keys_result, &pcmd->slot);
4243 if (num_keys < 0) {
4244 /* We skip the checks below since We expect the command to be rejected in this case */
4245 return;
4246 } else if (num_keys > 0) {
4247 /* Handle cross-slot keys: mark error and reset slot. */
4248 if (pcmd->slot == CLUSTER_CROSSSLOT) {
4249 pcmd->read_error = CLIENT_READ_CROSS_SLOT;
4250 pcmd->slot = INVALID_CLUSTER_SLOT;
4251 }
4252 }
4253 pcmd->flags |= PENDING_CMD_KEYS_RESULT_VALID;
4254}
4255
4256/* If this function gets called we already read a whole
4257 * command, arguments are in the client argv/argc fields.
4258 * processCommand() execute the command or prepare the
4259 * server for a bulk read from the client.
4260 *
4261 * If C_OK is returned the client is still alive and valid and
4262 * other operations can be performed by the caller. Otherwise
4263 * if C_ERR is returned the client was destroyed (i.e. after QUIT). */
4264int processCommand(client *c) {
4265 if (!scriptIsTimedout()) {
4266 /* Both EXEC and scripts call call() directly so there should be
4267 * no way in_exec or scriptIsRunning() is 1.
4268 * That is unless lua_timedout, in which case client may run
4269 * some commands. */
4270 serverAssert(!server.in_exec);
4271 serverAssert(!scriptIsRunning());
4272 }
4273
4274 /* in case we are starting to ProcessCommand and we already have a command we assume
4275 * this is a reprocessing of this command, so we do not want to perform some of the actions again. */
4276 int client_reprocessing_command = c->cmd ? 1 : 0;
4277
4278 /* only run command filter if not reprocessing command */
4279 if (!client_reprocessing_command) {
4280 moduleCallCommandFilters(c);
4281 reqresAppendRequest(c);
4282 }
4283
4284 /* If we're inside a module blocked context yielding that wants to avoid
4285 * processing clients, postpone the command. */
4286 if (server.busy_module_yield_flags != BUSY_MODULE_YIELD_NONE &&
4287 !(server.busy_module_yield_flags & BUSY_MODULE_YIELD_CLIENTS))
4288 {
4289 blockPostponeClient(c);
4290 return C_OK;
4291 }
4292
4293 /* Now lookup the command and check ASAP about trivial error conditions
4294 * such as wrong arity, bad command name and so forth.
4295 * In case we are reprocessing a command after it was blocked,
4296 * we do not have to repeat the same checks */
4297 if (!client_reprocessing_command) {
4298 /* check if we can reuse the last command instead of looking up if we already have that info */
4299 struct redisCommand *cmd = c->lookedcmd;
4300
4301 /* The command may have been modified by modules (e.g., in CommandFilters callbacks),
4302 * so we need to look it up again. */
4303 if (!cmd) {
4304 if (isCommandReusable(c->lastcmd, c->argv[0]))
4305 cmd = c->lastcmd;
4306 else
4307 cmd = lookupCommand(c->argv, c->argc);
4308 }
4309
4310 if (!cmd) {
4311 /* Handle possible security attacks. */
4312 if (!strcasecmp(c->argv[0]->ptr,"host:") || !strcasecmp(c->argv[0]->ptr,"post")) {
4313 securityWarningCommand(c);
4314 return C_ERR;
4315 }
4316 }
4317
4318 /* Internal commands seem unexistent to non-internal connections.
4319 * masters and AOF loads are implicitly internal. */
4320 if (cmd && (cmd->flags & CMD_INTERNAL) && !((c->flags & CLIENT_INTERNAL) || mustObeyClient(c))) {
4321 cmd = NULL;
4322 }
4323
4324 c->cmd = c->lastcmd = c->realcmd = cmd;
4325 sds err;
4326 if (!commandCheckExistence(c, &err)) {
4327 rejectCommandSds(c, err);
4328 return C_OK;
4329 }
4330 if (!commandCheckArity(c->cmd, c->argc, &err)) {
4331 rejectCommandSds(c, err);
4332 return C_OK;
4333 }
4334
4335
4336 /* Check if the command is marked as protected and the relevant configuration allows it */
4337 if (c->cmd->flags & CMD_PROTECTED) {
4338 if ((c->cmd->proc == debugCommand && !allowProtectedAction(server.enable_debug_cmd, c)) ||
4339 (c->cmd->proc == moduleCommand && !allowProtectedAction(server.enable_module_cmd, c)))
4340 {
4341 rejectCommandFormat(c,"%s command not allowed. If the %s option is set to \"local\", "
4342 "you can run it from a local connection, otherwise you need to set this option "
4343 "in the configuration file, and then restart the server.",
4344 c->cmd->proc == debugCommand ? "DEBUG" : "MODULE",
4345 c->cmd->proc == debugCommand ? "enable-debug-command" : "enable-module-command");
4346 return C_OK;
4347
4348 }
4349 }
4350 }
4351
4352 const uint64_t cmd_flags = getCommandFlags(c);
4353
4354 int is_read_command = (cmd_flags & CMD_READONLY) ||
4355 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_READONLY));
4356 int is_write_command = (cmd_flags & CMD_WRITE) ||
4357 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
4358 int is_denyoom_command = (cmd_flags & CMD_DENYOOM) ||
4359 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_DENYOOM));
4360 int is_denystale_command = !(cmd_flags & CMD_STALE) ||
4361 (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_STALE));
4362 int is_denyloading_command = !(cmd_flags & CMD_LOADING) ||
4363 (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_LOADING));
4364 int is_may_replicate_command = (cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE)) ||
4365 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE)));
4366 int is_deny_async_loading_command = (cmd_flags & CMD_NO_ASYNC_LOADING) ||
4367 (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_NO_ASYNC_LOADING));
4368 int obey_client = mustObeyClient(c);
4369
4370 if (authRequired(c)) {
4371 /* AUTH and HELLO and no auth commands are valid even in
4372 * non-authenticated state. */
4373 if (!(c->cmd->flags & CMD_NO_AUTH)) {
4374 rejectCommand(c,shared.noautherr);
4375 return C_OK;
4376 }
4377 }
4378
4379 if (c->flags & CLIENT_MULTI && c->cmd->flags & CMD_NO_MULTI) {
4380 rejectCommandFormat(c,"Command not allowed inside a transaction");
4381 return C_OK;
4382 }
4383
4384 /* Check if the user can run this command according to the current
4385 * ACLs. */
4386 int acl_errpos;
4387 int acl_retval = ACLCheckAllPerm(c,&acl_errpos);
4388 if (acl_retval != ACL_OK) {
4389 addACLLogEntry(c,acl_retval,(c->flags & CLIENT_MULTI) ? ACL_LOG_CTX_MULTI : ACL_LOG_CTX_TOPLEVEL,acl_errpos,NULL,NULL);
4390 sds msg = getAclErrorMessage(acl_retval, c->user, c->cmd, c->argv[acl_errpos]->ptr, 0);
4391 rejectCommandFormat(c, "-NOPERM %s", msg);
4392 sdsfree(msg);
4393 return C_OK;
4394 }
4395
4396 /* If cluster is enabled perform the cluster redirection here.
4397 * However we don't perform the redirection if:
4398 * 1) The sender of this command is our master.
4399 * 2) The command has no key arguments. */
4400 if (server.cluster_enabled &&
4401 !mustObeyClient(c) &&
4402 !(!(c->cmd->flags&CMD_MOVABLE_KEYS) && c->cmd->key_specs_num == 0 &&
4403 c->cmd->proc != execCommand))
4404 {
4405 int error_code;
4406 clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,
4407 &c->slot,getClientCachedKeyResult(c),c->read_error,cmd_flags,&error_code);
4408 if (n == NULL || !clusterNodeIsMyself(n)) {
4409 if (c->cmd->proc == execCommand) {
4410 discardTransaction(c);
4411 } else {
4412 flagTransaction(c);
4413 }
4414 clusterRedirectClient(c,n,c->slot,error_code);
4415 c->duration = 0;
4416 c->cmd->rejected_calls++;
4417 return C_OK;
4418 }
4419 }
4420
4421 /* Check if the command keys are all in the same slot for cluster compatibility */
4422 if (server.cluster_compatibility_sample_ratio && !server.cluster_enabled &&
4423 !(!(c->cmd->flags&CMD_MOVABLE_KEYS) && c->cmd->key_specs_num == 0 &&
4424 c->cmd->proc != execCommand) && SHOULD_CLUSTER_COMPATIBILITY_SAMPLE())
4425 {
4426 c->cluster_compatibility_check_slot = -1;
4427 if (!areCommandKeysInSameSlot(c, &c->cluster_compatibility_check_slot)) {
4428 server.stat_cluster_incompatible_ops++;
4429 /* If we find cross slot keys, reset slot to -2 to indicate we won't
4430 * check this command again. That is useful for script, since we need
4431 * this variable to decide if we continue checking accessing keys. */
4432 c->cluster_compatibility_check_slot = -2;
4433 }
4434 }
4435
4436 /* Disconnect some clients if total clients memory is too high. We do this
4437 * before key eviction, after the last command was executed and consumed
4438 * some client output buffer memory. */
4439 evictClients();
4440 if (server.current_client == NULL) {
4441 /* If we evicted ourself then abort processing the command */
4442 return C_ERR;
4443 }
4444
4445 /* Handle the maxmemory directive.
4446 *
4447 * Note that we do not want to reclaim memory if we are here re-entering
4448 * the event loop since there is a busy Lua script running in timeout
4449 * condition, to avoid mixing the propagation of scripts with the
4450 * propagation of DELs due to eviction. */
4451 if (server.maxmemory && !isInsideYieldingLongCommand()) {
4452 int out_of_memory = (performEvictions() == EVICT_FAIL);
4453
4454 /* performEvictions may evict keys, so we need flush pending tracking
4455 * invalidation keys. If we don't do this, we may get an invalidation
4456 * message after we perform operation on the key, where in fact this
4457 * message belongs to the old value of the key before it gets evicted.*/
4458 trackingHandlePendingKeyInvalidations();
4459
4460 /* performEvictions may flush slave output buffers. This may result
4461 * in a slave, that may be the active client, to be freed. */
4462 if (server.current_client == NULL) return C_ERR;
4463
4464 if (out_of_memory && is_denyoom_command) {
4465 rejectCommand(c, shared.oomerr);
4466 return C_OK;
4467 }
4468
4469 /* Save out_of_memory result at command start, otherwise if we check OOM
4470 * in the first write within script, memory used by lua stack and
4471 * arguments might interfere. We need to save it for EXEC and module
4472 * calls too, since these can call EVAL, but avoid saving it during an
4473 * interrupted / yielding busy script / module. */
4474 server.pre_command_oom_state = out_of_memory;
4475 }
4476
4477 /* Make sure to use a reasonable amount of memory for client side
4478 * caching metadata. */
4479 if (server.tracking_clients) trackingLimitUsedSlots();
4480
4481 /* Don't accept write commands if there are problems persisting on disk
4482 * unless coming from our master, in which case check the replica ignore
4483 * disk write error config to either log or crash. */
4484 int deny_write_type = writeCommandsDeniedByDiskError();
4485 if (deny_write_type != DISK_ERROR_TYPE_NONE &&
4486 (is_write_command || c->cmd->proc == pingCommand))
4487 {
4488 if (obey_client) {
4489 if (!server.repl_ignore_disk_write_error && c->cmd->proc != pingCommand) {
4490 serverPanic("Replica was unable to write command to disk.");
4491 } else {
4492 static mstime_t last_log_time_ms = 0;
4493 const mstime_t log_interval_ms = 10000;
4494 if (server.mstime > last_log_time_ms + log_interval_ms) {
4495 last_log_time_ms = server.mstime;
4496 serverLog(LL_WARNING, "Replica is applying a command even though "
4497 "it is unable to write to disk.");
4498 }
4499 }
4500 } else {
4501 sds err = writeCommandsGetDiskErrorMessage(deny_write_type);
4502 /* remove the newline since rejectCommandSds adds it. */
4503 sdssubstr(err, 0, sdslen(err)-2);
4504 rejectCommandSds(c, err);
4505 return C_OK;
4506 }
4507 }
4508
4509 /* Don't accept write commands if there are not enough good slaves and
4510 * user configured the min-slaves-to-write option. */
4511 if (is_write_command && !checkGoodReplicasStatus()) {
4512 rejectCommand(c, shared.noreplicaserr);
4513 return C_OK;
4514 }
4515
4516 /* Don't accept write commands if this is a read only slave. But
4517 * accept write commands if this is our master. */
4518 if (server.masterhost && server.repl_slave_ro &&
4519 !obey_client &&
4520 is_write_command)
4521 {
4522 rejectCommand(c, shared.roslaveerr);
4523 return C_OK;
4524 }
4525
4526 /* If this node is a replica and there is a trim job due to slot migration,
4527 * we cannot process commands from the master for the slot being trimmed.
4528 * Otherwise, the trim cycle could mistakenly delete newly added keys.
4529 * In this case, the master will be blocked until the trim job finishes.
4530 * This is supposed to be a rare event as it needs to migrate slots and
4531 * import them back before the trim job is done. */
4532 if ((c->flags & CLIENT_MASTER) && is_write_command && server.cluster_enabled) {
4533 /* Check if the command is accessing keys in a slot being trimmed. */
4534 int slot_in_trim = asmGetTrimmingSlotForCommand(c->cmd, c->argv, c->argc);
4535 if (slot_in_trim != -1) {
4536 serverLog(LL_WARNING, "Master is sending command for slot %d. "
4537 "There is an trim job in progress for this slot. "
4538 "This replica cannot process this command right now. "
4539 "Blocking master client until trim job is done. ", slot_in_trim);
4540 /* Block master client */
4541 blockPostponeClientWithType(c, BLOCKED_POSTPONE_TRIM);
4542 return C_OK;
4543 }
4544 }
4545
4546 /* Only allow a subset of commands in the context of Pub/Sub if the
4547 * connection is in RESP2 mode. With RESP3 there are no limits. */
4548 if ((c->flags & CLIENT_PUBSUB && c->resp == 2) &&
4549 c->cmd->proc != pingCommand &&
4550 c->cmd->proc != subscribeCommand &&
4551 c->cmd->proc != ssubscribeCommand &&
4552 c->cmd->proc != unsubscribeCommand &&
4553 c->cmd->proc != sunsubscribeCommand &&
4554 c->cmd->proc != psubscribeCommand &&
4555 c->cmd->proc != punsubscribeCommand &&
4556 c->cmd->proc != quitCommand &&
4557 c->cmd->proc != resetCommand) {
4558 rejectCommandFormat(c,
4559 "Can't execute '%s': only (P|S)SUBSCRIBE / "
4560 "(P|S)UNSUBSCRIBE / PING / QUIT / RESET are allowed in this context",
4561 c->cmd->fullname);
4562 return C_OK;
4563 }
4564
4565 /* Only allow commands with flag "t", such as INFO, REPLICAOF and so on,
4566 * when replica-serve-stale-data is no and we are a replica with a broken
4567 * link with master. */
4568 if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED &&
4569 server.repl_serve_stale_data == 0 &&
4570 is_denystale_command)
4571 {
4572 rejectCommand(c, shared.masterdownerr);
4573 return C_OK;
4574 }
4575
4576 /* Loading DB? Return an error if the command has not the
4577 * CMD_LOADING flag. */
4578 if (server.loading && !server.async_loading && is_denyloading_command) {
4579 rejectCommand(c, shared.loadingerr);
4580 return C_OK;
4581 }
4582
4583 /* During async-loading, block certain commands. */
4584 if (server.async_loading && is_deny_async_loading_command) {
4585 rejectCommand(c,shared.loadingerr);
4586 return C_OK;
4587 }
4588
4589 /* when a busy job is being done (script / module)
4590 * Only allow a limited number of commands.
4591 * Note that we need to allow the transactions commands, otherwise clients
4592 * sending a transaction with pipelining without error checking, may have
4593 * the MULTI plus a few initial commands refused, then the timeout
4594 * condition resolves, and the bottom-half of the transaction gets
4595 * executed, see Github PR #7022. */
4596 if (isInsideYieldingLongCommand() && !(c->cmd->flags & CMD_ALLOW_BUSY)) {
4597 if (server.busy_module_yield_flags && server.busy_module_yield_reply) {
4598 rejectCommandFormat(c, "-BUSY %s", server.busy_module_yield_reply);
4599 } else if (server.busy_module_yield_flags) {
4600 rejectCommand(c, shared.slowmoduleerr);
4601 } else if (scriptIsEval()) {
4602 rejectCommand(c, shared.slowevalerr);
4603 } else {
4604 rejectCommand(c, shared.slowscripterr);
4605 }
4606 return C_OK;
4607 }
4608
4609 /* Prevent a replica from sending commands that access the keyspace.
4610 * The main objective here is to prevent abuse of client pause check
4611 * from which replicas are exempt. */
4612 if ((c->flags & CLIENT_SLAVE) && (is_may_replicate_command || is_write_command || is_read_command)) {
4613 rejectCommandFormat(c, "Replica can't interact with the keyspace");
4614 return C_OK;
4615 }
4616
4617 /* If the server is paused, block the client until
4618 * the pause has ended. Replicas are never paused. */
4619 if (!(c->flags & CLIENT_SLAVE) &&
4620 ((isPausedActions(PAUSE_ACTION_CLIENT_ALL)) ||
4621 ((isPausedActions(PAUSE_ACTION_CLIENT_WRITE)) && is_may_replicate_command)))
4622 {
4623 blockPostponeClient(c);
4624 return C_OK;
4625 }
4626
4627 /* Exec the command */
4628 if (c->flags & CLIENT_MULTI &&
4629 c->cmd->proc != execCommand &&
4630 c->cmd->proc != discardCommand &&
4631 c->cmd->proc != multiCommand &&
4632 c->cmd->proc != watchCommand &&
4633 c->cmd->proc != quitCommand &&
4634 c->cmd->proc != resetCommand)
4635 {
4636 queueMultiCommand(c, cmd_flags);
4637 addReply(c,shared.queued);
4638 } else {
4639 int flags = CMD_CALL_FULL;
4640 call(c,flags);
4641 if (listLength(server.ready_keys) && !isInsideYieldingLongCommand())
4642 handleClientsBlockedOnKeys();
4643 }
4644 return C_OK;
4645}
4646
4647/* Checks if all keys in a command (or a MULTI-EXEC) belong to the same hash slot.
4648 * If yes, return 1, otherwise 0. If hashslot is not NULL, it will be set to the
4649 * slot of the keys. */
4650int areCommandKeysInSameSlot(client *c, int *hashslot) {
4651 int slot = -1;
4652 multiState *ms = NULL;
4653
4654 if (c->cmd->proc == execCommand) {
4655 if (!(c->flags & CLIENT_MULTI)) return 1;
4656 else ms = &c->mstate;
4657 }
4658
4659 /* If client is in multi-exec, we need to check the slot of all keys
4660 * in the transaction. */
4661 for (int i = 0; i < (ms ? ms->count : 1); i++) {
4662 struct redisCommand *cmd = ms ? ms->commands[i]->cmd : c->cmd;
4663 robj **argv = ms ? ms->commands[i]->argv : c->argv;
4664 int argc = ms ? ms->commands[i]->argc : c->argc;
4665
4666 getKeysResult result = GETKEYS_RESULT_INIT;
4667 int numkeys = getKeysFromCommand(cmd, argv, argc, &result);
4668 keyReference *keyindex = result.keys;
4669
4670 /* Check if all keys have the same slots, increment the metric if not */
4671 for (int j = 0; j < numkeys; j++) {
4672 robj *thiskey = argv[keyindex[j].pos];
4673 int thisslot = keyHashSlot((char*)thiskey->ptr, sdslen(thiskey->ptr));
4674 if (slot == -1) {
4675 slot = thisslot;
4676 } else if (slot != thisslot) {
4677 getKeysFreeResult(&result);
4678 return 0;
4679 }
4680 }
4681 getKeysFreeResult(&result);
4682 }
4683 if (hashslot) *hashslot = slot;
4684 return 1;
4685}
4686
4687/* ====================== Error lookup and execution ===================== */
4688
4689/* Users who abuse lua error_reply will generate a new error object on each
4690 * error call, which can make server.errors get bigger and bigger. This will
4691 * cause the server to block when calling INFO (we also return errorstats by
4692 * default). To prevent the damage it can cause, when a misuse is detected,
4693 * we will print the warning log and disable the errorstats to avoid adding
4694 * more new errors. It can be re-enabled via CONFIG RESETSTAT. */
4695#define ERROR_STATS_NUMBER 128
4696void incrementErrorCount(const char *fullerr, size_t namelen) {
4697 /* errorstats is disabled, return ASAP. */
4698 if (!server.errors_enabled) return;
4699
4700 void *result;
4701 if (!raxFind(server.errors,(unsigned char*)fullerr,namelen,&result)) {
4702 if (server.errors->numele >= ERROR_STATS_NUMBER) {
4703 sds errors = sdsempty();
4704 raxIterator ri;
4705 raxStart(&ri, server.errors);
4706 raxSeek(&ri, "^", NULL, 0);
4707 while (raxNext(&ri)) {
4708 char *tmpsafe;
4709 errors = sdscatlen(errors, getSafeInfoString((char *)ri.key, ri.key_len, &tmpsafe), ri.key_len);
4710 errors = sdscatlen(errors, ", ", 2);
4711 if (tmpsafe != NULL) zfree(tmpsafe);
4712 }
4713 sdsrange(errors, 0, -3); /* Remove final ", ". */
4714 raxStop(&ri);
4715
4716 /* Print the warning log and the contents of server.errors to the log. */
4717 serverLog(LL_WARNING,
4718 "Errorstats stopped adding new errors because the number of "
4719 "errors reached the limit, may be misuse of lua error_reply, "
4720 "please check INFO ERRORSTATS, this can be re-enabled via "
4721 "CONFIG RESETSTAT.");
4722 serverLog(LL_WARNING, "Current errors code list: %s", errors);
4723 sdsfree(errors);
4724
4725 /* Reset the errors and add a single element to indicate that it is disabled. */
4726 resetErrorTableStats();
4727 incrementErrorCount("ERRORSTATS_DISABLED", 19);
4728 server.errors_enabled = 0;
4729 return;
4730 }
4731
4732 struct redisError *error = zmalloc(sizeof(*error));
4733 error->count = 1;
4734 raxInsert(server.errors,(unsigned char*)fullerr,namelen,error,NULL);
4735 } else {
4736 struct redisError *error = result;
4737 error->count++;
4738 }
4739}
4740
4741/*================================== Shutdown =============================== */
4742
4743/* Close listening sockets. Also unlink the unix domain socket if
4744 * unlink_unix_socket is non-zero. */
4745void closeListeningSockets(int unlink_unix_socket) {
4746 int j;
4747
4748 for (int i = 0; i < CONN_TYPE_MAX; i++) {
4749 connListener *listener = &server.listeners[i];
4750 if (listener->ct == NULL)
4751 continue;
4752
4753 for (j = 0; j < listener->count; j++) close(listener->fd[j]);
4754 }
4755
4756 if (server.cluster_enabled)
4757 for (j = 0; j < server.clistener.count; j++) close(server.clistener.fd[j]);
4758 if (unlink_unix_socket && server.unixsocket) {
4759 serverLog(LL_NOTICE,"Removing the unix socket file.");
4760 if (unlink(server.unixsocket) != 0)
4761 serverLog(LL_WARNING,"Error removing the unix socket file: %s",strerror(errno));
4762 }
4763}
4764
4765/* Prepare for shutting down the server. Flags:
4766 *
4767 * - SHUTDOWN_SAVE: Save a database dump even if the server is configured not to
4768 * save any dump.
4769 *
4770 * - SHUTDOWN_NOSAVE: Don't save any database dump even if the server is
4771 * configured to save one.
4772 *
4773 * - SHUTDOWN_NOW: Don't wait for replicas to catch up before shutting down.
4774 *
4775 * - SHUTDOWN_FORCE: Ignore errors writing AOF and RDB files on disk, which
4776 * would normally prevent a shutdown.
4777 *
4778 * Unless SHUTDOWN_NOW is set and if any replicas are lagging behind, C_ERR is
4779 * returned and server.shutdown_mstime is set to a timestamp to allow a grace
4780 * period for the replicas to catch up. This is checked and handled by
4781 * serverCron() which completes the shutdown as soon as possible.
4782 *
4783 * If shutting down fails due to errors writing RDB or AOF files, C_ERR is
4784 * returned and an error is logged. If the flag SHUTDOWN_FORCE is set, these
4785 * errors are logged but ignored and C_OK is returned.
4786 *
4787 * On success, this function returns C_OK and then it's OK to call exit(0). */
4788int prepareForShutdown(int flags) {
4789 if (isShutdownInitiated()) return C_ERR;
4790
4791 /* When SHUTDOWN is called while the server is loading a dataset in
4792 * memory we need to make sure no attempt is performed to save
4793 * the dataset on shutdown (otherwise it could overwrite the current DB
4794 * with half-read data).
4795 *
4796 * Also when in Sentinel mode clear the SAVE flag and force NOSAVE. */
4797 if (server.loading || server.sentinel_mode)
4798 flags = (flags & ~SHUTDOWN_SAVE) | SHUTDOWN_NOSAVE;
4799
4800 server.shutdown_flags = flags;
4801
4802 serverLog(LL_NOTICE,"User requested shutdown...");
4803 if (server.supervised_mode == SUPERVISED_SYSTEMD)
4804 redisCommunicateSystemd("STOPPING=1\n");
4805
4806 /* Cancel all ASM tasks before shutting down. */
4807 clusterAsmCancel(NULL, "server shutdown");
4808
4809 /* If we have any replicas, let them catch up the replication offset before
4810 * we shut down, to avoid data loss. */
4811 if (!(flags & SHUTDOWN_NOW) &&
4812 server.shutdown_timeout != 0 &&
4813 !isReadyToShutdown())
4814 {
4815 server.shutdown_mstime = server.mstime + server.shutdown_timeout * 1000;
4816 if (!isPausedActions(PAUSE_ACTION_REPLICA)) sendGetackToReplicas();
4817 pauseActions(PAUSE_DURING_SHUTDOWN,
4818 LLONG_MAX,
4819 PAUSE_ACTIONS_CLIENT_WRITE_SET);
4820 serverLog(LL_NOTICE, "Waiting for replicas before shutting down.");
4821 return C_ERR;
4822 }
4823
4824 return finishShutdown();
4825}
4826
4827static inline int isShutdownInitiated(void) {
4828 return server.shutdown_mstime != 0;
4829}
4830
4831/* Returns 0 if there are any replicas which are lagging in replication which we
4832 * need to wait for before shutting down. Returns 1 if we're ready to shut
4833 * down now. */
4834int isReadyToShutdown(void) {
4835 if (listLength(server.slaves) == 0) return 1; /* No replicas. */
4836
4837 listIter li;
4838 listNode *ln;
4839 listRewind(server.slaves, &li);
4840 while ((ln = listNext(&li)) != NULL) {
4841 client *replica = listNodeValue(ln);
4842 /* Don't count migration destination replicas. */
4843 if (replica->flags & CLIENT_ASM_MIGRATING) continue;
4844 if (replica->repl_ack_off != server.master_repl_offset) return 0;
4845 }
4846 return 1;
4847}
4848
4849static void cancelShutdown(void) {
4850 atomicSet(server.shutdown_asap, 0);
4851 server.shutdown_flags = 0;
4852 server.shutdown_mstime = 0;
4853 atomicSet(server.last_sig_received, 0);
4854 replyToClientsBlockedOnShutdown();
4855 unpauseActions(PAUSE_DURING_SHUTDOWN);
4856}
4857
4858/* Returns C_OK if shutdown was aborted and C_ERR if shutdown wasn't ongoing. */
4859int abortShutdown(void) {
4860 if (isShutdownInitiated()) {
4861 cancelShutdown();
4862 } else if (shouldShutdownAsap()) {
4863 /* Signal handler has requested shutdown, but it hasn't been initiated
4864 * yet. Just clear the flag. */
4865 atomicSet(server.shutdown_asap, 0);
4866 } else {
4867 /* Shutdown neither initiated nor requested. */
4868 return C_ERR;
4869 }
4870 serverLog(LL_NOTICE, "Shutdown manually aborted.");
4871 return C_OK;
4872}
4873
4874/* The final step of the shutdown sequence. Returns C_OK if the shutdown
4875 * sequence was successful and it's OK to call exit(). If C_ERR is returned,
4876 * it's not safe to call exit(). */
4877int finishShutdown(void) {
4878
4879 int save = server.shutdown_flags & SHUTDOWN_SAVE;
4880 int nosave = server.shutdown_flags & SHUTDOWN_NOSAVE;
4881 int force = server.shutdown_flags & SHUTDOWN_FORCE;
4882
4883 /* Log a warning for each replica that is lagging. */
4884 listIter replicas_iter;
4885 listNode *replicas_list_node;
4886 int num_replicas = 0, num_lagging_replicas = 0;
4887 listRewind(server.slaves, &replicas_iter);
4888 while ((replicas_list_node = listNext(&replicas_iter)) != NULL) {
4889 client *replica = listNodeValue(replicas_list_node);
4890 /* Don't count migration destination replicas. */
4891 if (replica->flags & CLIENT_ASM_MIGRATING) continue;
4892 num_replicas++;
4893
4894 /* We pause the IO thread this replica is running on so we avoid data
4895 * races. */
4896 int paused = 0;
4897 if (replica->running_tid != IOTHREAD_MAIN_THREAD_ID) {
4898 pauseIOThread(replica->tid);
4899 paused = 1;
4900 }
4901
4902 if (replica->repl_ack_off != server.master_repl_offset) {
4903 num_lagging_replicas++;
4904 long lag = replica->replstate == SLAVE_STATE_ONLINE ?
4905 time(NULL) - replica->repl_ack_time : 0;
4906 serverLog(LL_NOTICE,
4907 "Lagging replica %s reported offset %lld behind master, lag=%ld, state=%s.",
4908 replicationGetSlaveName(replica),
4909 server.master_repl_offset - replica->repl_ack_off,
4910 lag,
4911 replstateToString(replica->replstate));
4912 }
4913
4914 if (paused) resumeIOThread(replica->tid);
4915 }
4916 if (num_replicas > 0) {
4917 serverLog(LL_NOTICE,
4918 "%d of %d replicas are in sync when shutting down.",
4919 num_replicas - num_lagging_replicas,
4920 num_replicas);
4921 }
4922
4923 /* Kill all the Lua debugger forked sessions. */
4924 ldbKillForkedSessions();
4925
4926 /* Kill the saving child if there is a background saving in progress.
4927 We want to avoid race conditions, for instance our saving child may
4928 overwrite the synchronous saving did by SHUTDOWN. */
4929 if (server.child_type == CHILD_TYPE_RDB) {
4930 serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!");
4931 killRDBChild();
4932 /* Note that, in killRDBChild normally has backgroundSaveDoneHandler
4933 * doing it's cleanup, but in this case this code will not be reached,
4934 * so we need to call rdbRemoveTempFile which will close fd(in order
4935 * to unlink file actually) in background thread.
4936 * The temp rdb file fd may won't be closed when redis exits quickly,
4937 * but OS will close this fd when process exits. */
4938 rdbRemoveTempFile(server.child_pid, 0);
4939 resetChildState();
4940 }
4941
4942 /* Kill module child if there is one. */
4943 if (server.child_type == CHILD_TYPE_MODULE) {
4944 serverLog(LL_WARNING,"There is a module fork child. Killing it!");
4945 TerminateModuleForkChild(server.child_pid,0);
4946 }
4947
4948 /* Kill the AOF saving child as the AOF we already have may be longer
4949 * but contains the full dataset anyway. */
4950 if (server.child_type == CHILD_TYPE_AOF) {
4951 /* If we have AOF enabled but haven't written the AOF yet, don't
4952 * shutdown or else the dataset will be lost. */
4953 if (server.aof_state == AOF_WAIT_REWRITE) {
4954 if (force) {
4955 serverLog(LL_WARNING, "Writing initial AOF. Exit anyway.");
4956 } else {
4957 serverLog(LL_WARNING, "Writing initial AOF, can't exit.");
4958 if (server.supervised_mode == SUPERVISED_SYSTEMD)
4959 redisCommunicateSystemd("STATUS=Writing initial AOF, can't exit.\n");
4960 goto error;
4961 }
4962 }
4963 serverLog(LL_WARNING,
4964 "There is a child rewriting the AOF. Killing it!");
4965 killAppendOnlyChild();
4966 }
4967 if (server.aof_state != AOF_OFF) {
4968 /* Append only file: flush buffers and fsync() the AOF at exit */
4969 serverLog(LL_NOTICE,"Calling fsync() on the AOF file.");
4970 flushAppendOnlyFile(1);
4971 if (redis_fsync(server.aof_fd) == -1) {
4972 serverLog(LL_WARNING,"Fail to fsync the AOF file: %s.",
4973 strerror(errno));
4974 }
4975 }
4976
4977 /* Create a new RDB file before exiting. */
4978 if ((server.saveparamslen > 0 && !nosave) || save) {
4979 serverLog(LL_NOTICE,"Saving the final RDB snapshot before exiting.");
4980 if (server.supervised_mode == SUPERVISED_SYSTEMD)
4981 redisCommunicateSystemd("STATUS=Saving the final RDB snapshot\n");
4982 /* Snapshotting. Perform a SYNC SAVE and exit */
4983 rdbSaveInfo rsi, *rsiptr;
4984 rsiptr = rdbPopulateSaveInfo(&rsi);
4985 /* Keep the page cache since it's likely to restart soon */
4986 if (rdbSave(SLAVE_REQ_NONE,server.rdb_filename,rsiptr,RDBFLAGS_KEEP_CACHE) != C_OK) {
4987 /* Ooops.. error saving! The best we can do is to continue
4988 * operating. Note that if there was a background saving process,
4989 * in the next cron() Redis will be notified that the background
4990 * saving aborted, handling special stuff like slaves pending for
4991 * synchronization... */
4992 if (force) {
4993 serverLog(LL_WARNING,"Error trying to save the DB. Exit anyway.");
4994 } else {
4995 serverLog(LL_WARNING,"Error trying to save the DB, can't exit.");
4996 if (server.supervised_mode == SUPERVISED_SYSTEMD)
4997 redisCommunicateSystemd("STATUS=Error trying to save the DB, can't exit.\n");
4998 goto error;
4999 }
5000 }
5001 }
5002
5003 /* Update the end offset of current INCR AOF if possible. */
5004 updateCurIncrAofEndOffset();
5005
5006 /* Free the AOF manifest. */
5007 if (server.aof_manifest) aofManifestFree(server.aof_manifest);
5008
5009 /* Fire the shutdown modules event. */
5010 moduleFireServerEvent(REDISMODULE_EVENT_SHUTDOWN,0,NULL);
5011
5012 /* Remove the pid file if possible and needed. */
5013 if (server.daemonize || server.pidfile) {
5014 serverLog(LL_NOTICE,"Removing the pid file.");
5015 unlink(server.pidfile);
5016 }
5017
5018 /* Best effort flush of slave output buffers, so that we hopefully
5019 * send them pending writes. */
5020 flushSlavesOutputBuffers();
5021
5022 /* Close the listening sockets. Apparently this allows faster restarts. */
5023 closeListeningSockets(1);
5024
5025#if !defined(__sun)
5026 /* Unlock the cluster config file before shutdown */
5027 if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1) {
5028 flock(server.cluster_config_file_lock_fd, LOCK_UN|LOCK_NB);
5029 }
5030#endif /* __sun */
5031
5032
5033 serverLog(LL_WARNING,"%s is now ready to exit, bye bye...",
5034 server.sentinel_mode ? "Sentinel" : "Redis");
5035 return C_OK;
5036
5037error:
5038 serverLog(LL_WARNING, "Errors trying to shut down the server. Check the logs for more information.");
5039 cancelShutdown();
5040 return C_ERR;
5041}
5042
5043/*================================== Commands =============================== */
5044
5045/* Sometimes Redis cannot accept write commands because there is a persistence
5046 * error with the RDB or AOF file, and Redis is configured in order to stop
5047 * accepting writes in such situation. This function returns if such a
5048 * condition is active, and the type of the condition.
5049 *
5050 * Function return values:
5051 *
5052 * DISK_ERROR_TYPE_NONE: No problems, we can accept writes.
5053 * DISK_ERROR_TYPE_AOF: Don't accept writes: AOF errors.
5054 * DISK_ERROR_TYPE_RDB: Don't accept writes: RDB errors.
5055 */
5056int writeCommandsDeniedByDiskError(void) {
5057 if (server.stop_writes_on_bgsave_err &&
5058 server.saveparamslen > 0 &&
5059 server.lastbgsave_status == C_ERR)
5060 {
5061 return DISK_ERROR_TYPE_RDB;
5062 } else if (server.aof_state != AOF_OFF) {
5063 if (server.aof_last_write_status == C_ERR) {
5064 return DISK_ERROR_TYPE_AOF;
5065 }
5066 /* AOF fsync error. */
5067 int aof_bio_fsync_status;
5068 atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
5069 if (aof_bio_fsync_status == C_ERR) {
5070 atomicGet(server.aof_bio_fsync_errno,server.aof_last_write_errno);
5071 return DISK_ERROR_TYPE_AOF;
5072 }
5073 }
5074
5075 return DISK_ERROR_TYPE_NONE;
5076}
5077
5078sds writeCommandsGetDiskErrorMessage(int error_code) {
5079 sds ret = NULL;
5080 if (error_code == DISK_ERROR_TYPE_RDB) {
5081 ret = sdsdup(shared.bgsaveerr->ptr);
5082 } else {
5083 ret = sdscatfmt(sdsempty(),
5084 "-MISCONF Errors writing to the AOF file: %s\r\n",
5085 strerror(server.aof_last_write_errno));
5086 }
5087 return ret;
5088}
5089
5090/* The PING command. It works in a different way if the client is in
5091 * in Pub/Sub mode. */
5092void pingCommand(client *c) {
5093 /* The command takes zero or one arguments. */
5094 if (c->argc > 2) {
5095 addReplyErrorArity(c);
5096 return;
5097 }
5098
5099 if (c->flags & CLIENT_PUBSUB && c->resp == 2) {
5100 addReply(c,shared.mbulkhdr[2]);
5101 addReplyBulkCBuffer(c,"pong",4);
5102 if (c->argc == 1)
5103 addReplyBulkCBuffer(c,"",0);
5104 else
5105 addReplyBulk(c,c->argv[1]);
5106 } else {
5107 if (c->argc == 1)
5108 addReply(c,shared.pong);
5109 else
5110 addReplyBulk(c,c->argv[1]);
5111 }
5112}
5113
5114void echoCommand(client *c) {
5115 addReplyBulk(c,c->argv[1]);
5116}
5117
5118void timeCommand(client *c) {
5119 addReplyArrayLen(c,2);
5120 addReplyBulkLongLong(c, server.unixtime);
5121 addReplyBulkLongLong(c, server.ustime-((long long)server.unixtime)*1000000);
5122}
5123
5124typedef struct replyFlagNames {
5125 uint64_t flag;
5126 const char *name;
5127} replyFlagNames;
5128
5129/* Helper function to output flags. */
5130void addReplyCommandFlags(client *c, uint64_t flags, replyFlagNames *replyFlags) {
5131 int count = 0, j=0;
5132 /* Count them so we don't have to use deferred reply. */
5133 while (replyFlags[j].name) {
5134 if (flags & replyFlags[j].flag)
5135 count++;
5136 j++;
5137 }
5138
5139 addReplySetLen(c, count);
5140 j = 0;
5141 while (replyFlags[j].name) {
5142 if (flags & replyFlags[j].flag)
5143 addReplyStatus(c, replyFlags[j].name);
5144 j++;
5145 }
5146}
5147
5148void addReplyFlagsForCommand(client *c, struct redisCommand *cmd) {
5149 replyFlagNames flagNames[] = {
5150 {CMD_WRITE, "write"},
5151 {CMD_READONLY, "readonly"},
5152 {CMD_DENYOOM, "denyoom"},
5153 {CMD_MODULE, "module"},
5154 {CMD_ADMIN, "admin"},
5155 {CMD_PUBSUB, "pubsub"},
5156 {CMD_NOSCRIPT, "noscript"},
5157 {CMD_BLOCKING, "blocking"},
5158 {CMD_LOADING, "loading"},
5159 {CMD_STALE, "stale"},
5160 {CMD_SKIP_MONITOR, "skip_monitor"},
5161 {CMD_SKIP_SLOWLOG, "skip_slowlog"},
5162 {CMD_ASKING, "asking"},
5163 {CMD_FAST, "fast"},
5164 {CMD_NO_AUTH, "no_auth"},
5165 /* {CMD_MAY_REPLICATE, "may_replicate"},, Hidden on purpose */
5166 /* {CMD_SENTINEL, "sentinel"}, Hidden on purpose */
5167 /* {CMD_ONLY_SENTINEL, "only_sentinel"}, Hidden on purpose */
5168 {CMD_NO_MANDATORY_KEYS, "no_mandatory_keys"},
5169 /* {CMD_PROTECTED, "protected"}, Hidden on purpose */
5170 {CMD_NO_ASYNC_LOADING, "no_async_loading"},
5171 {CMD_NO_MULTI, "no_multi"},
5172 {CMD_MOVABLE_KEYS, "movablekeys"},
5173 {CMD_ALLOW_BUSY, "allow_busy"},
5174 /* {CMD_TOUCHES_ARBITRARY_KEYS, "TOUCHES_ARBITRARY_KEYS"}, Hidden on purpose */
5175 {0,NULL}
5176 };
5177 addReplyCommandFlags(c, cmd->flags, flagNames);
5178}
5179
5180void addReplyDocFlagsForCommand(client *c, struct redisCommand *cmd) {
5181 replyFlagNames docFlagNames[] = {
5182 {CMD_DOC_DEPRECATED, "deprecated"},
5183 {CMD_DOC_SYSCMD, "syscmd"},
5184 {0,NULL}
5185 };
5186 addReplyCommandFlags(c, cmd->doc_flags, docFlagNames);
5187}
5188
5189void addReplyFlagsForKeyArgs(client *c, uint64_t flags) {
5190 replyFlagNames docFlagNames[] = {
5191 {CMD_KEY_RO, "RO"},
5192 {CMD_KEY_RW, "RW"},
5193 {CMD_KEY_OW, "OW"},
5194 {CMD_KEY_RM, "RM"},
5195 {CMD_KEY_ACCESS, "access"},
5196 {CMD_KEY_UPDATE, "update"},
5197 {CMD_KEY_INSERT, "insert"},
5198 {CMD_KEY_DELETE, "delete"},
5199 {CMD_KEY_NOT_KEY, "not_key"},
5200 {CMD_KEY_INCOMPLETE, "incomplete"},
5201 {CMD_KEY_VARIABLE_FLAGS, "variable_flags"},
5202 {0,NULL}
5203 };
5204 addReplyCommandFlags(c, flags, docFlagNames);
5205}
5206
5207/* Must match redisCommandArgType */
5208const char *ARG_TYPE_STR[] = {
5209 "string",
5210 "integer",
5211 "double",
5212 "key",
5213 "pattern",
5214 "unix-time",
5215 "pure-token",
5216 "oneof",
5217 "block",
5218};
5219
5220void addReplyFlagsForArg(client *c, uint64_t flags) {
5221 replyFlagNames argFlagNames[] = {
5222 {CMD_ARG_OPTIONAL, "optional"},
5223 {CMD_ARG_MULTIPLE, "multiple"},
5224 {CMD_ARG_MULTIPLE_TOKEN, "multiple_token"},
5225 {0,NULL}
5226 };
5227 addReplyCommandFlags(c, flags, argFlagNames);
5228}
5229
5230void addReplyCommandArgList(client *c, struct redisCommandArg *args, int num_args) {
5231 addReplyArrayLen(c, num_args);
5232 for (int j = 0; j<num_args; j++) {
5233 /* Count our reply len so we don't have to use deferred reply. */
5234 int has_display_text = 1;
5235 long maplen = 2;
5236 if (args[j].key_spec_index != -1) maplen++;
5237 if (args[j].token) maplen++;
5238 if (args[j].summary) maplen++;
5239 if (args[j].since) maplen++;
5240 if (args[j].deprecated_since) maplen++;
5241 if (args[j].flags) maplen++;
5242 if (args[j].type == ARG_TYPE_ONEOF || args[j].type == ARG_TYPE_BLOCK) {
5243 has_display_text = 0;
5244 maplen++;
5245 }
5246 if (has_display_text) maplen++;
5247 addReplyMapLen(c, maplen);
5248
5249 addReplyBulkCString(c, "name");
5250 addReplyBulkCString(c, args[j].name);
5251
5252 addReplyBulkCString(c, "type");
5253 addReplyBulkCString(c, ARG_TYPE_STR[args[j].type]);
5254
5255 if (has_display_text) {
5256 addReplyBulkCString(c, "display_text");
5257 addReplyBulkCString(c, args[j].display_text ? args[j].display_text : args[j].name);
5258 }
5259 if (args[j].key_spec_index != -1) {
5260 addReplyBulkCString(c, "key_spec_index");
5261 addReplyLongLong(c, args[j].key_spec_index);
5262 }
5263 if (args[j].token) {
5264 addReplyBulkCString(c, "token");
5265 addReplyBulkCString(c, args[j].token);
5266 }
5267 if (args[j].summary) {
5268 addReplyBulkCString(c, "summary");
5269 addReplyBulkCString(c, args[j].summary);
5270 }
5271 if (args[j].since) {
5272 addReplyBulkCString(c, "since");
5273 addReplyBulkCString(c, args[j].since);
5274 }
5275 if (args[j].deprecated_since) {
5276 addReplyBulkCString(c, "deprecated_since");
5277 addReplyBulkCString(c, args[j].deprecated_since);
5278 }
5279 if (args[j].flags) {
5280 addReplyBulkCString(c, "flags");
5281 addReplyFlagsForArg(c, args[j].flags);
5282 }
5283 if (args[j].type == ARG_TYPE_ONEOF || args[j].type == ARG_TYPE_BLOCK) {
5284 addReplyBulkCString(c, "arguments");
5285 addReplyCommandArgList(c, args[j].subargs, args[j].num_args);
5286 }
5287 }
5288}
5289
5290#ifdef LOG_REQ_RES
5291
5292void addReplyJson(client *c, struct jsonObject *rs) {
5293 addReplyMapLen(c, rs->length);
5294
5295 for (int i = 0; i < rs->length; i++) {
5296 struct jsonObjectElement *curr = &rs->elements[i];
5297 addReplyBulkCString(c, curr->key);
5298 switch (curr->type) {
5299 case (JSON_TYPE_BOOLEAN):
5300 addReplyBool(c, curr->value.boolean);
5301 break;
5302 case (JSON_TYPE_INTEGER):
5303 addReplyLongLong(c, curr->value.integer);
5304 break;
5305 case (JSON_TYPE_STRING):
5306 addReplyBulkCString(c, curr->value.string);
5307 break;
5308 case (JSON_TYPE_OBJECT):
5309 addReplyJson(c, curr->value.object);
5310 break;
5311 case (JSON_TYPE_ARRAY):
5312 addReplyArrayLen(c, curr->value.array.length);
5313 for (int k = 0; k < curr->value.array.length; k++) {
5314 struct jsonObject *object = curr->value.array.objects[k];
5315 addReplyJson(c, object);
5316 }
5317 break;
5318 default:
5319 serverPanic("Invalid JSON type %d", curr->type);
5320 }
5321 }
5322}
5323
5324#endif
5325
5326void addReplyCommandHistory(client *c, struct redisCommand *cmd) {
5327 addReplySetLen(c, cmd->num_history);
5328 for (int j = 0; j<cmd->num_history; j++) {
5329 addReplyArrayLen(c, 2);
5330 addReplyBulkCString(c, cmd->history[j].since);
5331 addReplyBulkCString(c, cmd->history[j].changes);
5332 }
5333}
5334
5335void addReplyCommandTips(client *c, struct redisCommand *cmd) {
5336 addReplySetLen(c, cmd->num_tips);
5337 for (int j = 0; j<cmd->num_tips; j++) {
5338 addReplyBulkCString(c, cmd->tips[j]);
5339 }
5340}
5341
5342void addReplyCommandKeySpecs(client *c, struct redisCommand *cmd) {
5343 addReplySetLen(c, cmd->key_specs_num);
5344 for (int i = 0; i < cmd->key_specs_num; i++) {
5345 int maplen = 3;
5346 if (cmd->key_specs[i].notes) maplen++;
5347
5348 addReplyMapLen(c, maplen);
5349
5350 if (cmd->key_specs[i].notes) {
5351 addReplyBulkCString(c, "notes");
5352 addReplyBulkCString(c,cmd->key_specs[i].notes);
5353 }
5354
5355 addReplyBulkCString(c, "flags");
5356 addReplyFlagsForKeyArgs(c,cmd->key_specs[i].flags);
5357
5358 addReplyBulkCString(c, "begin_search");
5359 switch (cmd->key_specs[i].begin_search_type) {
5360 case KSPEC_BS_UNKNOWN:
5361 addReplyMapLen(c, 2);
5362 addReplyBulkCString(c, "type");
5363 addReplyBulkCString(c, "unknown");
5364
5365 addReplyBulkCString(c, "spec");
5366 addReplyMapLen(c, 0);
5367 break;
5368 case KSPEC_BS_INDEX:
5369 addReplyMapLen(c, 2);
5370 addReplyBulkCString(c, "type");
5371 addReplyBulkCString(c, "index");
5372
5373 addReplyBulkCString(c, "spec");
5374 addReplyMapLen(c, 1);
5375 addReplyBulkCString(c, "index");
5376 addReplyLongLong(c, cmd->key_specs[i].bs.index.pos);
5377 break;
5378 case KSPEC_BS_KEYWORD:
5379 addReplyMapLen(c, 2);
5380 addReplyBulkCString(c, "type");
5381 addReplyBulkCString(c, "keyword");
5382
5383 addReplyBulkCString(c, "spec");
5384 addReplyMapLen(c, 2);
5385 addReplyBulkCString(c, "keyword");
5386 addReplyBulkCString(c, cmd->key_specs[i].bs.keyword.keyword);
5387 addReplyBulkCString(c, "startfrom");
5388 addReplyLongLong(c, cmd->key_specs[i].bs.keyword.startfrom);
5389 break;
5390 default:
5391 serverPanic("Invalid begin_search key spec type %d", cmd->key_specs[i].begin_search_type);
5392 }
5393
5394 addReplyBulkCString(c, "find_keys");
5395 switch (cmd->key_specs[i].find_keys_type) {
5396 case KSPEC_FK_UNKNOWN:
5397 addReplyMapLen(c, 2);
5398 addReplyBulkCString(c, "type");
5399 addReplyBulkCString(c, "unknown");
5400
5401 addReplyBulkCString(c, "spec");
5402 addReplyMapLen(c, 0);
5403 break;
5404 case KSPEC_FK_RANGE:
5405 addReplyMapLen(c, 2);
5406 addReplyBulkCString(c, "type");
5407 addReplyBulkCString(c, "range");
5408
5409 addReplyBulkCString(c, "spec");
5410 addReplyMapLen(c, 3);
5411 addReplyBulkCString(c, "lastkey");
5412 addReplyLongLong(c, cmd->key_specs[i].fk.range.lastkey);
5413 addReplyBulkCString(c, "keystep");
5414 addReplyLongLong(c, cmd->key_specs[i].fk.range.keystep);
5415 addReplyBulkCString(c, "limit");
5416 addReplyLongLong(c, cmd->key_specs[i].fk.range.limit);
5417 break;
5418 case KSPEC_FK_KEYNUM:
5419 addReplyMapLen(c, 2);
5420 addReplyBulkCString(c, "type");
5421 addReplyBulkCString(c, "keynum");
5422
5423 addReplyBulkCString(c, "spec");
5424 addReplyMapLen(c, 3);
5425 addReplyBulkCString(c, "keynumidx");
5426 addReplyLongLong(c, cmd->key_specs[i].fk.keynum.keynumidx);
5427 addReplyBulkCString(c, "firstkey");
5428 addReplyLongLong(c, cmd->key_specs[i].fk.keynum.firstkey);
5429 addReplyBulkCString(c, "keystep");
5430 addReplyLongLong(c, cmd->key_specs[i].fk.keynum.keystep);
5431 break;
5432 default:
5433 serverPanic("Invalid find_keys key spec type %d", cmd->key_specs[i].begin_search_type);
5434 }
5435 }
5436}
5437
5438/* Reply with an array of sub-command using the provided reply callback. */
5439void addReplyCommandSubCommands(client *c, struct redisCommand *cmd, void (*reply_function)(client*, struct redisCommand*), int use_map) {
5440 if (!cmd->subcommands_dict || !commandVisibleForClient(c, cmd)) {
5441 addReplySetLen(c, 0);
5442 return;
5443 }
5444
5445 if (use_map)
5446 addReplyMapLen(c, dictSize(cmd->subcommands_dict));
5447 else
5448 addReplyArrayLen(c, dictSize(cmd->subcommands_dict));
5449 dictEntry *de;
5450 dictIterator di;
5451 dictInitSafeIterator(&di, cmd->subcommands_dict);
5452 while((de = dictNext(&di)) != NULL) {
5453 struct redisCommand *sub = (struct redisCommand *)dictGetVal(de);
5454 if (use_map)
5455 addReplyBulkCBuffer(c, sub->fullname, sdslen(sub->fullname));
5456 reply_function(c, sub);
5457 }
5458 dictResetIterator(&di);
5459}
5460
5461/* Output the representation of a Redis command. Used by the COMMAND command and COMMAND INFO. */
5462void addReplyCommandInfo(client *c, struct redisCommand *cmd) {
5463 if (!cmd || !commandVisibleForClient(c, cmd)) {
5464 addReplyNull(c);
5465 } else {
5466 int firstkey = 0, lastkey = 0, keystep = 0;
5467 if (cmd->legacy_range_key_spec.begin_search_type != KSPEC_BS_INVALID) {
5468 firstkey = cmd->legacy_range_key_spec.bs.index.pos;
5469 lastkey = cmd->legacy_range_key_spec.fk.range.lastkey;
5470 if (lastkey >= 0)
5471 lastkey += firstkey;
5472 keystep = cmd->legacy_range_key_spec.fk.range.keystep;
5473 }
5474
5475 addReplyArrayLen(c, 10);
5476 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
5477 addReplyLongLong(c, cmd->arity);
5478 addReplyFlagsForCommand(c, cmd);
5479 addReplyLongLong(c, firstkey);
5480 addReplyLongLong(c, lastkey);
5481 addReplyLongLong(c, keystep);
5482 addReplyCommandCategories(c, cmd);
5483 addReplyCommandTips(c, cmd);
5484 addReplyCommandKeySpecs(c, cmd);
5485 addReplyCommandSubCommands(c, cmd, addReplyCommandInfo, 0);
5486 }
5487}
5488
5489/* Output the representation of a Redis command. Used by the COMMAND DOCS. */
5490void addReplyCommandDocs(client *c, struct redisCommand *cmd) {
5491 /* Count our reply len so we don't have to use deferred reply. */
5492 long maplen = 1;
5493 if (cmd->summary) maplen++;
5494 if (cmd->since) maplen++;
5495 if (cmd->flags & CMD_MODULE) maplen++;
5496 if (cmd->complexity) maplen++;
5497 if (cmd->doc_flags) maplen++;
5498 if (cmd->deprecated_since) maplen++;
5499 if (cmd->replaced_by) maplen++;
5500 if (cmd->history) maplen++;
5501#ifdef LOG_REQ_RES
5502 if (cmd->reply_schema) maplen++;
5503#endif
5504 if (cmd->args) maplen++;
5505 if (cmd->subcommands_dict) maplen++;
5506 addReplyMapLen(c, maplen);
5507
5508 if (cmd->summary) {
5509 addReplyBulkCString(c, "summary");
5510 addReplyBulkCString(c, cmd->summary);
5511 }
5512 if (cmd->since) {
5513 addReplyBulkCString(c, "since");
5514 addReplyBulkCString(c, cmd->since);
5515 }
5516
5517 /* Always have the group, for module commands the group is always "module". */
5518 addReplyBulkCString(c, "group");
5519 addReplyBulkCString(c, commandGroupStr(cmd->group));
5520
5521 if (cmd->complexity) {
5522 addReplyBulkCString(c, "complexity");
5523 addReplyBulkCString(c, cmd->complexity);
5524 }
5525 if (cmd->flags & CMD_MODULE) {
5526 addReplyBulkCString(c, "module");
5527 addReplyBulkCString(c, moduleNameFromCommand(cmd));
5528 }
5529 if (cmd->doc_flags) {
5530 addReplyBulkCString(c, "doc_flags");
5531 addReplyDocFlagsForCommand(c, cmd);
5532 }
5533 if (cmd->deprecated_since) {
5534 addReplyBulkCString(c, "deprecated_since");
5535 addReplyBulkCString(c, cmd->deprecated_since);
5536 }
5537 if (cmd->replaced_by) {
5538 addReplyBulkCString(c, "replaced_by");
5539 addReplyBulkCString(c, cmd->replaced_by);
5540 }
5541 if (cmd->history) {
5542 addReplyBulkCString(c, "history");
5543 addReplyCommandHistory(c, cmd);
5544 }
5545#ifdef LOG_REQ_RES
5546 if (cmd->reply_schema) {
5547 addReplyBulkCString(c, "reply_schema");
5548 addReplyJson(c, cmd->reply_schema);
5549 }
5550#endif
5551 if (cmd->args) {
5552 addReplyBulkCString(c, "arguments");
5553 addReplyCommandArgList(c, cmd->args, cmd->num_args);
5554 }
5555 if (cmd->subcommands_dict) {
5556 addReplyBulkCString(c, "subcommands");
5557 addReplyCommandSubCommands(c, cmd, addReplyCommandDocs, 1);
5558 }
5559}
5560
5561/* Helper for COMMAND GETKEYS and GETKEYSANDFLAGS */
5562void getKeysSubcommandImpl(client *c, int with_flags) {
5563 struct redisCommand *cmd = lookupCommand(c->argv+2,c->argc-2);
5564 getKeysResult result = GETKEYS_RESULT_INIT;
5565 int j;
5566
5567 if (!cmd || !commandVisibleForClient(c, cmd)) {
5568 addReplyError(c,"Invalid command specified");
5569 return;
5570 } else if (!doesCommandHaveKeys(cmd)) {
5571 addReplyError(c,"The command has no key arguments");
5572 return;
5573 } else if ((cmd->arity > 0 && cmd->arity != c->argc-2) ||
5574 ((c->argc-2) < -cmd->arity))
5575 {
5576 addReplyError(c,"Invalid number of arguments specified for command");
5577 return;
5578 }
5579
5580 if (!getKeysFromCommandWithSpecs(cmd,c->argv+2,c->argc-2,GET_KEYSPEC_DEFAULT,&result)) {
5581 if (cmd->flags & CMD_NO_MANDATORY_KEYS) {
5582 addReplyArrayLen(c,0);
5583 } else {
5584 addReplyError(c,"Invalid arguments specified for command");
5585 }
5586 } else {
5587 addReplyArrayLen(c,result.numkeys);
5588 for (j = 0; j < result.numkeys; j++) {
5589 if (!with_flags) {
5590 addReplyBulk(c,c->argv[result.keys[j].pos+2]);
5591 } else {
5592 addReplyArrayLen(c,2);
5593 addReplyBulk(c,c->argv[result.keys[j].pos+2]);
5594 addReplyFlagsForKeyArgs(c,result.keys[j].flags);
5595 }
5596 }
5597 }
5598 getKeysFreeResult(&result);
5599}
5600
5601/* COMMAND GETKEYSANDFLAGS cmd arg1 arg2 ... */
5602void commandGetKeysAndFlagsCommand(client *c) {
5603 getKeysSubcommandImpl(c, 1);
5604}
5605
5606/* COMMAND GETKEYS cmd arg1 arg2 ... */
5607void getKeysSubcommand(client *c) {
5608 getKeysSubcommandImpl(c, 0);
5609}
5610
5611void genericCommandCommand(client *c, int count_only) {
5612 dictIterator di;
5613 dictEntry *de;
5614 void *len = NULL;
5615 int count = 0;
5616
5617 if (!count_only)
5618 len = addReplyDeferredLen(c);
5619
5620 dictInitIterator(&di, server.commands);
5621 while ((de = dictNext(&di)) != NULL) {
5622 struct redisCommand *cmd = dictGetVal(de);
5623 if (!commandVisibleForClient(c, cmd))
5624 continue;
5625 if (!count_only)
5626 addReplyCommandInfo(c, dictGetVal(de));
5627 count++;
5628 }
5629 dictResetIterator(&di);
5630 if (count_only)
5631 addReplyLongLong(c, count);
5632 else
5633 setDeferredArrayLen(c, len, count);
5634}
5635
5636/* COMMAND (no args) */
5637void commandCommand(client *c) {
5638 genericCommandCommand(c, 0);
5639}
5640
5641/* COMMAND COUNT */
5642void commandCountCommand(client *c) {
5643 genericCommandCommand(c, 1);
5644}
5645
5646typedef enum {
5647 COMMAND_LIST_FILTER_MODULE,
5648 COMMAND_LIST_FILTER_ACLCAT,
5649 COMMAND_LIST_FILTER_PATTERN,
5650} commandListFilterType;
5651
5652typedef struct {
5653 commandListFilterType type;
5654 sds arg;
5655 struct {
5656 int valid;
5657 union {
5658 uint64_t aclcat;
5659 void *module_handle;
5660 } u;
5661 } cache;
5662} commandListFilter;
5663
5664int shouldFilterFromCommandList(struct redisCommand *cmd, commandListFilter *filter) {
5665 switch (filter->type) {
5666 case (COMMAND_LIST_FILTER_MODULE):
5667 if (!filter->cache.valid) {
5668 filter->cache.u.module_handle = moduleGetHandleByName(filter->arg);
5669 filter->cache.valid = 1;
5670 }
5671 return !moduleIsModuleCommand(filter->cache.u.module_handle, cmd);
5672 case (COMMAND_LIST_FILTER_ACLCAT): {
5673 if (!filter->cache.valid) {
5674 filter->cache.u.aclcat = ACLGetCommandCategoryFlagByName(filter->arg);
5675 filter->cache.valid = 1;
5676 }
5677 uint64_t cat = filter->cache.u.aclcat;
5678 if (cat == 0)
5679 return 1; /* Invalid ACL category */
5680 return (!(cmd->acl_categories & cat));
5681 break;
5682 }
5683 case (COMMAND_LIST_FILTER_PATTERN):
5684 return !stringmatchlen(filter->arg, sdslen(filter->arg), cmd->fullname, sdslen(cmd->fullname), 1);
5685 default:
5686 serverPanic("Invalid filter type %d", filter->type);
5687 }
5688}
5689
5690/* COMMAND LIST FILTERBY (MODULE <module-name>|ACLCAT <cat>|PATTERN <pattern>) */
5691void commandListWithFilter(client *c, dict *commands, commandListFilter filter, int *numcmds) {
5692 dictEntry *de;
5693 dictIterator di;
5694
5695 dictInitIterator(&di, commands);
5696 while ((de = dictNext(&di)) != NULL) {
5697 struct redisCommand *cmd = dictGetVal(de);
5698 if (commandVisibleForClient(c, cmd) && !shouldFilterFromCommandList(cmd,&filter)) {
5699 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
5700 (*numcmds)++;
5701 }
5702
5703 if (cmd->subcommands_dict) {
5704 commandListWithFilter(c, cmd->subcommands_dict, filter, numcmds);
5705 }
5706 }
5707 dictResetIterator(&di);
5708}
5709
5710/* COMMAND LIST */
5711void commandListWithoutFilter(client *c, dict *commands, int *numcmds) {
5712 dictEntry *de;
5713 dictIterator di;
5714
5715 dictInitIterator(&di, commands);
5716 while ((de = dictNext(&di)) != NULL) {
5717 struct redisCommand *cmd = dictGetVal(de);
5718 if (commandVisibleForClient(c, cmd)) {
5719 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
5720 (*numcmds)++;
5721 }
5722
5723 if (cmd->subcommands_dict) {
5724 commandListWithoutFilter(c, cmd->subcommands_dict, numcmds);
5725 }
5726 }
5727 dictResetIterator(&di);
5728}
5729
5730/* COMMAND LIST [FILTERBY (MODULE <module-name>|ACLCAT <cat>|PATTERN <pattern>)] */
5731void commandListCommand(client *c) {
5732
5733 /* Parse options. */
5734 int i = 2, got_filter = 0;
5735 commandListFilter filter = {0};
5736 for (; i < c->argc; i++) {
5737 int moreargs = (c->argc-1) - i; /* Number of additional arguments. */
5738 char *opt = c->argv[i]->ptr;
5739 if (!strcasecmp(opt,"filterby") && moreargs == 2) {
5740 char *filtertype = c->argv[i+1]->ptr;
5741 if (!strcasecmp(filtertype,"module")) {
5742 filter.type = COMMAND_LIST_FILTER_MODULE;
5743 } else if (!strcasecmp(filtertype,"aclcat")) {
5744 filter.type = COMMAND_LIST_FILTER_ACLCAT;
5745 } else if (!strcasecmp(filtertype,"pattern")) {
5746 filter.type = COMMAND_LIST_FILTER_PATTERN;
5747 } else {
5748 addReplyErrorObject(c,shared.syntaxerr);
5749 return;
5750 }
5751 got_filter = 1;
5752 filter.arg = c->argv[i+2]->ptr;
5753 i += 2;
5754 } else {
5755 addReplyErrorObject(c,shared.syntaxerr);
5756 return;
5757 }
5758 }
5759
5760 int numcmds = 0;
5761 void *replylen = addReplyDeferredLen(c);
5762
5763 if (got_filter) {
5764 commandListWithFilter(c, server.commands, filter, &numcmds);
5765 } else {
5766 commandListWithoutFilter(c, server.commands, &numcmds);
5767 }
5768
5769 setDeferredArrayLen(c,replylen,numcmds);
5770}
5771
5772/* COMMAND INFO [<command-name> ...] */
5773void commandInfoCommand(client *c) {
5774 int i;
5775
5776 if (c->argc == 2) {
5777 genericCommandCommand(c, 0);
5778 } else {
5779 addReplyArrayLen(c, c->argc-2);
5780 for (i = 2; i < c->argc; i++) {
5781 addReplyCommandInfo(c, lookupCommandBySds(c->argv[i]->ptr));
5782 }
5783 }
5784}
5785
5786/* COMMAND DOCS [command-name [command-name ...]] */
5787void commandDocsCommand(client *c) {
5788 int i;
5789 int numcmds = 0;
5790 if (c->argc == 2) {
5791 /* Reply with an array of all commands */
5792 dictIterator di;
5793 dictEntry *de;
5794 void *replylen = addReplyDeferredLen(c);
5795 dictInitIterator(&di, server.commands);
5796 while ((de = dictNext(&di)) != NULL) {
5797 struct redisCommand *cmd = dictGetVal(de);
5798 if (commandVisibleForClient(c, cmd)) {
5799 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
5800 addReplyCommandDocs(c, cmd);
5801 numcmds++;
5802 }
5803 }
5804 dictResetIterator(&di);
5805 setDeferredMapLen(c,replylen,numcmds);
5806 } else {
5807 /* Reply with an array of the requested commands (if we find them) */
5808 void *replylen = addReplyDeferredLen(c);
5809 for (i = 2; i < c->argc; i++) {
5810 struct redisCommand *cmd = lookupCommandBySds(c->argv[i]->ptr);
5811 if (!cmd || !commandVisibleForClient(c, cmd))
5812 continue;
5813 addReplyBulkCBuffer(c, cmd->fullname, sdslen(cmd->fullname));
5814 addReplyCommandDocs(c, cmd);
5815 numcmds++;
5816 }
5817 setDeferredMapLen(c,replylen,numcmds);
5818 }
5819}
5820
5821/* COMMAND GETKEYS arg0 arg1 arg2 ... */
5822void commandGetKeysCommand(client *c) {
5823 getKeysSubcommand(c);
5824}
5825
5826/* COMMAND HELP */
5827void commandHelpCommand(client *c) {
5828 const char *help[] = {
5829"(no subcommand)",
5830" Return details about all Redis commands.",
5831"COUNT",
5832" Return the total number of commands in this Redis server.",
5833"LIST",
5834" Return a list of all commands in this Redis server.",
5835"INFO [<command-name> ...]",
5836" Return details about multiple Redis commands.",
5837" If no command names are given, documentation details for all",
5838" commands are returned.",
5839"DOCS [<command-name> ...]",
5840" Return documentation details about multiple Redis commands.",
5841" If no command names are given, documentation details for all",
5842" commands are returned.",
5843"GETKEYS <full-command>",
5844" Return the keys from a full Redis command.",
5845"GETKEYSANDFLAGS <full-command>",
5846" Return the keys and the access flags from a full Redis command.",
5847NULL
5848 };
5849
5850 addReplyHelp(c, help);
5851}
5852
5853/* Convert an amount of bytes into a human readable string in the form
5854 * of 100B, 2G, 100M, 4K, and so forth. */
5855void bytesToHuman(char *s, size_t size, unsigned long long n) {
5856 double d;
5857
5858 if (n < 1024) {
5859 /* Bytes */
5860 snprintf(s,size,"%lluB",n);
5861 } else if (n < (1024*1024)) {
5862 d = (double)n/(1024);
5863 snprintf(s,size,"%.2fK",d);
5864 } else if (n < (1024LL*1024*1024)) {
5865 d = (double)n/(1024*1024);
5866 snprintf(s,size,"%.2fM",d);
5867 } else if (n < (1024LL*1024*1024*1024)) {
5868 d = (double)n/(1024LL*1024*1024);
5869 snprintf(s,size,"%.2fG",d);
5870 } else if (n < (1024LL*1024*1024*1024*1024)) {
5871 d = (double)n/(1024LL*1024*1024*1024);
5872 snprintf(s,size,"%.2fT",d);
5873 } else if (n < (1024LL*1024*1024*1024*1024*1024)) {
5874 d = (double)n/(1024LL*1024*1024*1024*1024);
5875 snprintf(s,size,"%.2fP",d);
5876 } else {
5877 /* Let's hope we never need this */
5878 snprintf(s,size,"%lluB",n);
5879 }
5880}
5881
5882/* Fill percentile distribution of latencies. */
5883sds fillPercentileDistributionLatencies(sds info, const char* histogram_name, struct hdr_histogram* histogram) {
5884 info = sdscatfmt(info,"latency_percentiles_usec_%s:",histogram_name);
5885 for (int j = 0; j < server.latency_tracking_info_percentiles_len; j++) {
5886 char fbuf[128];
5887 size_t len = snprintf(fbuf, sizeof(fbuf), "%f", server.latency_tracking_info_percentiles[j]);
5888 trimDoubleString(fbuf, len);
5889 info = sdscatprintf(info,"p%s=%.3f", fbuf,
5890 ((double)hdr_value_at_percentile(histogram,server.latency_tracking_info_percentiles[j]))/1000.0f);
5891 if (j != server.latency_tracking_info_percentiles_len-1)
5892 info = sdscatlen(info,",",1);
5893 }
5894 info = sdscatprintf(info,"\r\n");
5895 return info;
5896}
5897
5898const char *replstateToString(int replstate) {
5899 switch (replstate) {
5900 case SLAVE_STATE_WAIT_BGSAVE_START:
5901 case SLAVE_STATE_WAIT_BGSAVE_END:
5902 case SLAVE_STATE_WAIT_RDB_CHANNEL:
5903 return "wait_bgsave";
5904 case SLAVE_STATE_SEND_BULK_AND_STREAM:
5905 return "send_bulk_and_stream";
5906 case SLAVE_STATE_SEND_BULK:
5907 return "send_bulk";
5908 case SLAVE_STATE_ONLINE:
5909 return "online";
5910 default:
5911 return "";
5912 }
5913}
5914
5915/* Characters we sanitize on INFO output to maintain expected format. */
5916static char unsafe_info_chars[] = "#:\n\r";
5917static char unsafe_info_chars_substs[] = "____"; /* Must be same length as above */
5918
5919/* Returns a sanitized version of s that contains no unsafe info string chars.
5920 * If no unsafe characters are found, simply returns s. Caller needs to
5921 * free tmp if it is non-null on return.
5922 */
5923const char *getSafeInfoString(const char *s, size_t len, char **tmp) {
5924 *tmp = NULL;
5925 if (mempbrk(s, len, unsafe_info_chars,sizeof(unsafe_info_chars)-1)
5926 == NULL) return s;
5927 char *new = *tmp = zmalloc(len + 1);
5928 memcpy(new, s, len);
5929 new[len] = '\0';
5930 return memmapchars(new, len, unsafe_info_chars, unsafe_info_chars_substs,
5931 sizeof(unsafe_info_chars)-1);
5932}
5933
5934sds genRedisInfoStringCommandStats(sds info, dict *commands) {
5935 struct redisCommand *c;
5936 dictEntry *de;
5937 dictIterator di;
5938 dictInitSafeIterator(&di, commands);
5939 while((de = dictNext(&di)) != NULL) {
5940 char *tmpsafe;
5941 c = (struct redisCommand *) dictGetVal(de);
5942 if (c->calls || c->failed_calls || c->rejected_calls) {
5943 info = sdscatprintf(info,
5944 "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f"
5945 ",rejected_calls=%lld,failed_calls=%lld\r\n",
5946 getSafeInfoString(c->fullname, sdslen(c->fullname), &tmpsafe), c->calls, c->microseconds,
5947 (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls),
5948 c->rejected_calls, c->failed_calls);
5949 if (tmpsafe != NULL) zfree(tmpsafe);
5950 }
5951 if (c->subcommands_dict) {
5952 info = genRedisInfoStringCommandStats(info, c->subcommands_dict);
5953 }
5954 }
5955 dictResetIterator(&di);
5956
5957 return info;
5958}
5959
5960/* Writes the ACL metrics to the info */
5961sds genRedisInfoStringACLStats(sds info) {
5962 info = sdscatprintf(info,
5963 "acl_access_denied_auth:%lld\r\n"
5964 "acl_access_denied_cmd:%lld\r\n"
5965 "acl_access_denied_key:%lld\r\n"
5966 "acl_access_denied_channel:%lld\r\n"
5967 "acl_access_denied_tls_cert:%lld\r\n",
5968 server.acl_info.user_auth_failures,
5969 server.acl_info.invalid_cmd_accesses,
5970 server.acl_info.invalid_key_accesses,
5971 server.acl_info.invalid_channel_accesses,
5972 server.acl_info.acl_access_denied_tls_cert);
5973 return info;
5974}
5975
5976sds genRedisInfoStringLatencyStats(sds info, dict *commands) {
5977 struct redisCommand *c;
5978 dictEntry *de;
5979 dictIterator di;
5980 dictInitSafeIterator(&di, commands);
5981 while((de = dictNext(&di)) != NULL) {
5982 char *tmpsafe;
5983 c = (struct redisCommand *) dictGetVal(de);
5984 if (c->latency_histogram) {
5985 info = fillPercentileDistributionLatencies(info,
5986 getSafeInfoString(c->fullname, sdslen(c->fullname), &tmpsafe),
5987 c->latency_histogram);
5988 if (tmpsafe != NULL) zfree(tmpsafe);
5989 }
5990 if (c->subcommands_dict) {
5991 info = genRedisInfoStringLatencyStats(info, c->subcommands_dict);
5992 }
5993 }
5994 dictResetIterator(&di);
5995
5996 return info;
5997}
5998
5999/* Takes a null terminated sections list, and adds them to the dict. */
6000void addInfoSectionsToDict(dict *section_dict, char **sections) {
6001 while (*sections) {
6002 sds section = sdsnew(*sections);
6003 if (dictAdd(section_dict, section, NULL)==DICT_ERR)
6004 sdsfree(section);
6005 sections++;
6006 }
6007}
6008
6009/* Cached copy of the default sections, as an optimization. */
6010static dict *cached_default_info_sections = NULL;
6011
6012void releaseInfoSectionDict(dict *sec) {
6013 if (sec != cached_default_info_sections)
6014 dictRelease(sec);
6015}
6016
6017/* Create a dictionary with unique section names to be used by genRedisInfoString.
6018 * 'argv' and 'argc' are list of arguments for INFO.
6019 * 'defaults' is an optional null terminated list of default sections.
6020 * 'out_all' and 'out_everything' are optional.
6021 * The resulting dictionary should be released with releaseInfoSectionDict. */
6022dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, int *out_everything) {
6023 char *default_sections[] = {
6024 "server", "clients", "memory", "persistence", "stats", "replication", "threads",
6025 "cpu", "hotkeys", "module_list", "errorstats", "cluster", "keyspace", "keysizes", NULL};
6026 if (!defaults)
6027 defaults = default_sections;
6028
6029 if (argc == 0) {
6030 /* In this case we know the dict is not gonna be modified, so we cache
6031 * it as an optimization for a common case. */
6032 if (cached_default_info_sections)
6033 return cached_default_info_sections;
6034 cached_default_info_sections = dictCreate(&stringSetDictType);
6035 dictExpand(cached_default_info_sections, 16);
6036 addInfoSectionsToDict(cached_default_info_sections, defaults);
6037 return cached_default_info_sections;
6038 }
6039
6040 dict *section_dict = dictCreate(&stringSetDictType);
6041 dictExpand(section_dict, min(argc,16));
6042 for (int i = 0; i < argc; i++) {
6043 if (!strcasecmp(argv[i]->ptr,"default")) {
6044 addInfoSectionsToDict(section_dict, defaults);
6045 } else if (!strcasecmp(argv[i]->ptr,"all")) {
6046 if (out_all) *out_all = 1;
6047 } else if (!strcasecmp(argv[i]->ptr,"everything")) {
6048 if (out_everything) *out_everything = 1;
6049 if (out_all) *out_all = 1;
6050 } else {
6051 sds section = sdsnew(argv[i]->ptr);
6052 if (dictAdd(section_dict, section, NULL) != DICT_OK)
6053 sdsfree(section);
6054 }
6055 }
6056 return section_dict;
6057}
6058
6059/* sets blocking_keys to the total number of keys which has at least one client blocked on them.
6060 * sets blocking_keys_on_nokey to the total number of keys which has at least one client
6061 * blocked on them to be written or deleted.
6062 * sets watched_keys to the total number of keys which has at least on client watching on them. */
6063void totalNumberOfStatefulKeys(unsigned long *blocking_keys, unsigned long *blocking_keys_on_nokey, unsigned long *watched_keys) {
6064 unsigned long bkeys=0, bkeys_on_nokey=0, wkeys=0;
6065 for (int j = 0; j < server.dbnum; j++) {
6066 bkeys += dictSize(server.db[j].blocking_keys);
6067 bkeys_on_nokey += dictSize(server.db[j].blocking_keys_unblock_on_nokey);
6068 wkeys += dictSize(server.db[j].watched_keys);
6069 }
6070 if (blocking_keys)
6071 *blocking_keys = bkeys;
6072 if (blocking_keys_on_nokey)
6073 *blocking_keys_on_nokey = bkeys_on_nokey;
6074 if (watched_keys)
6075 *watched_keys = wkeys;
6076}
6077
6078/* Create the string returned by the INFO command. This is decoupled
6079 * by the INFO command itself as we need to report the same information
6080 * on memory corruption problems. */
6081sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
6082 sds info = sdsempty();
6083 time_t uptime = server.unixtime-server.stat_starttime;
6084 int j;
6085 int sections = 0;
6086 if (everything) all_sections = 1;
6087
6088 /* Server */
6089 if (all_sections || (dictFind(section_dict,"server") != NULL)) {
6090 static int call_uname = 1;
6091 static struct utsname name;
6092 char *mode;
6093 char *supervised;
6094
6095 if (server.cluster_enabled) mode = "cluster";
6096 else if (server.sentinel_mode) mode = "sentinel";
6097 else mode = "standalone";
6098
6099 if (server.supervised) {
6100 if (server.supervised_mode == SUPERVISED_UPSTART) supervised = "upstart";
6101 else if (server.supervised_mode == SUPERVISED_SYSTEMD) supervised = "systemd";
6102 else supervised = "unknown";
6103 } else {
6104 supervised = "no";
6105 }
6106
6107 if (sections++) info = sdscat(info,"\r\n");
6108
6109 if (call_uname) {
6110 /* Uname can be slow and is always the same output. Cache it. */
6111 uname(&name);
6112 call_uname = 0;
6113 }
6114
6115 info = sdscatfmt(info, "# Server\r\n" FMTARGS(
6116 "redis_version:%s\r\n", REDIS_VERSION,
6117 "redis_git_sha1:%s\r\n", redisGitSHA1(),
6118 "redis_git_dirty:%i\r\n", strtol(redisGitDirty(),NULL,10) > 0,
6119 "redis_build_id:%s\r\n", redisBuildIdString(),
6120 "redis_mode:%s\r\n", mode,
6121 "os:%s", name.sysname,
6122 " %s", name.release,
6123 " %s\r\n", name.machine,
6124 "arch_bits:%i\r\n", server.arch_bits,
6125 "monotonic_clock:%s\r\n", monotonicInfoString(),
6126 "multiplexing_api:%s\r\n", aeGetApiName(),
6127 "atomicvar_api:%s\r\n", REDIS_ATOMIC_API,
6128 "gcc_version:%s\r\n", GNUC_VERSION_STR,
6129 "process_id:%I\r\n", (int64_t) getpid(),
6130 "process_supervised:%s\r\n", supervised,
6131 "run_id:%s\r\n", server.runid,
6132 "tcp_port:%i\r\n", server.port ? server.port : server.tls_port,
6133 "server_time_usec:%I\r\n", (int64_t)server.ustime,
6134 "uptime_in_seconds:%I\r\n", (int64_t)uptime,
6135 "uptime_in_days:%I\r\n", (int64_t)(uptime/(3600*24)),
6136 "hz:%i\r\n", server.hz,
6137 "configured_hz:%i\r\n", server.config_hz,
6138 "lru_clock:%u\r\n", server.lruclock,
6139 "executable:%s\r\n", server.executable ? server.executable : "",
6140 "config_file:%s\r\n", server.configfile ? server.configfile : "",
6141 "io_threads_active:%i\r\n", server.io_threads_active));
6142
6143 /* Conditional properties */
6144 if (isShutdownInitiated()) {
6145 info = sdscatfmt(info,
6146 "shutdown_in_milliseconds:%I\r\n",
6147 (int64_t)(server.shutdown_mstime - commandTimeSnapshot()));
6148 }
6149
6150 /* get all the listeners information */
6151 info = getListensInfoString(info);
6152 }
6153
6154 /* Clients */
6155 if (all_sections || (dictFind(section_dict,"clients") != NULL)) {
6156 size_t maxin, maxout;
6157 unsigned long blocking_keys, blocking_keys_on_nokey, watched_keys;
6158 getExpansiveClientsInfo(&maxin,&maxout);
6159 totalNumberOfStatefulKeys(&blocking_keys, &blocking_keys_on_nokey, &watched_keys);
6160 if (sections++) info = sdscat(info,"\r\n");
6161 info = sdscatprintf(info, "# Clients\r\n" FMTARGS(
6162 "connected_clients:%lu\r\n", listLength(server.clients) - listLength(server.slaves),
6163 "cluster_connections:%lu\r\n", getClusterConnectionsCount(),
6164 "maxclients:%u\r\n", server.maxclients,
6165 "client_recent_max_input_buffer:%zu\r\n", maxin,
6166 "client_recent_max_output_buffer:%zu\r\n", maxout,
6167 "blocked_clients:%d\r\n", server.blocked_clients,
6168 "tracking_clients:%d\r\n", server.tracking_clients,
6169 "pubsub_clients:%d\r\n", server.pubsub_clients,
6170 "watching_clients:%d\r\n", server.watching_clients,
6171 "clients_in_timeout_table:%llu\r\n", (unsigned long long) raxSize(server.clients_timeout_table),
6172 "total_watched_keys:%lu\r\n", watched_keys,
6173 "total_blocking_keys:%lu\r\n", blocking_keys,
6174 "total_blocking_keys_on_nokey:%lu\r\n", blocking_keys_on_nokey));
6175 }
6176
6177 /* Memory */
6178 if (all_sections || (dictFind(section_dict,"memory") != NULL)) {
6179 char hmem[64];
6180 char peak_hmem[64];
6181 char total_system_hmem[64];
6182 char used_memory_lua_hmem[64];
6183 char used_memory_vm_total_hmem[64];
6184 char used_memory_scripts_hmem[64];
6185 char used_memory_rss_hmem[64];
6186 char maxmemory_hmem[64];
6187 size_t zmalloc_used = zmalloc_used_memory();
6188 size_t total_system_mem = server.system_memory_size;
6189 const char *evict_policy = evictPolicyToString();
6190 long long memory_lua = evalScriptsMemoryVM();
6191 long long memory_functions = functionsMemoryVM();
6192 struct redisMemOverhead *mh = getMemoryOverheadData();
6193
6194 /* Peak memory is updated from time to time by serverCron() so it
6195 * may happen that the instantaneous value is slightly bigger than
6196 * the peak value. This may confuse users, so we update the peak
6197 * if found smaller than the current memory usage. */
6198 updatePeakMemory();
6199
6200 bytesToHuman(hmem,sizeof(hmem),zmalloc_used);
6201 bytesToHuman(peak_hmem,sizeof(peak_hmem),server.stat_peak_memory);
6202 bytesToHuman(total_system_hmem,sizeof(total_system_hmem),total_system_mem);
6203 bytesToHuman(used_memory_lua_hmem,sizeof(used_memory_lua_hmem),memory_lua);
6204 bytesToHuman(used_memory_vm_total_hmem,sizeof(used_memory_vm_total_hmem),memory_functions + memory_lua);
6205 bytesToHuman(used_memory_scripts_hmem,sizeof(used_memory_scripts_hmem),mh->eval_caches + mh->functions_caches);
6206 bytesToHuman(used_memory_rss_hmem,sizeof(used_memory_rss_hmem),server.cron_malloc_stats.process_rss);
6207 bytesToHuman(maxmemory_hmem,sizeof(maxmemory_hmem),server.maxmemory);
6208
6209 if (sections++) info = sdscat(info,"\r\n");
6210 info = sdscatprintf(info, "# Memory\r\n" FMTARGS(
6211 "used_memory:%zu\r\n", zmalloc_used,
6212 "used_memory_human:%s\r\n", hmem,
6213 "used_memory_rss:%zu\r\n", server.cron_malloc_stats.process_rss,
6214 "used_memory_rss_human:%s\r\n", used_memory_rss_hmem,
6215 "used_memory_peak:%zu\r\n", server.stat_peak_memory,
6216 "used_memory_peak_human:%s\r\n", peak_hmem,
6217 "used_memory_peak_time:%jd\r\n", (intmax_t)server.stat_peak_memory_time,
6218 "used_memory_peak_perc:%.2f%%\r\n", mh->peak_perc,
6219 "used_memory_overhead:%zu\r\n", mh->overhead_total,
6220 "used_memory_startup:%zu\r\n", mh->startup_allocated,
6221 "used_memory_dataset:%zu\r\n", mh->dataset,
6222 "used_memory_dataset_perc:%.2f%%\r\n", mh->dataset_perc,
6223 "allocator_allocated:%zu\r\n", server.cron_malloc_stats.allocator_allocated,
6224 "allocator_active:%zu\r\n", server.cron_malloc_stats.allocator_active,
6225 "allocator_resident:%zu\r\n", server.cron_malloc_stats.allocator_resident,
6226 "allocator_muzzy:%zu\r\n", server.cron_malloc_stats.allocator_muzzy,
6227 "total_system_memory:%lu\r\n", (unsigned long)total_system_mem,
6228 "total_system_memory_human:%s\r\n", total_system_hmem,
6229 "used_memory_lua:%lld\r\n", memory_lua, /* deprecated, renamed to used_memory_vm_eval */
6230 "used_memory_vm_eval:%lld\r\n", memory_lua,
6231 "used_memory_lua_human:%s\r\n", used_memory_lua_hmem, /* deprecated */
6232 "used_memory_scripts_eval:%lld\r\n", (long long)mh->eval_caches,
6233 "number_of_cached_scripts:%lu\r\n", dictSize(evalScriptsDict()),
6234 "number_of_functions:%lu\r\n", functionsNum(),
6235 "number_of_libraries:%lu\r\n", functionsLibNum(),
6236 "used_memory_vm_functions:%lld\r\n", memory_functions,
6237 "used_memory_vm_total:%lld\r\n", memory_functions + memory_lua,
6238 "used_memory_vm_total_human:%s\r\n", used_memory_vm_total_hmem,
6239 "used_memory_functions:%lld\r\n", (long long)mh->functions_caches,
6240 "used_memory_scripts:%lld\r\n", (long long)mh->eval_caches + (long long)mh->functions_caches,
6241 "used_memory_scripts_human:%s\r\n", used_memory_scripts_hmem,
6242 "maxmemory:%lld\r\n", server.maxmemory,
6243 "maxmemory_human:%s\r\n", maxmemory_hmem,
6244 "maxmemory_policy:%s\r\n", evict_policy,
6245 "allocator_frag_ratio:%.2f\r\n", mh->allocator_frag,
6246 "allocator_frag_bytes:%zu\r\n", mh->allocator_frag_bytes,
6247 "allocator_rss_ratio:%.2f\r\n", mh->allocator_rss,
6248 "allocator_rss_bytes:%zd\r\n", mh->allocator_rss_bytes,
6249 "rss_overhead_ratio:%.2f\r\n", mh->rss_extra,
6250 "rss_overhead_bytes:%zd\r\n", mh->rss_extra_bytes,
6251 /* The next field (mem_fragmentation_ratio) is the total RSS
6252 * overhead, including fragmentation, but not just it. This field
6253 * (and the next one) is named like that just for backward
6254 * compatibility. */
6255 "mem_fragmentation_ratio:%.2f\r\n", mh->total_frag,
6256 "mem_fragmentation_bytes:%zd\r\n", mh->total_frag_bytes,
6257 "mem_not_counted_for_evict:%zu\r\n", freeMemoryGetNotCountedMemory(),
6258 "mem_replication_backlog:%zu\r\n", mh->repl_backlog,
6259 "mem_total_replication_buffers:%zu\r\n", server.repl_buffer_mem + server.repl_full_sync_buffer.mem_used,
6260 "mem_replica_full_sync_buffer:%zu\r\n", server.repl_full_sync_buffer.mem_used,
6261 "mem_clients_slaves:%zu\r\n", mh->clients_slaves,
6262 "mem_clients_normal:%zu\r\n", mh->clients_normal,
6263 "mem_cluster_slot_migration_output_buffer:%zu\r\n", mh->asm_migrate_output_buffer,
6264 "mem_cluster_slot_migration_input_buffer:%zu\r\n", mh->asm_import_input_buffer,
6265 "mem_cluster_slot_migration_input_buffer_peak:%zu\r\n", asmGetPeakSyncBufferSize(),
6266 "mem_cluster_links:%zu\r\n", mh->cluster_links,
6267 "mem_aof_buffer:%zu\r\n", mh->aof_buffer,
6268 "mem_allocator:%s\r\n", ZMALLOC_LIB,
6269 "mem_overhead_db_hashtable_rehashing:%zu\r\n", mh->overhead_db_hashtable_rehashing,
6270 "active_defrag_running:%d\r\n", server.active_defrag_running,
6271 "lazyfree_pending_objects:%zu\r\n", lazyfreeGetPendingObjectsCount(),
6272 "lazyfreed_objects:%zu\r\n", lazyfreeGetFreedObjectsCount()));
6273 freeMemoryOverheadData(mh);
6274 }
6275
6276 /* Persistence */
6277 if (all_sections || (dictFind(section_dict,"persistence") != NULL)) {
6278 if (sections++) info = sdscat(info,"\r\n");
6279 double fork_perc = 0;
6280 if (server.stat_module_progress) {
6281 fork_perc = server.stat_module_progress * 100;
6282 } else if (server.stat_current_save_keys_total) {
6283 fork_perc = ((double)server.stat_current_save_keys_processed / server.stat_current_save_keys_total) * 100;
6284 }
6285 int aof_bio_fsync_status;
6286 atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
6287
6288 info = sdscatprintf(info, "# Persistence\r\n" FMTARGS(
6289 "loading:%d\r\n", (int)(server.loading && !server.async_loading),
6290 "async_loading:%d\r\n", (int)server.async_loading,
6291 "current_cow_peak:%zu\r\n", server.stat_current_cow_peak,
6292 "current_cow_size:%zu\r\n", server.stat_current_cow_bytes,
6293 "current_cow_size_age:%lu\r\n", (server.stat_current_cow_updated ?
6294 (unsigned long) elapsedMs(server.stat_current_cow_updated) / 1000 : 0),
6295 "current_fork_perc:%.2f\r\n", fork_perc,
6296 "current_save_keys_processed:%zu\r\n", server.stat_current_save_keys_processed,
6297 "current_save_keys_total:%zu\r\n", server.stat_current_save_keys_total,
6298 "rdb_changes_since_last_save:%lld\r\n", server.dirty,
6299 "rdb_bgsave_in_progress:%d\r\n", server.child_type == CHILD_TYPE_RDB,
6300 "rdb_last_save_time:%jd\r\n", (intmax_t)server.lastsave,
6301 "rdb_last_bgsave_status:%s\r\n", (server.lastbgsave_status == C_OK) ? "ok" : "err",
6302 "rdb_last_bgsave_time_sec:%jd\r\n", (intmax_t)server.rdb_save_time_last,
6303 "rdb_current_bgsave_time_sec:%jd\r\n", (intmax_t)((server.child_type != CHILD_TYPE_RDB) ?
6304 -1 : time(NULL)-server.rdb_save_time_start),
6305 "rdb_saves:%lld\r\n", server.stat_rdb_saves,
6306 "rdb_saves_consecutive_failures:%lld\r\n", server.stat_rdb_consecutive_failures,
6307 "rdb_last_cow_size:%zu\r\n", server.stat_rdb_cow_bytes,
6308 "rdb_last_load_keys_expired:%lld\r\n", server.rdb_last_load_keys_expired,
6309 "rdb_last_load_keys_loaded:%lld\r\n", server.rdb_last_load_keys_loaded,
6310 "aof_enabled:%d\r\n", server.aof_state != AOF_OFF,
6311 "aof_rewrite_in_progress:%d\r\n", server.child_type == CHILD_TYPE_AOF,
6312 "aof_rewrite_scheduled:%d\r\n", server.aof_rewrite_scheduled,
6313 "aof_last_rewrite_time_sec:%jd\r\n", (intmax_t)server.aof_rewrite_time_last,
6314 "aof_current_rewrite_time_sec:%jd\r\n", (intmax_t)((server.child_type != CHILD_TYPE_AOF) ?
6315 -1 : time(NULL)-server.aof_rewrite_time_start),
6316 "aof_last_bgrewrite_status:%s\r\n", (server.aof_lastbgrewrite_status == C_OK ?
6317 "ok" : "err"),
6318 "aof_rewrites:%lld\r\n", server.stat_aof_rewrites,
6319 "aof_rewrites_consecutive_failures:%lld\r\n", server.stat_aofrw_consecutive_failures,
6320 "aof_last_write_status:%s\r\n", (server.aof_last_write_status == C_OK &&
6321 aof_bio_fsync_status == C_OK) ? "ok" : "err",
6322 "aof_last_cow_size:%zu\r\n", server.stat_aof_cow_bytes,
6323 "module_fork_in_progress:%d\r\n", server.child_type == CHILD_TYPE_MODULE,
6324 "module_fork_last_cow_size:%zu\r\n", server.stat_module_cow_bytes));
6325
6326 if (server.aof_enabled) {
6327 info = sdscatprintf(info, FMTARGS(
6328 "aof_current_size:%lld\r\n", (long long) server.aof_current_size,
6329 "aof_base_size:%lld\r\n", (long long) server.aof_rewrite_base_size,
6330 "aof_pending_rewrite:%d\r\n", server.aof_rewrite_scheduled,
6331 "aof_buffer_length:%zu\r\n", sdslen(server.aof_buf),
6332 "aof_pending_bio_fsync:%lu\r\n", bioPendingJobsOfType(BIO_AOF_FSYNC),
6333 "aof_delayed_fsync:%lu\r\n", server.aof_delayed_fsync));
6334 }
6335
6336 if (server.loading) {
6337 double perc = 0;
6338 time_t eta, elapsed;
6339 off_t remaining_bytes = 1;
6340
6341 if (server.loading_total_bytes) {
6342 perc = ((double)server.loading_loaded_bytes / server.loading_total_bytes) * 100;
6343 remaining_bytes = server.loading_total_bytes - server.loading_loaded_bytes;
6344 } else if(server.loading_rdb_used_mem) {
6345 perc = ((double)server.loading_loaded_bytes / server.loading_rdb_used_mem) * 100;
6346 remaining_bytes = server.loading_rdb_used_mem - server.loading_loaded_bytes;
6347 /* used mem is only a (bad) estimation of the rdb file size, avoid going over 100% */
6348 if (perc > 99.99) perc = 99.99;
6349 if (remaining_bytes < 1) remaining_bytes = 1;
6350 }
6351
6352 elapsed = time(NULL)-server.loading_start_time;
6353 if (elapsed == 0) {
6354 eta = 1; /* A fake 1 second figure if we don't have
6355 enough info */
6356 } else {
6357 eta = (elapsed*remaining_bytes)/(server.loading_loaded_bytes+1);
6358 }
6359
6360 info = sdscatprintf(info, FMTARGS(
6361 "loading_start_time:%jd\r\n", (intmax_t) server.loading_start_time,
6362 "loading_total_bytes:%llu\r\n", (unsigned long long) server.loading_total_bytes,
6363 "loading_rdb_used_mem:%llu\r\n", (unsigned long long) server.loading_rdb_used_mem,
6364 "loading_loaded_bytes:%llu\r\n", (unsigned long long) server.loading_loaded_bytes,
6365 "loading_loaded_perc:%.2f\r\n", perc,
6366 "loading_eta_seconds:%jd\r\n", (intmax_t)eta));
6367 }
6368 }
6369
6370 /* Threads */
6371 int stat_io_ops_processed_calculated = 0;
6372 long long stat_io_reads_processed = 0, stat_io_writes_processed = 0;
6373 long long stat_total_reads_processed = 0, stat_total_writes_processed = 0;
6374 if (all_sections || (dictFind(section_dict,"threads") != NULL)) {
6375 if (sections++) info = sdscat(info,"\r\n");
6376 info = sdscatprintf(info, "# Threads\r\n");
6377 long long reads, writes;
6378 for (j = 0; j < server.io_threads_num; j++) {
6379 atomicGet(server.stat_io_reads_processed[j], reads);
6380 atomicGet(server.stat_io_writes_processed[j], writes);
6381 info = sdscatprintf(info, "io_thread_%d:clients=%d,reads=%lld,writes=%lld\r\n",
6382 j, server.io_threads_clients_num[j], reads, writes);
6383 stat_total_reads_processed += reads;
6384 if (j != 0) stat_io_reads_processed += reads; /* Skip the main thread */
6385 stat_total_writes_processed += writes;
6386 if (j != 0) stat_io_writes_processed += writes; /* Skip the main thread */
6387 }
6388 stat_io_ops_processed_calculated = 1;
6389 }
6390
6391 /* Stats */
6392 if (all_sections || (dictFind(section_dict,"stats") != NULL)) {
6393 long long stat_net_input_bytes, stat_net_output_bytes;
6394 long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
6395 long long current_eviction_exceeded_time = server.stat_last_eviction_exceeded_time ?
6396 (long long) elapsedUs(server.stat_last_eviction_exceeded_time): 0;
6397 long long current_active_defrag_time = server.stat_last_active_defrag_time ?
6398 (long long) elapsedUs(server.stat_last_active_defrag_time): 0;
6399 long long stat_client_qbuf_limit_disconnections;
6400 atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
6401 atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
6402 atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
6403 atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
6404 atomicGet(server.stat_client_qbuf_limit_disconnections, stat_client_qbuf_limit_disconnections);
6405
6406 /* If we calculated the total reads and writes in the threads section,
6407 * we don't need to do it again, and also keep the values consistent. */
6408 if (!stat_io_ops_processed_calculated) {
6409 long long reads, writes;
6410 for (j = 0; j < server.io_threads_num; j++) {
6411 atomicGet(server.stat_io_reads_processed[j], reads);
6412 stat_total_reads_processed += reads;
6413 if (j != 0) stat_io_reads_processed += reads; /* Skip the main thread */
6414 atomicGet(server.stat_io_writes_processed[j], writes);
6415 stat_total_writes_processed += writes;
6416 if (j != 0) stat_io_writes_processed += writes; /* Skip the main thread */
6417 }
6418 }
6419
6420 if (sections++) info = sdscat(info,"\r\n");
6421 info = sdscatprintf(info, "# Stats\r\n" FMTARGS(
6422 "total_connections_received:%lld\r\n", server.stat_numconnections,
6423 "total_commands_processed:%lld\r\n", server.stat_numcommands,
6424 "instantaneous_ops_per_sec:%lld\r\n", getInstantaneousMetric(STATS_METRIC_COMMAND),
6425 "total_net_input_bytes:%lld\r\n", stat_net_input_bytes + stat_net_repl_input_bytes,
6426 "total_net_output_bytes:%lld\r\n", stat_net_output_bytes + stat_net_repl_output_bytes,
6427 "total_net_repl_input_bytes:%lld\r\n", stat_net_repl_input_bytes,
6428 "total_net_repl_output_bytes:%lld\r\n", stat_net_repl_output_bytes,
6429 "instantaneous_input_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
6430 "instantaneous_output_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
6431 "instantaneous_input_repl_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION)/1024,
6432 "instantaneous_output_repl_kbps:%.2f\r\n", (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION)/1024,
6433 "rejected_connections:%lld\r\n", server.stat_rejected_conn,
6434 "sync_full:%lld\r\n", server.stat_sync_full,
6435 "sync_partial_ok:%lld\r\n", server.stat_sync_partial_ok,
6436 "sync_partial_err:%lld\r\n", server.stat_sync_partial_err,
6437 "expired_subkeys:%lld\r\n", server.stat_expired_subkeys,
6438 "expired_keys:%lld\r\n", server.stat_expiredkeys,
6439 "expired_stale_perc:%.2f\r\n", server.stat_expired_stale_perc*100,
6440 "expired_time_cap_reached_count:%lld\r\n", server.stat_expired_time_cap_reached_count,
6441 "expire_cycle_cpu_milliseconds:%lld\r\n", server.stat_expire_cycle_time_used/1000,
6442 "evicted_keys:%lld\r\n", server.stat_evictedkeys,
6443 "evicted_clients:%lld\r\n", server.stat_evictedclients,
6444 "evicted_scripts:%lld\r\n", server.stat_evictedscripts,
6445 "total_eviction_exceeded_time:%lld\r\n", (server.stat_total_eviction_exceeded_time + current_eviction_exceeded_time) / 1000,
6446 "current_eviction_exceeded_time:%lld\r\n", current_eviction_exceeded_time / 1000,
6447 "keyspace_hits:%lld\r\n", server.stat_keyspace_hits,
6448 "keyspace_misses:%lld\r\n", server.stat_keyspace_misses,
6449 "pubsub_channels:%llu\r\n", kvstoreSize(server.pubsub_channels),
6450 "pubsub_patterns:%lu\r\n", dictSize(server.pubsub_patterns),
6451 "pubsubshard_channels:%llu\r\n", kvstoreSize(server.pubsubshard_channels),
6452 "latest_fork_usec:%lld\r\n", server.stat_fork_time,
6453 "total_forks:%lld\r\n", server.stat_total_forks,
6454 "migrate_cached_sockets:%ld\r\n", dictSize(server.migrate_cached_sockets),
6455 "slave_expires_tracked_keys:%zu\r\n", getSlaveKeyWithExpireCount(),
6456 "active_defrag_hits:%lld\r\n", server.stat_active_defrag_hits,
6457 "active_defrag_misses:%lld\r\n", server.stat_active_defrag_misses,
6458 "active_defrag_key_hits:%lld\r\n", server.stat_active_defrag_key_hits,
6459 "active_defrag_key_misses:%lld\r\n", server.stat_active_defrag_key_misses,
6460 "total_active_defrag_time:%lld\r\n", (server.stat_total_active_defrag_time + current_active_defrag_time) / 1000,
6461 "current_active_defrag_time:%lld\r\n", current_active_defrag_time / 1000,
6462 "tracking_total_keys:%lld\r\n", (unsigned long long) trackingGetTotalKeys(),
6463 "tracking_total_items:%lld\r\n", (unsigned long long) trackingGetTotalItems(),
6464 "tracking_total_prefixes:%lld\r\n", (unsigned long long) trackingGetTotalPrefixes(),
6465 "unexpected_error_replies:%lld\r\n", server.stat_unexpected_error_replies,
6466 "total_error_replies:%lld\r\n", server.stat_total_error_replies,
6467 "dump_payload_sanitizations:%lld\r\n", server.stat_dump_payload_sanitizations,
6468 "total_reads_processed:%lld\r\n", stat_total_reads_processed,
6469 "total_writes_processed:%lld\r\n", stat_total_writes_processed,
6470 "io_threaded_reads_processed:%lld\r\n", stat_io_reads_processed,
6471 "io_threaded_writes_processed:%lld\r\n", stat_io_writes_processed,
6472 "io_threaded_total_prefetch_batches:%lld\r\n", server.stat_total_prefetch_batches,
6473 "io_threaded_total_prefetch_entries:%lld\r\n", server.stat_total_prefetch_entries,
6474 "client_query_buffer_limit_disconnections:%lld\r\n", stat_client_qbuf_limit_disconnections,
6475 "client_output_buffer_limit_disconnections:%lld\r\n", server.stat_client_outbuf_limit_disconnections,
6476 "reply_buffer_shrinks:%lld\r\n", server.stat_reply_buffer_shrinks,
6477 "reply_buffer_expands:%lld\r\n", server.stat_reply_buffer_expands,
6478 "eventloop_cycles:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_EL].cnt,
6479 "eventloop_duration_sum:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_EL].sum,
6480 "eventloop_duration_cmd_sum:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_CMD].sum,
6481 "instantaneous_eventloop_cycles_per_sec:%llu\r\n", getInstantaneousMetric(STATS_METRIC_EL_CYCLE),
6482 "instantaneous_eventloop_duration_usec:%llu\r\n", getInstantaneousMetric(STATS_METRIC_EL_DURATION)));
6483 info = genRedisInfoStringACLStats(info);
6484 if (!server.cluster_enabled && server.cluster_compatibility_sample_ratio) {
6485 info = sdscatprintf(info, "cluster_incompatible_ops:%lld\r\n", server.stat_cluster_incompatible_ops);
6486 }
6487 }
6488
6489 /* Replication */
6490 if (all_sections || (dictFind(section_dict,"replication") != NULL)) {
6491 if (sections++) info = sdscat(info,"\r\n");
6492 info = sdscatprintf(info,
6493 "# Replication\r\n"
6494 "role:%s\r\n",
6495 server.masterhost == NULL ? "master" : "slave");
6496 if (server.masterhost) {
6497 long long slave_repl_offset = 1;
6498 long long slave_read_repl_offset = 1;
6499 time_t current_disconnect_time = server.repl_down_since ?
6500 server.unixtime - server.repl_down_since : 0 ;
6501
6502 if (server.master) {
6503 slave_repl_offset = server.master->reploff;
6504 slave_read_repl_offset = server.master->read_reploff;
6505 } else if (server.cached_master) {
6506 slave_repl_offset = server.cached_master->reploff;
6507 slave_read_repl_offset = server.cached_master->read_reploff;
6508 }
6509
6510 info = sdscatprintf(info, FMTARGS(
6511 "master_host:%s\r\n", server.masterhost,
6512 "master_port:%d\r\n", server.masterport,
6513 "master_link_status:%s\r\n", (server.repl_state == REPL_STATE_CONNECTED) ? "up" : "down",
6514 "master_last_io_seconds_ago:%d\r\n", server.master ? ((int)(server.unixtime-server.master->lastinteraction)) : -1,
6515 "master_sync_in_progress:%d\r\n", server.repl_state == REPL_STATE_TRANSFER,
6516 "slave_read_repl_offset:%lld\r\n", slave_read_repl_offset,
6517 "slave_repl_offset:%lld\r\n", slave_repl_offset,
6518 "replica_full_sync_buffer_size:%zu\r\n", server.repl_full_sync_buffer.size,
6519 "replica_full_sync_buffer_peak:%zu\r\n", server.repl_full_sync_buffer.peak,
6520 "master_current_sync_attempts:%lld\r\n", server.repl_current_sync_attempts,
6521 "master_total_sync_attempts:%lld\r\n", server.repl_total_sync_attempts));
6522 if (server.repl_state == REPL_STATE_TRANSFER) {
6523 double perc = 0;
6524 if (server.repl_transfer_size) {
6525 perc = ((double)server.repl_transfer_read / server.repl_transfer_size) * 100;
6526 }
6527 info = sdscatprintf(info, FMTARGS(
6528 "master_sync_total_bytes:%lld\r\n", (long long) server.repl_transfer_size,
6529 "master_sync_read_bytes:%lld\r\n", (long long) server.repl_transfer_read,
6530 "master_sync_left_bytes:%lld\r\n", (long long) (server.repl_transfer_size - server.repl_transfer_read),
6531 "master_sync_perc:%.2f\r\n", perc,
6532 "master_sync_last_io_seconds_ago:%d\r\n", (int)(server.unixtime-server.repl_transfer_lastio)));
6533 }
6534
6535 if (server.repl_state != REPL_STATE_CONNECTED) {
6536 info = sdscatprintf(info,
6537 "master_link_down_since_seconds:%jd\r\n",
6538 server.repl_down_since ?
6539 (intmax_t)(server.unixtime-server.repl_down_since) : -1);
6540 } else {
6541 info = sdscatprintf(info, FMTARGS(
6542 "master_link_up_since_seconds:%jd\r\n",
6543 server.repl_up_since ? /* defensive code, should never be 0 when connected */
6544 (intmax_t)(server.unixtime-server.repl_up_since) : -1,
6545 "master_client_io_thread:%d\r\n", server.master->tid));
6546 }
6547 info = sdscatprintf(info, "total_disconnect_time_sec:%jd\r\n", (intmax_t)server.repl_total_disconnect_time+(current_disconnect_time));
6548
6549 info = sdscatprintf(info, FMTARGS(
6550 "slave_priority:%d\r\n", server.slave_priority,
6551 "slave_read_only:%d\r\n", server.repl_slave_ro,
6552 "replica_announced:%d\r\n", server.replica_announced));
6553 }
6554
6555 info = sdscatprintf(info,
6556 "connected_slaves:%lu\r\n",
6557 replicationLogicalReplicaCount());
6558
6559 /* If min-slaves-to-write is active, write the number of slaves
6560 * currently considered 'good'. */
6561 if (server.repl_min_slaves_to_write &&
6562 server.repl_min_slaves_max_lag) {
6563 info = sdscatprintf(info,
6564 "min_slaves_good_slaves:%d\r\n",
6565 server.repl_good_slaves_count);
6566 }
6567
6568 if (listLength(server.slaves)) {
6569 int slaveid = 0;
6570 listNode *ln;
6571 listIter li;
6572
6573 listRewind(server.slaves,&li);
6574 while((ln = listNext(&li))) {
6575 client *slave = listNodeValue(ln);
6576 char ip[NET_IP_STR_LEN], *slaveip = slave->slave_addr;
6577 int port;
6578 long lag = 0;
6579
6580 /* During rdbchannel replication, replica opens two connections.
6581 * These are distinct slaves in server.slaves list from master
6582 * POV. We don't want to list these separately. If a rdbchannel
6583 * replica has an associated main-channel replica in
6584 * server.slaves list, we'll list main channel replica only. */
6585 if (replicationCheckHasMainChannel(slave))
6586 continue;
6587
6588 /* Don't list migration destination replicas. */
6589 if (slave->flags & CLIENT_ASM_MIGRATING)
6590 continue;
6591
6592 if (!slaveip) {
6593 if (connAddrPeerName(slave->conn,ip,sizeof(ip),&port) == -1)
6594 continue;
6595 slaveip = ip;
6596 }
6597 const char *state = replstateToString(slave->replstate);
6598 if (state[0] == '\0') continue;
6599 if (slave->replstate == SLAVE_STATE_ONLINE)
6600 lag = time(NULL) - slave->repl_ack_time;
6601
6602 info = sdscatprintf(info,
6603 "slave%d:ip=%s,port=%d,state=%s,"
6604 "offset=%lld,lag=%ld,io-thread=%d\r\n",
6605 slaveid,slaveip,slave->slave_listening_port,state,
6606 slave->repl_ack_off, lag, slave->tid);
6607 slaveid++;
6608 }
6609 }
6610 info = sdscatprintf(info, FMTARGS(
6611 "master_failover_state:%s\r\n", getFailoverStateString(),
6612 "master_replid:%s\r\n", server.replid,
6613 "master_replid2:%s\r\n", server.replid2,
6614 "master_repl_offset:%lld\r\n", server.master_repl_offset,
6615 "second_repl_offset:%lld\r\n", server.second_replid_offset,
6616 "repl_backlog_active:%d\r\n", server.repl_backlog != NULL,
6617 "repl_backlog_size:%lld\r\n", server.repl_backlog_size,
6618 "repl_backlog_first_byte_offset:%lld\r\n", server.repl_backlog ? server.repl_backlog->offset : 0,
6619 "repl_backlog_histlen:%lld\r\n", server.repl_backlog ? server.repl_backlog->histlen : 0));
6620 }
6621
6622 /* CPU */
6623 if (all_sections || (dictFind(section_dict,"cpu") != NULL)) {
6624 if (sections++) info = sdscat(info,"\r\n");
6625
6626 struct rusage self_ru, c_ru;
6627 getrusage(RUSAGE_SELF, &self_ru);
6628 getrusage(RUSAGE_CHILDREN, &c_ru);
6629 info = sdscatprintf(info,
6630 "# CPU\r\n"
6631 "used_cpu_sys:%ld.%06ld\r\n"
6632 "used_cpu_user:%ld.%06ld\r\n"
6633 "used_cpu_sys_children:%ld.%06ld\r\n"
6634 "used_cpu_user_children:%ld.%06ld\r\n",
6635 (long)self_ru.ru_stime.tv_sec, (long)self_ru.ru_stime.tv_usec,
6636 (long)self_ru.ru_utime.tv_sec, (long)self_ru.ru_utime.tv_usec,
6637 (long)c_ru.ru_stime.tv_sec, (long)c_ru.ru_stime.tv_usec,
6638 (long)c_ru.ru_utime.tv_sec, (long)c_ru.ru_utime.tv_usec);
6639#ifdef RUSAGE_THREAD
6640 struct rusage m_ru;
6641 getrusage(RUSAGE_THREAD, &m_ru);
6642 info = sdscatprintf(info,
6643 "used_cpu_sys_main_thread:%ld.%06ld\r\n"
6644 "used_cpu_user_main_thread:%ld.%06ld\r\n",
6645 (long)m_ru.ru_stime.tv_sec, (long)m_ru.ru_stime.tv_usec,
6646 (long)m_ru.ru_utime.tv_sec, (long)m_ru.ru_utime.tv_usec);
6647#endif /* RUSAGE_THREAD */
6648 }
6649
6650 /* Hotkeys */
6651 if (server.hotkeys &&
6652 (all_sections || (dictFind(section_dict,"hotkeys") != NULL)))
6653 {
6654 if (sections++) info = sdscat(info,"\r\n");
6655
6656 info = sdscatprintf(info, "# Hotkeys\r\n"
6657 "hotkeys-tracking-active:%d\r\n"
6658 "hotkeys-cmd-cpu-time:%lld\r\n",
6659 server.hotkeys->active ? 1 : 0,
6660 server.hotkeys->cpu_time);
6661 }
6662
6663 /* Modules */
6664 if (all_sections || (dictFind(section_dict,"module_list") != NULL) || (dictFind(section_dict,"modules") != NULL)) {
6665 if (sections++) info = sdscat(info,"\r\n");
6666 info = sdscatprintf(info,"# Modules\r\n");
6667 info = genModulesInfoString(info);
6668 }
6669
6670 /* Command statistics */
6671 if (all_sections || (dictFind(section_dict,"commandstats") != NULL)) {
6672 if (sections++) info = sdscat(info,"\r\n");
6673 info = sdscatprintf(info, "# Commandstats\r\n");
6674 info = genRedisInfoStringCommandStats(info, server.commands);
6675 }
6676
6677 /* Error statistics */
6678 if (all_sections || (dictFind(section_dict,"errorstats") != NULL)) {
6679 if (sections++) info = sdscat(info,"\r\n");
6680 info = sdscat(info, "# Errorstats\r\n");
6681 raxIterator ri;
6682 raxStart(&ri,server.errors);
6683 raxSeek(&ri,"^",NULL,0);
6684 struct redisError *e;
6685 while(raxNext(&ri)) {
6686 char *tmpsafe;
6687 e = (struct redisError *) ri.data;
6688 info = sdscatprintf(info,
6689 "errorstat_%.*s:count=%lld\r\n",
6690 (int)ri.key_len, getSafeInfoString((char *) ri.key, ri.key_len, &tmpsafe), e->count);
6691 if (tmpsafe != NULL) zfree(tmpsafe);
6692 }
6693 raxStop(&ri);
6694 }
6695
6696 /* Latency by percentile distribution per command */
6697 if (all_sections || (dictFind(section_dict,"latencystats") != NULL)) {
6698 if (sections++) info = sdscat(info,"\r\n");
6699 info = sdscatprintf(info, "# Latencystats\r\n");
6700 if (server.latency_tracking_enabled) {
6701 info = genRedisInfoStringLatencyStats(info, server.commands);
6702 }
6703 }
6704
6705 /* Cluster */
6706 if (all_sections || (dictFind(section_dict,"cluster") != NULL)) {
6707 if (sections++) info = sdscat(info,"\r\n");
6708 info = sdscatprintf(info,
6709 "# Cluster\r\n"
6710 "cluster_enabled:%d\r\n",
6711 server.cluster_enabled);
6712 }
6713
6714 /* Key space */
6715 if (all_sections || (dictFind(section_dict,"keyspace") != NULL)) {
6716 if (sections++) info = sdscat(info,"\r\n");
6717 info = sdscatprintf(info, "# Keyspace\r\n");
6718 for (j = 0; j < server.dbnum; j++) {
6719 long long keys, vkeys, subexpiry;
6720
6721 keys = kvstoreSize(server.db[j].keys);
6722 vkeys = kvstoreSize(server.db[j].expires);
6723 subexpiry = estoreSize(server.db[j].subexpires);
6724
6725 if (keys || vkeys) {
6726 info = sdscatprintf(info,
6727 "db%d:keys=%lld,expires=%lld,avg_ttl=%lld,subexpiry=%lld\r\n",
6728 j, keys, vkeys, server.db[j].avg_ttl, subexpiry);
6729 }
6730 }
6731 }
6732
6733 /* keysizes */
6734 if (all_sections || (dictFind(section_dict,"keysizes") != NULL)) {
6735 if (sections++) info = sdscat(info,"\r\n");
6736 info = sdscatprintf(info, "# Keysizes\r\n");
6737
6738 char *typestr[] = {
6739 [OBJ_STRING] = "distrib_strings_sizes",
6740 [OBJ_LIST] = "distrib_lists_items",
6741 [OBJ_SET] = "distrib_sets_items",
6742 [OBJ_ZSET] = "distrib_zsets_items",
6743 [OBJ_HASH] = "distrib_hashes_items"
6744 };
6745 serverAssert(sizeof(typestr)/sizeof(typestr[0]) == OBJ_TYPE_BASIC_MAX);
6746
6747 for (int dbnum = 0; dbnum < server.dbnum; dbnum++) {
6748 char *expSizeLabels[] = {
6749 "0", "1", "2", "4", "8", "16", "32", "64", "128", "256", "512", /* Byte */
6750 "1K", "2K", "4K", "8K", "16K", "32K", "64K", "128K", "256K", "512K", /* Kilo */
6751 "1M", "2M", "4M", "8M", "16M", "32M", "64M", "128M", "256M", "512M", /* Mega */
6752 "1G", "2G", "4G", "8G", "16G", "32G", "64G", "128G", "256G", "512G", /* Giga */
6753 "1T", "2T", "4T", "8T", "16T", "32T", "64T", "128T", "256T", "512T", /* Tera */
6754 "1P", "2P", "4P", "8P", "16P", "32P", "64P", "128P", "256P", "512P", /* Peta */
6755 "1E", "2E", "4E" /* Exa */
6756 };
6757
6758 if (kvstoreSize(server.db[dbnum].keys) == 0)
6759 continue;
6760
6761 for (int type = 0; type < OBJ_TYPE_BASIC_MAX; type++) {
6762 kvstoreMetadata *meta = kvstoreGetMetadata(server.db[dbnum].keys);
6763 int64_t *kvstoreHist = meta->keysizes_hist[type];
6764 char buf[10000];
6765 int cnt = 0, buflen = 0;
6766
6767 /* Print histogram to temp buf[]. First bin is garbage */
6768 buflen += snprintf(buf + buflen, sizeof(buf) - buflen, "db%d_%s:", dbnum, typestr[type]);
6769
6770 for (int i = 0; i < MAX_KEYSIZES_BINS; i++) {
6771 if (kvstoreHist[i] == 0)
6772 continue;
6773
6774 int res = snprintf(buf + buflen, sizeof(buf) - buflen,
6775 (cnt == 0) ? "%s=%llu" : ",%s=%llu",
6776 expSizeLabels[i], (unsigned long long) kvstoreHist[i]);
6777 if (res < 0) break;
6778 buflen += res;
6779 cnt += kvstoreHist[i];
6780 }
6781
6782 /* Print the temp buf[] to the info string */
6783 if (cnt) info = sdscatprintf(info, "%s\r\n", buf);
6784 }
6785 }
6786 }
6787
6788 /* Get info from modules.
6789 * Returned when the user asked for "everything", "modules", or a specific module section.
6790 * We're not aware of the module section names here, and we rather avoid the search when we can.
6791 * so we proceed if there's a requested section name that's not found yet, or when the user asked
6792 * for "all" with any additional section names. */
6793 if (everything || dictFind(section_dict, "modules") != NULL || sections < (int)dictSize(section_dict) ||
6794 (all_sections && dictSize(section_dict)))
6795 {
6796
6797 info = modulesCollectInfo(info,
6798 everything || dictFind(section_dict, "modules") != NULL ? NULL: section_dict,
6799 0, /* not a crash report */
6800 sections);
6801 }
6802
6803 if (dictFind(section_dict, "debug") != NULL) {
6804 if (sections++) info = sdscat(info,"\r\n");
6805 info = sdscatprintf(info, "# Debug\r\n" FMTARGS(
6806 "eventloop_duration_aof_sum:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_AOF].sum,
6807 "eventloop_duration_cron_sum:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_CRON].sum,
6808 "eventloop_duration_max:%llu\r\n", server.duration_stats[EL_DURATION_TYPE_EL].max,
6809 "eventloop_cmd_per_cycle_max:%lld\r\n", server.el_cmd_cnt_max,
6810 "allocator_allocated_lua:%zu\r\n", server.cron_malloc_stats.lua_allocator_allocated,
6811 "allocator_active_lua:%zu\r\n", server.cron_malloc_stats.lua_allocator_active,
6812 "allocator_resident_lua:%zu\r\n", server.cron_malloc_stats.lua_allocator_resident,
6813 "allocator_frag_bytes_lua:%zu\r\n", server.cron_malloc_stats.lua_allocator_frag_smallbins_bytes));
6814 }
6815
6816 return info;
6817}
6818
6819/* INFO [<section> [<section> ...]] */
6820void infoCommand(client *c) {
6821 if (server.sentinel_mode) {
6822 sentinelInfoCommand(c);
6823 return;
6824 }
6825 int all_sections = 0;
6826 int everything = 0;
6827 dict *sections_dict = genInfoSectionDict(c->argv+1, c->argc-1, NULL, &all_sections, &everything);
6828 sds info = genRedisInfoString(sections_dict, all_sections, everything);
6829 addReplyVerbatim(c,info,sdslen(info),"txt");
6830 sdsfree(info);
6831 releaseInfoSectionDict(sections_dict);
6832 return;
6833}
6834
6835void monitorCommand(client *c) {
6836 if (c->flags & CLIENT_DENY_BLOCKING) {
6837 /**
6838 * A client that has CLIENT_DENY_BLOCKING flag on
6839 * expects a reply per command and so can't execute MONITOR. */
6840 addReplyError(c, "MONITOR isn't allowed for DENY BLOCKING client");
6841 return;
6842 }
6843
6844 /* ignore MONITOR if already slave or in monitor mode */
6845 if (c->flags & CLIENT_SLAVE) return;
6846
6847 c->flags |= (CLIENT_SLAVE|CLIENT_MONITOR);
6848 listAddNodeTail(server.monitors,c);
6849 addReply(c,shared.ok);
6850}
6851
6852/* =================================== Main! ================================ */
6853
6854int checkIgnoreWarning(const char *warning) {
6855 int argc, j;
6856 sds *argv = sdssplitargs(server.ignore_warnings, &argc);
6857 if (argv == NULL)
6858 return 0;
6859
6860 for (j = 0; j < argc; j++) {
6861 char *flag = argv[j];
6862 if (!strcasecmp(flag, warning))
6863 break;
6864 }
6865 sdsfreesplitres(argv,argc);
6866 return j < argc;
6867}
6868
6869#ifdef __linux__
6870#include <sys/prctl.h>
6871/* since linux-3.5, kernel supports to set the state of the "THP disable" flag
6872 * for the calling thread. PR_SET_THP_DISABLE is defined in linux/prctl.h */
6873static int THPDisable(void) {
6874 int ret = -EINVAL;
6875
6876 if (!server.disable_thp)
6877 return ret;
6878
6879#ifdef PR_SET_THP_DISABLE
6880 ret = prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0);
6881#endif
6882
6883 return ret;
6884}
6885
6886void linuxMemoryWarnings(void) {
6887 sds err_msg = NULL;
6888 if (checkOvercommit(&err_msg) < 0) {
6889 serverLog(LL_WARNING,"WARNING %s", err_msg);
6890 sdsfree(err_msg);
6891 }
6892 if (checkTHPEnabled(&err_msg) < 0) {
6893 server.thp_enabled = 1;
6894 if (THPDisable() == 0) {
6895 server.thp_enabled = 0;
6896 } else {
6897 serverLog(LL_WARNING, "WARNING %s", err_msg);
6898 }
6899 sdsfree(err_msg);
6900 }
6901}
6902#endif /* __linux__ */
6903
6904void createPidFile(void) {
6905 /* If pidfile requested, but no pidfile defined, use
6906 * default pidfile path */
6907 if (!server.pidfile) server.pidfile = zstrdup(CONFIG_DEFAULT_PID_FILE);
6908
6909 /* Try to write the pid file in a best-effort way. */
6910 FILE *fp = fopen(server.pidfile,"w");
6911 if (fp) {
6912 fprintf(fp,"%d\n",(int)getpid());
6913 fclose(fp);
6914 } else {
6915 serverLog(LL_WARNING, "Failed to write PID file: %s", strerror(errno));
6916 }
6917}
6918
6919void daemonize(void) {
6920 int fd;
6921
6922 if (fork() != 0) exit(0); /* parent exits */
6923 setsid(); /* create a new session */
6924
6925 /* Every output goes to /dev/null. If Redis is daemonized but
6926 * the 'logfile' is set to 'stdout' in the configuration file
6927 * it will not log at all. */
6928 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
6929 dup2(fd, STDIN_FILENO);
6930 dup2(fd, STDOUT_FILENO);
6931 dup2(fd, STDERR_FILENO);
6932 if (fd > STDERR_FILENO) close(fd);
6933 }
6934}
6935
6936sds getVersion(void) {
6937 sds version = sdscatprintf(sdsempty(),
6938 "v=%s sha=%s:%d malloc=%s bits=%d build=%llx",
6939 REDIS_VERSION,
6940 redisGitSHA1(),
6941 atoi(redisGitDirty()) > 0,
6942 ZMALLOC_LIB,
6943 sizeof(long) == 4 ? 32 : 64,
6944 (unsigned long long) redisBuildId());
6945 return version;
6946}
6947
6948void usage(void) {
6949 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options] [-]\n");
6950 fprintf(stderr," ./redis-server - (read config from stdin)\n");
6951 fprintf(stderr," ./redis-server -v or --version\n");
6952 fprintf(stderr," ./redis-server -h or --help\n");
6953 fprintf(stderr," ./redis-server --test-memory <megabytes>\n");
6954 fprintf(stderr," ./redis-server --check-system\n");
6955 fprintf(stderr,"\n");
6956 fprintf(stderr,"Examples:\n");
6957 fprintf(stderr," ./redis-server (run the server with default conf)\n");
6958 fprintf(stderr," echo 'maxmemory 128mb' | ./redis-server -\n");
6959 fprintf(stderr," ./redis-server /etc/redis/6379.conf\n");
6960 fprintf(stderr," ./redis-server --port 7777\n");
6961 fprintf(stderr," ./redis-server --port 7777 --replicaof 127.0.0.1 8888\n");
6962 fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose -\n");
6963 fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose\n\n");
6964 fprintf(stderr,"Sentinel mode:\n");
6965 fprintf(stderr," ./redis-server /etc/sentinel.conf --sentinel\n");
6966 exit(1);
6967}
6968
6969void redisAsciiArt(void) {
6970#include "asciilogo.h"
6971 char *buf = zmalloc(1024*16);
6972 char *mode;
6973
6974 if (server.cluster_enabled) mode = "cluster";
6975 else if (server.sentinel_mode) mode = "sentinel";
6976 else mode = "standalone";
6977
6978 /* Show the ASCII logo if: log file is stdout AND stdout is a
6979 * tty AND syslog logging is disabled. Also show logo if the user
6980 * forced us to do so via redis.conf. */
6981 int show_logo = ((!server.syslog_enabled &&
6982 server.logfile[0] == '\0' &&
6983 isatty(fileno(stdout))) ||
6984 server.always_show_logo);
6985
6986 if (!show_logo) {
6987 serverLog(LL_NOTICE,
6988 "Running mode=%s, port=%d.",
6989 mode, server.port ? server.port : server.tls_port
6990 );
6991 } else {
6992 snprintf(buf,1024*16,ascii_logo,
6993 REDIS_VERSION,
6994 redisGitSHA1(),
6995 strtol(redisGitDirty(),NULL,10) > 0,
6996 (sizeof(long) == 8) ? "64" : "32",
6997 mode, server.port ? server.port : server.tls_port,
6998 (long) getpid()
6999 );
7000 serverLogRaw(LL_NOTICE|LL_RAW,buf);
7001 }
7002 zfree(buf);
7003}
7004
7005/* Get the server listener by type name */
7006connListener *listenerByType(const char *typename) {
7007 int conn_index;
7008
7009 conn_index = connectionIndexByType(typename);
7010 if (conn_index < 0)
7011 return NULL;
7012
7013 return &server.listeners[conn_index];
7014}
7015
7016/* Close original listener, re-create a new listener from the updated bind address & port */
7017int changeListener(connListener *listener) {
7018 /* Close old servers */
7019 closeListener(listener);
7020
7021 /* Just close the server if port disabled */
7022 if (listener->port == 0) {
7023 if (server.set_proc_title) redisSetProcTitle(NULL);
7024 return C_OK;
7025 }
7026
7027 /* Re-create listener */
7028 if (connListen(listener) != C_OK) {
7029 return C_ERR;
7030 }
7031
7032 /* Create event handlers */
7033 if (createSocketAcceptHandler(listener, listener->ct->accept_handler) != C_OK) {
7034 serverPanic("Unrecoverable error creating %s accept handler.", listener->ct->get_type(NULL));
7035 }
7036
7037 if (server.set_proc_title) redisSetProcTitle(NULL);
7038
7039 return C_OK;
7040}
7041
7042static void sigShutdownHandler(int sig) {
7043 char *msg;
7044
7045 switch (sig) {
7046 case SIGINT:
7047 msg = "Received SIGINT scheduling shutdown...";
7048 break;
7049 case SIGTERM:
7050 msg = "Received SIGTERM scheduling shutdown...";
7051 break;
7052 default:
7053 msg = "Received shutdown signal, scheduling shutdown...";
7054 };
7055
7056 /* SIGINT is often delivered via Ctrl+C in an interactive session.
7057 * If we receive the signal the second time, we interpret this as
7058 * the user really wanting to quit ASAP without waiting to persist
7059 * on disk and without waiting for lagging replicas. */
7060 if (shouldShutdownAsap() && sig == SIGINT) {
7061 serverLogRawFromHandler(LL_WARNING, "You insist... exiting now.");
7062 rdbRemoveTempFile(getpid(), 1);
7063 exit(1); /* Exit with an error since this was not a clean shutdown. */
7064 } else if (server.loading) {
7065 msg = "Received shutdown signal during loading, scheduling shutdown.";
7066 }
7067
7068 serverLogRawFromHandler(LL_WARNING, msg);
7069 atomicSet(server.shutdown_asap, 1);
7070 atomicSet(server.last_sig_received, sig);
7071}
7072
7073void setupSignalHandlers(void) {
7074 struct sigaction act;
7075
7076 sigemptyset(&act.sa_mask);
7077 act.sa_flags = 0;
7078 act.sa_handler = sigShutdownHandler;
7079 sigaction(SIGTERM, &act, NULL);
7080 sigaction(SIGINT, &act, NULL);
7081
7082 setupDebugSigHandlers();
7083}
7084
7085/* This is the signal handler for children process. It is currently useful
7086 * in order to track the SIGUSR1, that we send to a child in order to terminate
7087 * it in a clean way, without the parent detecting an error and stop
7088 * accepting writes because of a write error condition. */
7089static void sigKillChildHandler(int sig) {
7090 UNUSED(sig);
7091 int level = server.in_fork_child == CHILD_TYPE_MODULE? LL_VERBOSE: LL_WARNING;
7092 serverLogRawFromHandler(level, "Received SIGUSR1 in child, exiting now.");
7093 /* We don't want to perform any IO in the child when the parent is terminating us.
7094 * We don't know what our stack trace is, it is possible that we were called during an IO operation
7095 * If we were to do another IO operation, we might end up in a deadlock */
7096 exitFromChild(SERVER_CHILD_NOERROR_RETVAL, 1);
7097}
7098
7099void setupChildSignalHandlers(void) {
7100 struct sigaction act;
7101
7102 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
7103 * Otherwise, sa_handler is used. */
7104 sigemptyset(&act.sa_mask);
7105 act.sa_flags = 0;
7106 act.sa_handler = sigKillChildHandler;
7107 sigaction(SIGUSR1, &act, NULL);
7108}
7109
7110/* After fork, the child process will inherit the resources
7111 * of the parent process, e.g. fd(socket or flock) etc.
7112 * should close the resources not used by the child process, so that if the
7113 * parent restarts it can bind/lock despite the child possibly still running. */
7114void closeChildUnusedResourceAfterFork(void) {
7115 closeListeningSockets(0);
7116 if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1)
7117 close(server.cluster_config_file_lock_fd); /* don't care if this fails */
7118
7119 /* Clear server.pidfile, this is the parent pidfile which should not
7120 * be touched (or deleted) by the child (on exit / crash) */
7121 zfree(server.pidfile);
7122 server.pidfile = NULL;
7123}
7124
7125/* purpose is one of CHILD_TYPE_ types */
7126int redisFork(int purpose) {
7127 if (isMutuallyExclusiveChildType(purpose)) {
7128 if (hasActiveChildProcess()) {
7129 errno = EEXIST;
7130 return -1;
7131 }
7132
7133 openChildInfoPipe();
7134 }
7135
7136 int childpid;
7137 long long start = ustime();
7138 if ((childpid = fork()) == 0) {
7139 /* Child.
7140 *
7141 * The order of setting things up follows some reasoning:
7142 * Setup signal handlers first because a signal could fire at any time.
7143 * Adjust OOM score before everything else to assist the OOM killer if
7144 * memory resources are low.
7145 */
7146 server.in_fork_child = purpose;
7147 setupChildSignalHandlers();
7148 setOOMScoreAdj(CONFIG_OOM_BGCHILD);
7149 updateDictResizePolicy();
7150 dismissMemoryInChild();
7151 closeChildUnusedResourceAfterFork();
7152 /* Close the reading part, so that if the parent crashes, the child will
7153 * get a write error and exit. */
7154 if (server.child_info_pipe[0] != -1)
7155 close(server.child_info_pipe[0]);
7156 } else {
7157 /* Parent */
7158 if (childpid == -1) {
7159 int fork_errno = errno;
7160 if (isMutuallyExclusiveChildType(purpose)) closeChildInfoPipe();
7161 errno = fork_errno;
7162 return -1;
7163 }
7164
7165 server.stat_total_forks++;
7166 server.stat_fork_time = ustime()-start;
7167 server.stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024*1024); /* GB per second. */
7168 latencyAddSampleIfNeeded("fork",server.stat_fork_time/1000);
7169
7170 /* The child_pid and child_type are only for mutually exclusive children.
7171 * other child types should handle and store their pid's in dedicated variables.
7172 *
7173 * Today, we allows CHILD_TYPE_LDB to run in parallel with the other fork types:
7174 * - it isn't used for production, so it will not make the server be less efficient
7175 * - used for debugging, and we don't want to block it from running while other
7176 * forks are running (like RDB and AOF) */
7177 if (isMutuallyExclusiveChildType(purpose)) {
7178 server.child_pid = childpid;
7179 server.child_type = purpose;
7180 server.stat_current_cow_peak = 0;
7181 server.stat_current_cow_bytes = 0;
7182 server.stat_current_cow_updated = 0;
7183 server.stat_current_save_keys_processed = 0;
7184 server.stat_module_progress = 0;
7185 server.stat_current_save_keys_total = dbTotalServerKeyCount();
7186 }
7187
7188 updateDictResizePolicy();
7189 moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
7190 REDISMODULE_SUBEVENT_FORK_CHILD_BORN,
7191 NULL);
7192 }
7193 return childpid;
7194}
7195
7196void sendChildCowInfo(childInfoType info_type, char *pname) {
7197 sendChildInfoGeneric(info_type, 0, -1, pname);
7198}
7199
7200void sendChildInfo(childInfoType info_type, size_t keys, char *pname) {
7201 sendChildInfoGeneric(info_type, keys, -1, pname);
7202}
7203
7204/* Try to release pages back to the OS directly (bypassing the allocator),
7205 * in an effort to decrease CoW during fork. For small allocations, we can't
7206 * release any full page, so in an effort to avoid getting the size of the
7207 * allocation from the allocator (malloc_size) when we already know it's small,
7208 * we check the size_hint. If the size is not already known, passing a size_hint
7209 * of 0 will lead the checking the real size of the allocation.
7210 * Also please note that the size may be not accurate, so in order to make this
7211 * solution effective, the judgement for releasing memory pages should not be
7212 * too strict. */
7213void dismissMemory(void* ptr, size_t size_hint) {
7214 if (ptr == NULL) return;
7215
7216 /* madvise(MADV_DONTNEED) can not release pages if the size of memory
7217 * is too small, we try to release only for the memory which the size
7218 * is more than half of page size. */
7219 if (size_hint && size_hint <= server.page_size/2) return;
7220
7221 zmadvise_dontneed(ptr);
7222}
7223
7224/* Dismiss big chunks of memory inside a client structure, see dismissMemory() */
7225void dismissClientMemory(client *c) {
7226 /* Dismiss client query buffer and static reply buffer. */
7227 dismissMemory(c->buf, c->buf_usable_size);
7228 if (c->querybuf) dismissSds(c->querybuf);
7229 /* Dismiss argv array only if we estimate it contains a big buffer. */
7230 if (c->argc && c->all_argv_len_sum/c->argc >= server.page_size) {
7231 for (int i = 0; i < c->argc; i++) {
7232 dismissObject(c->argv[i], 0);
7233 }
7234 }
7235 if (c->argc) dismissMemory(c->argv, c->argc*sizeof(robj*));
7236
7237 /* Dismiss the reply array only if the average buffer size is bigger
7238 * than a page. */
7239 if (listLength(c->reply) &&
7240 c->reply_bytes/listLength(c->reply) >= server.page_size)
7241 {
7242 listIter li;
7243 listNode *ln;
7244 listRewind(c->reply, &li);
7245 while ((ln = listNext(&li))) {
7246 clientReplyBlock *bulk = listNodeValue(ln);
7247 /* Default bulk size is 16k, actually it has extra data, maybe it
7248 * occupies 20k according to jemalloc bin size if using jemalloc. */
7249 if (bulk) dismissMemory(bulk, bulk->size);
7250 }
7251 }
7252}
7253
7254/* In the child process, we don't need some buffers anymore, and these are
7255 * likely to change in the parent when there's heavy write traffic.
7256 * We dismiss them right away, to avoid CoW.
7257 * see dismissMemeory(). */
7258void dismissMemoryInChild(void) {
7259 /* madvise(MADV_DONTNEED) may not work if Transparent Huge Pages is enabled. */
7260 if (server.thp_enabled) return;
7261
7262 /* Currently we use zmadvise_dontneed only when we use jemalloc with Linux.
7263 * so we avoid these pointless loops when they're not going to do anything. */
7264#if defined(USE_JEMALLOC) && defined(__linux__)
7265 listIter li;
7266 listNode *ln;
7267
7268 /* Dismiss replication buffer. We don't need to separately dismiss replication
7269 * backlog and replica' output buffer, because they just reference the global
7270 * replication buffer but don't cost real memory. */
7271 listRewind(server.repl_buffer_blocks, &li);
7272 while((ln = listNext(&li))) {
7273 replBufBlock *o = listNodeValue(ln);
7274 dismissMemory(o, o->size);
7275 }
7276
7277 /* Dismiss accumulated repl buffer on replica. */
7278 if (server.repl_full_sync_buffer.blocks) {
7279 listRewind(server.repl_full_sync_buffer.blocks, &li);
7280 while((ln = listNext(&li))) {
7281 replDataBufBlock *o = listNodeValue(ln);
7282 dismissMemory(o, o->size);
7283 }
7284 }
7285
7286 /* Dismiss all clients memory. */
7287 listRewind(server.clients, &li);
7288 while((ln = listNext(&li))) {
7289 client *c = listNodeValue(ln);
7290 dismissClientMemory(c);
7291 }
7292#endif
7293}
7294
7295void memtest(size_t megabytes, int passes);
7296
7297/* Returns 1 if there is --sentinel among the arguments or if
7298 * executable name contains "redis-sentinel". */
7299int checkForSentinelMode(int argc, char **argv, char *exec_name) {
7300 if (strstr(exec_name,"redis-sentinel") != NULL) return 1;
7301
7302 for (int j = 1; j < argc; j++)
7303 if (!strcmp(argv[j],"--sentinel")) return 1;
7304 return 0;
7305}
7306
7307/* Function called at startup to load RDB or AOF file in memory. */
7308void loadDataFromDisk(void) {
7309 long long start = ustime();
7310 if (server.aof_state == AOF_ON) {
7311 int ret = loadAppendOnlyFiles(server.aof_manifest);
7312 if (ret == AOF_FAILED || ret == AOF_OPEN_ERR)
7313 exit(1);
7314 if (ret != AOF_NOT_EXIST)
7315 serverLog(LL_NOTICE, "DB loaded from append only file: %.3f seconds", (float)(ustime()-start)/1000000);
7316 updateReplOffsetAndResetEndOffset();
7317 } else {
7318 rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
7319 int rsi_is_valid = 0;
7320 errno = 0; /* Prevent a stale value from affecting error checking */
7321 int rdb_flags = RDBFLAGS_NONE;
7322 if (iAmMaster()) {
7323 /* Master may delete expired keys when loading, we should
7324 * propagate expire to replication backlog. */
7325 createReplicationBacklog();
7326 rdb_flags |= RDBFLAGS_FEED_REPL;
7327 }
7328 int rdb_load_ret = rdbLoad(server.rdb_filename, &rsi, rdb_flags);
7329 if (rdb_load_ret == RDB_OK) {
7330 serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds",
7331 (float)(ustime()-start)/1000000);
7332
7333 /* Restore the replication ID / offset from the RDB file. */
7334 if (rsi.repl_id_is_set &&
7335 rsi.repl_offset != -1 &&
7336 /* Note that older implementations may save a repl_stream_db
7337 * of -1 inside the RDB file in a wrong way, see more
7338 * information in function rdbPopulateSaveInfo. */
7339 rsi.repl_stream_db != -1)
7340 {
7341 rsi_is_valid = 1;
7342 if (!iAmMaster()) {
7343 memcpy(server.replid,rsi.repl_id,sizeof(server.replid));
7344 server.master_repl_offset = rsi.repl_offset;
7345 /* If this is a replica, create a cached master from this
7346 * information, in order to allow partial resynchronizations
7347 * with masters. */
7348 replicationCacheMasterUsingMyself();
7349 selectDb(server.cached_master,rsi.repl_stream_db);
7350 } else {
7351 /* If this is a master, we can save the replication info
7352 * as secondary ID and offset, in order to allow replicas
7353 * to partial resynchronizations with masters. */
7354 memcpy(server.replid2,rsi.repl_id,sizeof(server.replid));
7355 server.second_replid_offset = rsi.repl_offset+1;
7356 /* Rebase master_repl_offset from rsi.repl_offset. */
7357 server.master_repl_offset += rsi.repl_offset;
7358 serverAssert(server.repl_backlog);
7359 server.repl_backlog->offset = server.master_repl_offset -
7360 server.repl_backlog->histlen + 1;
7361 rebaseReplicationBuffer(rsi.repl_offset);
7362 server.repl_no_slaves_since = time(NULL);
7363 }
7364 }
7365 } else if (rdb_load_ret != RDB_NOT_EXIST) {
7366 serverLog(LL_WARNING, "Fatal error loading the DB, check server logs. Exiting.");
7367 exit(1);
7368 }
7369
7370 /* We always create replication backlog if server is a master, we need
7371 * it because we put DELs in it when loading expired keys in RDB, but
7372 * if RDB doesn't have replication info or there is no rdb, it is not
7373 * possible to support partial resynchronization, to avoid extra memory
7374 * of replication backlog, we drop it. */
7375 if (!rsi_is_valid && server.repl_backlog)
7376 freeReplicationBacklog();
7377 }
7378}
7379
7380void redisOutOfMemoryHandler(size_t allocation_size) {
7381 serverLog(LL_WARNING,"Out Of Memory allocating %zu bytes!",
7382 allocation_size);
7383 serverPanic("Redis aborting for OUT OF MEMORY. Allocating %zu bytes!",
7384 allocation_size);
7385}
7386
7387/* Callback for sdstemplate on proc-title-template. See redis.conf for
7388 * supported variables.
7389 */
7390static sds redisProcTitleGetVariable(const sds varname, void *arg)
7391{
7392 if (!strcmp(varname, "title")) {
7393 return sdsnew(arg);
7394 } else if (!strcmp(varname, "listen-addr")) {
7395 if (server.port || server.tls_port)
7396 return sdscatprintf(sdsempty(), "%s:%u",
7397 server.bindaddr_count ? server.bindaddr[0] : "*",
7398 server.port ? server.port : server.tls_port);
7399 else
7400 return sdscatprintf(sdsempty(), "unixsocket:%s", server.unixsocket);
7401 } else if (!strcmp(varname, "server-mode")) {
7402 if (server.cluster_enabled) return sdsnew("[cluster]");
7403 else if (server.sentinel_mode) return sdsnew("[sentinel]");
7404 else return sdsempty();
7405 } else if (!strcmp(varname, "config-file")) {
7406 return sdsnew(server.configfile ? server.configfile : "-");
7407 } else if (!strcmp(varname, "port")) {
7408 return sdscatprintf(sdsempty(), "%u", server.port);
7409 } else if (!strcmp(varname, "tls-port")) {
7410 return sdscatprintf(sdsempty(), "%u", server.tls_port);
7411 } else if (!strcmp(varname, "unixsocket")) {
7412 return sdsnew(server.unixsocket);
7413 } else
7414 return NULL; /* Unknown variable name */
7415}
7416
7417/* Expand the specified proc-title-template string and return a newly
7418 * allocated sds, or NULL. */
7419static sds expandProcTitleTemplate(const char *template, const char *title) {
7420 sds res = sdstemplate(template, redisProcTitleGetVariable, (void *) title);
7421 if (!res)
7422 return NULL;
7423 return sdstrim(res, " ");
7424}
7425/* Validate the specified template, returns 1 if valid or 0 otherwise. */
7426int validateProcTitleTemplate(const char *template) {
7427 int ok = 1;
7428 sds res = expandProcTitleTemplate(template, "");
7429 if (!res)
7430 return 0;
7431 if (sdslen(res) == 0) ok = 0;
7432 sdsfree(res);
7433 return ok;
7434}
7435
7436int redisSetProcTitle(char *title) {
7437#ifdef USE_SETPROCTITLE
7438 if (!title) title = server.exec_argv[0];
7439 sds proc_title = expandProcTitleTemplate(server.proc_title_template, title);
7440 if (!proc_title) return C_ERR; /* Not likely, proc_title_template is validated */
7441
7442 setproctitle("%s", proc_title);
7443 sdsfree(proc_title);
7444#else
7445 UNUSED(title);
7446#endif
7447
7448 return C_OK;
7449}
7450
7451void redisSetCpuAffinity(const char *cpulist) {
7452#ifdef USE_SETCPUAFFINITY
7453 setcpuaffinity(cpulist);
7454#else
7455 UNUSED(cpulist);
7456#endif
7457}
7458
7459/* Send a notify message to systemd. Returns sd_notify return code which is
7460 * a positive number on success. */
7461int redisCommunicateSystemd(const char *sd_notify_msg) {
7462#ifdef HAVE_LIBSYSTEMD
7463 int ret = sd_notify(0, sd_notify_msg);
7464
7465 if (ret == 0)
7466 serverLog(LL_WARNING, "systemd supervision error: NOTIFY_SOCKET not found!");
7467 else if (ret < 0)
7468 serverLog(LL_WARNING, "systemd supervision error: sd_notify: %d", ret);
7469 return ret;
7470#else
7471 UNUSED(sd_notify_msg);
7472 return 0;
7473#endif
7474}
7475
7476/* Attempt to set up upstart supervision. Returns 1 if successful. */
7477static int redisSupervisedUpstart(void) {
7478 const char *upstart_job = getenv("UPSTART_JOB");
7479
7480 if (!upstart_job) {
7481 serverLog(LL_WARNING,
7482 "upstart supervision requested, but UPSTART_JOB not found!");
7483 return 0;
7484 }
7485
7486 serverLog(LL_NOTICE, "supervised by upstart, will stop to signal readiness.");
7487 raise(SIGSTOP);
7488 unsetenv("UPSTART_JOB");
7489 return 1;
7490}
7491
7492/* Attempt to set up systemd supervision. Returns 1 if successful. */
7493static int redisSupervisedSystemd(void) {
7494#ifndef HAVE_LIBSYSTEMD
7495 serverLog(LL_WARNING,
7496 "systemd supervision requested or auto-detected, but Redis is compiled without libsystemd support!");
7497 return 0;
7498#else
7499 if (redisCommunicateSystemd("STATUS=Redis is loading...\n") <= 0)
7500 return 0;
7501 serverLog(LL_NOTICE,
7502 "Supervised by systemd. Please make sure you set appropriate values for TimeoutStartSec and TimeoutStopSec in your service unit.");
7503 return 1;
7504#endif
7505}
7506
7507int redisIsSupervised(int mode) {
7508 int ret = 0;
7509
7510 if (mode == SUPERVISED_AUTODETECT) {
7511 if (getenv("UPSTART_JOB")) {
7512 serverLog(LL_VERBOSE, "Upstart supervision detected.");
7513 mode = SUPERVISED_UPSTART;
7514 } else if (getenv("NOTIFY_SOCKET")) {
7515 serverLog(LL_VERBOSE, "Systemd supervision detected.");
7516 mode = SUPERVISED_SYSTEMD;
7517 }
7518 }
7519
7520 switch (mode) {
7521 case SUPERVISED_UPSTART:
7522 ret = redisSupervisedUpstart();
7523 break;
7524 case SUPERVISED_SYSTEMD:
7525 ret = redisSupervisedSystemd();
7526 break;
7527 default:
7528 break;
7529 }
7530
7531 if (ret)
7532 server.supervised_mode = mode;
7533
7534 return ret;
7535}
7536
7537int iAmMaster(void) {
7538 return ((!server.cluster_enabled && server.masterhost == NULL) ||
7539 (server.cluster_enabled && clusterNodeIsMaster(getMyClusterNode())));
7540}
7541
7542#ifdef REDIS_TEST
7543#include "testhelp.h"
7544#include "intset.h" /* Compact integer set structure */
7545
7546int __failed_tests = 0;
7547int __test_num = 0;
7548
7549/* The flags are the following:
7550* --accurate: Runs tests with more iterations.
7551* --large-memory: Enables tests that consume more than 100mb. */
7552typedef int redisTestProc(int argc, char **argv, int flags);
7553int bitopsTest(int argc, char **argv, int flags);
7554int zsetTest(int argc, char **argv, int flags);
7555struct redisTest {
7556 char *name;
7557 redisTestProc *proc;
7558 int failed;
7559} redisTests[] = {
7560 {"ziplist", ziplistTest},
7561 {"quicklist", quicklistTest},
7562 {"intset", intsetTest},
7563 {"zipmap", zipmapTest},
7564 {"sha1test", sha1Test},
7565 {"util", utilTest},
7566 {"endianconv", endianconvTest},
7567 {"crc64", crc64Test},
7568 {"zmalloc", zmalloc_test},
7569 {"sds", sdsTest},
7570 {"mstr", mstrTest},
7571 {"dict", dictTest},
7572 {"listpack", listpackTest},
7573 {"kvstore", kvstoreTest},
7574 {"fwtree", fwtreeTest},
7575 {"estore", estoreTest},
7576 {"ebuckets", ebucketsTest},
7577 {"bitmap", bitopsTest},
7578 {"rax", raxTest},
7579 {"zset", zsetTest},
7580 {"topk", chkTopKTest},
7581};
7582redisTestProc *getTestProcByName(const char *name) {
7583 int numtests = sizeof(redisTests)/sizeof(struct redisTest);
7584 for (int j = 0; j < numtests; j++) {
7585 if (!strcasecmp(name,redisTests[j].name)) {
7586 return redisTests[j].proc;
7587 }
7588 }
7589 return NULL;
7590}
7591#endif
7592
7593int main(int argc, char **argv) {
7594 struct timeval tv;
7595 int j;
7596 char config_from_stdin = 0;
7597
7598#ifdef REDIS_TEST
7599 monotonicInit(); /* Required for dict tests, that are relying on monotime during dict rehashing. */
7600 if (argc >= 3 && !strcasecmp(argv[1], "test")) {
7601 int flags = 0;
7602 for (j = 3; j < argc; j++) {
7603 char *arg = argv[j];
7604 if (!strcasecmp(arg, "--accurate")) flags |= REDIS_TEST_ACCURATE;
7605 else if (!strcasecmp(arg, "--large-memory")) flags |= REDIS_TEST_LARGE_MEMORY;
7606 else if (!strcasecmp(arg, "--valgrind")) flags |= REDIS_TEST_VALGRIND;
7607 else if (!strcasecmp(arg, "--verbose")) flags |= REDIS_TEST_VERBOSE;
7608 }
7609
7610 if (!strcasecmp(argv[2], "all")) {
7611 int numtests = sizeof(redisTests)/sizeof(struct redisTest);
7612 for (j = 0; j < numtests; j++) {
7613 redisTests[j].failed = (redisTests[j].proc(argc,argv,flags) != 0);
7614 }
7615
7616 /* Report tests result */
7617 int failed_num = 0;
7618 for (j = 0; j < numtests; j++) {
7619 if (redisTests[j].failed) {
7620 failed_num++;
7621 printf("[failed] Test - %s\n", redisTests[j].name);
7622 } else {
7623 printf("[ok] Test - %s\n", redisTests[j].name);
7624 }
7625 }
7626
7627 printf("%d tests, %d passed, %d failed\n", numtests,
7628 numtests-failed_num, failed_num);
7629
7630 return failed_num == 0 ? 0 : 1;
7631 } else {
7632 redisTestProc *proc = getTestProcByName(argv[2]);
7633 if (!proc) return -1; /* test not found */
7634 return proc(argc,argv,flags);
7635 }
7636
7637 return 0;
7638 }
7639#endif
7640
7641 /* We need to initialize our libraries, and the server configuration. */
7642#ifdef INIT_SETPROCTITLE_REPLACEMENT
7643 spt_init(argc, argv);
7644#endif
7645 tzset(); /* Populates 'timezone' global. */
7646 zmalloc_set_oom_handler(redisOutOfMemoryHandler);
7647
7648 /* To achieve entropy, in case of containers, their time() and getpid() can
7649 * be the same. But value of tv_usec is fast enough to make the difference */
7650 gettimeofday(&tv,NULL);
7651 srand(time(NULL)^getpid()^tv.tv_usec);
7652 srandom(time(NULL)^getpid()^tv.tv_usec);
7653 init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
7654 crc64_init();
7655
7656 /* Store umask value. Because umask(2) only offers a set-and-get API we have
7657 * to reset it and restore it back. We do this early to avoid a potential
7658 * race condition with threads that could be creating files or directories.
7659 */
7660 umask(server.umask = umask(0777));
7661
7662 uint8_t hashseed[16];
7663 getRandomBytes(hashseed,sizeof(hashseed));
7664 dictSetHashFunctionSeed(hashseed);
7665
7666 char *exec_name = strrchr(argv[0], '/');
7667 if (exec_name == NULL) exec_name = argv[0];
7668 server.sentinel_mode = checkForSentinelMode(argc,argv, exec_name);
7669 initServerConfig();
7670 ACLInit(); /* The ACL subsystem must be initialized ASAP because the
7671 basic networking code and client creation depends on it. */
7672 moduleInitModulesSystem();
7673 connTypeInitialize();
7674 keyMetaInit();
7675
7676 /* Store the executable path and arguments in a safe place in order
7677 * to be able to restart the server later. */
7678 server.executable = getAbsolutePath(argv[0]);
7679 server.exec_argv = zmalloc(sizeof(char*)*(argc+1));
7680 server.exec_argv[argc] = NULL;
7681 for (j = 0; j < argc; j++) server.exec_argv[j] = zstrdup(argv[j]);
7682
7683 /* We need to init sentinel right now as parsing the configuration file
7684 * in sentinel mode will have the effect of populating the sentinel
7685 * data structures with master nodes to monitor. */
7686 if (server.sentinel_mode) {
7687 initSentinelConfig();
7688 initSentinel();
7689 }
7690
7691 /* Check if we need to start in redis-check-rdb/aof mode. We just execute
7692 * the program main. However the program is part of the Redis executable
7693 * so that we can easily execute an RDB check on loading errors. */
7694 if (strstr(exec_name,"redis-check-rdb") != NULL)
7695 redis_check_rdb_main(argc,argv,NULL);
7696 else if (strstr(exec_name,"redis-check-aof") != NULL)
7697 redis_check_aof_main(argc,argv);
7698
7699 if (argc >= 2) {
7700 j = 1; /* First option to parse in argv[] */
7701 sds options = sdsempty();
7702
7703 /* Handle special options --help and --version */
7704 if (strcmp(argv[1], "-v") == 0 ||
7705 strcmp(argv[1], "--version") == 0)
7706 {
7707 sds version = getVersion();
7708 printf("Redis server %s\n", version);
7709 sdsfree(version);
7710 exit(0);
7711 }
7712 if (strcmp(argv[1], "--help") == 0 ||
7713 strcmp(argv[1], "-h") == 0) usage();
7714 if (strcmp(argv[1], "--test-memory") == 0) {
7715 if (argc == 3) {
7716 memtest(atoi(argv[2]),50);
7717 exit(0);
7718 } else {
7719 fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
7720 fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
7721 exit(1);
7722 }
7723 } if (strcmp(argv[1], "--check-system") == 0) {
7724 exit(syscheck() ? 0 : 1);
7725 }
7726 /* Parse command line options
7727 * Precedence wise, File, stdin, explicit options -- last config is the one that matters.
7728 *
7729 * First argument is the config file name? */
7730 if (argv[1][0] != '-') {
7731 /* Replace the config file in server.exec_argv with its absolute path. */
7732 server.configfile = getAbsolutePath(argv[1]);
7733 zfree(server.exec_argv[1]);
7734 server.exec_argv[1] = zstrdup(server.configfile);
7735 j = 2; // Skip this arg when parsing options
7736 }
7737 sds *argv_tmp;
7738 int argc_tmp;
7739 int handled_last_config_arg = 1;
7740 while(j < argc) {
7741 /* Either first or last argument - Should we read config from stdin? */
7742 if (argv[j][0] == '-' && argv[j][1] == '\0' && (j == 1 || j == argc-1)) {
7743 config_from_stdin = 1;
7744 }
7745 /* All the other options are parsed and conceptually appended to the
7746 * configuration file. For instance --port 6380 will generate the
7747 * string "port 6380\n" to be parsed after the actual config file
7748 * and stdin input are parsed (if they exist).
7749 * Only consider that if the last config has at least one argument. */
7750 else if (handled_last_config_arg && argv[j][0] == '-' && argv[j][1] == '-') {
7751 /* Option name */
7752 if (sdslen(options)) options = sdscat(options,"\n");
7753 /* argv[j]+2 for removing the preceding `--` */
7754 options = sdscat(options,argv[j]+2);
7755 options = sdscat(options," ");
7756
7757 argv_tmp = sdssplitargs(argv[j], &argc_tmp);
7758 if (argc_tmp == 1) {
7759 /* Means that we only have one option name, like --port or "--port " */
7760 handled_last_config_arg = 0;
7761
7762 if ((j != argc-1) && argv[j+1][0] == '-' && argv[j+1][1] == '-' &&
7763 !strcasecmp(argv[j], "--save"))
7764 {
7765 /* Special case: handle some things like `--save --config value`.
7766 * In this case, if next argument starts with `--`, we will reset
7767 * handled_last_config_arg flag and append an empty "" config value
7768 * to the options, so it will become `--save "" --config value`.
7769 * We are doing it to be compatible with pre 7.0 behavior (which we
7770 * break it in #10660, 7.0.1), since there might be users who generate
7771 * a command line from an array and when it's empty that's what they produce. */
7772 options = sdscat(options, "\"\"");
7773 handled_last_config_arg = 1;
7774 }
7775 else if ((j == argc-1) && !strcasecmp(argv[j], "--save")) {
7776 /* Special case: when empty save is the last argument.
7777 * In this case, we append an empty "" config value to the options,
7778 * so it will become `--save ""` and will follow the same reset thing. */
7779 options = sdscat(options, "\"\"");
7780 }
7781 else if ((j != argc-1) && argv[j+1][0] == '-' && argv[j+1][1] == '-' &&
7782 !strcasecmp(argv[j], "--sentinel"))
7783 {
7784 /* Special case: handle some things like `--sentinel --config value`.
7785 * It is a pseudo config option with no value. In this case, if next
7786 * argument starts with `--`, we will reset handled_last_config_arg flag.
7787 * We are doing it to be compatible with pre 7.0 behavior (which we
7788 * break it in #10660, 7.0.1). */
7789 options = sdscat(options, "");
7790 handled_last_config_arg = 1;
7791 }
7792 else if ((j == argc-1) && !strcasecmp(argv[j], "--sentinel")) {
7793 /* Special case: when --sentinel is the last argument.
7794 * It is a pseudo config option with no value. In this case, do nothing.
7795 * We are doing it to be compatible with pre 7.0 behavior (which we
7796 * break it in #10660, 7.0.1). */
7797 options = sdscat(options, "");
7798 }
7799 } else {
7800 /* Means that we are passing both config name and it's value in the same arg,
7801 * like "--port 6380", so we need to reset handled_last_config_arg flag. */
7802 handled_last_config_arg = 1;
7803 }
7804 sdsfreesplitres(argv_tmp, argc_tmp);
7805 } else {
7806 /* Option argument */
7807 options = sdscatrepr(options,argv[j],strlen(argv[j]));
7808 options = sdscat(options," ");
7809 handled_last_config_arg = 1;
7810 }
7811 j++;
7812 }
7813
7814 loadServerConfig(server.configfile, config_from_stdin, options);
7815 if (server.sentinel_mode) loadSentinelConfigFromQueue();
7816 sdsfree(options);
7817 }
7818 if (server.sentinel_mode) sentinelCheckConfigFile();
7819
7820 /* Do system checks */
7821#ifdef __linux__
7822 linuxMemoryWarnings();
7823 sds err_msg = NULL;
7824 if (checkXenClocksource(&err_msg) < 0) {
7825 serverLog(LL_WARNING, "WARNING %s", err_msg);
7826 sdsfree(err_msg);
7827 }
7828#if defined (__arm64__)
7829 int ret;
7830 if ((ret = checkLinuxMadvFreeForkBug(&err_msg)) <= 0) {
7831 if (ret < 0) {
7832 serverLog(LL_WARNING, "WARNING %s", err_msg);
7833 sdsfree(err_msg);
7834 } else
7835 serverLog(LL_WARNING, "Failed to test the kernel for a bug that could lead to data corruption during background save. "
7836 "Your system could be affected, please report this error.");
7837 if (!checkIgnoreWarning("ARM64-COW-BUG")) {
7838 serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. "
7839 "Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
7840 exit(1);
7841 }
7842 }
7843#endif /* __arm64__ */
7844#endif /* __linux__ */
7845
7846 /* Daemonize if needed */
7847 server.supervised = redisIsSupervised(server.supervised_mode);
7848 int background = server.daemonize && !server.supervised;
7849 if (background) daemonize();
7850
7851 serverLog(LL_NOTICE, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo");
7852 serverLog(LL_NOTICE,
7853 "Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started",
7854 REDIS_VERSION,
7855 (sizeof(long) == 8) ? 64 : 32,
7856 redisGitSHA1(),
7857 strtol(redisGitDirty(),NULL,10) > 0,
7858 (int)getpid());
7859
7860 if (argc == 1) {
7861 serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/redis.conf", argv[0]);
7862 } else {
7863 serverLog(LL_NOTICE, "Configuration loaded");
7864 }
7865
7866 initServer();
7867 if (background || server.pidfile) createPidFile();
7868 if (server.set_proc_title) redisSetProcTitle(NULL);
7869 redisAsciiArt();
7870 checkTcpBacklogSettings();
7871 if (server.cluster_enabled) {
7872 /* clusterCommonInit() initializes slot-stats required by clusterInit() */
7873 clusterCommonInit();
7874 clusterInit();
7875 }
7876 if (!server.sentinel_mode) {
7877 moduleInitModulesSystemLast();
7878 moduleLoadInternalModules();
7879 moduleLoadFromQueue();
7880 }
7881 ACLLoadUsersAtStartup();
7882 initListeners();
7883 if (server.cluster_enabled) {
7884 clusterInitLast();
7885 }
7886 InitServerLast();
7887
7888 if (!server.sentinel_mode) {
7889 /* Things not needed when running in Sentinel mode. */
7890 serverLog(LL_NOTICE,"Server initialized");
7891 aofLoadManifestFromDisk();
7892 loadDataFromDisk();
7893 aofOpenIfNeededOnServerStart();
7894 aofDelHistoryFiles();
7895 /* While loading data, we delay applying "appendonly" config change.
7896 * If there was a config change while we were inside loadDataFromDisk()
7897 * above, we'll apply it here. */
7898 applyAppendOnlyConfig();
7899
7900 if (server.cluster_enabled) {
7901 serverAssert(verifyClusterConfigWithData() == C_OK);
7902 }
7903
7904 for (j = 0; j < CONN_TYPE_MAX; j++) {
7905 connListener *listener = &server.listeners[j];
7906 if (listener->ct == NULL)
7907 continue;
7908
7909 serverLog(LL_NOTICE,"Ready to accept connections %s", listener->ct->get_type(NULL));
7910 }
7911
7912 if (server.supervised_mode == SUPERVISED_SYSTEMD) {
7913 if (!server.masterhost) {
7914 redisCommunicateSystemd("STATUS=Ready to accept connections\n");
7915 } else {
7916 redisCommunicateSystemd("STATUS=Ready to accept connections in read-only mode. Waiting for MASTER <-> REPLICA sync\n");
7917 }
7918 redisCommunicateSystemd("READY=1\n");
7919 }
7920 } else {
7921 sentinelIsRunning();
7922 if (server.supervised_mode == SUPERVISED_SYSTEMD) {
7923 redisCommunicateSystemd("STATUS=Ready to accept connections\n");
7924 redisCommunicateSystemd("READY=1\n");
7925 }
7926 }
7927
7928 /* Warning the user about suspicious maxmemory setting. */
7929 if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
7930 serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
7931 }
7932
7933 redisSetCpuAffinity(server.server_cpulist);
7934 setOOMScoreAdj(-1);
7935
7936 aeMain(server.el);
7937 aeDeleteEventLoop(server.el);
7938 return 0;
7939}
7940
7941/* The End */