summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/src/evict.c
diff options
context:
space:
mode:
Diffstat (limited to 'examples/redis-unstable/src/evict.c')
-rw-r--r--examples/redis-unstable/src/evict.c764
1 files changed, 0 insertions, 764 deletions
diff --git a/examples/redis-unstable/src/evict.c b/examples/redis-unstable/src/evict.c
deleted file mode 100644
index e287ede..0000000
--- a/examples/redis-unstable/src/evict.c
+++ /dev/null
@@ -1,764 +0,0 @@
-/* Maxmemory directive handling (LRU eviction and other policies).
- *
- * ----------------------------------------------------------------------------
- *
- * Copyright (c) 2009-Present, Redis Ltd.
- * All rights reserved.
- *
- * Licensed under your choice of (a) the Redis Source Available License 2.0
- * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
- * GNU Affero General Public License v3 (AGPLv3).
- */
-
-#include "server.h"
-#include "bio.h"
-#include "atomicvar.h"
-#include "script.h"
-#include "cluster.h"
-#include "cluster_asm.h"
-#include <math.h>
-
-/* ----------------------------------------------------------------------------
- * Data structures
- * --------------------------------------------------------------------------*/
-
-/* To improve the quality of the LRU approximation we take a set of keys
- * that are good candidate for eviction across performEvictions() calls.
- *
- * Entries inside the eviction pool are taken ordered by idle time, putting
- * greater idle times to the right (ascending order).
- *
- * When an LFU policy is used instead, a reverse frequency indication is used
- * instead of the idle time, so that we still evict by larger value (larger
- * inverse frequency means to evict keys with the least frequent accesses).
- *
- * Empty entries have the key pointer set to NULL. */
-#define EVPOOL_SIZE 16
-#define EVPOOL_CACHED_SDS_SIZE 255
-struct evictionPoolEntry {
- unsigned long long idle; /* Object idle time (inverse frequency for LFU) */
- sds key; /* Key name. */
- sds cached; /* Cached SDS object for key name. */
- int dbid; /* Key DB number. */
- int slot; /* Slot. */
-};
-
-static struct evictionPoolEntry *EvictionPoolLRU;
-
-/* ----------------------------------------------------------------------------
- * Implementation of eviction, aging and LRU
- * --------------------------------------------------------------------------*/
-
-/* Return the LRU clock, based on the clock resolution. This is a time
- * in a reduced-bits format that can be used to set and check the
- * object->lru field of redisObject structures. */
-unsigned int getLRUClock(void) {
- return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
-}
-
-/* This function is used to obtain the current LRU clock.
- * If the current resolution is lower than the frequency we refresh the
- * LRU clock (as it should be in production servers) we return the
- * precomputed value, otherwise we need to resort to a system call. */
-unsigned int LRU_CLOCK(void) {
- unsigned int lruclock;
- if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
- lruclock = server.lruclock;
- } else {
- lruclock = getLRUClock();
- }
- return lruclock;
-}
-
-/* Given an object returns the min number of milliseconds the object was never
- * requested, using an approximated LRU algorithm. */
-unsigned long long estimateObjectIdleTime(robj *o) {
- unsigned long long lruclock = LRU_CLOCK();
- if (lruclock >= o->lru) {
- return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
- } else {
- return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
- LRU_CLOCK_RESOLUTION;
- }
-}
-
-/* During atomic slot migration, keys that are being imported are in an
- * intermediate state. We cannot evict them and therefore skip them. */
-static int randomEvictionShouldSkipDictIndex(int didx) {
- return !clusterCanAccessKeysInSlot(didx);
-}
-
-/* LRU approximation algorithm
- *
- * Redis uses an approximation of the LRU algorithm that runs in constant
- * memory. Every time there is a key to expire, we sample N keys (with
- * N very small, usually in around 5) to populate a pool of best keys to
- * evict of M keys (the pool size is defined by EVPOOL_SIZE).
- *
- * The N keys sampled are added in the pool of good keys to expire (the one
- * with an old access time) if they are better than one of the current keys
- * in the pool.
- *
- * After the pool is populated, the best key we have in the pool is expired.
- * However note that we don't remove keys from the pool when they are deleted
- * so the pool may contain keys that no longer exist.
- *
- * When we try to evict a key, and all the entries in the pool don't exist
- * we populate it again. This time we'll be sure that the pool has at least
- * one key that can be evicted, if there is at least one key that can be
- * evicted in the whole database. */
-
-/* Create a new eviction pool. */
-void evictionPoolAlloc(void) {
- struct evictionPoolEntry *ep;
- int j;
-
- ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
- for (j = 0; j < EVPOOL_SIZE; j++) {
- ep[j].idle = 0;
- ep[j].key = NULL;
- ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
- ep[j].dbid = 0;
- }
- EvictionPoolLRU = ep;
-}
-
-/* This is a helper function for performEvictions(), it is used in order
- * to populate the evictionPool with a few entries every time we want to
- * expire a key. Keys with idle time bigger than one of the current
- * keys are added. Keys are always added if there are free entries.
- *
- * We insert keys on place in ascending order, so keys with the smaller
- * idle time are on the left, and keys with the higher idle time on the
- * right. */
-int evictionPoolPopulate(redisDb *db, kvstore *samplekvs, struct evictionPoolEntry *pool) {
- int j, k, count;
- dictEntry *samples[server.maxmemory_samples];
-
- /* Don't retry, since we will call evictionPoolPopulate multiple times if needed. */
- int slot = kvstoreGetFairRandomDictIndex(samplekvs, randomEvictionShouldSkipDictIndex, 1, 0);
- if (slot == -1) return 0;
- count = kvstoreDictGetSomeKeys(samplekvs,slot,samples,server.maxmemory_samples);
- for (j = 0; j < count; j++) {
- unsigned long long idle;
-
- dictEntry *de = samples[j];
- kvobj *kv = dictGetKV(de);
- sds key = kvobjGetKey(kv);
-
- /* Calculate the idle time according to the policy. This is called
- * idle just because the code initially handled LRU, but is in fact
- * just a score where a higher score means better candidate. */
- if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LRM)) {
- idle = estimateObjectIdleTime(kv);
- } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
- /* When we use an LRU policy, we sort the keys by idle time
- * so that we expire keys starting from greater idle time.
- * However when the policy is an LFU one, we have a frequency
- * estimation, and we want to evict keys with lower frequency
- * first. So inside the pool we put objects using the inverted
- * frequency subtracting the actual frequency to the maximum
- * frequency of 255. */
- idle = 255-LFUDecrAndReturn(kv);
- } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
- /* In this case the sooner the expire the better. */
- idle = ULLONG_MAX - kvobjGetExpire(kv);
- } else {
- serverPanic("Unknown eviction policy in evictionPoolPopulate()");
- }
-
- /* Insert the element inside the pool.
- * First, find the first empty bucket or the first populated
- * bucket that has an idle time smaller than our idle time. */
- k = 0;
- while (k < EVPOOL_SIZE &&
- pool[k].key &&
- pool[k].idle < idle) k++;
- if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
- /* Can't insert if the element is < the worst element we have
- * and there are no empty buckets. */
- continue;
- } else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
- /* Inserting into empty position. No setup needed before insert. */
- } else {
- /* Inserting in the middle. Now k points to the first element
- * greater than the element to insert. */
- if (pool[EVPOOL_SIZE-1].key == NULL) {
- /* Free space on the right? Insert at k shifting
- * all the elements from k to end to the right. */
-
- /* Save SDS before overwriting. */
- sds cached = pool[EVPOOL_SIZE-1].cached;
- memmove(pool+k+1,pool+k,
- sizeof(pool[0])*(EVPOOL_SIZE-k-1));
- pool[k].cached = cached;
- } else {
- /* No free space on right? Insert at k-1 */
- k--;
- /* Shift all elements on the left of k (included) to the
- * left, so we discard the element with smaller idle time. */
- sds cached = pool[0].cached; /* Save SDS before overwriting. */
- if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
- memmove(pool,pool+1,sizeof(pool[0])*k);
- pool[k].cached = cached;
- }
- }
-
- /* Try to reuse the cached SDS string allocated in the pool entry,
- * because allocating and deallocating this object is costly
- * (according to the profiler, not my fantasy. Remember:
- * premature optimization bla bla bla. */
- int klen = sdslen(key);
- if (klen > EVPOOL_CACHED_SDS_SIZE) {
- pool[k].key = sdsdup(key);
- } else {
- memcpy(pool[k].cached,key,klen+1);
- sdssetlen(pool[k].cached,klen);
- pool[k].key = pool[k].cached;
- }
- pool[k].idle = idle;
- pool[k].dbid = db->id;
- pool[k].slot = slot;
- }
-
- return count;
-}
-
-/* ----------------------------------------------------------------------------
- * LFU (Least Frequently Used) implementation.
-
- * We have 24 total bits of space in each object in order to implement
- * an LFU (Least Frequently Used) eviction policy, since we re-use the
- * LRU field for this purpose.
- *
- * We split the 24 bits into two fields:
- *
- * 16 bits 8 bits
- * +------------------+--------+
- * + Last access time | LOG_C |
- * +------------------+--------+
- *
- * LOG_C is a logarithmic counter that provides an indication of the access
- * frequency. However this field must also be decremented otherwise what used
- * to be a frequently accessed key in the past, will remain ranked like that
- * forever, while we want the algorithm to adapt to access pattern changes.
- *
- * So the remaining 16 bits are used in order to store the "access time",
- * a reduced-precision Unix time (we take 16 bits of the time converted
- * in minutes since we don't care about wrapping around) where the LOG_C
- * counter decays every minute by default (depends on lfu-decay-time).
- *
- * New keys don't start at zero, in order to have the ability to collect
- * some accesses before being trashed away, so they start at LFU_INIT_VAL.
- * The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL
- * when incrementing the key, so that keys starting at LFU_INIT_VAL
- * (or having a smaller value) have a very high chance of being incremented
- * on access. (The chance depends on counter and lfu-log-factor.)
- *
- * During decrement, the value of the logarithmic counter is decremented by
- * one when lfu-decay-time minutes elapsed.
- * --------------------------------------------------------------------------*/
-
-/* Return the current time in minutes, just taking the least significant
- * 16 bits. The returned time is suitable to be stored as LDT (last access
- * time) for the LFU implementation. */
-unsigned long LFUGetTimeInMinutes(void) {
- return (server.unixtime/60) & 65535;
-}
-
-/* Given an object ldt (last access time), compute the minimum number of minutes
- * that elapsed since the last access. Handle overflow (ldt greater than
- * the current 16 bits minutes time) considering the time as wrapping
- * exactly once. */
-unsigned long LFUTimeElapsed(unsigned long ldt) {
- unsigned long now = LFUGetTimeInMinutes();
- if (now >= ldt) return now-ldt;
- return 65535-ldt+now;
-}
-
-/* Logarithmically increment a counter. The greater is the current counter value
- * the less likely is that it gets really incremented. Saturate it at 255. */
-uint8_t LFULogIncr(uint8_t counter) {
- if (counter == 255) return 255;
- double r = (double)rand()/RAND_MAX;
- double baseval = counter - LFU_INIT_VAL;
- if (baseval < 0) baseval = 0;
- double p = 1.0/(baseval*server.lfu_log_factor+1);
- if (r < p) counter++;
- return counter;
-}
-
-/* If the object's ldt (last access time) is reached, decrement the LFU counter but
- * do not update LFU fields of the object, we update the access time
- * and counter in an explicit way when the object is really accessed.
- * And we will decrement the counter according to the times of
- * elapsed time than server.lfu_decay_time.
- * Return the object frequency counter.
- *
- * This function is used in order to scan the dataset for the best object
- * to fit: as we check for the candidate, we incrementally decrement the
- * counter of the scanned objects if needed. */
-unsigned long LFUDecrAndReturn(robj *o) {
- unsigned long ldt = o->lru >> 8;
- unsigned long counter = o->lru & 255;
- unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0;
- if (num_periods)
- counter = (num_periods > counter) ? 0 : counter - num_periods;
- return counter;
-}
-
-/* We don't want to count AOF buffers and slaves output buffers as
- * used memory: the eviction should use mostly data size, because
- * it can cause feedback-loop when we push DELs into them, putting
- * more and more DELs will make them bigger, if we count them, we
- * need to evict more keys, and then generate more DELs, maybe cause
- * massive eviction loop, even all keys are evicted.
- *
- * This function returns the sum of AOF and replication buffer. */
-size_t freeMemoryGetNotCountedMemory(void) {
- size_t overhead = 0;
-
- /* Since all replicas and replication backlog share global replication
- * buffer, we think only the part of exceeding backlog size is the extra
- * separate consumption of replicas.
- *
- * Note that although the backlog is also initially incrementally grown
- * (pushing DELs consumes memory), it'll eventually stop growing and
- * remain constant in size, so even if its creation will cause some
- * eviction, it's capped, and also here to stay (no resonance effect)
- *
- * Note that, because we trim backlog incrementally in the background,
- * backlog size may exceeds our setting if slow replicas that reference
- * vast replication buffer blocks disconnect. To avoid massive eviction
- * loop, we don't count the delayed freed replication backlog into used
- * memory even if there are no replicas, i.e. we still regard this memory
- * as replicas'. */
- if ((long long)server.repl_buffer_mem > server.repl_backlog_size) {
- /* We use list structure to manage replication buffer blocks, so backlog
- * also occupies some extra memory, we can't know exact blocks numbers,
- * we only get approximate size according to per block size. */
- size_t extra_approx_size =
- (server.repl_backlog_size/PROTO_REPLY_CHUNK_BYTES + 1) *
- (sizeof(replBufBlock)+sizeof(listNode));
- size_t counted_mem = server.repl_backlog_size + extra_approx_size;
- if (server.repl_buffer_mem > counted_mem) {
- overhead += (server.repl_buffer_mem - counted_mem);
- }
- }
-
- /* The migrate client is like a replica, we also push DELs into it when
- * evicting keys belonging to the migrating slot, so we don't count its
- * output buffer to avoid eviction loop. */
- overhead += asmGetMigrateOutputBufferSize();
-
- if (server.aof_state != AOF_OFF) {
- overhead += sdsAllocSize(server.aof_buf);
- }
- return overhead;
-}
-
-/* Get the memory status from the point of view of the maxmemory directive:
- * if the memory used is under the maxmemory setting then C_OK is returned.
- * Otherwise, if we are over the memory limit, the function returns
- * C_ERR.
- *
- * The function may return additional info via reference, only if the
- * pointers to the respective arguments is not NULL. Certain fields are
- * populated only when C_ERR is returned:
- *
- * 'total' total amount of bytes used.
- * (Populated both for C_ERR and C_OK)
- *
- * 'logical' the amount of memory used minus the slaves/AOF buffers.
- * (Populated when C_ERR is returned)
- *
- * 'tofree' the amount of memory that should be released
- * in order to return back into the memory limits.
- * (Populated when C_ERR is returned)
- *
- * 'level' this usually ranges from 0 to 1, and reports the amount of
- * memory currently used. May be > 1 if we are over the memory
- * limit.
- * (Populated both for C_ERR and C_OK)
- */
-int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level) {
- size_t mem_reported, mem_used, mem_tofree;
-
- /* Check if we are over the memory usage limit. If we are not, no need
- * to subtract the slaves output buffers. We can just return ASAP. */
- mem_reported = zmalloc_used_memory();
- if (total) *total = mem_reported;
-
- /* We may return ASAP if there is no need to compute the level. */
- if (!server.maxmemory) {
- if (level) *level = 0;
- return C_OK;
- }
- if (mem_reported <= server.maxmemory && !level) return C_OK;
-
- /* Remove the size of slaves output buffers and AOF buffer from the
- * count of used memory. */
- mem_used = mem_reported;
- size_t overhead = freeMemoryGetNotCountedMemory();
- mem_used = (mem_used > overhead) ? mem_used-overhead : 0;
-
- /* Compute the ratio of memory usage. */
- if (level) *level = (float)mem_used / (float)server.maxmemory;
-
- if (mem_reported <= server.maxmemory) return C_OK;
-
- /* Check if we are still over the memory limit. */
- if (mem_used <= server.maxmemory) return C_OK;
-
- /* Compute how much memory we need to free. */
- mem_tofree = mem_used - server.maxmemory;
-
- if (logical) *logical = mem_used;
- if (tofree) *tofree = mem_tofree;
-
- return C_ERR;
-}
-
-/* Return 1 if used memory is more than maxmemory after allocating more memory,
- * return 0 if not. Redis may reject user's requests or evict some keys if used
- * memory exceeds maxmemory, especially, when we allocate huge memory at once. */
-int overMaxmemoryAfterAlloc(size_t moremem) {
- if (!server.maxmemory) return 0; /* No limit. */
-
- /* Check quickly. */
- size_t mem_used = zmalloc_used_memory();
- if (mem_used + moremem <= server.maxmemory) return 0;
-
- size_t overhead = freeMemoryGetNotCountedMemory();
- mem_used = (mem_used > overhead) ? mem_used - overhead : 0;
- return mem_used + moremem > server.maxmemory;
-}
-
-/* The evictionTimeProc is started when "maxmemory" has been breached and
- * could not immediately be resolved. This will spin the event loop with short
- * eviction cycles until the "maxmemory" condition has resolved or there are no
- * more evictable items. */
-static int isEvictionProcRunning = 0;
-static int evictionTimeProc(
- struct aeEventLoop *eventLoop, long long id, void *clientData) {
- UNUSED(eventLoop);
- UNUSED(id);
- UNUSED(clientData);
-
- if (performEvictions() == EVICT_RUNNING) return 0; /* keep evicting */
-
- /* For EVICT_OK - things are good, no need to keep evicting.
- * For EVICT_FAIL - there is nothing left to evict. */
- isEvictionProcRunning = 0;
- return AE_NOMORE;
-}
-
-void startEvictionTimeProc(void) {
- if (!isEvictionProcRunning) {
- isEvictionProcRunning = 1;
- aeCreateTimeEvent(server.el, 0,
- evictionTimeProc, NULL, NULL);
- }
-}
-
-/* Check if it's safe to perform evictions.
- * Returns 1 if evictions can be performed
- * Returns 0 if eviction processing should be skipped
- */
-static int isSafeToPerformEvictions(void) {
- /* - There must be no script in timeout condition.
- * - Nor we are loading data right now. */
- if (isInsideYieldingLongCommand() || server.loading) return 0;
-
- /* By default replicas should ignore maxmemory
- * and just be masters exact copies. */
- if (server.masterhost && server.repl_slave_ignore_maxmemory) return 0;
-
- /* Disable eviction during slot migration import to avoid delays and errors
- * caused by failed evictions. A special client is created for data import,
- * identified by the CLIENT_MASTER and CLIENT_ASM_IMPORTING flags. */
- if (server.current_client && server.current_client->flags & CLIENT_MASTER &&
- server.current_client->flags & CLIENT_ASM_IMPORTING)
- return 0;
-
- /* If 'evict' action is paused, for whatever reason, then return false */
- if (isPausedActionsWithUpdate(PAUSE_ACTION_EVICT)) return 0;
-
- return 1;
-}
-
-/* Algorithm for converting tenacity (0-100) to a time limit. */
-static unsigned long evictionTimeLimitUs(void) {
- serverAssert(server.maxmemory_eviction_tenacity >= 0);
- serverAssert(server.maxmemory_eviction_tenacity <= 100);
-
- if (server.maxmemory_eviction_tenacity <= 10) {
- /* A linear progression from 0..500us */
- return 50uL * server.maxmemory_eviction_tenacity;
- }
-
- if (server.maxmemory_eviction_tenacity < 100) {
- /* A 15% geometric progression, resulting in a limit of ~2 min at tenacity==99 */
- return (unsigned long)(500.0 * pow(1.15, server.maxmemory_eviction_tenacity - 10.0));
- }
-
- return ULONG_MAX; /* No limit to eviction time */
-}
-
-/* Check that memory usage is within the current "maxmemory" limit. If over
- * "maxmemory", attempt to free memory by evicting data (if it's safe to do so).
- *
- * It's possible for Redis to suddenly be significantly over the "maxmemory"
- * setting. This can happen if there is a large allocation (like a hash table
- * resize) or even if the "maxmemory" setting is manually adjusted. Because of
- * this, it's important to evict for a managed period of time - otherwise Redis
- * would become unresponsive while evicting.
- *
- * The goal of this function is to improve the memory situation - not to
- * immediately resolve it. In the case that some items have been evicted but
- * the "maxmemory" limit has not been achieved, an aeTimeProc will be started
- * which will continue to evict items until memory limits are achieved or
- * nothing more is evictable.
- *
- * This should be called before execution of commands. If EVICT_FAIL is
- * returned, commands which will result in increased memory usage should be
- * rejected.
- *
- * Returns:
- * EVICT_OK - memory is OK or it's not possible to perform evictions now
- * EVICT_RUNNING - memory is over the limit, but eviction is still processing
- * EVICT_FAIL - memory is over the limit, and there's nothing to evict
- * */
-int performEvictions(void) {
- /* Note, we don't goto update_metrics here because this check skips eviction
- * as if it wasn't triggered. it's a fake EVICT_OK. */
- if (!isSafeToPerformEvictions()) return EVICT_OK;
-
- int keys_freed = 0;
- size_t mem_reported, mem_tofree;
- long long mem_freed; /* May be negative */
- mstime_t latency;
- int slaves = listLength(server.slaves);
- int result = EVICT_FAIL;
-
- if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK) {
- result = EVICT_OK;
- goto update_metrics;
- }
-
- if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) {
- result = EVICT_FAIL; /* We need to free memory, but policy forbids. */
- goto update_metrics;
- }
-
- unsigned long eviction_time_limit_us = evictionTimeLimitUs();
-
- mem_freed = 0;
-
- latencyStartMonitor(latency);
-
- monotime evictionTimer;
- elapsedStart(&evictionTimer);
-
- /* Try to smoke-out bugs (server.also_propagate should be empty here) */
- serverAssert(server.also_propagate.numops == 0);
- /* Evictions are performed on random keys that have nothing to do with the current command slot. */
-
- while (mem_freed < (long long)mem_tofree) {
- int j, k, i;
- static unsigned int next_db = 0;
- sds bestkey = NULL;
- int bestdbid;
- redisDb *db;
- dictEntry *de;
-
- if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU|MAXMEMORY_FLAG_LRM) ||
- server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
- {
- struct evictionPoolEntry *pool = EvictionPoolLRU;
- while (bestkey == NULL) {
- unsigned long total_keys = 0;
- unsigned long total_sampled_keys = 0;
-
- /* We don't want to make local-db choices when expiring keys,
- * so to start populate the eviction pool sampling keys from
- * every DB. */
- for (i = 0; i < server.dbnum; i++) {
- db = server.db+i;
- kvstore *kvs;
- if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
- kvs = db->keys;
- } else {
- kvs = db->expires;
- }
- unsigned long sampled_keys = 0;
- unsigned long current_db_keys = kvstoreSize(kvs);
- if (current_db_keys == 0) continue;
-
- total_keys += current_db_keys;
- int l = kvstoreNumNonEmptyDicts(kvs);
- /* Do not exceed the number of non-empty slots when looping. */
- while (l--) {
- sampled_keys += evictionPoolPopulate(db, kvs, pool);
- total_sampled_keys += sampled_keys;
- /* We have sampled enough keys in the current db, exit the loop. */
- if (sampled_keys >= (unsigned long) server.maxmemory_samples)
- break;
- /* If there are not a lot of keys in the current db, dict/s may be very
- * sparsely populated, exit the loop without meeting the sampling
- * requirement. */
- if (current_db_keys < (unsigned long) server.maxmemory_samples*10)
- break;
- }
- }
- if (!total_keys) break; /* No keys to evict. */
-
- /* If we iterated all the DBs and all non-empty slot dicts, then
- * did not sample any key, stop sampling. */
- if (!total_sampled_keys) break;
-
- /* Go backward from best to worst element to evict. */
- for (k = EVPOOL_SIZE-1; k >= 0; k--) {
- if (pool[k].key == NULL) continue;
- bestdbid = pool[k].dbid;
-
- kvstore *kvs;
- if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
- kvs = server.db[bestdbid].keys;
- } else {
- kvs = server.db[bestdbid].expires;
- }
- de = kvstoreDictFind(kvs, pool[k].slot, pool[k].key);
-
- /* Remove the entry from the pool. */
- if (pool[k].key != pool[k].cached)
- sdsfree(pool[k].key);
- pool[k].key = NULL;
- pool[k].idle = 0;
-
- /* If the key exists, is our pick. Otherwise it is
- * a ghost and we need to try the next element. */
- if (de) {
- bestkey = kvobjGetKey(dictGetKV(de));
- break;
- } else {
- /* Ghost... Iterate again. */
- }
- }
- }
- }
-
- /* volatile-random and allkeys-random policy */
- else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
- server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
- {
- /* When evicting a random key, we try to evict a key for
- * each DB, so we use the static 'next_db' variable to
- * incrementally visit all DBs. */
- for (i = 0; i < server.dbnum; i++) {
- j = (++next_db) % server.dbnum;
- db = server.db+j;
- kvstore *kvs;
- if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) {
- kvs = db->keys;
- } else {
- kvs = db->expires;
- }
- int slot = kvstoreGetFairRandomDictIndex(kvs, randomEvictionShouldSkipDictIndex, 16, 0);
- if (slot == -1) continue;
- de = kvstoreDictGetRandomKey(kvs, slot);
- if (de) {
- kvobj *kv = dictGetKV(de);
- bestkey = kvobjGetKey(kv);
- bestdbid = j;
- break;
- }
- }
- }
-
- /* Finally remove the selected key. */
- if (bestkey) {
- long long key_mem_freed;
- db = server.db+bestdbid;
-
- enterExecutionUnit(1, 0);
- robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
- deleteEvictedKeyAndPropagate(db, keyobj, &key_mem_freed);
- decrRefCount(keyobj);
- exitExecutionUnit();
- /* Propagate the DEL command */
- postExecutionUnitOperations();
-
- mem_freed += key_mem_freed;
- keys_freed++;
-
- if (keys_freed % 16 == 0) {
- /* When the memory to free starts to be big enough, we may
- * start spending so much time here that is impossible to
- * deliver data to the replicas fast enough, so we force the
- * transmission here inside the loop. */
- if (slaves) flushSlavesOutputBuffers();
-
- /* Normally our stop condition is the ability to release
- * a fixed, pre-computed amount of memory. However when we
- * are deleting objects in another thread, it's better to
- * check, from time to time, if we already reached our target
- * memory, since the "mem_freed" amount is computed only
- * across the dbAsyncDelete() call, while the thread can
- * release the memory all the time. */
- if (server.lazyfree_lazy_eviction) {
- if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
- break;
- }
- }
-
- /* After some time, exit the loop early - even if memory limit
- * hasn't been reached. If we suddenly need to free a lot of
- * memory, don't want to spend too much time here. */
- if (elapsedUs(evictionTimer) > eviction_time_limit_us) {
- // We still need to free memory - start eviction timer proc
- startEvictionTimeProc();
- break;
- }
- }
- } else {
- goto cant_free; /* nothing to free... */
- }
- }
- /* at this point, the memory is OK, or we have reached the time limit */
- result = (isEvictionProcRunning) ? EVICT_RUNNING : EVICT_OK;
-
-cant_free:
- if (result == EVICT_FAIL) {
- /* At this point, we have run out of evictable items. It's possible
- * that some items are being freed in the lazyfree thread. Perform a
- * short wait here if such jobs exist, but don't wait long. */
- mstime_t lazyfree_latency;
- latencyStartMonitor(lazyfree_latency);
- while (bioPendingJobsOfType(BIO_LAZY_FREE) &&
- elapsedUs(evictionTimer) < eviction_time_limit_us) {
- if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
- result = EVICT_OK;
- break;
- }
- usleep(eviction_time_limit_us < 1000 ? eviction_time_limit_us : 1000);
- }
- latencyEndMonitor(lazyfree_latency);
- latencyAddSampleIfNeeded("eviction-lazyfree",lazyfree_latency);
- }
-
- latencyEndMonitor(latency);
- latencyAddSampleIfNeeded("eviction-cycle",latency);
-
-update_metrics:
- if (result == EVICT_RUNNING || result == EVICT_FAIL) {
- if (server.stat_last_eviction_exceeded_time == 0)
- elapsedStart(&server.stat_last_eviction_exceeded_time);
- } else if (result == EVICT_OK) {
- if (server.stat_last_eviction_exceeded_time != 0) {
- server.stat_total_eviction_exceeded_time += elapsedUs(server.stat_last_eviction_exceeded_time);
- server.stat_last_eviction_exceeded_time = 0;
- }
- }
- return result;
-}