diff options
Diffstat (limited to 'examples/redis-unstable/src/evict.c')
| -rw-r--r-- | examples/redis-unstable/src/evict.c | 764 |
1 files changed, 0 insertions, 764 deletions
diff --git a/examples/redis-unstable/src/evict.c b/examples/redis-unstable/src/evict.c deleted file mode 100644 index e287ede..0000000 --- a/examples/redis-unstable/src/evict.c +++ /dev/null @@ -1,764 +0,0 @@ -/* Maxmemory directive handling (LRU eviction and other policies). - * - * ---------------------------------------------------------------------------- - * - * Copyright (c) 2009-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of (a) the Redis Source Available License 2.0 - * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the - * GNU Affero General Public License v3 (AGPLv3). - */ - -#include "server.h" -#include "bio.h" -#include "atomicvar.h" -#include "script.h" -#include "cluster.h" -#include "cluster_asm.h" -#include <math.h> - -/* ---------------------------------------------------------------------------- - * Data structures - * --------------------------------------------------------------------------*/ - -/* To improve the quality of the LRU approximation we take a set of keys - * that are good candidate for eviction across performEvictions() calls. - * - * Entries inside the eviction pool are taken ordered by idle time, putting - * greater idle times to the right (ascending order). - * - * When an LFU policy is used instead, a reverse frequency indication is used - * instead of the idle time, so that we still evict by larger value (larger - * inverse frequency means to evict keys with the least frequent accesses). - * - * Empty entries have the key pointer set to NULL. */ -#define EVPOOL_SIZE 16 -#define EVPOOL_CACHED_SDS_SIZE 255 -struct evictionPoolEntry { - unsigned long long idle; /* Object idle time (inverse frequency for LFU) */ - sds key; /* Key name. */ - sds cached; /* Cached SDS object for key name. */ - int dbid; /* Key DB number. */ - int slot; /* Slot. */ -}; - -static struct evictionPoolEntry *EvictionPoolLRU; - -/* ---------------------------------------------------------------------------- - * Implementation of eviction, aging and LRU - * --------------------------------------------------------------------------*/ - -/* Return the LRU clock, based on the clock resolution. This is a time - * in a reduced-bits format that can be used to set and check the - * object->lru field of redisObject structures. */ -unsigned int getLRUClock(void) { - return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX; -} - -/* This function is used to obtain the current LRU clock. - * If the current resolution is lower than the frequency we refresh the - * LRU clock (as it should be in production servers) we return the - * precomputed value, otherwise we need to resort to a system call. */ -unsigned int LRU_CLOCK(void) { - unsigned int lruclock; - if (1000/server.hz <= LRU_CLOCK_RESOLUTION) { - lruclock = server.lruclock; - } else { - lruclock = getLRUClock(); - } - return lruclock; -} - -/* Given an object returns the min number of milliseconds the object was never - * requested, using an approximated LRU algorithm. */ -unsigned long long estimateObjectIdleTime(robj *o) { - unsigned long long lruclock = LRU_CLOCK(); - if (lruclock >= o->lru) { - return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION; - } else { - return (lruclock + (LRU_CLOCK_MAX - o->lru)) * - LRU_CLOCK_RESOLUTION; - } -} - -/* During atomic slot migration, keys that are being imported are in an - * intermediate state. We cannot evict them and therefore skip them. */ -static int randomEvictionShouldSkipDictIndex(int didx) { - return !clusterCanAccessKeysInSlot(didx); -} - -/* LRU approximation algorithm - * - * Redis uses an approximation of the LRU algorithm that runs in constant - * memory. Every time there is a key to expire, we sample N keys (with - * N very small, usually in around 5) to populate a pool of best keys to - * evict of M keys (the pool size is defined by EVPOOL_SIZE). - * - * The N keys sampled are added in the pool of good keys to expire (the one - * with an old access time) if they are better than one of the current keys - * in the pool. - * - * After the pool is populated, the best key we have in the pool is expired. - * However note that we don't remove keys from the pool when they are deleted - * so the pool may contain keys that no longer exist. - * - * When we try to evict a key, and all the entries in the pool don't exist - * we populate it again. This time we'll be sure that the pool has at least - * one key that can be evicted, if there is at least one key that can be - * evicted in the whole database. */ - -/* Create a new eviction pool. */ -void evictionPoolAlloc(void) { - struct evictionPoolEntry *ep; - int j; - - ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE); - for (j = 0; j < EVPOOL_SIZE; j++) { - ep[j].idle = 0; - ep[j].key = NULL; - ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE); - ep[j].dbid = 0; - } - EvictionPoolLRU = ep; -} - -/* This is a helper function for performEvictions(), it is used in order - * to populate the evictionPool with a few entries every time we want to - * expire a key. Keys with idle time bigger than one of the current - * keys are added. Keys are always added if there are free entries. - * - * We insert keys on place in ascending order, so keys with the smaller - * idle time are on the left, and keys with the higher idle time on the - * right. */ -int evictionPoolPopulate(redisDb *db, kvstore *samplekvs, struct evictionPoolEntry *pool) { - int j, k, count; - dictEntry *samples[server.maxmemory_samples]; - - /* Don't retry, since we will call evictionPoolPopulate multiple times if needed. */ - int slot = kvstoreGetFairRandomDictIndex(samplekvs, randomEvictionShouldSkipDictIndex, 1, 0); - if (slot == -1) return 0; - count = kvstoreDictGetSomeKeys(samplekvs,slot,samples,server.maxmemory_samples); - for (j = 0; j < count; j++) { - unsigned long long idle; - - dictEntry *de = samples[j]; - kvobj *kv = dictGetKV(de); - sds key = kvobjGetKey(kv); - - /* Calculate the idle time according to the policy. This is called - * idle just because the code initially handled LRU, but is in fact - * just a score where a higher score means better candidate. */ - if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LRM)) { - idle = estimateObjectIdleTime(kv); - } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { - /* When we use an LRU policy, we sort the keys by idle time - * so that we expire keys starting from greater idle time. - * However when the policy is an LFU one, we have a frequency - * estimation, and we want to evict keys with lower frequency - * first. So inside the pool we put objects using the inverted - * frequency subtracting the actual frequency to the maximum - * frequency of 255. */ - idle = 255-LFUDecrAndReturn(kv); - } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { - /* In this case the sooner the expire the better. */ - idle = ULLONG_MAX - kvobjGetExpire(kv); - } else { - serverPanic("Unknown eviction policy in evictionPoolPopulate()"); - } - - /* Insert the element inside the pool. - * First, find the first empty bucket or the first populated - * bucket that has an idle time smaller than our idle time. */ - k = 0; - while (k < EVPOOL_SIZE && - pool[k].key && - pool[k].idle < idle) k++; - if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) { - /* Can't insert if the element is < the worst element we have - * and there are no empty buckets. */ - continue; - } else if (k < EVPOOL_SIZE && pool[k].key == NULL) { - /* Inserting into empty position. No setup needed before insert. */ - } else { - /* Inserting in the middle. Now k points to the first element - * greater than the element to insert. */ - if (pool[EVPOOL_SIZE-1].key == NULL) { - /* Free space on the right? Insert at k shifting - * all the elements from k to end to the right. */ - - /* Save SDS before overwriting. */ - sds cached = pool[EVPOOL_SIZE-1].cached; - memmove(pool+k+1,pool+k, - sizeof(pool[0])*(EVPOOL_SIZE-k-1)); - pool[k].cached = cached; - } else { - /* No free space on right? Insert at k-1 */ - k--; - /* Shift all elements on the left of k (included) to the - * left, so we discard the element with smaller idle time. */ - sds cached = pool[0].cached; /* Save SDS before overwriting. */ - if (pool[0].key != pool[0].cached) sdsfree(pool[0].key); - memmove(pool,pool+1,sizeof(pool[0])*k); - pool[k].cached = cached; - } - } - - /* Try to reuse the cached SDS string allocated in the pool entry, - * because allocating and deallocating this object is costly - * (according to the profiler, not my fantasy. Remember: - * premature optimization bla bla bla. */ - int klen = sdslen(key); - if (klen > EVPOOL_CACHED_SDS_SIZE) { - pool[k].key = sdsdup(key); - } else { - memcpy(pool[k].cached,key,klen+1); - sdssetlen(pool[k].cached,klen); - pool[k].key = pool[k].cached; - } - pool[k].idle = idle; - pool[k].dbid = db->id; - pool[k].slot = slot; - } - - return count; -} - -/* ---------------------------------------------------------------------------- - * LFU (Least Frequently Used) implementation. - - * We have 24 total bits of space in each object in order to implement - * an LFU (Least Frequently Used) eviction policy, since we re-use the - * LRU field for this purpose. - * - * We split the 24 bits into two fields: - * - * 16 bits 8 bits - * +------------------+--------+ - * + Last access time | LOG_C | - * +------------------+--------+ - * - * LOG_C is a logarithmic counter that provides an indication of the access - * frequency. However this field must also be decremented otherwise what used - * to be a frequently accessed key in the past, will remain ranked like that - * forever, while we want the algorithm to adapt to access pattern changes. - * - * So the remaining 16 bits are used in order to store the "access time", - * a reduced-precision Unix time (we take 16 bits of the time converted - * in minutes since we don't care about wrapping around) where the LOG_C - * counter decays every minute by default (depends on lfu-decay-time). - * - * New keys don't start at zero, in order to have the ability to collect - * some accesses before being trashed away, so they start at LFU_INIT_VAL. - * The logarithmic increment performed on LOG_C takes care of LFU_INIT_VAL - * when incrementing the key, so that keys starting at LFU_INIT_VAL - * (or having a smaller value) have a very high chance of being incremented - * on access. (The chance depends on counter and lfu-log-factor.) - * - * During decrement, the value of the logarithmic counter is decremented by - * one when lfu-decay-time minutes elapsed. - * --------------------------------------------------------------------------*/ - -/* Return the current time in minutes, just taking the least significant - * 16 bits. The returned time is suitable to be stored as LDT (last access - * time) for the LFU implementation. */ -unsigned long LFUGetTimeInMinutes(void) { - return (server.unixtime/60) & 65535; -} - -/* Given an object ldt (last access time), compute the minimum number of minutes - * that elapsed since the last access. Handle overflow (ldt greater than - * the current 16 bits minutes time) considering the time as wrapping - * exactly once. */ -unsigned long LFUTimeElapsed(unsigned long ldt) { - unsigned long now = LFUGetTimeInMinutes(); - if (now >= ldt) return now-ldt; - return 65535-ldt+now; -} - -/* Logarithmically increment a counter. The greater is the current counter value - * the less likely is that it gets really incremented. Saturate it at 255. */ -uint8_t LFULogIncr(uint8_t counter) { - if (counter == 255) return 255; - double r = (double)rand()/RAND_MAX; - double baseval = counter - LFU_INIT_VAL; - if (baseval < 0) baseval = 0; - double p = 1.0/(baseval*server.lfu_log_factor+1); - if (r < p) counter++; - return counter; -} - -/* If the object's ldt (last access time) is reached, decrement the LFU counter but - * do not update LFU fields of the object, we update the access time - * and counter in an explicit way when the object is really accessed. - * And we will decrement the counter according to the times of - * elapsed time than server.lfu_decay_time. - * Return the object frequency counter. - * - * This function is used in order to scan the dataset for the best object - * to fit: as we check for the candidate, we incrementally decrement the - * counter of the scanned objects if needed. */ -unsigned long LFUDecrAndReturn(robj *o) { - unsigned long ldt = o->lru >> 8; - unsigned long counter = o->lru & 255; - unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0; - if (num_periods) - counter = (num_periods > counter) ? 0 : counter - num_periods; - return counter; -} - -/* We don't want to count AOF buffers and slaves output buffers as - * used memory: the eviction should use mostly data size, because - * it can cause feedback-loop when we push DELs into them, putting - * more and more DELs will make them bigger, if we count them, we - * need to evict more keys, and then generate more DELs, maybe cause - * massive eviction loop, even all keys are evicted. - * - * This function returns the sum of AOF and replication buffer. */ -size_t freeMemoryGetNotCountedMemory(void) { - size_t overhead = 0; - - /* Since all replicas and replication backlog share global replication - * buffer, we think only the part of exceeding backlog size is the extra - * separate consumption of replicas. - * - * Note that although the backlog is also initially incrementally grown - * (pushing DELs consumes memory), it'll eventually stop growing and - * remain constant in size, so even if its creation will cause some - * eviction, it's capped, and also here to stay (no resonance effect) - * - * Note that, because we trim backlog incrementally in the background, - * backlog size may exceeds our setting if slow replicas that reference - * vast replication buffer blocks disconnect. To avoid massive eviction - * loop, we don't count the delayed freed replication backlog into used - * memory even if there are no replicas, i.e. we still regard this memory - * as replicas'. */ - if ((long long)server.repl_buffer_mem > server.repl_backlog_size) { - /* We use list structure to manage replication buffer blocks, so backlog - * also occupies some extra memory, we can't know exact blocks numbers, - * we only get approximate size according to per block size. */ - size_t extra_approx_size = - (server.repl_backlog_size/PROTO_REPLY_CHUNK_BYTES + 1) * - (sizeof(replBufBlock)+sizeof(listNode)); - size_t counted_mem = server.repl_backlog_size + extra_approx_size; - if (server.repl_buffer_mem > counted_mem) { - overhead += (server.repl_buffer_mem - counted_mem); - } - } - - /* The migrate client is like a replica, we also push DELs into it when - * evicting keys belonging to the migrating slot, so we don't count its - * output buffer to avoid eviction loop. */ - overhead += asmGetMigrateOutputBufferSize(); - - if (server.aof_state != AOF_OFF) { - overhead += sdsAllocSize(server.aof_buf); - } - return overhead; -} - -/* Get the memory status from the point of view of the maxmemory directive: - * if the memory used is under the maxmemory setting then C_OK is returned. - * Otherwise, if we are over the memory limit, the function returns - * C_ERR. - * - * The function may return additional info via reference, only if the - * pointers to the respective arguments is not NULL. Certain fields are - * populated only when C_ERR is returned: - * - * 'total' total amount of bytes used. - * (Populated both for C_ERR and C_OK) - * - * 'logical' the amount of memory used minus the slaves/AOF buffers. - * (Populated when C_ERR is returned) - * - * 'tofree' the amount of memory that should be released - * in order to return back into the memory limits. - * (Populated when C_ERR is returned) - * - * 'level' this usually ranges from 0 to 1, and reports the amount of - * memory currently used. May be > 1 if we are over the memory - * limit. - * (Populated both for C_ERR and C_OK) - */ -int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level) { - size_t mem_reported, mem_used, mem_tofree; - - /* Check if we are over the memory usage limit. If we are not, no need - * to subtract the slaves output buffers. We can just return ASAP. */ - mem_reported = zmalloc_used_memory(); - if (total) *total = mem_reported; - - /* We may return ASAP if there is no need to compute the level. */ - if (!server.maxmemory) { - if (level) *level = 0; - return C_OK; - } - if (mem_reported <= server.maxmemory && !level) return C_OK; - - /* Remove the size of slaves output buffers and AOF buffer from the - * count of used memory. */ - mem_used = mem_reported; - size_t overhead = freeMemoryGetNotCountedMemory(); - mem_used = (mem_used > overhead) ? mem_used-overhead : 0; - - /* Compute the ratio of memory usage. */ - if (level) *level = (float)mem_used / (float)server.maxmemory; - - if (mem_reported <= server.maxmemory) return C_OK; - - /* Check if we are still over the memory limit. */ - if (mem_used <= server.maxmemory) return C_OK; - - /* Compute how much memory we need to free. */ - mem_tofree = mem_used - server.maxmemory; - - if (logical) *logical = mem_used; - if (tofree) *tofree = mem_tofree; - - return C_ERR; -} - -/* Return 1 if used memory is more than maxmemory after allocating more memory, - * return 0 if not. Redis may reject user's requests or evict some keys if used - * memory exceeds maxmemory, especially, when we allocate huge memory at once. */ -int overMaxmemoryAfterAlloc(size_t moremem) { - if (!server.maxmemory) return 0; /* No limit. */ - - /* Check quickly. */ - size_t mem_used = zmalloc_used_memory(); - if (mem_used + moremem <= server.maxmemory) return 0; - - size_t overhead = freeMemoryGetNotCountedMemory(); - mem_used = (mem_used > overhead) ? mem_used - overhead : 0; - return mem_used + moremem > server.maxmemory; -} - -/* The evictionTimeProc is started when "maxmemory" has been breached and - * could not immediately be resolved. This will spin the event loop with short - * eviction cycles until the "maxmemory" condition has resolved or there are no - * more evictable items. */ -static int isEvictionProcRunning = 0; -static int evictionTimeProc( - struct aeEventLoop *eventLoop, long long id, void *clientData) { - UNUSED(eventLoop); - UNUSED(id); - UNUSED(clientData); - - if (performEvictions() == EVICT_RUNNING) return 0; /* keep evicting */ - - /* For EVICT_OK - things are good, no need to keep evicting. - * For EVICT_FAIL - there is nothing left to evict. */ - isEvictionProcRunning = 0; - return AE_NOMORE; -} - -void startEvictionTimeProc(void) { - if (!isEvictionProcRunning) { - isEvictionProcRunning = 1; - aeCreateTimeEvent(server.el, 0, - evictionTimeProc, NULL, NULL); - } -} - -/* Check if it's safe to perform evictions. - * Returns 1 if evictions can be performed - * Returns 0 if eviction processing should be skipped - */ -static int isSafeToPerformEvictions(void) { - /* - There must be no script in timeout condition. - * - Nor we are loading data right now. */ - if (isInsideYieldingLongCommand() || server.loading) return 0; - - /* By default replicas should ignore maxmemory - * and just be masters exact copies. */ - if (server.masterhost && server.repl_slave_ignore_maxmemory) return 0; - - /* Disable eviction during slot migration import to avoid delays and errors - * caused by failed evictions. A special client is created for data import, - * identified by the CLIENT_MASTER and CLIENT_ASM_IMPORTING flags. */ - if (server.current_client && server.current_client->flags & CLIENT_MASTER && - server.current_client->flags & CLIENT_ASM_IMPORTING) - return 0; - - /* If 'evict' action is paused, for whatever reason, then return false */ - if (isPausedActionsWithUpdate(PAUSE_ACTION_EVICT)) return 0; - - return 1; -} - -/* Algorithm for converting tenacity (0-100) to a time limit. */ -static unsigned long evictionTimeLimitUs(void) { - serverAssert(server.maxmemory_eviction_tenacity >= 0); - serverAssert(server.maxmemory_eviction_tenacity <= 100); - - if (server.maxmemory_eviction_tenacity <= 10) { - /* A linear progression from 0..500us */ - return 50uL * server.maxmemory_eviction_tenacity; - } - - if (server.maxmemory_eviction_tenacity < 100) { - /* A 15% geometric progression, resulting in a limit of ~2 min at tenacity==99 */ - return (unsigned long)(500.0 * pow(1.15, server.maxmemory_eviction_tenacity - 10.0)); - } - - return ULONG_MAX; /* No limit to eviction time */ -} - -/* Check that memory usage is within the current "maxmemory" limit. If over - * "maxmemory", attempt to free memory by evicting data (if it's safe to do so). - * - * It's possible for Redis to suddenly be significantly over the "maxmemory" - * setting. This can happen if there is a large allocation (like a hash table - * resize) or even if the "maxmemory" setting is manually adjusted. Because of - * this, it's important to evict for a managed period of time - otherwise Redis - * would become unresponsive while evicting. - * - * The goal of this function is to improve the memory situation - not to - * immediately resolve it. In the case that some items have been evicted but - * the "maxmemory" limit has not been achieved, an aeTimeProc will be started - * which will continue to evict items until memory limits are achieved or - * nothing more is evictable. - * - * This should be called before execution of commands. If EVICT_FAIL is - * returned, commands which will result in increased memory usage should be - * rejected. - * - * Returns: - * EVICT_OK - memory is OK or it's not possible to perform evictions now - * EVICT_RUNNING - memory is over the limit, but eviction is still processing - * EVICT_FAIL - memory is over the limit, and there's nothing to evict - * */ -int performEvictions(void) { - /* Note, we don't goto update_metrics here because this check skips eviction - * as if it wasn't triggered. it's a fake EVICT_OK. */ - if (!isSafeToPerformEvictions()) return EVICT_OK; - - int keys_freed = 0; - size_t mem_reported, mem_tofree; - long long mem_freed; /* May be negative */ - mstime_t latency; - int slaves = listLength(server.slaves); - int result = EVICT_FAIL; - - if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK) { - result = EVICT_OK; - goto update_metrics; - } - - if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) { - result = EVICT_FAIL; /* We need to free memory, but policy forbids. */ - goto update_metrics; - } - - unsigned long eviction_time_limit_us = evictionTimeLimitUs(); - - mem_freed = 0; - - latencyStartMonitor(latency); - - monotime evictionTimer; - elapsedStart(&evictionTimer); - - /* Try to smoke-out bugs (server.also_propagate should be empty here) */ - serverAssert(server.also_propagate.numops == 0); - /* Evictions are performed on random keys that have nothing to do with the current command slot. */ - - while (mem_freed < (long long)mem_tofree) { - int j, k, i; - static unsigned int next_db = 0; - sds bestkey = NULL; - int bestdbid; - redisDb *db; - dictEntry *de; - - if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU|MAXMEMORY_FLAG_LRM) || - server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) - { - struct evictionPoolEntry *pool = EvictionPoolLRU; - while (bestkey == NULL) { - unsigned long total_keys = 0; - unsigned long total_sampled_keys = 0; - - /* We don't want to make local-db choices when expiring keys, - * so to start populate the eviction pool sampling keys from - * every DB. */ - for (i = 0; i < server.dbnum; i++) { - db = server.db+i; - kvstore *kvs; - if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) { - kvs = db->keys; - } else { - kvs = db->expires; - } - unsigned long sampled_keys = 0; - unsigned long current_db_keys = kvstoreSize(kvs); - if (current_db_keys == 0) continue; - - total_keys += current_db_keys; - int l = kvstoreNumNonEmptyDicts(kvs); - /* Do not exceed the number of non-empty slots when looping. */ - while (l--) { - sampled_keys += evictionPoolPopulate(db, kvs, pool); - total_sampled_keys += sampled_keys; - /* We have sampled enough keys in the current db, exit the loop. */ - if (sampled_keys >= (unsigned long) server.maxmemory_samples) - break; - /* If there are not a lot of keys in the current db, dict/s may be very - * sparsely populated, exit the loop without meeting the sampling - * requirement. */ - if (current_db_keys < (unsigned long) server.maxmemory_samples*10) - break; - } - } - if (!total_keys) break; /* No keys to evict. */ - - /* If we iterated all the DBs and all non-empty slot dicts, then - * did not sample any key, stop sampling. */ - if (!total_sampled_keys) break; - - /* Go backward from best to worst element to evict. */ - for (k = EVPOOL_SIZE-1; k >= 0; k--) { - if (pool[k].key == NULL) continue; - bestdbid = pool[k].dbid; - - kvstore *kvs; - if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) { - kvs = server.db[bestdbid].keys; - } else { - kvs = server.db[bestdbid].expires; - } - de = kvstoreDictFind(kvs, pool[k].slot, pool[k].key); - - /* Remove the entry from the pool. */ - if (pool[k].key != pool[k].cached) - sdsfree(pool[k].key); - pool[k].key = NULL; - pool[k].idle = 0; - - /* If the key exists, is our pick. Otherwise it is - * a ghost and we need to try the next element. */ - if (de) { - bestkey = kvobjGetKey(dictGetKV(de)); - break; - } else { - /* Ghost... Iterate again. */ - } - } - } - } - - /* volatile-random and allkeys-random policy */ - else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM || - server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM) - { - /* When evicting a random key, we try to evict a key for - * each DB, so we use the static 'next_db' variable to - * incrementally visit all DBs. */ - for (i = 0; i < server.dbnum; i++) { - j = (++next_db) % server.dbnum; - db = server.db+j; - kvstore *kvs; - if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) { - kvs = db->keys; - } else { - kvs = db->expires; - } - int slot = kvstoreGetFairRandomDictIndex(kvs, randomEvictionShouldSkipDictIndex, 16, 0); - if (slot == -1) continue; - de = kvstoreDictGetRandomKey(kvs, slot); - if (de) { - kvobj *kv = dictGetKV(de); - bestkey = kvobjGetKey(kv); - bestdbid = j; - break; - } - } - } - - /* Finally remove the selected key. */ - if (bestkey) { - long long key_mem_freed; - db = server.db+bestdbid; - - enterExecutionUnit(1, 0); - robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); - deleteEvictedKeyAndPropagate(db, keyobj, &key_mem_freed); - decrRefCount(keyobj); - exitExecutionUnit(); - /* Propagate the DEL command */ - postExecutionUnitOperations(); - - mem_freed += key_mem_freed; - keys_freed++; - - if (keys_freed % 16 == 0) { - /* When the memory to free starts to be big enough, we may - * start spending so much time here that is impossible to - * deliver data to the replicas fast enough, so we force the - * transmission here inside the loop. */ - if (slaves) flushSlavesOutputBuffers(); - - /* Normally our stop condition is the ability to release - * a fixed, pre-computed amount of memory. However when we - * are deleting objects in another thread, it's better to - * check, from time to time, if we already reached our target - * memory, since the "mem_freed" amount is computed only - * across the dbAsyncDelete() call, while the thread can - * release the memory all the time. */ - if (server.lazyfree_lazy_eviction) { - if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { - break; - } - } - - /* After some time, exit the loop early - even if memory limit - * hasn't been reached. If we suddenly need to free a lot of - * memory, don't want to spend too much time here. */ - if (elapsedUs(evictionTimer) > eviction_time_limit_us) { - // We still need to free memory - start eviction timer proc - startEvictionTimeProc(); - break; - } - } - } else { - goto cant_free; /* nothing to free... */ - } - } - /* at this point, the memory is OK, or we have reached the time limit */ - result = (isEvictionProcRunning) ? EVICT_RUNNING : EVICT_OK; - -cant_free: - if (result == EVICT_FAIL) { - /* At this point, we have run out of evictable items. It's possible - * that some items are being freed in the lazyfree thread. Perform a - * short wait here if such jobs exist, but don't wait long. */ - mstime_t lazyfree_latency; - latencyStartMonitor(lazyfree_latency); - while (bioPendingJobsOfType(BIO_LAZY_FREE) && - elapsedUs(evictionTimer) < eviction_time_limit_us) { - if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { - result = EVICT_OK; - break; - } - usleep(eviction_time_limit_us < 1000 ? eviction_time_limit_us : 1000); - } - latencyEndMonitor(lazyfree_latency); - latencyAddSampleIfNeeded("eviction-lazyfree",lazyfree_latency); - } - - latencyEndMonitor(latency); - latencyAddSampleIfNeeded("eviction-cycle",latency); - -update_metrics: - if (result == EVICT_RUNNING || result == EVICT_FAIL) { - if (server.stat_last_eviction_exceeded_time == 0) - elapsedStart(&server.stat_last_eviction_exceeded_time); - } else if (result == EVICT_OK) { - if (server.stat_last_eviction_exceeded_time != 0) { - server.stat_total_eviction_exceeded_time += elapsedUs(server.stat_last_eviction_exceeded_time); - server.stat_last_eviction_exceeded_time = 0; - } - } - return result; -} |
