diff options
Diffstat (limited to 'examples/redis-unstable/src/dict.c')
| -rw-r--r-- | examples/redis-unstable/src/dict.c | 2340 |
1 files changed, 2340 insertions, 0 deletions
diff --git a/examples/redis-unstable/src/dict.c b/examples/redis-unstable/src/dict.c new file mode 100644 index 0000000..d0885ff --- /dev/null +++ b/examples/redis-unstable/src/dict.c | |||
| @@ -0,0 +1,2340 @@ | |||
| 1 | /* Hash Tables Implementation. | ||
| 2 | * | ||
| 3 | * This file implements in memory hash tables with insert/del/replace/find/ | ||
| 4 | * get-random-element operations. Hash tables will auto resize if needed | ||
| 5 | * tables of power of two in size are used, collisions are handled by | ||
| 6 | * chaining. See the source code for more information... :) | ||
| 7 | * | ||
| 8 | * Copyright (c) 2006-Present, Redis Ltd. | ||
| 9 | * All rights reserved. | ||
| 10 | * | ||
| 11 | * Licensed under your choice of (a) the Redis Source Available License 2.0 | ||
| 12 | * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the | ||
| 13 | * GNU Affero General Public License v3 (AGPLv3). | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include "fmacros.h" | ||
| 17 | |||
| 18 | #include <stdio.h> | ||
| 19 | #include <stdlib.h> | ||
| 20 | #include <stdint.h> | ||
| 21 | #include <string.h> | ||
| 22 | #include <stdarg.h> | ||
| 23 | #include <limits.h> | ||
| 24 | #include <sys/time.h> | ||
| 25 | #include <stddef.h> | ||
| 26 | |||
| 27 | #include "dict.h" | ||
| 28 | #include "zmalloc.h" | ||
| 29 | #include "redisassert.h" | ||
| 30 | #include "monotonic.h" | ||
| 31 | #include "util.h" | ||
| 32 | |||
| 33 | /* Using dictSetResizeEnabled() we make possible to disable | ||
| 34 | * resizing and rehashing of the hash table as needed. This is very important | ||
| 35 | * for Redis, as we use copy-on-write and don't want to move too much memory | ||
| 36 | * around when there is a child performing saving operations. | ||
| 37 | * | ||
| 38 | * Note that even when dict_can_resize is set to DICT_RESIZE_AVOID, not all | ||
| 39 | * resizes are prevented: | ||
| 40 | * - A hash table is still allowed to expand if the ratio between the number | ||
| 41 | * of elements and the buckets >= dict_force_resize_ratio. | ||
| 42 | * - A hash table is still allowed to shrink if the ratio between the number | ||
| 43 | * of elements and the buckets <= 1 / (HASHTABLE_MIN_FILL * dict_force_resize_ratio). */ | ||
| 44 | static dictResizeEnable dict_can_resize = DICT_RESIZE_ENABLE; | ||
| 45 | static unsigned int dict_force_resize_ratio = 4; | ||
| 46 | |||
| 47 | /* -------------------------- types ----------------------------------------- */ | ||
| 48 | struct dictEntry { | ||
| 49 | struct dictEntry *next; /* Must be first */ | ||
| 50 | void *key; /* Must be second */ | ||
| 51 | union { | ||
| 52 | void *val; | ||
| 53 | uint64_t u64; | ||
| 54 | int64_t s64; | ||
| 55 | double d; | ||
| 56 | } v; | ||
| 57 | }; | ||
| 58 | |||
| 59 | typedef struct dictEntryNoValue { | ||
| 60 | dictEntry *next; /* Must be first */ | ||
| 61 | void *key; /* Must be second */ | ||
| 62 | } dictEntryNoValue; | ||
| 63 | |||
| 64 | static_assert(offsetof(dictEntry, next) == offsetof(dictEntryNoValue, next), "dictEntry & dictEntryNoValue next not aligned"); | ||
| 65 | static_assert(offsetof(dictEntry, key) == offsetof(dictEntryNoValue, key), "dictEntry & dictEntryNoValue key not aligned"); | ||
| 66 | |||
| 67 | /* -------------------------- private prototypes ---------------------------- */ | ||
| 68 | |||
| 69 | static int _dictExpandIfNeeded(dict *d); | ||
| 70 | static void _dictShrinkIfNeeded(dict *d); | ||
| 71 | static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx); | ||
| 72 | static signed char _dictNextExp(unsigned long size); | ||
| 73 | static int _dictInit(dict *d, dictType *type); | ||
| 74 | static dictEntryLink dictGetNextLink(dictEntry *de); | ||
| 75 | static void dictSetNext(dictEntry *de, dictEntry *next); | ||
| 76 | static int dictDefaultCompare(dictCmpCache *cache, const void *key1, const void *key2); | ||
| 77 | static dictEntryLink dictFindLinkInternal(dict *d, const void *key, dictEntryLink *bucket); | ||
| 78 | dictEntryLink dictFindLinkForInsert(dict *d, const void *key, dictEntry **existing); | ||
| 79 | static dictEntry *dictInsertKeyAtLink(dict *d, void *key __stored_key, dictEntryLink link); | ||
| 80 | |||
| 81 | /* -------------------------- unused --------------------------- */ | ||
| 82 | void dictSetSignedIntegerVal(dictEntry *de, int64_t val); | ||
| 83 | int64_t dictGetSignedIntegerVal(const dictEntry *de); | ||
| 84 | double dictIncrDoubleVal(dictEntry *de, double val); | ||
| 85 | void *dictEntryMetadata(dictEntry *de); | ||
| 86 | int64_t dictIncrSignedIntegerVal(dictEntry *de, int64_t val); | ||
| 87 | |||
| 88 | /* -------------------------- misc inline functions -------------------------------- */ | ||
| 89 | |||
| 90 | typedef int (*keyCmpFunc)(dictCmpCache *cache, const void *key1, const void *key2); | ||
| 91 | static inline keyCmpFunc dictGetCmpFunc(dict *d) { | ||
| 92 | if (d->type->keyCompare) | ||
| 93 | return d->type->keyCompare; | ||
| 94 | return dictDefaultCompare; | ||
| 95 | } | ||
| 96 | |||
| 97 | static const void *dictStoredKey2Key(dict *d, const void *key __stored_key) { | ||
| 98 | return (d->type->keyFromStoredKey) ? d->type->keyFromStoredKey(key) : key; | ||
| 99 | } | ||
| 100 | |||
| 101 | /* -------------------------- hash functions -------------------------------- */ | ||
| 102 | |||
| 103 | static uint8_t dict_hash_function_seed[16]; | ||
| 104 | |||
| 105 | void dictSetHashFunctionSeed(uint8_t *seed) { | ||
| 106 | memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed)); | ||
| 107 | } | ||
| 108 | |||
| 109 | /* The default hashing function uses SipHash implementation | ||
| 110 | * in siphash.c. */ | ||
| 111 | |||
| 112 | uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k); | ||
| 113 | uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k); | ||
| 114 | |||
| 115 | uint64_t dictGenHashFunction(const void *key, size_t len) { | ||
| 116 | return siphash(key, len, dict_hash_function_seed); | ||
| 117 | } | ||
| 118 | |||
| 119 | uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) { | ||
| 120 | return siphash_nocase(buf,len,dict_hash_function_seed); | ||
| 121 | } | ||
| 122 | |||
| 123 | /* --------------------- dictEntry pointer bit tricks ---------------------- */ | ||
| 124 | |||
| 125 | /* The 3 least significant bits in a pointer to a dictEntry determines what the | ||
| 126 | * pointer actually points to. If the least bit is set, it's a key. Otherwise, | ||
| 127 | * the bit pattern of the least 3 significant bits mark the kind of entry. */ | ||
| 128 | |||
| 129 | #define ENTRY_PTR_MASK 7 /* 111 */ | ||
| 130 | #define ENTRY_PTR_NORMAL 0 /* 000 : If a pointer to an entry with value. */ | ||
| 131 | #define ENTRY_PTR_IS_ODD_KEY 1 /* XX1 : If a pointer to odd key address (must be 1). */ | ||
| 132 | #define ENTRY_PTR_IS_EVEN_KEY 2 /* 010 : If a pointer to even key address. (must be 2 or 4). */ | ||
| 133 | #define ENTRY_PTR_UNUSED 4 /* 100 : Unused. */ | ||
| 134 | |||
| 135 | /* Returns 1 if the entry pointer is a pointer to a key, rather than to an | ||
| 136 | * allocated entry. Returns 0 otherwise. */ | ||
| 137 | static inline int entryIsKey(const dictEntry *de) { | ||
| 138 | return ((uintptr_t)de & (ENTRY_PTR_IS_ODD_KEY | ENTRY_PTR_IS_EVEN_KEY)); | ||
| 139 | } | ||
| 140 | |||
| 141 | /* Returns 1 if the pointer is actually a pointer to a dictEntry struct. Returns | ||
| 142 | * 0 otherwise. */ | ||
| 143 | static inline int entryIsNormal(const dictEntry *de) { | ||
| 144 | return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_NORMAL; | ||
| 145 | } | ||
| 146 | |||
| 147 | /* Creates an entry without a value field. */ | ||
| 148 | static inline dictEntry *createEntryNoValue(void *key __stored_key, dictEntry *next) { | ||
| 149 | dictEntryNoValue *entry = zmalloc(sizeof(*entry)); | ||
| 150 | entry->key = key; | ||
| 151 | entry->next = next; | ||
| 152 | return (dictEntry *) entry; | ||
| 153 | } | ||
| 154 | |||
| 155 | static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) { | ||
| 156 | assert(((uintptr_t)ptr & ENTRY_PTR_MASK) == 0); | ||
| 157 | return (dictEntry *)(void *)((uintptr_t)ptr | bits); | ||
| 158 | } | ||
| 159 | |||
| 160 | static inline void *decodeMaskedPtr(const dictEntry *de) { | ||
| 161 | return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK); | ||
| 162 | } | ||
| 163 | |||
| 164 | /* Encode a key pointer for storage in a no_value dict bucket. | ||
| 165 | * For odd keys (like SDS strings), the key can be stored directly. | ||
| 166 | * For even keys, we need to tag it with ENTRY_PTR_IS_EVEN_KEY. */ | ||
| 167 | static inline dictEntry *encodeEntryKey(dict *d, void *key) { | ||
| 168 | if (d->type->keys_are_odd) { | ||
| 169 | debugAssert(((uintptr_t)key & ENTRY_PTR_IS_ODD_KEY) == ENTRY_PTR_IS_ODD_KEY); | ||
| 170 | return key; | ||
| 171 | } else { | ||
| 172 | return encodeMaskedPtr(key, ENTRY_PTR_IS_EVEN_KEY); | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | /* Decodes the pointer to an entry without value, when you know it is an entry | ||
| 177 | * without value. Hint: Use entryIsNoValue to check. */ | ||
| 178 | static inline dictEntryNoValue *decodeEntryNoValue(const dictEntry *de) { | ||
| 179 | return decodeMaskedPtr(de); | ||
| 180 | } | ||
| 181 | |||
| 182 | /* Returns 1 if the entry has a value field and 0 otherwise. */ | ||
| 183 | static inline int entryHasValue(const dictEntry *de) { | ||
| 184 | return entryIsNormal(de); | ||
| 185 | } | ||
| 186 | |||
| 187 | /* ----------------------------- API implementation ------------------------- */ | ||
| 188 | |||
| 189 | /* Reset hash table parameters already initialized with _dictInit()*/ | ||
| 190 | static void _dictReset(dict *d, int htidx) | ||
| 191 | { | ||
| 192 | d->ht_table[htidx] = NULL; | ||
| 193 | d->ht_size_exp[htidx] = -1; | ||
| 194 | d->ht_used[htidx] = 0; | ||
| 195 | } | ||
| 196 | |||
| 197 | /* Create a new hash table */ | ||
| 198 | dict *dictCreate(dictType *type) | ||
| 199 | { | ||
| 200 | size_t metasize = type->dictMetadataBytes ? type->dictMetadataBytes(NULL) : 0; | ||
| 201 | dict *d = zmalloc(sizeof(*d)+metasize); | ||
| 202 | if (metasize > 0) { | ||
| 203 | memset(dictMetadata(d), 0, metasize); | ||
| 204 | } | ||
| 205 | _dictInit(d,type); | ||
| 206 | return d; | ||
| 207 | } | ||
| 208 | |||
| 209 | /* Change dictType of dict to another one with metadata support | ||
| 210 | * Rest of dictType's values must stay the same */ | ||
| 211 | void dictTypeAddMeta(dict **d, dictType *typeWithMeta) { | ||
| 212 | /* Verify new dictType is compatible with the old one */ | ||
| 213 | dictType toCmp = *typeWithMeta; | ||
| 214 | /* Ignore 'dictMetadataBytes' and 'onDictRelease' in comparison */ | ||
| 215 | toCmp.dictMetadataBytes = (*d)->type->dictMetadataBytes; | ||
| 216 | toCmp.onDictRelease = (*d)->type->onDictRelease; | ||
| 217 | assert(memcmp((*d)->type, &toCmp, sizeof(dictType)) == 0); /* The rest of the dictType fields must be the same */ | ||
| 218 | |||
| 219 | *d = zrealloc(*d, sizeof(dict) + typeWithMeta->dictMetadataBytes(*d)); | ||
| 220 | (*d)->type = typeWithMeta; | ||
| 221 | } | ||
| 222 | |||
| 223 | /* Initialize the hash table */ | ||
| 224 | int _dictInit(dict *d, dictType *type) | ||
| 225 | { | ||
| 226 | _dictReset(d, 0); | ||
| 227 | _dictReset(d, 1); | ||
| 228 | d->type = type; | ||
| 229 | d->rehashidx = -1; | ||
| 230 | d->pauserehash = 0; | ||
| 231 | d->pauseAutoResize = 0; | ||
| 232 | return DICT_OK; | ||
| 233 | } | ||
| 234 | |||
| 235 | /* Resize or create the hash table, | ||
| 236 | * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1). | ||
| 237 | * Returns DICT_OK if resize was performed, and DICT_ERR if skipped. */ | ||
| 238 | int _dictResize(dict *d, unsigned long size, int* malloc_failed) | ||
| 239 | { | ||
| 240 | if (malloc_failed) *malloc_failed = 0; | ||
| 241 | |||
| 242 | /* We can't rehash twice if rehashing is ongoing. */ | ||
| 243 | assert(!dictIsRehashing(d)); | ||
| 244 | |||
| 245 | /* the new hash table */ | ||
| 246 | dictEntry **new_ht_table; | ||
| 247 | unsigned long new_ht_used; | ||
| 248 | signed char new_ht_size_exp = _dictNextExp(size); | ||
| 249 | |||
| 250 | /* Detect overflows */ | ||
| 251 | size_t newsize = DICTHT_SIZE(new_ht_size_exp); | ||
| 252 | if (newsize < size || newsize * sizeof(dictEntry*) < newsize) | ||
| 253 | return DICT_ERR; | ||
| 254 | |||
| 255 | /* Rehashing to the same table size is not useful. */ | ||
| 256 | if (new_ht_size_exp == d->ht_size_exp[0]) return DICT_ERR; | ||
| 257 | |||
| 258 | /* Allocate the new hash table and initialize all pointers to NULL */ | ||
| 259 | if (malloc_failed) { | ||
| 260 | new_ht_table = ztrycalloc(newsize*sizeof(dictEntry*)); | ||
| 261 | *malloc_failed = new_ht_table == NULL; | ||
| 262 | if (*malloc_failed) | ||
| 263 | return DICT_ERR; | ||
| 264 | } else | ||
| 265 | new_ht_table = zcalloc(newsize*sizeof(dictEntry*)); | ||
| 266 | |||
| 267 | new_ht_used = 0; | ||
| 268 | |||
| 269 | /* Prepare a second hash table for incremental rehashing. | ||
| 270 | * We do this even for the first initialization, so that we can trigger the | ||
| 271 | * rehashingStarted more conveniently, we will clean it up right after. */ | ||
| 272 | d->ht_size_exp[1] = new_ht_size_exp; | ||
| 273 | d->ht_used[1] = new_ht_used; | ||
| 274 | d->ht_table[1] = new_ht_table; | ||
| 275 | d->rehashidx = 0; | ||
| 276 | if (d->type->rehashingStarted) d->type->rehashingStarted(d); | ||
| 277 | if (d->type->bucketChanged) | ||
| 278 | d->type->bucketChanged(d, DICTHT_SIZE(d->ht_size_exp[1])); | ||
| 279 | |||
| 280 | /* Is this the first initialization or is the first hash table empty? If so | ||
| 281 | * it's not really a rehashing, we can just set the first hash table so that | ||
| 282 | * it can accept keys. */ | ||
| 283 | if (d->ht_table[0] == NULL || d->ht_used[0] == 0) { | ||
| 284 | if (d->type->rehashingCompleted) d->type->rehashingCompleted(d); | ||
| 285 | if (d->type->bucketChanged) | ||
| 286 | d->type->bucketChanged(d, -(long long)DICTHT_SIZE(d->ht_size_exp[0])); | ||
| 287 | if (d->ht_table[0]) zfree(d->ht_table[0]); | ||
| 288 | d->ht_size_exp[0] = new_ht_size_exp; | ||
| 289 | d->ht_used[0] = new_ht_used; | ||
| 290 | d->ht_table[0] = new_ht_table; | ||
| 291 | _dictReset(d, 1); | ||
| 292 | d->rehashidx = -1; | ||
| 293 | return DICT_OK; | ||
| 294 | } | ||
| 295 | |||
| 296 | /* Force a full rehashing of the dictionary */ | ||
| 297 | if (d->type->force_full_rehash) { | ||
| 298 | while (dictRehash(d, 1000)) { | ||
| 299 | /* Continue rehashing */ | ||
| 300 | } | ||
| 301 | } | ||
| 302 | return DICT_OK; | ||
| 303 | } | ||
| 304 | |||
| 305 | int _dictExpand(dict *d, unsigned long size, int* malloc_failed) { | ||
| 306 | /* the size is invalid if it is smaller than the size of the hash table | ||
| 307 | * or smaller than the number of elements already inside the hash table */ | ||
| 308 | if (dictIsRehashing(d) || d->ht_used[0] > size || DICTHT_SIZE(d->ht_size_exp[0]) >= size) | ||
| 309 | return DICT_ERR; | ||
| 310 | return _dictResize(d, size, malloc_failed); | ||
| 311 | } | ||
| 312 | |||
| 313 | /* return DICT_ERR if expand was not performed */ | ||
| 314 | int dictExpand(dict *d, unsigned long size) { | ||
| 315 | return _dictExpand(d, size, NULL); | ||
| 316 | } | ||
| 317 | |||
| 318 | /* return DICT_ERR if expand failed due to memory allocation failure */ | ||
| 319 | int dictTryExpand(dict *d, unsigned long size) { | ||
| 320 | int malloc_failed = 0; | ||
| 321 | _dictExpand(d, size, &malloc_failed); | ||
| 322 | return malloc_failed? DICT_ERR : DICT_OK; | ||
| 323 | } | ||
| 324 | |||
| 325 | /* return DICT_ERR if shrink was not performed */ | ||
| 326 | int dictShrink(dict *d, unsigned long size) { | ||
| 327 | /* the size is invalid if it is bigger than the size of the hash table | ||
| 328 | * or smaller than the number of elements already inside the hash table */ | ||
| 329 | if (dictIsRehashing(d) || d->ht_used[0] > size || DICTHT_SIZE(d->ht_size_exp[0]) <= size) | ||
| 330 | return DICT_ERR; | ||
| 331 | return _dictResize(d, size, NULL); | ||
| 332 | } | ||
| 333 | |||
| 334 | /* Helper function for `dictRehash` and `dictBucketRehash` which rehashes all the keys | ||
| 335 | * in a bucket at index `idx` from the old to the new hash HT. */ | ||
| 336 | static void rehashEntriesInBucketAtIndex(dict *d, uint64_t idx) { | ||
| 337 | dictEntry *de = d->ht_table[0][idx]; | ||
| 338 | uint64_t h; | ||
| 339 | dictEntry *nextde; | ||
| 340 | while (de) { | ||
| 341 | nextde = dictGetNext(de); | ||
| 342 | void *storedKey = dictGetKey(de); | ||
| 343 | /* Get the index in the new hash table */ | ||
| 344 | if (d->ht_size_exp[1] > d->ht_size_exp[0]) { | ||
| 345 | const void *key = dictStoredKey2Key(d, storedKey); | ||
| 346 | h = dictGetHash(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]); | ||
| 347 | } else { | ||
| 348 | /* We're shrinking the table. The tables sizes are powers of | ||
| 349 | * two, so we simply mask the bucket index in the larger table | ||
| 350 | * to get the bucket index in the smaller table. */ | ||
| 351 | h = idx & DICTHT_SIZE_MASK(d->ht_size_exp[1]); | ||
| 352 | } | ||
| 353 | if (d->type->no_value) { | ||
| 354 | if (!d->ht_table[1][h]) { | ||
| 355 | /* The destination bucket is empty, allowing the key to be stored | ||
| 356 | * directly without allocating a dictEntry. If an old entry was | ||
| 357 | * previously allocated, free its memory. */ | ||
| 358 | if (!entryIsKey(de)) zfree(decodeMaskedPtr(de)); | ||
| 359 | |||
| 360 | de = encodeEntryKey(d, storedKey); | ||
| 361 | |||
| 362 | } else if (entryIsKey(de)) { | ||
| 363 | /* We don't have an allocated entry but we need one. */ | ||
| 364 | de = createEntryNoValue(storedKey, d->ht_table[1][h]); | ||
| 365 | } else { | ||
| 366 | dictSetNext(de, d->ht_table[1][h]); | ||
| 367 | } | ||
| 368 | } else { | ||
| 369 | dictSetNext(de, d->ht_table[1][h]); | ||
| 370 | } | ||
| 371 | d->ht_table[1][h] = de; | ||
| 372 | d->ht_used[0]--; | ||
| 373 | d->ht_used[1]++; | ||
| 374 | de = nextde; | ||
| 375 | } | ||
| 376 | d->ht_table[0][idx] = NULL; | ||
| 377 | } | ||
| 378 | |||
| 379 | /* This checks if we already rehashed the whole table and if more rehashing is required */ | ||
| 380 | static int dictCheckRehashingCompleted(dict *d) { | ||
| 381 | if (d->ht_used[0] != 0) return 0; | ||
| 382 | |||
| 383 | if (d->type->rehashingCompleted) d->type->rehashingCompleted(d); | ||
| 384 | if (d->type->bucketChanged) | ||
| 385 | d->type->bucketChanged(d, -(long long)DICTHT_SIZE(d->ht_size_exp[0])); | ||
| 386 | zfree(d->ht_table[0]); | ||
| 387 | /* Copy the new ht onto the old one */ | ||
| 388 | d->ht_table[0] = d->ht_table[1]; | ||
| 389 | d->ht_used[0] = d->ht_used[1]; | ||
| 390 | d->ht_size_exp[0] = d->ht_size_exp[1]; | ||
| 391 | _dictReset(d, 1); | ||
| 392 | d->rehashidx = -1; | ||
| 393 | return 1; | ||
| 394 | } | ||
| 395 | |||
| 396 | /* Performs N steps of incremental rehashing. Returns 1 if there are still | ||
| 397 | * keys to move from the old to the new hash table, otherwise 0 is returned. | ||
| 398 | * | ||
| 399 | * Note that a rehashing step consists in moving a bucket (that may have more | ||
| 400 | * than one key as we use chaining) from the old to the new hash table, however | ||
| 401 | * since part of the hash table may be composed of empty spaces, it is not | ||
| 402 | * guaranteed that this function will rehash even a single bucket, since it | ||
| 403 | * will visit at max N*10 empty buckets in total, otherwise the amount of | ||
| 404 | * work it does would be unbound and the function may block for a long time. */ | ||
| 405 | int dictRehash(dict *d, int n) { | ||
| 406 | int empty_visits = n*10; /* Max number of empty buckets to visit. */ | ||
| 407 | unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]); | ||
| 408 | unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]); | ||
| 409 | if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0; | ||
| 410 | /* If dict_can_resize is DICT_RESIZE_AVOID, we want to avoid rehashing. | ||
| 411 | * - If expanding, the threshold is dict_force_resize_ratio which is 4. | ||
| 412 | * - If shrinking, the threshold is 1 / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) which is 1/32. */ | ||
| 413 | if (dict_can_resize == DICT_RESIZE_AVOID && | ||
| 414 | ((s1 > s0 && s1 < dict_force_resize_ratio * s0) || | ||
| 415 | (s1 < s0 && s0 < HASHTABLE_MIN_FILL * dict_force_resize_ratio * s1))) | ||
| 416 | { | ||
| 417 | return 0; | ||
| 418 | } | ||
| 419 | |||
| 420 | while(n-- && d->ht_used[0] != 0) { | ||
| 421 | /* Note that rehashidx can't overflow as we are sure there are more | ||
| 422 | * elements because ht[0].used != 0 */ | ||
| 423 | assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx); | ||
| 424 | while(d->ht_table[0][d->rehashidx] == NULL) { | ||
| 425 | d->rehashidx++; | ||
| 426 | if (--empty_visits == 0) return 1; | ||
| 427 | } | ||
| 428 | /* Move all the keys in this bucket from the old to the new hash HT */ | ||
| 429 | rehashEntriesInBucketAtIndex(d, d->rehashidx); | ||
| 430 | d->rehashidx++; | ||
| 431 | } | ||
| 432 | |||
| 433 | return !dictCheckRehashingCompleted(d); | ||
| 434 | } | ||
| 435 | |||
| 436 | long long timeInMilliseconds(void) { | ||
| 437 | struct timeval tv; | ||
| 438 | |||
| 439 | gettimeofday(&tv,NULL); | ||
| 440 | return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000); | ||
| 441 | } | ||
| 442 | |||
| 443 | /* Rehash in us+"delta" microseconds. The value of "delta" is larger | ||
| 444 | * than 0, and is smaller than 1000 in most cases. The exact upper bound | ||
| 445 | * depends on the running time of dictRehash(d,100).*/ | ||
| 446 | int dictRehashMicroseconds(dict *d, uint64_t us) { | ||
| 447 | if (d->pauserehash > 0) return 0; | ||
| 448 | |||
| 449 | monotime timer; | ||
| 450 | elapsedStart(&timer); | ||
| 451 | int rehashes = 0; | ||
| 452 | |||
| 453 | while(dictRehash(d,100)) { | ||
| 454 | rehashes += 100; | ||
| 455 | if (elapsedUs(timer) >= us) break; | ||
| 456 | } | ||
| 457 | return rehashes; | ||
| 458 | } | ||
| 459 | |||
| 460 | /* This function performs just a step of rehashing, and only if hashing has | ||
| 461 | * not been paused for our hash table. When we have iterators in the | ||
| 462 | * middle of a rehashing we can't mess with the two hash tables otherwise | ||
| 463 | * some elements can be missed or duplicated. | ||
| 464 | * | ||
| 465 | * This function is called by common lookup or update operations in the | ||
| 466 | * dictionary so that the hash table automatically migrates from H1 to H2 | ||
| 467 | * while it is actively used. */ | ||
| 468 | static void _dictRehashStep(dict *d) { | ||
| 469 | if (d->pauserehash == 0) dictRehash(d,1); | ||
| 470 | } | ||
| 471 | |||
| 472 | /* Performs rehashing on a single bucket. */ | ||
| 473 | int _dictBucketRehash(dict *d, uint64_t idx) { | ||
| 474 | if (d->pauserehash != 0) return 0; | ||
| 475 | unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]); | ||
| 476 | unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]); | ||
| 477 | if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0; | ||
| 478 | /* If dict_can_resize is DICT_RESIZE_AVOID, we want to avoid rehashing. | ||
| 479 | * - If expanding, the threshold is dict_force_resize_ratio which is 4. | ||
| 480 | * - If shrinking, the threshold is 1 / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) which is 1/32. */ | ||
| 481 | if (dict_can_resize == DICT_RESIZE_AVOID && | ||
| 482 | ((s1 > s0 && s1 < dict_force_resize_ratio * s0) || | ||
| 483 | (s1 < s0 && s0 < HASHTABLE_MIN_FILL * dict_force_resize_ratio * s1))) | ||
| 484 | { | ||
| 485 | return 0; | ||
| 486 | } | ||
| 487 | rehashEntriesInBucketAtIndex(d, idx); | ||
| 488 | dictCheckRehashingCompleted(d); | ||
| 489 | return 1; | ||
| 490 | } | ||
| 491 | |||
| 492 | /* Add an element to the target hash table */ | ||
| 493 | int dictAdd(dict *d, void *key __stored_key, void *val) | ||
| 494 | { | ||
| 495 | dictEntry *entry = dictAddRaw(d,key,NULL); | ||
| 496 | |||
| 497 | if (!entry) return DICT_ERR; | ||
| 498 | if (!d->type->no_value) dictSetVal(d, entry, val); | ||
| 499 | return DICT_OK; | ||
| 500 | } | ||
| 501 | |||
| 502 | int dictCompareKeys(dict *d, const void *key1, const void *key2) { | ||
| 503 | dictCmpCache cache = {0}; | ||
| 504 | keyCmpFunc cmpFunc = dictGetCmpFunc(d); | ||
| 505 | return cmpFunc(&cache, key1, key2); | ||
| 506 | } | ||
| 507 | |||
| 508 | /* Low level add or find: | ||
| 509 | * This function adds the entry but instead of setting a value returns the | ||
| 510 | * dictEntry structure to the user, that will make sure to fill the value | ||
| 511 | * field as they wish. | ||
| 512 | * | ||
| 513 | * This function is also directly exposed to the user API to be called | ||
| 514 | * mainly in order to store non-pointers inside the hash value, example: | ||
| 515 | * | ||
| 516 | * entry = dictAddRaw(dict,mykey,NULL); | ||
| 517 | * if (entry != NULL) dictSetSignedIntegerVal(entry,1000); | ||
| 518 | * | ||
| 519 | * Return values: | ||
| 520 | * | ||
| 521 | * If key already exists NULL is returned, and "*existing" is populated | ||
| 522 | * with the existing entry if existing is not NULL. | ||
| 523 | * | ||
| 524 | * If key was added, the hash entry is returned to be manipulated by the caller. | ||
| 525 | */ | ||
| 526 | dictEntry *dictAddRaw(dict *d, void *key __stored_key, dictEntry **existing) | ||
| 527 | { | ||
| 528 | /* Get the position for the new key or NULL if the key already exists. */ | ||
| 529 | void *position = dictFindLinkForInsert(d, dictStoredKey2Key(d, key), existing); | ||
| 530 | if (!position) return NULL; | ||
| 531 | |||
| 532 | /* Dup the key if necessary. */ | ||
| 533 | if (d->type->keyDup) key = d->type->keyDup(d, key); | ||
| 534 | |||
| 535 | return dictInsertKeyAtLink(d, key, position); | ||
| 536 | } | ||
| 537 | |||
| 538 | /* Adds a key in the dict's hashtable at the link returned by a preceding | ||
| 539 | * call to dictFindLinkForInsert(). This is a low level function which allows | ||
| 540 | * splitting dictAddRaw in two parts. Normally, dictAddRaw or dictAdd should be | ||
| 541 | * used instead. It assumes that dictExpandIfNeeded() was called before. */ | ||
| 542 | dictEntry *dictInsertKeyAtLink(dict *d, void *key __stored_key, dictEntryLink link) { | ||
| 543 | dictEntryLink bucket = link; /* It's a bucket, but the API hides that. */ | ||
| 544 | dictEntry *entry; | ||
| 545 | /* If rehashing is ongoing, we insert in table 1, otherwise in table 0. | ||
| 546 | * Assert that the provided bucket is the right table. */ | ||
| 547 | int htidx = dictIsRehashing(d) ? 1 : 0; | ||
| 548 | assert(bucket >= &d->ht_table[htidx][0] && | ||
| 549 | bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]); | ||
| 550 | if (d->type->no_value) { | ||
| 551 | if (!*bucket) { | ||
| 552 | /* We can store the key directly in the destination bucket without | ||
| 553 | * allocating dictEntry. | ||
| 554 | */ | ||
| 555 | entry = encodeEntryKey(d, key); | ||
| 556 | assert(entryIsKey(entry)); | ||
| 557 | } else { | ||
| 558 | /* Allocate an entry without value. */ | ||
| 559 | entry = createEntryNoValue(key, *bucket); | ||
| 560 | } | ||
| 561 | } else { | ||
| 562 | /* Allocate the memory and store the new entry. | ||
| 563 | * Insert the element in top, with the assumption that in a database | ||
| 564 | * system it is more likely that recently added entries are accessed | ||
| 565 | * more frequently. */ | ||
| 566 | entry = zmalloc(sizeof(*entry)); | ||
| 567 | assert(entryIsNormal(entry)); /* Check alignment of allocation */ | ||
| 568 | entry->key = key; | ||
| 569 | entry->next = *bucket; | ||
| 570 | } | ||
| 571 | *bucket = entry; | ||
| 572 | d->ht_used[htidx]++; | ||
| 573 | |||
| 574 | return entry; | ||
| 575 | } | ||
| 576 | |||
| 577 | /* Add or Overwrite: | ||
| 578 | * Add an element, discarding the old value if the key already exists. | ||
| 579 | * Return 1 if the key was added from scratch, 0 if there was already an | ||
| 580 | * element with such key and dictReplace() just performed a value update | ||
| 581 | * operation. */ | ||
| 582 | int dictReplace(dict *d, void *key __stored_key, void *val) | ||
| 583 | { | ||
| 584 | dictEntry *entry, *existing; | ||
| 585 | |||
| 586 | /* Try to add the element. If the key | ||
| 587 | * does not exists dictAdd will succeed. */ | ||
| 588 | entry = dictAddRaw(d,key,&existing); | ||
| 589 | if (entry) { | ||
| 590 | dictSetVal(d, entry, val); | ||
| 591 | return 1; | ||
| 592 | } | ||
| 593 | |||
| 594 | /* Set the new value and free the old one. Note that it is important | ||
| 595 | * to do that in this order, as the value may just be exactly the same | ||
| 596 | * as the previous one. In this context, think to reference counting, | ||
| 597 | * you want to increment (set), and then decrement (free), and not the | ||
| 598 | * reverse. */ | ||
| 599 | void *oldval = dictGetVal(existing); | ||
| 600 | dictSetVal(d, existing, val); | ||
| 601 | if (d->type->valDestructor) | ||
| 602 | d->type->valDestructor(d, oldval); | ||
| 603 | return 0; | ||
| 604 | } | ||
| 605 | |||
| 606 | /* Add or Find: | ||
| 607 | * dictAddOrFind() is simply a version of dictAddRaw() that always | ||
| 608 | * returns the hash entry of the specified key, even if the key already | ||
| 609 | * exists and can't be added (in that case the entry of the already | ||
| 610 | * existing key is returned.) | ||
| 611 | * | ||
| 612 | * See dictAddRaw() for more information. */ | ||
| 613 | dictEntry *dictAddOrFind(dict *d, void *key __stored_key) { | ||
| 614 | dictEntry *entry, *existing; | ||
| 615 | entry = dictAddRaw(d,key,&existing); | ||
| 616 | return entry ? entry : existing; | ||
| 617 | } | ||
| 618 | |||
| 619 | /* Search and remove an element. This is a helper function for | ||
| 620 | * dictDelete() and dictUnlink(), please check the top comment | ||
| 621 | * of those functions. */ | ||
| 622 | static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { | ||
| 623 | dictCmpCache cmpCache = {0}; | ||
| 624 | uint64_t h, idx; | ||
| 625 | dictEntry *he, *prevHe; | ||
| 626 | int table; | ||
| 627 | |||
| 628 | /* dict is empty */ | ||
| 629 | if (dictSize(d) == 0) return NULL; | ||
| 630 | |||
| 631 | h = dictGetHash(d, key); | ||
| 632 | idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]); | ||
| 633 | |||
| 634 | /* Rehash the hash table if needed */ | ||
| 635 | _dictRehashStepIfNeeded(d,idx); | ||
| 636 | |||
| 637 | keyCmpFunc cmpFunc = dictGetCmpFunc(d); | ||
| 638 | |||
| 639 | for (table = 0; table <= 1; table++) { | ||
| 640 | if (table == 0 && (long)idx < d->rehashidx) continue; | ||
| 641 | idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]); | ||
| 642 | he = d->ht_table[table][idx]; | ||
| 643 | prevHe = NULL; | ||
| 644 | while(he) { | ||
| 645 | const void *he_key = dictStoredKey2Key(d, dictGetKey(he)); | ||
| 646 | if (key == he_key || cmpFunc(&cmpCache, key, he_key)) { | ||
| 647 | /* Unlink the element from the list */ | ||
| 648 | if (prevHe) | ||
| 649 | dictSetNext(prevHe, dictGetNext(he)); | ||
| 650 | else | ||
| 651 | d->ht_table[table][idx] = dictGetNext(he); | ||
| 652 | if (!nofree) { | ||
| 653 | dictFreeUnlinkedEntry(d, he); | ||
| 654 | } | ||
| 655 | d->ht_used[table]--; | ||
| 656 | _dictShrinkIfNeeded(d); | ||
| 657 | return he; | ||
| 658 | } | ||
| 659 | prevHe = he; | ||
| 660 | he = dictGetNext(he); | ||
| 661 | } | ||
| 662 | if (!dictIsRehashing(d)) break; | ||
| 663 | } | ||
| 664 | return NULL; /* not found */ | ||
| 665 | } | ||
| 666 | |||
| 667 | /* Remove an element, returning DICT_OK on success or DICT_ERR if the | ||
| 668 | * element was not found. */ | ||
| 669 | int dictDelete(dict *ht, const void *key) { | ||
| 670 | return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR; | ||
| 671 | } | ||
| 672 | |||
| 673 | /* Remove an element from the table, but without actually releasing | ||
| 674 | * the key, value and dictionary entry. The dictionary entry is returned | ||
| 675 | * if the element was found (and unlinked from the table), and the user | ||
| 676 | * should later call `dictFreeUnlinkedEntry()` with it in order to release it. | ||
| 677 | * Otherwise if the key is not found, NULL is returned. | ||
| 678 | * | ||
| 679 | * This function is useful when we want to remove something from the hash | ||
| 680 | * table but want to use its value before actually deleting the entry. | ||
| 681 | * Without this function the pattern would require two lookups: | ||
| 682 | * | ||
| 683 | * entry = dictFind(...); | ||
| 684 | * // Do something with entry | ||
| 685 | * dictDelete(dictionary,entry); | ||
| 686 | * | ||
| 687 | * Thanks to this function it is possible to avoid this, and use | ||
| 688 | * instead: | ||
| 689 | * | ||
| 690 | * entry = dictUnlink(dictionary,entry); | ||
| 691 | * // Do something with entry | ||
| 692 | * dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again. | ||
| 693 | */ | ||
| 694 | dictEntry *dictUnlink(dict *d, const void *key) { | ||
| 695 | return dictGenericDelete(d,key,1); | ||
| 696 | } | ||
| 697 | |||
| 698 | /* You need to call this function to really free the entry after a call | ||
| 699 | * to dictUnlink(). It's safe to call this function with 'he' = NULL. */ | ||
| 700 | void dictFreeUnlinkedEntry(dict *d, dictEntry *he) { | ||
| 701 | if (he == NULL) return; | ||
| 702 | dictFreeKey(d, he); | ||
| 703 | dictFreeVal(d, he); | ||
| 704 | if (!entryIsKey(he)) zfree(decodeMaskedPtr(he)); | ||
| 705 | } | ||
| 706 | |||
| 707 | /* Destroy an entire dictionary */ | ||
| 708 | int _dictClear(dict *d, int htidx, void(callback)(dict*)) { | ||
| 709 | unsigned long i; | ||
| 710 | |||
| 711 | /* Free all the elements */ | ||
| 712 | for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]) && d->ht_used[htidx] > 0; i++) { | ||
| 713 | dictEntry *he, *nextHe; | ||
| 714 | /* Callback will be called once for every 65535 deletions. Beware, | ||
| 715 | * if dict has less than 65535 items, it will not be called at all.*/ | ||
| 716 | if (callback && i != 0 && (i & 65535) == 0) callback(d); | ||
| 717 | |||
| 718 | if ((he = d->ht_table[htidx][i]) == NULL) continue; | ||
| 719 | while(he) { | ||
| 720 | nextHe = dictGetNext(he); | ||
| 721 | dictFreeKey(d, he); | ||
| 722 | dictFreeVal(d, he); | ||
| 723 | if (!entryIsKey(he)) zfree(decodeMaskedPtr(he)); | ||
| 724 | d->ht_used[htidx]--; | ||
| 725 | he = nextHe; | ||
| 726 | } | ||
| 727 | } | ||
| 728 | /* Free the table and the allocated cache structure */ | ||
| 729 | zfree(d->ht_table[htidx]); | ||
| 730 | /* Re-initialize the table */ | ||
| 731 | _dictReset(d, htidx); | ||
| 732 | return DICT_OK; /* never fails */ | ||
| 733 | } | ||
| 734 | |||
| 735 | /* Clear & Release the hash table */ | ||
| 736 | void dictRelease(dict *d) | ||
| 737 | { | ||
| 738 | /* Someone may be monitoring a dict that started rehashing, before | ||
| 739 | * destroying the dict fake completion. */ | ||
| 740 | if (dictIsRehashing(d) && d->type->rehashingCompleted) | ||
| 741 | d->type->rehashingCompleted(d); | ||
| 742 | |||
| 743 | /* Subtract the size of all buckets. */ | ||
| 744 | if (d->type->bucketChanged) | ||
| 745 | d->type->bucketChanged(d, -(long long)dictBuckets(d)); | ||
| 746 | |||
| 747 | if (d->type->onDictRelease) | ||
| 748 | d->type->onDictRelease(d); | ||
| 749 | |||
| 750 | _dictClear(d,0,NULL); | ||
| 751 | _dictClear(d,1,NULL); | ||
| 752 | zfree(d); | ||
| 753 | } | ||
| 754 | |||
| 755 | /* Finds a given key. Like dictFindLink(), yet search bucket even if dict is empty. | ||
| 756 | * | ||
| 757 | * Returns dictEntryLink reference if found. Otherwise, return NULL. | ||
| 758 | * | ||
| 759 | * bucket - return pointer to bucket that the key was mapped. unless dict is empty. | ||
| 760 | */ | ||
| 761 | static dictEntryLink dictFindLinkInternal(dict *d, const void *key, dictEntryLink *bucket) { | ||
| 762 | dictCmpCache cmpCache = {0}; | ||
| 763 | dictEntryLink link; | ||
| 764 | uint64_t idx; | ||
| 765 | int table; | ||
| 766 | |||
| 767 | if (bucket) { | ||
| 768 | *bucket = NULL; | ||
| 769 | } else { | ||
| 770 | /* If dict is empty and no need to find bucket, return NULL */ | ||
| 771 | if (dictSize(d) == 0) return NULL; | ||
| 772 | } | ||
| 773 | |||
| 774 | const uint64_t hash = dictGetHash(d, key); | ||
| 775 | idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]); | ||
| 776 | keyCmpFunc cmpFunc = dictGetCmpFunc(d); | ||
| 777 | |||
| 778 | /* Rehash the hash table if needed */ | ||
| 779 | _dictRehashStepIfNeeded(d,idx); | ||
| 780 | |||
| 781 | int tables = (dictIsRehashing(d)) ? 2 : 1; | ||
| 782 | for (table = 0; table < tables; table++) { | ||
| 783 | if (table == 0 && (long)idx < d->rehashidx) continue; | ||
| 784 | idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); | ||
| 785 | |||
| 786 | link = &(d->ht_table[table][idx]); | ||
| 787 | if (bucket) *bucket = link; | ||
| 788 | while(link && *link) { | ||
| 789 | const void *visitedKey = dictStoredKey2Key(d, dictGetKey(*link)); | ||
| 790 | |||
| 791 | if (key == visitedKey || cmpFunc( &cmpCache, key, visitedKey)) | ||
| 792 | return link; | ||
| 793 | |||
| 794 | link = dictGetNextLink(*link); | ||
| 795 | } | ||
| 796 | } | ||
| 797 | return NULL; | ||
| 798 | } | ||
| 799 | |||
| 800 | dictEntry *dictFind(dict *d, const void *key) | ||
| 801 | { | ||
| 802 | dictEntryLink link = dictFindLink(d, key, NULL); | ||
| 803 | return (link) ? *link : NULL; | ||
| 804 | } | ||
| 805 | |||
| 806 | /* Finds the dictEntry using pointer and pre-calculated hash. | ||
| 807 | * oldkey is a dead pointer and should not be accessed. | ||
| 808 | * the hash value should be provided using dictGetHash. | ||
| 809 | * no string / key comparison is performed. | ||
| 810 | * return value is a pointer to the dictEntry if found, or NULL if not found. */ | ||
| 811 | dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash) { | ||
| 812 | dictEntry *he; | ||
| 813 | unsigned long idx, table; | ||
| 814 | |||
| 815 | if (dictSize(d) == 0) return NULL; /* dict is empty */ | ||
| 816 | for (table = 0; table <= 1; table++) { | ||
| 817 | idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); | ||
| 818 | if (table == 0 && (long)idx < d->rehashidx) continue; | ||
| 819 | he = d->ht_table[table][idx]; | ||
| 820 | while(he) { | ||
| 821 | if (oldptr == dictGetKey(he)) | ||
| 822 | return he; | ||
| 823 | he = dictGetNext(he); | ||
| 824 | } | ||
| 825 | if (!dictIsRehashing(d)) return NULL; | ||
| 826 | } | ||
| 827 | return NULL; | ||
| 828 | } | ||
| 829 | |||
| 830 | /* Find a key and return its dictEntryLink reference. Otherwise, return NULL | ||
| 831 | * | ||
| 832 | * A dictEntryLink pointer being used to find preceding dictEntry of searched item. | ||
| 833 | * It is Useful for deletion, addition, unlinking and updating, especially for | ||
| 834 | * dict configured with 'no_value'. In such cases returning only `dictEntry` from | ||
| 835 | * a lookup may be insufficient since it might be opt-out to be the object itself. | ||
| 836 | * By locating preceding dictEntry (dictEntryLink) these ops can be properly handled. | ||
| 837 | * | ||
| 838 | * After calling link = dictFindLink(...), any necessary updates based on returned | ||
| 839 | * link or bucket must be performed immediately after by calling dictSetKeyAtLink() | ||
| 840 | * without any intervening operations on given dict. Otherwise, `dictEntryLink` may | ||
| 841 | * become invalid. Example with kvobj of replacing key with new key: | ||
| 842 | * | ||
| 843 | * link = dictFindLink(d, key, &bucket, 0); | ||
| 844 | * ... Do something, but don't modify the dict ... | ||
| 845 | * // assert(link != NULL); | ||
| 846 | * dictSetKeyAtLink(d, kv, &link, 0); | ||
| 847 | * | ||
| 848 | * To add new value (If no space for the new key, dict will be expanded by | ||
| 849 | * dictSetKeyAtLink() and bucket will be looked up again.): | ||
| 850 | * | ||
| 851 | * link = dictFindLink(d, key, &bucket); | ||
| 852 | * ... Do something, but don't modify the dict ... | ||
| 853 | * // assert(link == NULL); | ||
| 854 | * dictSetKeyAtLink(d, kv, &bucket, 1); | ||
| 855 | * | ||
| 856 | * bucket - return link to bucket that the key was mapped. unless dict is empty. | ||
| 857 | */ | ||
| 858 | dictEntryLink dictFindLink(dict *d, const void *key, dictEntryLink *bucket) { | ||
| 859 | if (bucket) *bucket = NULL; | ||
| 860 | if (unlikely(dictSize(d) == 0)) | ||
| 861 | return NULL; | ||
| 862 | |||
| 863 | return dictFindLinkInternal(d, key, bucket); | ||
| 864 | } | ||
| 865 | |||
| 866 | /* Set the key with link | ||
| 867 | * | ||
| 868 | * link: - When `newItem` is set, `link` points to the bucket of the key. | ||
| 869 | * - When `newItem` is not set, `link` points to the link of the key. | ||
| 870 | * - If *link is NULL, dictFindLink() will be called to locate the key. | ||
| 871 | * - On return, get updated, by need, to the inserted key. | ||
| 872 | * | ||
| 873 | * newItem: 1 = Add a key with a new dictEntry. | ||
| 874 | * 0 = Set a key to an existing dictEntry. | ||
| 875 | */ | ||
| 876 | void dictSetKeyAtLink(dict *d, void *key __stored_key, dictEntryLink *link, int newItem) { | ||
| 877 | dictEntryLink dummy = NULL; | ||
| 878 | if (link == NULL) link = &dummy; | ||
| 879 | void *addedKey = (d->type->keyDup) ? d->type->keyDup(d, key) : key; | ||
| 880 | |||
| 881 | if (newItem) { | ||
| 882 | signed char snap[2] = {d->ht_size_exp[0], d->ht_size_exp[1] }; | ||
| 883 | |||
| 884 | /* Make room if needed for the new key */ | ||
| 885 | dictExpandIfNeeded(d); | ||
| 886 | |||
| 887 | /* Lookup key's link if tables reallocated or if given link is set to NULL */ | ||
| 888 | if (snap[0] != d->ht_size_exp[0] || snap[1] != d->ht_size_exp[1] || *link == NULL) { | ||
| 889 | dictEntryLink bucket; | ||
| 890 | /* Bypass dictFindLink() to search bucket even if dict is empty!!! */ | ||
| 891 | *link = dictFindLinkInternal(d, dictStoredKey2Key(d, key), &bucket); | ||
| 892 | assert(bucket != NULL); | ||
| 893 | assert(*link == NULL); | ||
| 894 | *link = bucket; /* On newItem the link should be the bucket */ | ||
| 895 | } | ||
| 896 | dictInsertKeyAtLink(d, addedKey, *link); | ||
| 897 | return; | ||
| 898 | } | ||
| 899 | |||
| 900 | /* Setting key of existing dictEntry (newItem == 0)*/ | ||
| 901 | |||
| 902 | if (*link == NULL) { | ||
| 903 | *link = dictFindLink(d, key, NULL); | ||
| 904 | assert(*link != NULL); | ||
| 905 | } | ||
| 906 | |||
| 907 | dictEntry **de = *link; | ||
| 908 | if (entryIsKey(*de)) { | ||
| 909 | /* `de` opt-out to be actually a key. Replace key but keep the lsb flags */ | ||
| 910 | *de = encodeEntryKey(d, addedKey); | ||
| 911 | } else { | ||
| 912 | /* either dictEntry or dictEntryNoValue */ | ||
| 913 | (*de)->key = addedKey; | ||
| 914 | } | ||
| 915 | } | ||
| 916 | |||
| 917 | void *dictFetchValue(dict *d, const void *key) { | ||
| 918 | dictEntry *he; | ||
| 919 | |||
| 920 | he = dictFind(d,key); | ||
| 921 | return he ? dictGetVal(he) : NULL; | ||
| 922 | } | ||
| 923 | |||
| 924 | /* Find an element from the table. A link is returned if the element is found, and | ||
| 925 | * the user should later call `dictTwoPhaseUnlinkFree` with it in order to unlink | ||
| 926 | * and release it. Otherwise if the key is not found, NULL is returned. These two | ||
| 927 | * functions should be used in pair. | ||
| 928 | * `dictTwoPhaseUnlinkFind` pauses rehash and `dictTwoPhaseUnlinkFree` resumes rehash. | ||
| 929 | * | ||
| 930 | * We can use like this: | ||
| 931 | * | ||
| 932 | * dictEntryLink link = dictTwoPhaseUnlinkFind(db->dict,key->ptr, &table); | ||
| 933 | * // Do something, but we can't modify the dict | ||
| 934 | * dictTwoPhaseUnlinkFree(db->dict, link, table); // We don't need to lookup again | ||
| 935 | * | ||
| 936 | * If we want to find an entry before delete this entry, this an optimization to avoid | ||
| 937 | * dictFind followed by dictDelete. i.e. the first API is a find, and it gives some info | ||
| 938 | * to the second one to avoid repeating the lookup | ||
| 939 | */ | ||
| 940 | dictEntryLink dictTwoPhaseUnlinkFind(dict *d, const void *key, int *table_index) { | ||
| 941 | dictCmpCache cmpCache = {0}; | ||
| 942 | uint64_t h, idx, table; | ||
| 943 | |||
| 944 | if (dictSize(d) == 0) return NULL; /* dict is empty */ | ||
| 945 | if (dictIsRehashing(d)) _dictRehashStep(d); | ||
| 946 | |||
| 947 | h = dictGetHash(d, key); | ||
| 948 | keyCmpFunc cmpFunc = dictGetCmpFunc(d); | ||
| 949 | |||
| 950 | for (table = 0; table <= 1; table++) { | ||
| 951 | idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]); | ||
| 952 | if (table == 0 && (long)idx < d->rehashidx) continue; | ||
| 953 | dictEntry **ref = &d->ht_table[table][idx]; | ||
| 954 | while (ref && *ref) { | ||
| 955 | const void *de_key = dictStoredKey2Key(d, dictGetKey(*ref)); | ||
| 956 | if (key == de_key || cmpFunc(&cmpCache, key, de_key)) { | ||
| 957 | *table_index = table; | ||
| 958 | dictPauseRehashing(d); | ||
| 959 | return ref; | ||
| 960 | } | ||
| 961 | ref = dictGetNextLink(*ref); | ||
| 962 | } | ||
| 963 | if (!dictIsRehashing(d)) return NULL; | ||
| 964 | } | ||
| 965 | return NULL; | ||
| 966 | } | ||
| 967 | |||
| 968 | void dictTwoPhaseUnlinkFree(dict *d, dictEntryLink plink, int table_index) { | ||
| 969 | if (plink == NULL || *plink == NULL) return; | ||
| 970 | dictEntry *de = *plink; | ||
| 971 | d->ht_used[table_index]--; | ||
| 972 | |||
| 973 | *plink = dictGetNext(de); | ||
| 974 | dictFreeKey(d, de); | ||
| 975 | dictFreeVal(d, de); | ||
| 976 | if (!entryIsKey(de)) zfree(decodeMaskedPtr(de)); | ||
| 977 | _dictShrinkIfNeeded(d); | ||
| 978 | dictResumeRehashing(d); | ||
| 979 | } | ||
| 980 | |||
| 981 | void dictSetKey(dict *d, dictEntry* de, void *key __stored_key) { | ||
| 982 | assert(!d->type->no_value); | ||
| 983 | if (d->type->keyDup) | ||
| 984 | de->key = d->type->keyDup(d, key); | ||
| 985 | else | ||
| 986 | de->key = key; | ||
| 987 | } | ||
| 988 | |||
| 989 | void dictSetVal(dict *d, dictEntry *de, void *val) { | ||
| 990 | assert(entryHasValue(de)); | ||
| 991 | de->v.val = d->type->valDup ? d->type->valDup(d, val) : val; | ||
| 992 | } | ||
| 993 | |||
| 994 | void dictSetSignedIntegerVal(dictEntry *de, int64_t val) { | ||
| 995 | assert(entryHasValue(de)); | ||
| 996 | de->v.s64 = val; | ||
| 997 | } | ||
| 998 | |||
| 999 | void dictSetUnsignedIntegerVal(dictEntry *de, uint64_t val) { | ||
| 1000 | assert(entryHasValue(de)); | ||
| 1001 | de->v.u64 = val; | ||
| 1002 | } | ||
| 1003 | |||
| 1004 | void dictSetDoubleVal(dictEntry *de, double val) { | ||
| 1005 | assert(entryHasValue(de)); | ||
| 1006 | de->v.d = val; | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | int64_t dictIncrSignedIntegerVal(dictEntry *de, int64_t val) { | ||
| 1010 | assert(entryHasValue(de)); | ||
| 1011 | return de->v.s64 += val; | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | uint64_t dictIncrUnsignedIntegerVal(dictEntry *de, uint64_t val) { | ||
| 1015 | assert(entryHasValue(de)); | ||
| 1016 | return de->v.u64 += val; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | double dictIncrDoubleVal(dictEntry *de, double val) { | ||
| 1020 | assert(entryHasValue(de)); | ||
| 1021 | return de->v.d += val; | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | int dictEntryIsKey(const dictEntry *de) { | ||
| 1025 | return entryIsKey(de); | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | void *dictGetKey(const dictEntry *de) { | ||
| 1029 | /* if entryIsKey() */ | ||
| 1030 | if ((uintptr_t)de & ENTRY_PTR_IS_ODD_KEY) return (void *) de; | ||
| 1031 | if ((uintptr_t)de & ENTRY_PTR_IS_EVEN_KEY) return decodeMaskedPtr(de); | ||
| 1032 | /* Regular entry */ | ||
| 1033 | return de->key; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | void *dictGetVal(const dictEntry *de) { | ||
| 1037 | assert(entryHasValue(de)); | ||
| 1038 | return de->v.val; | ||
| 1039 | } | ||
| 1040 | |||
| 1041 | int64_t dictGetSignedIntegerVal(const dictEntry *de) { | ||
| 1042 | assert(entryHasValue(de)); | ||
| 1043 | return de->v.s64; | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | uint64_t dictGetUnsignedIntegerVal(const dictEntry *de) { | ||
| 1047 | assert(entryHasValue(de)); | ||
| 1048 | return de->v.u64; | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | double dictGetDoubleVal(const dictEntry *de) { | ||
| 1052 | assert(entryHasValue(de)); | ||
| 1053 | return de->v.d; | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | /* Returns a mutable reference to the value as a double within the entry. */ | ||
| 1057 | double *dictGetDoubleValPtr(dictEntry *de) { | ||
| 1058 | assert(entryHasValue(de)); | ||
| 1059 | return &de->v.d; | ||
| 1060 | } | ||
| 1061 | |||
| 1062 | /* Returns the 'next' field of the entry or NULL if the entry doesn't have a | ||
| 1063 | * 'next' field. */ | ||
| 1064 | dictEntry *dictGetNext(const dictEntry *de) { | ||
| 1065 | if (entryIsKey(de)) return NULL; /* there's no next */ | ||
| 1066 | /* Must come after entryIsKey() check */ | ||
| 1067 | return de->next; | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | /* Returns a pointer to the 'next' field in the entry or NULL if the entry | ||
| 1071 | * doesn't have a next field. */ | ||
| 1072 | static dictEntryLink dictGetNextLink(dictEntry *de) { | ||
| 1073 | if (entryIsKey(de)) return NULL; | ||
| 1074 | /* Must come after entryIsKey() check */ | ||
| 1075 | return &de->next; | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | static void dictSetNext(dictEntry *de, dictEntry *next) { | ||
| 1079 | assert(!entryIsKey(de)); | ||
| 1080 | /* dictEntryNoValue & dictEntry are layout-compatible */ | ||
| 1081 | de->next = next; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | /* Returns the memory usage in bytes of the dict, excluding the size of the keys | ||
| 1085 | * and values. */ | ||
| 1086 | size_t dictMemUsage(const dict *d) { | ||
| 1087 | return dictSize(d) * sizeof(dictEntry) + | ||
| 1088 | dictBuckets(d) * sizeof(dictEntry*); | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | size_t dictEntryMemUsage(int noValueDict) { | ||
| 1092 | return (noValueDict) ? sizeof(dictEntryNoValue) :sizeof(dictEntry); | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | /* A fingerprint is a 64 bit number that represents the state of the dictionary | ||
| 1096 | * at a given time, it's just a few dict properties xored together. | ||
| 1097 | * When an unsafe iterator is initialized, we get the dict fingerprint, and check | ||
| 1098 | * the fingerprint again when the iterator is released. | ||
| 1099 | * If the two fingerprints are different it means that the user of the iterator | ||
| 1100 | * performed forbidden operations against the dictionary while iterating. */ | ||
| 1101 | unsigned long long dictFingerprint(dict *d) { | ||
| 1102 | unsigned long long integers[6], hash = 0; | ||
| 1103 | int j; | ||
| 1104 | |||
| 1105 | integers[0] = (long) d->ht_table[0]; | ||
| 1106 | integers[1] = d->ht_size_exp[0]; | ||
| 1107 | integers[2] = d->ht_used[0]; | ||
| 1108 | integers[3] = (long) d->ht_table[1]; | ||
| 1109 | integers[4] = d->ht_size_exp[1]; | ||
| 1110 | integers[5] = d->ht_used[1]; | ||
| 1111 | |||
| 1112 | /* We hash N integers by summing every successive integer with the integer | ||
| 1113 | * hashing of the previous sum. Basically: | ||
| 1114 | * | ||
| 1115 | * Result = hash(hash(hash(int1)+int2)+int3) ... | ||
| 1116 | * | ||
| 1117 | * This way the same set of integers in a different order will (likely) hash | ||
| 1118 | * to a different number. */ | ||
| 1119 | for (j = 0; j < 6; j++) { | ||
| 1120 | hash += integers[j]; | ||
| 1121 | /* For the hashing step we use Tomas Wang's 64 bit integer hash. */ | ||
| 1122 | hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1; | ||
| 1123 | hash = hash ^ (hash >> 24); | ||
| 1124 | hash = (hash + (hash << 3)) + (hash << 8); // hash * 265 | ||
| 1125 | hash = hash ^ (hash >> 14); | ||
| 1126 | hash = (hash + (hash << 2)) + (hash << 4); // hash * 21 | ||
| 1127 | hash = hash ^ (hash >> 28); | ||
| 1128 | hash = hash + (hash << 31); | ||
| 1129 | } | ||
| 1130 | return hash; | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | void dictInitIterator(dictIterator *iter, dict *d) | ||
| 1134 | { | ||
| 1135 | iter->d = d; | ||
| 1136 | iter->table = 0; | ||
| 1137 | iter->index = -1; | ||
| 1138 | iter->safe = 0; | ||
| 1139 | iter->entry = NULL; | ||
| 1140 | iter->nextEntry = NULL; | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | void dictInitSafeIterator(dictIterator *iter, dict *d) | ||
| 1144 | { | ||
| 1145 | dictInitIterator(iter, d); | ||
| 1146 | iter->safe = 1; | ||
| 1147 | } | ||
| 1148 | |||
| 1149 | void dictResetIterator(dictIterator *iter) | ||
| 1150 | { | ||
| 1151 | if (!(iter->index == -1 && iter->table == 0)) { | ||
| 1152 | if (iter->safe) | ||
| 1153 | dictResumeRehashing(iter->d); | ||
| 1154 | else | ||
| 1155 | assert(iter->fingerprint == dictFingerprint(iter->d)); | ||
| 1156 | } | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | dictIterator *dictGetIterator(dict *d) | ||
| 1160 | { | ||
| 1161 | dictIterator *iter = zmalloc(sizeof(*iter)); | ||
| 1162 | dictInitIterator(iter, d); | ||
| 1163 | return iter; | ||
| 1164 | } | ||
| 1165 | |||
| 1166 | dictIterator *dictGetSafeIterator(dict *d) { | ||
| 1167 | dictIterator *i = dictGetIterator(d); | ||
| 1168 | |||
| 1169 | i->safe = 1; | ||
| 1170 | return i; | ||
| 1171 | } | ||
| 1172 | |||
| 1173 | dictEntry *dictNext(dictIterator *iter) | ||
| 1174 | { | ||
| 1175 | while (1) { | ||
| 1176 | if (iter->entry == NULL) { | ||
| 1177 | if (iter->index == -1 && iter->table == 0) { | ||
| 1178 | if (iter->safe) | ||
| 1179 | dictPauseRehashing(iter->d); | ||
| 1180 | else | ||
| 1181 | iter->fingerprint = dictFingerprint(iter->d); | ||
| 1182 | |||
| 1183 | /* skip the rehashed slots in table[0] */ | ||
| 1184 | if (dictIsRehashing(iter->d)) { | ||
| 1185 | iter->index = iter->d->rehashidx - 1; | ||
| 1186 | } | ||
| 1187 | } | ||
| 1188 | iter->index++; | ||
| 1189 | if (iter->index >= (long) DICTHT_SIZE(iter->d->ht_size_exp[iter->table])) { | ||
| 1190 | if (dictIsRehashing(iter->d) && iter->table == 0) { | ||
| 1191 | iter->table++; | ||
| 1192 | iter->index = 0; | ||
| 1193 | } else { | ||
| 1194 | break; | ||
| 1195 | } | ||
| 1196 | } | ||
| 1197 | iter->entry = iter->d->ht_table[iter->table][iter->index]; | ||
| 1198 | } else { | ||
| 1199 | iter->entry = iter->nextEntry; | ||
| 1200 | } | ||
| 1201 | if (iter->entry) { | ||
| 1202 | /* We need to save the 'next' here, the iterator user | ||
| 1203 | * may delete the entry we are returning. */ | ||
| 1204 | iter->nextEntry = dictGetNext(iter->entry); | ||
| 1205 | return iter->entry; | ||
| 1206 | } | ||
| 1207 | } | ||
| 1208 | return NULL; | ||
| 1209 | } | ||
| 1210 | |||
| 1211 | void dictReleaseIterator(dictIterator *iter) | ||
| 1212 | { | ||
| 1213 | dictResetIterator(iter); | ||
| 1214 | zfree(iter); | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | /* Return a random entry from the hash table. Useful to | ||
| 1218 | * implement randomized algorithms */ | ||
| 1219 | dictEntry *dictGetRandomKey(dict *d) | ||
| 1220 | { | ||
| 1221 | dictEntry *he, *orighe; | ||
| 1222 | unsigned long h; | ||
| 1223 | int listlen, listele; | ||
| 1224 | |||
| 1225 | if (dictSize(d) == 0) return NULL; | ||
| 1226 | if (dictIsRehashing(d)) _dictRehashStep(d); | ||
| 1227 | if (dictIsRehashing(d)) { | ||
| 1228 | unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]); | ||
| 1229 | do { | ||
| 1230 | /* We are sure there are no elements in indexes from 0 | ||
| 1231 | * to rehashidx-1 */ | ||
| 1232 | h = d->rehashidx + (randomULong() % (dictBuckets(d) - d->rehashidx)); | ||
| 1233 | he = (h >= s0) ? d->ht_table[1][h - s0] : d->ht_table[0][h]; | ||
| 1234 | } while(he == NULL); | ||
| 1235 | } else { | ||
| 1236 | unsigned long m = DICTHT_SIZE_MASK(d->ht_size_exp[0]); | ||
| 1237 | do { | ||
| 1238 | h = randomULong() & m; | ||
| 1239 | he = d->ht_table[0][h]; | ||
| 1240 | } while(he == NULL); | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | /* Now we found a non empty bucket, but it is a linked | ||
| 1244 | * list and we need to get a random element from the list. | ||
| 1245 | * The only sane way to do so is counting the elements and | ||
| 1246 | * select a random index. */ | ||
| 1247 | listlen = 0; | ||
| 1248 | orighe = he; | ||
| 1249 | while(he) { | ||
| 1250 | he = dictGetNext(he); | ||
| 1251 | listlen++; | ||
| 1252 | } | ||
| 1253 | listele = random() % listlen; | ||
| 1254 | he = orighe; | ||
| 1255 | while(listele--) he = dictGetNext(he); | ||
| 1256 | return he; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | /* This function samples the dictionary to return a few keys from random | ||
| 1260 | * locations. | ||
| 1261 | * | ||
| 1262 | * It does not guarantee to return all the keys specified in 'count', nor | ||
| 1263 | * it does guarantee to return non-duplicated elements, however it will make | ||
| 1264 | * some effort to do both things. | ||
| 1265 | * | ||
| 1266 | * Returned pointers to hash table entries are stored into 'des' that | ||
| 1267 | * points to an array of dictEntry pointers. The array must have room for | ||
| 1268 | * at least 'count' elements, that is the argument we pass to the function | ||
| 1269 | * to tell how many random elements we need. | ||
| 1270 | * | ||
| 1271 | * The function returns the number of items stored into 'des', that may | ||
| 1272 | * be less than 'count' if the hash table has less than 'count' elements | ||
| 1273 | * inside, or if not enough elements were found in a reasonable amount of | ||
| 1274 | * steps. | ||
| 1275 | * | ||
| 1276 | * Note that this function is not suitable when you need a good distribution | ||
| 1277 | * of the returned items, but only when you need to "sample" a given number | ||
| 1278 | * of continuous elements to run some kind of algorithm or to produce | ||
| 1279 | * statistics. However the function is much faster than dictGetRandomKey() | ||
| 1280 | * at producing N elements. */ | ||
| 1281 | unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) { | ||
| 1282 | unsigned long j; /* internal hash table id, 0 or 1. */ | ||
| 1283 | unsigned long tables; /* 1 or 2 tables? */ | ||
| 1284 | unsigned long stored = 0, maxsizemask; | ||
| 1285 | unsigned long maxsteps; | ||
| 1286 | |||
| 1287 | if (dictSize(d) < count) count = dictSize(d); | ||
| 1288 | maxsteps = count*10; | ||
| 1289 | |||
| 1290 | /* Try to do a rehashing work proportional to 'count'. */ | ||
| 1291 | for (j = 0; j < count; j++) { | ||
| 1292 | if (dictIsRehashing(d)) | ||
| 1293 | _dictRehashStep(d); | ||
| 1294 | else | ||
| 1295 | break; | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | tables = dictIsRehashing(d) ? 2 : 1; | ||
| 1299 | maxsizemask = DICTHT_SIZE_MASK(d->ht_size_exp[0]); | ||
| 1300 | if (tables > 1 && maxsizemask < DICTHT_SIZE_MASK(d->ht_size_exp[1])) | ||
| 1301 | maxsizemask = DICTHT_SIZE_MASK(d->ht_size_exp[1]); | ||
| 1302 | |||
| 1303 | /* Pick a random point inside the larger table. */ | ||
| 1304 | unsigned long i = randomULong() & maxsizemask; | ||
| 1305 | unsigned long emptylen = 0; /* Continuous empty entries so far. */ | ||
| 1306 | while(stored < count && maxsteps--) { | ||
| 1307 | for (j = 0; j < tables; j++) { | ||
| 1308 | /* Invariant of the dict.c rehashing: up to the indexes already | ||
| 1309 | * visited in ht[0] during the rehashing, there are no populated | ||
| 1310 | * buckets, so we can skip ht[0] for indexes between 0 and idx-1. */ | ||
| 1311 | if (tables == 2 && j == 0 && i < (unsigned long) d->rehashidx) { | ||
| 1312 | /* Moreover, if we are currently out of range in the second | ||
| 1313 | * table, there will be no elements in both tables up to | ||
| 1314 | * the current rehashing index, so we jump if possible. | ||
| 1315 | * (this happens when going from big to small table). */ | ||
| 1316 | if (i >= DICTHT_SIZE(d->ht_size_exp[1])) | ||
| 1317 | i = d->rehashidx; | ||
| 1318 | else | ||
| 1319 | continue; | ||
| 1320 | } | ||
| 1321 | if (i >= DICTHT_SIZE(d->ht_size_exp[j])) continue; /* Out of range for this table. */ | ||
| 1322 | dictEntry *he = d->ht_table[j][i]; | ||
| 1323 | |||
| 1324 | /* Count contiguous empty buckets, and jump to other | ||
| 1325 | * locations if they reach 'count' (with a minimum of 5). */ | ||
| 1326 | if (he == NULL) { | ||
| 1327 | emptylen++; | ||
| 1328 | if (emptylen >= 5 && emptylen > count) { | ||
| 1329 | i = randomULong() & maxsizemask; | ||
| 1330 | emptylen = 0; | ||
| 1331 | } | ||
| 1332 | } else { | ||
| 1333 | emptylen = 0; | ||
| 1334 | while (he) { | ||
| 1335 | /* Collect all the elements of the buckets found non empty while iterating. | ||
| 1336 | * To avoid the issue of being unable to sample the end of a long chain, | ||
| 1337 | * we utilize the Reservoir Sampling algorithm to optimize the sampling process. | ||
| 1338 | * This means that even when the maximum number of samples has been reached, | ||
| 1339 | * we continue sampling until we reach the end of the chain. | ||
| 1340 | * See https://en.wikipedia.org/wiki/Reservoir_sampling. */ | ||
| 1341 | if (stored < count) { | ||
| 1342 | des[stored] = he; | ||
| 1343 | } else { | ||
| 1344 | unsigned long r = randomULong() % (stored + 1); | ||
| 1345 | if (r < count) des[r] = he; | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | he = dictGetNext(he); | ||
| 1349 | stored++; | ||
| 1350 | } | ||
| 1351 | if (stored >= count) goto end; | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | i = (i+1) & maxsizemask; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | end: | ||
| 1358 | return stored > count ? count : stored; | ||
| 1359 | } | ||
| 1360 | |||
| 1361 | |||
| 1362 | /* Reallocate the dictEntry, key and value allocations in a bucket using the | ||
| 1363 | * provided allocation functions in order to defrag them. */ | ||
| 1364 | static void dictDefragBucket(dict *d, dictEntry **bucketref, dictDefragFunctions *defragfns) { | ||
| 1365 | dictDefragAllocFunction *defragalloc = defragfns->defragAlloc; | ||
| 1366 | dictDefragAllocFunction *defragkey = defragfns->defragKey; | ||
| 1367 | dictDefragAllocFunction *defragval = defragfns->defragVal; | ||
| 1368 | while (bucketref && *bucketref) { | ||
| 1369 | dictEntry *de = *bucketref, *newde = NULL; | ||
| 1370 | void *newkey = defragkey ? defragkey(dictGetKey(de)) : NULL; | ||
| 1371 | |||
| 1372 | if (d->type->no_value) { | ||
| 1373 | if (entryIsKey(de)) { | ||
| 1374 | if (newkey) *bucketref = encodeEntryKey(d, newkey); | ||
| 1375 | } else { | ||
| 1376 | dictEntryNoValue *entry = decodeEntryNoValue(de), *newentry; | ||
| 1377 | if ((newentry = defragalloc(entry))) { | ||
| 1378 | newde = (dictEntry *) newentry; | ||
| 1379 | entry = newentry; | ||
| 1380 | } | ||
| 1381 | if (newkey) entry->key = newkey; | ||
| 1382 | } | ||
| 1383 | } else { | ||
| 1384 | void *newval = defragval ? defragval(dictGetVal(de)) : NULL; | ||
| 1385 | assert(entryIsNormal(de)); | ||
| 1386 | newde = defragalloc(de); | ||
| 1387 | if (newde) de = newde; | ||
| 1388 | if (newkey) de->key = newkey; | ||
| 1389 | if (newval) de->v.val = newval; | ||
| 1390 | } | ||
| 1391 | if (newde) { | ||
| 1392 | *bucketref = newde; | ||
| 1393 | } | ||
| 1394 | bucketref = dictGetNextLink(*bucketref); | ||
| 1395 | } | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | /* This is like dictGetRandomKey() from the POV of the API, but will do more | ||
| 1399 | * work to ensure a better distribution of the returned element. | ||
| 1400 | * | ||
| 1401 | * This function improves the distribution because the dictGetRandomKey() | ||
| 1402 | * problem is that it selects a random bucket, then it selects a random | ||
| 1403 | * element from the chain in the bucket. However elements being in different | ||
| 1404 | * chain lengths will have different probabilities of being reported. With | ||
| 1405 | * this function instead what we do is to consider a "linear" range of the table | ||
| 1406 | * that may be constituted of N buckets with chains of different lengths | ||
| 1407 | * appearing one after the other. Then we report a random element in the range. | ||
| 1408 | * In this way we smooth away the problem of different chain lengths. */ | ||
| 1409 | #define GETFAIR_NUM_ENTRIES 15 | ||
| 1410 | dictEntry *dictGetFairRandomKey(dict *d) { | ||
| 1411 | dictEntry *entries[GETFAIR_NUM_ENTRIES]; | ||
| 1412 | unsigned int count = dictGetSomeKeys(d,entries,GETFAIR_NUM_ENTRIES); | ||
| 1413 | /* Note that dictGetSomeKeys() may return zero elements in an unlucky | ||
| 1414 | * run() even if there are actually elements inside the hash table. So | ||
| 1415 | * when we get zero, we call the true dictGetRandomKey() that will always | ||
| 1416 | * yield the element if the hash table has at least one. */ | ||
| 1417 | if (count == 0) return dictGetRandomKey(d); | ||
| 1418 | unsigned int idx = rand() % count; | ||
| 1419 | return entries[idx]; | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | /* Function to reverse bits. Algorithm from: | ||
| 1423 | * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */ | ||
| 1424 | static unsigned long rev(unsigned long v) { | ||
| 1425 | unsigned long s = CHAR_BIT * sizeof(v); // bit size; must be power of 2 | ||
| 1426 | unsigned long mask = ~0UL; | ||
| 1427 | while ((s >>= 1) > 0) { | ||
| 1428 | mask ^= (mask << s); | ||
| 1429 | v = ((v >> s) & mask) | ((v << s) & ~mask); | ||
| 1430 | } | ||
| 1431 | return v; | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | /* dictScan() is used to iterate over the elements of a dictionary. | ||
| 1435 | * | ||
| 1436 | * Iterating works the following way: | ||
| 1437 | * | ||
| 1438 | * 1) Initially you call the function using a cursor (v) value of 0. | ||
| 1439 | * 2) The function performs one step of the iteration, and returns the | ||
| 1440 | * new cursor value you must use in the next call. | ||
| 1441 | * 3) When the returned cursor is 0, the iteration is complete. | ||
| 1442 | * | ||
| 1443 | * The function guarantees all elements present in the | ||
| 1444 | * dictionary get returned between the start and end of the iteration. | ||
| 1445 | * However it is possible some elements get returned multiple times. | ||
| 1446 | * | ||
| 1447 | * For every element returned, the callback argument 'fn' is | ||
| 1448 | * called with 'privdata' as first argument and the dictionary entry | ||
| 1449 | * 'de' as second argument. | ||
| 1450 | * | ||
| 1451 | * HOW IT WORKS. | ||
| 1452 | * | ||
| 1453 | * The iteration algorithm was designed by Pieter Noordhuis. | ||
| 1454 | * The main idea is to increment a cursor starting from the higher order | ||
| 1455 | * bits. That is, instead of incrementing the cursor normally, the bits | ||
| 1456 | * of the cursor are reversed, then the cursor is incremented, and finally | ||
| 1457 | * the bits are reversed again. | ||
| 1458 | * | ||
| 1459 | * This strategy is needed because the hash table may be resized between | ||
| 1460 | * iteration calls. | ||
| 1461 | * | ||
| 1462 | * dict.c hash tables are always power of two in size, and they | ||
| 1463 | * use chaining, so the position of an element in a given table is given | ||
| 1464 | * by computing the bitwise AND between Hash(key) and SIZE-1 | ||
| 1465 | * (where SIZE-1 is always the mask that is equivalent to taking the rest | ||
| 1466 | * of the division between the Hash of the key and SIZE). | ||
| 1467 | * | ||
| 1468 | * For example if the current hash table size is 16, the mask is | ||
| 1469 | * (in binary) 1111. The position of a key in the hash table will always be | ||
| 1470 | * the last four bits of the hash output, and so forth. | ||
| 1471 | * | ||
| 1472 | * WHAT HAPPENS IF THE TABLE CHANGES IN SIZE? | ||
| 1473 | * | ||
| 1474 | * If the hash table grows, elements can go anywhere in one multiple of | ||
| 1475 | * the old bucket: for example let's say we already iterated with | ||
| 1476 | * a 4 bit cursor 1100 (the mask is 1111 because hash table size = 16). | ||
| 1477 | * | ||
| 1478 | * If the hash table will be resized to 64 elements, then the new mask will | ||
| 1479 | * be 111111. The new buckets you obtain by substituting in ??1100 | ||
| 1480 | * with either 0 or 1 can be targeted only by keys we already visited | ||
| 1481 | * when scanning the bucket 1100 in the smaller hash table. | ||
| 1482 | * | ||
| 1483 | * By iterating the higher bits first, because of the inverted counter, the | ||
| 1484 | * cursor does not need to restart if the table size gets bigger. It will | ||
| 1485 | * continue iterating using cursors without '1100' at the end, and also | ||
| 1486 | * without any other combination of the final 4 bits already explored. | ||
| 1487 | * | ||
| 1488 | * Similarly when the table size shrinks over time, for example going from | ||
| 1489 | * 16 to 8, if a combination of the lower three bits (the mask for size 8 | ||
| 1490 | * is 111) were already completely explored, it would not be visited again | ||
| 1491 | * because we are sure we tried, for example, both 0111 and 1111 (all the | ||
| 1492 | * variations of the higher bit) so we don't need to test it again. | ||
| 1493 | * | ||
| 1494 | * WAIT... YOU HAVE *TWO* TABLES DURING REHASHING! | ||
| 1495 | * | ||
| 1496 | * Yes, this is true, but we always iterate the smaller table first, then | ||
| 1497 | * we test all the expansions of the current cursor into the larger | ||
| 1498 | * table. For example if the current cursor is 101 and we also have a | ||
| 1499 | * larger table of size 16, we also test (0)101 and (1)101 inside the larger | ||
| 1500 | * table. This reduces the problem back to having only one table, where | ||
| 1501 | * the larger one, if it exists, is just an expansion of the smaller one. | ||
| 1502 | * | ||
| 1503 | * LIMITATIONS | ||
| 1504 | * | ||
| 1505 | * This iterator is completely stateless, and this is a huge advantage, | ||
| 1506 | * including no additional memory used. | ||
| 1507 | * | ||
| 1508 | * The disadvantages resulting from this design are: | ||
| 1509 | * | ||
| 1510 | * 1) It is possible we return elements more than once. However this is usually | ||
| 1511 | * easy to deal with in the application level. | ||
| 1512 | * 2) The iterator must return multiple elements per call, as it needs to always | ||
| 1513 | * return all the keys chained in a given bucket, and all the expansions, so | ||
| 1514 | * we are sure we don't miss keys moving during rehashing. | ||
| 1515 | * 3) The reverse cursor is somewhat hard to understand at first, but this | ||
| 1516 | * comment is supposed to help. | ||
| 1517 | */ | ||
| 1518 | unsigned long dictScan(dict *d, | ||
| 1519 | unsigned long v, | ||
| 1520 | dictScanFunction *fn, | ||
| 1521 | void *privdata) | ||
| 1522 | { | ||
| 1523 | return dictScanDefrag(d, v, fn, NULL, privdata); | ||
| 1524 | } | ||
| 1525 | |||
| 1526 | void dictScanDefragBucket(dict *d,dictScanFunction *fn, | ||
| 1527 | dictDefragFunctions *defragfns, | ||
| 1528 | void *privdata, | ||
| 1529 | dictEntry **bucketref) { | ||
| 1530 | dictEntry **plink, *de, *next; | ||
| 1531 | |||
| 1532 | /* Emit entries at bucket */ | ||
| 1533 | if (defragfns) dictDefragBucket(d, bucketref, defragfns); | ||
| 1534 | |||
| 1535 | de = *bucketref; | ||
| 1536 | plink = bucketref; | ||
| 1537 | while (de) { | ||
| 1538 | next = dictGetNext(de); | ||
| 1539 | fn(privdata, de, plink); | ||
| 1540 | |||
| 1541 | if (!next) break; /* if last element, break */ | ||
| 1542 | |||
| 1543 | /* if `*plink` still pointing to 'de', then it means that the | ||
| 1544 | * visited item wasn't deleted by fn() */ | ||
| 1545 | if (*plink == de) | ||
| 1546 | plink = &(de->next); | ||
| 1547 | |||
| 1548 | de = next; | ||
| 1549 | } | ||
| 1550 | } | ||
| 1551 | |||
| 1552 | /* Like dictScan, but additionally reallocates the memory used by the dict | ||
| 1553 | * entries using the provided allocation function. This feature was added for | ||
| 1554 | * the active defrag feature. | ||
| 1555 | * | ||
| 1556 | * The 'defragfns' callbacks are called with a pointer to memory that callback | ||
| 1557 | * can reallocate. The callbacks should return a new memory address or NULL, | ||
| 1558 | * where NULL means that no reallocation happened and the old memory is still | ||
| 1559 | * valid. */ | ||
| 1560 | unsigned long dictScanDefrag(dict *d, | ||
| 1561 | unsigned long v, | ||
| 1562 | dictScanFunction *fn, | ||
| 1563 | dictDefragFunctions *defragfns, | ||
| 1564 | void *privdata) | ||
| 1565 | { | ||
| 1566 | int htidx0, htidx1; | ||
| 1567 | unsigned long m0, m1; | ||
| 1568 | |||
| 1569 | if (dictSize(d) == 0) return 0; | ||
| 1570 | |||
| 1571 | /* This is needed in case the scan callback tries to do dictFind or alike. */ | ||
| 1572 | dictPauseRehashing(d); | ||
| 1573 | |||
| 1574 | if (!dictIsRehashing(d)) { | ||
| 1575 | htidx0 = 0; | ||
| 1576 | m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]); | ||
| 1577 | dictScanDefragBucket(d, fn, defragfns, privdata, &d->ht_table[htidx0][v & m0]); | ||
| 1578 | |||
| 1579 | /* Set unmasked bits so incrementing the reversed cursor | ||
| 1580 | * operates on the masked bits */ | ||
| 1581 | v |= ~m0; | ||
| 1582 | |||
| 1583 | /* Increment the reverse cursor */ | ||
| 1584 | v = rev(v); | ||
| 1585 | v++; | ||
| 1586 | v = rev(v); | ||
| 1587 | |||
| 1588 | } else { | ||
| 1589 | htidx0 = 0; | ||
| 1590 | htidx1 = 1; | ||
| 1591 | |||
| 1592 | /* Make sure t0 is the smaller and t1 is the bigger table */ | ||
| 1593 | if (DICTHT_SIZE(d->ht_size_exp[htidx0]) > DICTHT_SIZE(d->ht_size_exp[htidx1])) { | ||
| 1594 | htidx0 = 1; | ||
| 1595 | htidx1 = 0; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]); | ||
| 1599 | m1 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx1]); | ||
| 1600 | |||
| 1601 | dictScanDefragBucket(d, fn, defragfns, privdata, &d->ht_table[htidx0][v & m0]); | ||
| 1602 | |||
| 1603 | /* Iterate over indices in larger table that are the expansion | ||
| 1604 | * of the index pointed to by the cursor in the smaller table */ | ||
| 1605 | do { | ||
| 1606 | dictScanDefragBucket(d, fn, defragfns, privdata, &d->ht_table[htidx1][v & m1]); | ||
| 1607 | |||
| 1608 | /* Increment the reverse cursor not covered by the smaller mask.*/ | ||
| 1609 | v |= ~m1; | ||
| 1610 | v = rev(v); | ||
| 1611 | v++; | ||
| 1612 | v = rev(v); | ||
| 1613 | |||
| 1614 | /* Continue while bits covered by mask difference is non-zero */ | ||
| 1615 | } while (v & (m0 ^ m1)); | ||
| 1616 | } | ||
| 1617 | |||
| 1618 | dictResumeRehashing(d); | ||
| 1619 | |||
| 1620 | return v; | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | /* ------------------------- private functions ------------------------------ */ | ||
| 1624 | |||
| 1625 | /* Because we may need to allocate huge memory chunk at once when dict | ||
| 1626 | * resizes, we will check this allocation is allowed or not if the dict | ||
| 1627 | * type has resizeAllowed member function. */ | ||
| 1628 | static int dictTypeResizeAllowed(dict *d, size_t size) { | ||
| 1629 | if (d->type->resizeAllowed == NULL) return 1; | ||
| 1630 | return d->type->resizeAllowed( | ||
| 1631 | DICTHT_SIZE(_dictNextExp(size)) * sizeof(dictEntry*), | ||
| 1632 | (double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0])); | ||
| 1633 | } | ||
| 1634 | |||
| 1635 | /* Returning DICT_OK indicates a successful expand or the dictionary is undergoing rehashing, | ||
| 1636 | * and there is nothing else we need to do about this dictionary currently. While DICT_ERR indicates | ||
| 1637 | * that expand has not been triggered (may be try shrinking?)*/ | ||
| 1638 | int dictExpandIfNeeded(dict *d) { | ||
| 1639 | /* Incremental rehashing already in progress. Return. */ | ||
| 1640 | if (dictIsRehashing(d)) return DICT_OK; | ||
| 1641 | |||
| 1642 | /* If the hash table is empty expand it to the initial size. */ | ||
| 1643 | if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) { | ||
| 1644 | dictExpand(d, DICT_HT_INITIAL_SIZE); | ||
| 1645 | return DICT_OK; | ||
| 1646 | } | ||
| 1647 | |||
| 1648 | /* If we reached the 1:1 ratio, and we are allowed to resize the hash | ||
| 1649 | * table (global setting) or we should avoid it but the ratio between | ||
| 1650 | * elements/buckets is over the "safe" threshold, we resize doubling | ||
| 1651 | * the number of buckets. */ | ||
| 1652 | if ((dict_can_resize == DICT_RESIZE_ENABLE && | ||
| 1653 | d->ht_used[0] >= DICTHT_SIZE(d->ht_size_exp[0])) || | ||
| 1654 | (dict_can_resize != DICT_RESIZE_FORBID && | ||
| 1655 | d->ht_used[0] >= dict_force_resize_ratio * DICTHT_SIZE(d->ht_size_exp[0]))) | ||
| 1656 | { | ||
| 1657 | if (dictTypeResizeAllowed(d, d->ht_used[0] + 1)) | ||
| 1658 | dictExpand(d, d->ht_used[0] + 1); | ||
| 1659 | return DICT_OK; | ||
| 1660 | } | ||
| 1661 | return DICT_ERR; | ||
| 1662 | } | ||
| 1663 | |||
| 1664 | /* Expand the hash table if needed (OK=Expanded, ERR=Not expanded) */ | ||
| 1665 | static int _dictExpandIfNeeded(dict *d) { | ||
| 1666 | /* Automatic resizing is disallowed. Return */ | ||
| 1667 | if (d->pauseAutoResize > 0) return DICT_ERR; | ||
| 1668 | |||
| 1669 | return dictExpandIfNeeded(d); | ||
| 1670 | } | ||
| 1671 | |||
| 1672 | /* Returning DICT_OK indicates a successful shrinking or the dictionary is undergoing rehashing, | ||
| 1673 | * and there is nothing else we need to do about this dictionary currently. While DICT_ERR indicates | ||
| 1674 | * that shrinking has not been triggered (may be try expanding?)*/ | ||
| 1675 | int dictShrinkIfNeeded(dict *d) { | ||
| 1676 | /* Incremental rehashing already in progress. Return. */ | ||
| 1677 | if (dictIsRehashing(d)) return DICT_OK; | ||
| 1678 | |||
| 1679 | /* If the size of hash table is DICT_HT_INITIAL_SIZE, don't shrink it. */ | ||
| 1680 | if (DICTHT_SIZE(d->ht_size_exp[0]) <= DICT_HT_INITIAL_SIZE) return DICT_OK; | ||
| 1681 | |||
| 1682 | /* If we reached below 1:8 elements/buckets ratio, and we are allowed to resize | ||
| 1683 | * the hash table (global setting) or we should avoid it but the ratio is below 1:32, | ||
| 1684 | * we'll trigger a resize of the hash table. */ | ||
| 1685 | if ((dict_can_resize == DICT_RESIZE_ENABLE && | ||
| 1686 | d->ht_used[0] * HASHTABLE_MIN_FILL <= DICTHT_SIZE(d->ht_size_exp[0])) || | ||
| 1687 | (dict_can_resize != DICT_RESIZE_FORBID && | ||
| 1688 | d->ht_used[0] * HASHTABLE_MIN_FILL * dict_force_resize_ratio <= DICTHT_SIZE(d->ht_size_exp[0]))) | ||
| 1689 | { | ||
| 1690 | if (dictTypeResizeAllowed(d, d->ht_used[0])) | ||
| 1691 | dictShrink(d, d->ht_used[0]); | ||
| 1692 | return DICT_OK; | ||
| 1693 | } | ||
| 1694 | return DICT_ERR; | ||
| 1695 | } | ||
| 1696 | |||
| 1697 | static void _dictShrinkIfNeeded(dict *d) | ||
| 1698 | { | ||
| 1699 | /* Automatic resizing is disallowed. Return */ | ||
| 1700 | if (d->pauseAutoResize > 0) return; | ||
| 1701 | |||
| 1702 | dictShrinkIfNeeded(d); | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx) { | ||
| 1706 | if ((!dictIsRehashing(d)) || (d->pauserehash != 0)) | ||
| 1707 | return; | ||
| 1708 | /* rehashing not in progress if rehashidx == -1 */ | ||
| 1709 | if ((long)visitedIdx >= d->rehashidx && d->ht_table[0][visitedIdx]) { | ||
| 1710 | /* If we have a valid hash entry at `idx` in ht0, we perform | ||
| 1711 | * rehash on the bucket at `idx` (being more CPU cache friendly) */ | ||
| 1712 | _dictBucketRehash(d, visitedIdx); | ||
| 1713 | } else { | ||
| 1714 | /* If the hash entry is not in ht0, we rehash the buckets based | ||
| 1715 | * on the rehashidx (not CPU cache friendly). */ | ||
| 1716 | dictRehash(d,1); | ||
| 1717 | } | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | /* Our hash table capability is a power of two */ | ||
| 1721 | static signed char _dictNextExp(unsigned long size) | ||
| 1722 | { | ||
| 1723 | if (size <= DICT_HT_INITIAL_SIZE) return DICT_HT_INITIAL_EXP; | ||
| 1724 | if (size >= LONG_MAX) return (8*sizeof(long)-1); | ||
| 1725 | |||
| 1726 | return 8*sizeof(long) - __builtin_clzl(size-1); | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | /* Finds and returns the link within the dict where the provided key should | ||
| 1730 | * be inserted using dictInsertKeyAtLink() if the key does not already exist in | ||
| 1731 | * the dict. If the key exists in the dict, NULL is returned and the optional | ||
| 1732 | * 'existing' entry pointer is populated, if provided. */ | ||
| 1733 | dictEntryLink dictFindLinkForInsert(dict *d, const void *key, dictEntry **existing) { | ||
| 1734 | unsigned long idx, table; | ||
| 1735 | dictCmpCache cmpCache = {0}; | ||
| 1736 | dictEntry *he; | ||
| 1737 | uint64_t hash = dictGetHash(d, key); | ||
| 1738 | if (existing) *existing = NULL; | ||
| 1739 | idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]); | ||
| 1740 | |||
| 1741 | /* Rehash the hash table if needed */ | ||
| 1742 | _dictRehashStepIfNeeded(d,idx); | ||
| 1743 | |||
| 1744 | /* Expand the hash table if needed */ | ||
| 1745 | _dictExpandIfNeeded(d); | ||
| 1746 | keyCmpFunc cmpFunc = dictGetCmpFunc(d); | ||
| 1747 | |||
| 1748 | for (table = 0; table <= 1; table++) { | ||
| 1749 | if (table == 0 && (long)idx < d->rehashidx) continue; | ||
| 1750 | idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); | ||
| 1751 | /* Search if this slot does not already contain the given key */ | ||
| 1752 | he = d->ht_table[table][idx]; | ||
| 1753 | while(he) { | ||
| 1754 | const void *he_key = dictStoredKey2Key(d, dictGetKey(he)); | ||
| 1755 | if (key == he_key || cmpFunc(&cmpCache, key, he_key)) { | ||
| 1756 | if (existing) *existing = he; | ||
| 1757 | return NULL; | ||
| 1758 | } | ||
| 1759 | he = dictGetNext(he); | ||
| 1760 | } | ||
| 1761 | if (!dictIsRehashing(d)) break; | ||
| 1762 | } | ||
| 1763 | |||
| 1764 | /* If we are in the process of rehashing the hash table, the bucket is | ||
| 1765 | * always returned in the context of the second (new) hash table. */ | ||
| 1766 | dictEntry **bucket = &d->ht_table[dictIsRehashing(d) ? 1 : 0][idx]; | ||
| 1767 | return bucket; | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | |||
| 1771 | void dictEmpty(dict *d, void(callback)(dict*)) { | ||
| 1772 | /* Someone may be monitoring a dict that started rehashing, before | ||
| 1773 | * destroying the dict fake completion. */ | ||
| 1774 | if (dictIsRehashing(d) && d->type->rehashingCompleted) | ||
| 1775 | d->type->rehashingCompleted(d); | ||
| 1776 | |||
| 1777 | /* Subtract the size of all buckets. */ | ||
| 1778 | if (d->type->bucketChanged) | ||
| 1779 | d->type->bucketChanged(d, -(long long)dictBuckets(d)); | ||
| 1780 | |||
| 1781 | _dictClear(d,0,callback); | ||
| 1782 | _dictClear(d,1,callback); | ||
| 1783 | d->rehashidx = -1; | ||
| 1784 | d->pauserehash = 0; | ||
| 1785 | d->pauseAutoResize = 0; | ||
| 1786 | } | ||
| 1787 | |||
| 1788 | void dictSetResizeEnabled(dictResizeEnable enable) { | ||
| 1789 | dict_can_resize = enable; | ||
| 1790 | } | ||
| 1791 | |||
| 1792 | /* Compiler inlines this for internal calls within dict.c (verified with -O3). */ | ||
| 1793 | uint64_t dictGetHash(dict *d, const void *key) { | ||
| 1794 | return d->type->hashFunction(key); | ||
| 1795 | } | ||
| 1796 | |||
| 1797 | /* Provides the old and new ht size for a given dictionary during rehashing. This method | ||
| 1798 | * should only be invoked during initialization/rehashing. */ | ||
| 1799 | void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size) { | ||
| 1800 | /* Invalid method usage if rehashing isn't ongoing. */ | ||
| 1801 | assert(dictIsRehashing(d)); | ||
| 1802 | *from_size = DICTHT_SIZE(d->ht_size_exp[0]); | ||
| 1803 | *to_size = DICTHT_SIZE(d->ht_size_exp[1]); | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | /* ------------------------------- Debugging ---------------------------------*/ | ||
| 1807 | #define DICT_STATS_VECTLEN 50 | ||
| 1808 | void dictFreeStats(dictStats *stats) { | ||
| 1809 | zfree(stats->clvector); | ||
| 1810 | zfree(stats); | ||
| 1811 | } | ||
| 1812 | |||
| 1813 | void dictCombineStats(dictStats *from, dictStats *into) { | ||
| 1814 | into->buckets += from->buckets; | ||
| 1815 | into->maxChainLen = (from->maxChainLen > into->maxChainLen) ? from->maxChainLen : into->maxChainLen; | ||
| 1816 | into->totalChainLen += from->totalChainLen; | ||
| 1817 | into->htSize += from->htSize; | ||
| 1818 | into->htUsed += from->htUsed; | ||
| 1819 | for (int i = 0; i < DICT_STATS_VECTLEN; i++) { | ||
| 1820 | into->clvector[i] += from->clvector[i]; | ||
| 1821 | } | ||
| 1822 | } | ||
| 1823 | |||
| 1824 | dictStats *dictGetStatsHt(dict *d, int htidx, int full) { | ||
| 1825 | unsigned long *clvector = zcalloc(sizeof(unsigned long) * DICT_STATS_VECTLEN); | ||
| 1826 | dictStats *stats = zcalloc(sizeof(dictStats)); | ||
| 1827 | stats->htidx = htidx; | ||
| 1828 | stats->clvector = clvector; | ||
| 1829 | stats->htSize = DICTHT_SIZE(d->ht_size_exp[htidx]); | ||
| 1830 | stats->htUsed = d->ht_used[htidx]; | ||
| 1831 | if (!full) return stats; | ||
| 1832 | /* Compute stats. */ | ||
| 1833 | for (unsigned long i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]); i++) { | ||
| 1834 | dictEntry *he; | ||
| 1835 | |||
| 1836 | if (d->ht_table[htidx][i] == NULL) { | ||
| 1837 | clvector[0]++; | ||
| 1838 | continue; | ||
| 1839 | } | ||
| 1840 | stats->buckets++; | ||
| 1841 | /* For each hash entry on this slot... */ | ||
| 1842 | unsigned long chainlen = 0; | ||
| 1843 | he = d->ht_table[htidx][i]; | ||
| 1844 | while(he) { | ||
| 1845 | chainlen++; | ||
| 1846 | he = dictGetNext(he); | ||
| 1847 | } | ||
| 1848 | clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++; | ||
| 1849 | if (chainlen > stats->maxChainLen) stats->maxChainLen = chainlen; | ||
| 1850 | stats->totalChainLen += chainlen; | ||
| 1851 | } | ||
| 1852 | |||
| 1853 | return stats; | ||
| 1854 | } | ||
| 1855 | |||
| 1856 | /* Generates human readable stats. */ | ||
| 1857 | size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full) { | ||
| 1858 | if (stats->htUsed == 0) { | ||
| 1859 | return snprintf(buf,bufsize, | ||
| 1860 | "Hash table %d stats (%s):\n" | ||
| 1861 | "No stats available for empty dictionaries\n", | ||
| 1862 | stats->htidx, (stats->htidx == 0) ? "main hash table" : "rehashing target"); | ||
| 1863 | } | ||
| 1864 | size_t l = 0; | ||
| 1865 | l += snprintf(buf + l, bufsize - l, | ||
| 1866 | "Hash table %d stats (%s):\n" | ||
| 1867 | " table size: %lu\n" | ||
| 1868 | " number of elements: %lu\n", | ||
| 1869 | stats->htidx, (stats->htidx == 0) ? "main hash table" : "rehashing target", | ||
| 1870 | stats->htSize, stats->htUsed); | ||
| 1871 | if (full) { | ||
| 1872 | l += snprintf(buf + l, bufsize - l, | ||
| 1873 | " different slots: %lu\n" | ||
| 1874 | " max chain length: %lu\n" | ||
| 1875 | " avg chain length (counted): %.02f\n" | ||
| 1876 | " avg chain length (computed): %.02f\n" | ||
| 1877 | " Chain length distribution:\n", | ||
| 1878 | stats->buckets, stats->maxChainLen, | ||
| 1879 | (float) stats->totalChainLen / stats->buckets, (float) stats->htUsed / stats->buckets); | ||
| 1880 | |||
| 1881 | for (unsigned long i = 0; i < DICT_STATS_VECTLEN - 1; i++) { | ||
| 1882 | if (stats->clvector[i] == 0) continue; | ||
| 1883 | if (l >= bufsize) break; | ||
| 1884 | l += snprintf(buf + l, bufsize - l, | ||
| 1885 | " %ld: %ld (%.02f%%)\n", | ||
| 1886 | i, stats->clvector[i], ((float) stats->clvector[i] / stats->htSize) * 100); | ||
| 1887 | } | ||
| 1888 | } | ||
| 1889 | |||
| 1890 | /* Make sure there is a NULL term at the end. */ | ||
| 1891 | buf[bufsize-1] = '\0'; | ||
| 1892 | /* Unlike snprintf(), return the number of characters actually written. */ | ||
| 1893 | return strlen(buf); | ||
| 1894 | } | ||
| 1895 | |||
| 1896 | void dictGetStats(char *buf, size_t bufsize, dict *d, int full) { | ||
| 1897 | size_t l; | ||
| 1898 | char *orig_buf = buf; | ||
| 1899 | size_t orig_bufsize = bufsize; | ||
| 1900 | |||
| 1901 | dictStats *mainHtStats = dictGetStatsHt(d, 0, full); | ||
| 1902 | l = dictGetStatsMsg(buf, bufsize, mainHtStats, full); | ||
| 1903 | dictFreeStats(mainHtStats); | ||
| 1904 | buf += l; | ||
| 1905 | bufsize -= l; | ||
| 1906 | if (dictIsRehashing(d) && bufsize > 0) { | ||
| 1907 | dictStats *rehashHtStats = dictGetStatsHt(d, 1, full); | ||
| 1908 | dictGetStatsMsg(buf, bufsize, rehashHtStats, full); | ||
| 1909 | dictFreeStats(rehashHtStats); | ||
| 1910 | } | ||
| 1911 | /* Make sure there is a NULL term at the end. */ | ||
| 1912 | orig_buf[orig_bufsize-1] = '\0'; | ||
| 1913 | } | ||
| 1914 | |||
| 1915 | static int dictDefaultCompare(dictCmpCache *cache, const void *key1, const void *key2) { | ||
| 1916 | (void)(cache); /*unused*/ | ||
| 1917 | return key1 == key2; | ||
| 1918 | } | ||
| 1919 | |||
| 1920 | /* ------------------------------- Benchmark ---------------------------------*/ | ||
| 1921 | |||
| 1922 | #ifdef REDIS_TEST | ||
| 1923 | #include "testhelp.h" | ||
| 1924 | |||
| 1925 | #define UNUSED(V) ((void) V) | ||
| 1926 | #define TEST(name) printf("test — %s\n", name); | ||
| 1927 | |||
| 1928 | uint64_t hashCallback(const void *key) { | ||
| 1929 | return dictGenHashFunction((unsigned char*)key, strlen((char*)key)); | ||
| 1930 | } | ||
| 1931 | |||
| 1932 | int compareCallback(dictCmpCache *cache, const void *key1, const void *key2) { | ||
| 1933 | int l1,l2; | ||
| 1934 | UNUSED(cache); | ||
| 1935 | |||
| 1936 | l1 = strlen((char*)key1); | ||
| 1937 | l2 = strlen((char*)key2); | ||
| 1938 | if (l1 != l2) return 0; | ||
| 1939 | return memcmp(key1, key2, l1) == 0; | ||
| 1940 | } | ||
| 1941 | |||
| 1942 | void freeCallback(dict *d, void *val) { | ||
| 1943 | UNUSED(d); | ||
| 1944 | |||
| 1945 | zfree(val); | ||
| 1946 | } | ||
| 1947 | |||
| 1948 | char *stringFromLongLong(long long value) { | ||
| 1949 | char buf[32]; | ||
| 1950 | int len; | ||
| 1951 | char *s; | ||
| 1952 | |||
| 1953 | len = snprintf(buf,sizeof(buf),"%lld",value); | ||
| 1954 | s = zmalloc(len+1); | ||
| 1955 | memcpy(s, buf, len); | ||
| 1956 | s[len] = '\0'; | ||
| 1957 | return s; | ||
| 1958 | } | ||
| 1959 | |||
| 1960 | char *stringFromSubstring(void) { | ||
| 1961 | #define LARGE_STRING_SIZE 10000 | ||
| 1962 | #define MIN_STRING_SIZE 100 | ||
| 1963 | #define MAX_STRING_SIZE 500 | ||
| 1964 | static char largeString[LARGE_STRING_SIZE + 1]; | ||
| 1965 | static int init = 0; | ||
| 1966 | if (init == 0) { | ||
| 1967 | /* Generate a large string */ | ||
| 1968 | for (size_t i = 0; i < LARGE_STRING_SIZE; i++) { | ||
| 1969 | /* Random printable ASCII character (33 to 126) */ | ||
| 1970 | largeString[i] = 33 + (rand() % 94); | ||
| 1971 | } | ||
| 1972 | /* Null-terminate the large string */ | ||
| 1973 | largeString[LARGE_STRING_SIZE] = '\0'; | ||
| 1974 | init = 1; | ||
| 1975 | } | ||
| 1976 | /* Randomly choose a size between minSize and maxSize */ | ||
| 1977 | size_t substringSize = MIN_STRING_SIZE + (rand() % (MAX_STRING_SIZE - MIN_STRING_SIZE + 1)); | ||
| 1978 | size_t startIndex = rand() % (LARGE_STRING_SIZE - substringSize + 1); | ||
| 1979 | /* Allocate memory for the substring (+1 for null terminator) */ | ||
| 1980 | char *s = zmalloc(substringSize + 1); | ||
| 1981 | memcpy(s, largeString + startIndex, substringSize); // Copy the substring | ||
| 1982 | s[substringSize] = '\0'; // Null-terminate the string | ||
| 1983 | return s; | ||
| 1984 | } | ||
| 1985 | |||
| 1986 | dictType BenchmarkDictType = { | ||
| 1987 | hashCallback, | ||
| 1988 | NULL, | ||
| 1989 | NULL, | ||
| 1990 | compareCallback, | ||
| 1991 | freeCallback, | ||
| 1992 | NULL, | ||
| 1993 | NULL | ||
| 1994 | }; | ||
| 1995 | |||
| 1996 | #define start_benchmark() start = timeInMilliseconds() | ||
| 1997 | #define end_benchmark(msg) do { \ | ||
| 1998 | elapsed = timeInMilliseconds()-start; \ | ||
| 1999 | printf(msg ": %ld items in %lld ms\n", count, elapsed); \ | ||
| 2000 | } while(0) | ||
| 2001 | |||
| 2002 | /* ./redis-server test dict [<count> | --accurate] */ | ||
| 2003 | int dictTest(int argc, char **argv, int flags) { | ||
| 2004 | long j; | ||
| 2005 | long long start, elapsed; | ||
| 2006 | int retval; | ||
| 2007 | dict *d = dictCreate(&BenchmarkDictType); | ||
| 2008 | dictEntry* de = NULL; | ||
| 2009 | dictEntry* existing = NULL; | ||
| 2010 | long count = 0; | ||
| 2011 | unsigned long new_dict_size, current_dict_used, remain_keys; | ||
| 2012 | int accurate = (flags & REDIS_TEST_ACCURATE); | ||
| 2013 | |||
| 2014 | if (argc == 4) { | ||
| 2015 | if (accurate) { | ||
| 2016 | count = 5000000; | ||
| 2017 | } else { | ||
| 2018 | count = strtol(argv[3],NULL,10); | ||
| 2019 | } | ||
| 2020 | } else { | ||
| 2021 | count = 5000; | ||
| 2022 | } | ||
| 2023 | |||
| 2024 | TEST("Add 16 keys and verify dict resize is ok") { | ||
| 2025 | dictSetResizeEnabled(DICT_RESIZE_ENABLE); | ||
| 2026 | for (j = 0; j < 16; j++) { | ||
| 2027 | retval = dictAdd(d,stringFromLongLong(j),(void*)j); | ||
| 2028 | assert(retval == DICT_OK); | ||
| 2029 | } | ||
| 2030 | while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); | ||
| 2031 | assert(dictSize(d) == 16); | ||
| 2032 | assert(dictBuckets(d) == 16); | ||
| 2033 | } | ||
| 2034 | |||
| 2035 | TEST("Use DICT_RESIZE_AVOID to disable the dict resize and pad to (dict_force_resize_ratio * 16)") { | ||
| 2036 | /* Use DICT_RESIZE_AVOID to disable the dict resize, and pad | ||
| 2037 | * the number of keys to (dict_force_resize_ratio * 16), so we can satisfy | ||
| 2038 | * dict_force_resize_ratio in next test. */ | ||
| 2039 | dictSetResizeEnabled(DICT_RESIZE_AVOID); | ||
| 2040 | for (j = 16; j < (long)dict_force_resize_ratio * 16; j++) { | ||
| 2041 | retval = dictAdd(d,stringFromLongLong(j),(void*)j); | ||
| 2042 | assert(retval == DICT_OK); | ||
| 2043 | } | ||
| 2044 | current_dict_used = dict_force_resize_ratio * 16; | ||
| 2045 | assert(dictSize(d) == current_dict_used); | ||
| 2046 | assert(dictBuckets(d) == 16); | ||
| 2047 | } | ||
| 2048 | |||
| 2049 | TEST("Add one more key, trigger the dict resize") { | ||
| 2050 | retval = dictAdd(d,stringFromLongLong(current_dict_used),(void*)(current_dict_used)); | ||
| 2051 | assert(retval == DICT_OK); | ||
| 2052 | current_dict_used++; | ||
| 2053 | new_dict_size = 1UL << _dictNextExp(current_dict_used); | ||
| 2054 | assert(dictSize(d) == current_dict_used); | ||
| 2055 | assert(DICTHT_SIZE(d->ht_size_exp[0]) == 16); | ||
| 2056 | assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size); | ||
| 2057 | |||
| 2058 | /* Wait for rehashing. */ | ||
| 2059 | dictSetResizeEnabled(DICT_RESIZE_ENABLE); | ||
| 2060 | while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); | ||
| 2061 | assert(dictSize(d) == current_dict_used); | ||
| 2062 | assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size); | ||
| 2063 | assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0); | ||
| 2064 | } | ||
| 2065 | |||
| 2066 | TEST("Delete keys until we can trigger shrink in next test") { | ||
| 2067 | /* Delete keys until we can satisfy (1 / HASHTABLE_MIN_FILL) in the next test. */ | ||
| 2068 | for (j = new_dict_size / HASHTABLE_MIN_FILL + 1; j < (long)current_dict_used; j++) { | ||
| 2069 | char *key = stringFromLongLong(j); | ||
| 2070 | retval = dictDelete(d, key); | ||
| 2071 | zfree(key); | ||
| 2072 | assert(retval == DICT_OK); | ||
| 2073 | } | ||
| 2074 | current_dict_used = new_dict_size / HASHTABLE_MIN_FILL + 1; | ||
| 2075 | assert(dictSize(d) == current_dict_used); | ||
| 2076 | assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size); | ||
| 2077 | assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0); | ||
| 2078 | } | ||
| 2079 | |||
| 2080 | TEST("Delete one more key, trigger the dict resize") { | ||
| 2081 | current_dict_used--; | ||
| 2082 | char *key = stringFromLongLong(current_dict_used); | ||
| 2083 | retval = dictDelete(d, key); | ||
| 2084 | zfree(key); | ||
| 2085 | unsigned long oldDictSize = new_dict_size; | ||
| 2086 | new_dict_size = 1UL << _dictNextExp(current_dict_used); | ||
| 2087 | assert(retval == DICT_OK); | ||
| 2088 | assert(dictSize(d) == current_dict_used); | ||
| 2089 | assert(DICTHT_SIZE(d->ht_size_exp[0]) == oldDictSize); | ||
| 2090 | assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size); | ||
| 2091 | |||
| 2092 | /* Wait for rehashing. */ | ||
| 2093 | while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); | ||
| 2094 | assert(dictSize(d) == current_dict_used); | ||
| 2095 | assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size); | ||
| 2096 | assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0); | ||
| 2097 | } | ||
| 2098 | |||
| 2099 | TEST("Empty the dictionary and add 128 keys") { | ||
| 2100 | dictEmpty(d, NULL); | ||
| 2101 | for (j = 0; j < 128; j++) { | ||
| 2102 | retval = dictAdd(d,stringFromLongLong(j),(void*)j); | ||
| 2103 | assert(retval == DICT_OK); | ||
| 2104 | } | ||
| 2105 | while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); | ||
| 2106 | assert(dictSize(d) == 128); | ||
| 2107 | assert(dictBuckets(d) == 128); | ||
| 2108 | } | ||
| 2109 | |||
| 2110 | TEST("Use DICT_RESIZE_AVOID to disable the dict resize and reduce to 3") { | ||
| 2111 | /* Use DICT_RESIZE_AVOID to disable the dict reset, and reduce | ||
| 2112 | * the number of keys until we can trigger shrinking in next test. */ | ||
| 2113 | dictSetResizeEnabled(DICT_RESIZE_AVOID); | ||
| 2114 | remain_keys = DICTHT_SIZE(d->ht_size_exp[0]) / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) + 1; | ||
| 2115 | for (j = remain_keys; j < 128; j++) { | ||
| 2116 | char *key = stringFromLongLong(j); | ||
| 2117 | retval = dictDelete(d, key); | ||
| 2118 | zfree(key); | ||
| 2119 | assert(retval == DICT_OK); | ||
| 2120 | } | ||
| 2121 | current_dict_used = remain_keys; | ||
| 2122 | assert(dictSize(d) == remain_keys); | ||
| 2123 | assert(dictBuckets(d) == 128); | ||
| 2124 | } | ||
| 2125 | |||
| 2126 | TEST("Delete one more key, trigger the dict resize") { | ||
| 2127 | current_dict_used--; | ||
| 2128 | char *key = stringFromLongLong(current_dict_used); | ||
| 2129 | retval = dictDelete(d, key); | ||
| 2130 | zfree(key); | ||
| 2131 | new_dict_size = 1UL << _dictNextExp(current_dict_used); | ||
| 2132 | assert(retval == DICT_OK); | ||
| 2133 | assert(dictSize(d) == current_dict_used); | ||
| 2134 | assert(DICTHT_SIZE(d->ht_size_exp[0]) == 128); | ||
| 2135 | assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size); | ||
| 2136 | |||
| 2137 | /* Wait for rehashing. */ | ||
| 2138 | dictSetResizeEnabled(DICT_RESIZE_ENABLE); | ||
| 2139 | while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000); | ||
| 2140 | assert(dictSize(d) == current_dict_used); | ||
| 2141 | assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size); | ||
| 2142 | assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0); | ||
| 2143 | } | ||
| 2144 | |||
| 2145 | TEST("Restore to original state") { | ||
| 2146 | dictEmpty(d, NULL); | ||
| 2147 | dictSetResizeEnabled(DICT_RESIZE_ENABLE); | ||
| 2148 | } | ||
| 2149 | srand(12345); | ||
| 2150 | start_benchmark(); | ||
| 2151 | for (j = 0; j < count; j++) { | ||
| 2152 | /* Create a dynamically allocated substring */ | ||
| 2153 | char *key = stringFromSubstring(); | ||
| 2154 | |||
| 2155 | /* Insert the range directly from the large string */ | ||
| 2156 | de = dictAddRaw(d, key, &existing); | ||
| 2157 | assert(de != NULL || existing != NULL); | ||
| 2158 | /* If key already exists NULL is returned so we need to free the temp key string */ | ||
| 2159 | if (de == NULL) zfree(key); | ||
| 2160 | } | ||
| 2161 | end_benchmark("Inserting random substrings (100-500B) from large string with symbols"); | ||
| 2162 | assert((long)dictSize(d) <= count); | ||
| 2163 | dictEmpty(d, NULL); | ||
| 2164 | |||
| 2165 | start_benchmark(); | ||
| 2166 | for (j = 0; j < count; j++) { | ||
| 2167 | retval = dictAdd(d,stringFromLongLong(j),(void*)j); | ||
| 2168 | assert(retval == DICT_OK); | ||
| 2169 | } | ||
| 2170 | end_benchmark("Inserting via dictAdd() non existing"); | ||
| 2171 | assert((long)dictSize(d) == count); | ||
| 2172 | |||
| 2173 | dictEmpty(d, NULL); | ||
| 2174 | |||
| 2175 | start_benchmark(); | ||
| 2176 | for (j = 0; j < count; j++) { | ||
| 2177 | de = dictAddRaw(d,stringFromLongLong(j),NULL); | ||
| 2178 | assert(de != NULL); | ||
| 2179 | } | ||
| 2180 | end_benchmark("Inserting via dictAddRaw() non existing"); | ||
| 2181 | assert((long)dictSize(d) == count); | ||
| 2182 | |||
| 2183 | start_benchmark(); | ||
| 2184 | for (j = 0; j < count; j++) { | ||
| 2185 | void *key = stringFromLongLong(j); | ||
| 2186 | de = dictAddRaw(d,key,&existing); | ||
| 2187 | assert(existing != NULL); | ||
| 2188 | zfree(key); | ||
| 2189 | } | ||
| 2190 | end_benchmark("Inserting via dictAddRaw() existing (no insertion)"); | ||
| 2191 | assert((long)dictSize(d) == count); | ||
| 2192 | |||
| 2193 | /* Wait for rehashing. */ | ||
| 2194 | while (dictIsRehashing(d)) { | ||
| 2195 | dictRehashMicroseconds(d,100*1000); | ||
| 2196 | } | ||
| 2197 | |||
| 2198 | start_benchmark(); | ||
| 2199 | for (j = 0; j < count; j++) { | ||
| 2200 | char *key = stringFromLongLong(j); | ||
| 2201 | dictEntry *de = dictFind(d,key); | ||
| 2202 | assert(de != NULL); | ||
| 2203 | zfree(key); | ||
| 2204 | } | ||
| 2205 | end_benchmark("Linear access of existing elements"); | ||
| 2206 | |||
| 2207 | start_benchmark(); | ||
| 2208 | for (j = 0; j < count; j++) { | ||
| 2209 | char *key = stringFromLongLong(j); | ||
| 2210 | dictEntry *de = dictFind(d,key); | ||
| 2211 | assert(de != NULL); | ||
| 2212 | zfree(key); | ||
| 2213 | } | ||
| 2214 | end_benchmark("Linear access of existing elements (2nd round)"); | ||
| 2215 | |||
| 2216 | start_benchmark(); | ||
| 2217 | for (j = 0; j < count; j++) { | ||
| 2218 | char *key = stringFromLongLong(rand() % count); | ||
| 2219 | dictEntry *de = dictFind(d,key); | ||
| 2220 | assert(de != NULL); | ||
| 2221 | zfree(key); | ||
| 2222 | } | ||
| 2223 | end_benchmark("Random access of existing elements"); | ||
| 2224 | |||
| 2225 | start_benchmark(); | ||
| 2226 | for (j = 0; j < count; j++) { | ||
| 2227 | dictEntry *de = dictGetRandomKey(d); | ||
| 2228 | assert(de != NULL); | ||
| 2229 | } | ||
| 2230 | end_benchmark("Accessing random keys"); | ||
| 2231 | |||
| 2232 | start_benchmark(); | ||
| 2233 | for (j = 0; j < count; j++) { | ||
| 2234 | char *key = stringFromLongLong(rand() % count); | ||
| 2235 | key[0] = 'X'; | ||
| 2236 | dictEntry *de = dictFind(d,key); | ||
| 2237 | assert(de == NULL); | ||
| 2238 | zfree(key); | ||
| 2239 | } | ||
| 2240 | end_benchmark("Accessing missing"); | ||
| 2241 | |||
| 2242 | start_benchmark(); | ||
| 2243 | for (j = 0; j < count; j++) { | ||
| 2244 | char *key = stringFromLongLong(j); | ||
| 2245 | retval = dictDelete(d,key); | ||
| 2246 | assert(retval == DICT_OK); | ||
| 2247 | key[0] += 17; /* Change first number to letter. */ | ||
| 2248 | retval = dictAdd(d,key,(void*)j); | ||
| 2249 | assert(retval == DICT_OK); | ||
| 2250 | } | ||
| 2251 | end_benchmark("Removing and adding"); | ||
| 2252 | dictRelease(d); | ||
| 2253 | |||
| 2254 | TEST("Use dict without values (no_value=1)") { | ||
| 2255 | dictType dt = BenchmarkDictType; | ||
| 2256 | dt.no_value = 1; | ||
| 2257 | |||
| 2258 | /* Allocate array of size count and fill it with keys (stringFromLongLong(j) */ | ||
| 2259 | char **lookupKeys = zmalloc(sizeof(char*) * count); | ||
| 2260 | for (long j = 0; j < count; j++) | ||
| 2261 | lookupKeys[j] = stringFromLongLong(j); | ||
| 2262 | |||
| 2263 | |||
| 2264 | /* Add keys without values. */ | ||
| 2265 | dict *d = dictCreate(&dt); | ||
| 2266 | for (j = 0; j < count; j++) { | ||
| 2267 | retval = dictAdd(d,lookupKeys[j],NULL); | ||
| 2268 | assert(retval == DICT_OK); | ||
| 2269 | } | ||
| 2270 | |||
| 2271 | /* Now, we should be able to find the keys. */ | ||
| 2272 | for (j = 0; j < count; j++) { | ||
| 2273 | dictEntry *de = dictFind(d,lookupKeys[j]); | ||
| 2274 | assert(de != NULL); | ||
| 2275 | } | ||
| 2276 | |||
| 2277 | /* Find non exists keys. */ | ||
| 2278 | for (j = 0; j < count; j++) { | ||
| 2279 | /* Temporarily override first char of key */ | ||
| 2280 | char tmp = lookupKeys[j][0]; | ||
| 2281 | lookupKeys[j][0] = 'X'; | ||
| 2282 | dictEntry *de = dictFind(d,lookupKeys[j]); | ||
| 2283 | lookupKeys[j][0] = tmp; | ||
| 2284 | assert(de == NULL); | ||
| 2285 | } | ||
| 2286 | |||
| 2287 | dictRelease(d); | ||
| 2288 | zfree(lookupKeys); | ||
| 2289 | } | ||
| 2290 | |||
| 2291 | TEST("Test dictFindLink() functionality") { | ||
| 2292 | dictType dt = BenchmarkDictType; | ||
| 2293 | dict *d = dictCreate(&dt); | ||
| 2294 | |||
| 2295 | /* find in empty dict */ | ||
| 2296 | dictEntryLink link = dictFindLink(d, "key", NULL); | ||
| 2297 | assert(link == NULL); | ||
| 2298 | |||
| 2299 | /* Add keys to dict and test */ | ||
| 2300 | for (j = 0; j < 10; j++) { | ||
| 2301 | /* Add another key to dict */ | ||
| 2302 | char *key = stringFromLongLong(j); | ||
| 2303 | retval = dictAdd(d, key, (void*)j); | ||
| 2304 | assert(retval == DICT_OK); | ||
| 2305 | /* find existing keys with dictFindLink() */ | ||
| 2306 | dictEntryLink link = dictFindLink(d, key, NULL); | ||
| 2307 | assert(link != NULL); | ||
| 2308 | assert(*link != NULL); | ||
| 2309 | assert(dictGetKey(*link) != NULL); | ||
| 2310 | |||
| 2311 | /* Test that the key found is the correct one */ | ||
| 2312 | void *foundKey = dictGetKey(*link); | ||
| 2313 | assert(compareCallback( NULL, foundKey, key)); | ||
| 2314 | |||
| 2315 | /* Test finding a non-existing key */ | ||
| 2316 | char *nonExistingKey = stringFromLongLong(j + 10); | ||
| 2317 | link = dictFindLink(d, nonExistingKey, NULL); | ||
| 2318 | assert(link == NULL); | ||
| 2319 | |||
| 2320 | /* Test with bucket parameter */ | ||
| 2321 | dictEntryLink bucket = NULL; | ||
| 2322 | link = dictFindLink(d, key, &bucket); | ||
| 2323 | assert(link != NULL); | ||
| 2324 | assert(bucket != NULL); | ||
| 2325 | |||
| 2326 | /* Test bucket parameter with non-existing key */ | ||
| 2327 | link = dictFindLink(d, nonExistingKey, &bucket); | ||
| 2328 | assert(link == NULL); | ||
| 2329 | assert(bucket != NULL); /* Bucket should still be set even for non-existing keys */ | ||
| 2330 | |||
| 2331 | /* Clean up */ | ||
| 2332 | zfree(nonExistingKey); | ||
| 2333 | } | ||
| 2334 | |||
| 2335 | dictRelease(d); | ||
| 2336 | } | ||
| 2337 | |||
| 2338 | return 0; | ||
| 2339 | } | ||
| 2340 | #endif | ||
