summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/src/dict.h
blob: 25e4cf1bd2349e9696a4d415087c278f7ed713c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
/* Hash Tables Implementation.
 *
 * This file implements in-memory hash tables with insert/del/replace/find/
 * get-random-element operations. Hash tables will auto-resize if needed
 * tables of power of two in size are used, collisions are handled by
 * chaining. See the source code for more information... :)
 *
 * Copyright (c) 2006-Present, Redis Ltd.
 * All rights reserved.
 *
 * Licensed under your choice of (a) the Redis Source Available License 2.0
 * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
 * GNU Affero General Public License v3 (AGPLv3).
 *
 * Dict usage of pointer tagging
 * -----------------------------
 * In the "normal" case (no_value=0), a dict slot contains only a pointer to a 
 * dictEntry, and dictEntry holds untagged pointers to key and value. But when a 
 * dict is used as a set (no_value=1), we optimize by storing direct key pointers 
 * when possible, avoiding dictEntry allocation. This happens when A bucket contains 
 * only one key, or at the tail of a collision chain. Redis dicts uses pointer 
 * tagging, to identify direct key pointers from dictEntry pointers, i.e embedding 
 * metadata in the lowest three bits of pointers. This requires 8-byte alignment, 
 * which zmalloc() guarantees on both 32-bit and 64-bit systems (via jemalloc/tcmalloc, 
 * or standard malloc with explicit PREFIX_SIZE=8).
 * 
 * Besides of distinguishing direct key pointers from dictEntry pointers, we also 
 * need to distinguish between even and odd key pointers that being stored in the 
 * dict. Therefore, we use the following tagging scheme:
 * - dictEntry pointer: Points to a dictEntry structure (8-byte aligned). Left intact: 
 *   ENTRY_PTR_NORMAL=000
 * - Odd-address key (keys_are_odd=1): Direct pointer to a 
 *   key with odd address (e.g., all SDS strings), Left intact: 
 *   ENTRY_PTR_IS_ODD_KEY=XX1
 * - Even-address key  (keys_are_odd=0): Direct pointer to a key with 
 *   even address. Since 8-byte alignment yields bits = 000, same as dictEntry, 
 *   we tag it by setting bit 1 which results with: 
 *   ENTRY_PTR_IS_EVEN_KEY=010.
 */

#ifndef __DICT_H
#define __DICT_H

#include "mt19937-64.h"
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>

#define DICT_OK 0
#define DICT_ERR 1

/* Hash table parameters */
#define HASHTABLE_MIN_FILL        8      /* Minimal hash table fill 12.5%(100/8) */

/* stored-key vs. key
 * ------------------
 * If dictType.keyFromStoredKey is non-NULL, then dict distinguishes between the
 * lookup key and the actual stored-key object. In this case, "key" is used to 
 * locate entries, while "storedKey" is the actual element stored in the dict.
 * If dictType.keyFromStoredKey is NULL, the lookup "key" and the stored-key are the
 * same. This API is primarily relevant for no_value=1 dicts, where the key and value
 * might be packed together. When values are stored separately, this identity 
 * distinction does not arise. The marker __stored_key is used to indicate that 
 * the pointer refers to the stored-key rather than the lookup key.
 */
#define __stored_key

typedef struct dictEntry dictEntry; /* opaque */
typedef struct dict dict;
typedef dictEntry **dictEntryLink; /* See description of dictFindLink() */

/* Searching for a key in a dict may involve few comparisons.
 * If extracting the looked-up key is expensive (e.g., sdslen(), kvobjGetKey()),  
 * caching can be used to reduce those repetitive computations.  
 *  
 * This struct, passed to the comparison function as temporary caching, if 
 * needed by the function across comparison of a given lookup. 
 * for the looked-up key and resets before each new lookup. */
typedef struct dictCmpCache {
    int useCache;
    
    union {
        uint64_t u64;
        int64_t i64;
        int i;
        size_t sz;
        void *p;
    } data[2];
} dictCmpCache;

typedef struct dictType {
    /* Callbacks */
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(dict *d, const void *key __stored_key);
    void *(*valDup)(dict *d, const void *obj);
    int (*keyCompare)(dictCmpCache *cache, const void *key1, const void *key2);
    void (*keyDestructor)(dict *d, void *key __stored_key);
    void (*valDestructor)(dict *d, void *obj);
    int (*resizeAllowed)(size_t moreMem, double usedRatio);
    /* Invoked at the start of dict initialization/rehashing (old and new ht are already created) */
    void (*rehashingStarted)(dict *d);
    /* Invoked at the end of dict initialization/rehashing of all the entries from old to new ht. Both ht still exists
     * and are cleaned up after this callback.  */
    void (*rehashingCompleted)(dict *d);
    /* Invoked when the size of the dictionary changes.
     * The `delta` parameter can be positive (size increase) or negative (size decrease). */
    void (*bucketChanged)(dict *d, long long delta);
    /* Allow a dict to carry extra caller-defined metadata. The
     * extra memory is initialized to 0 when a dict is allocated. */
    size_t (*dictMetadataBytes)(dict *d);

    /* Data */
    void *userdata;

    /* Flags */
    /* The 'no_value' flag, if set, indicates that values are not used, i.e. the
     * dict is a set. When this flag is set, it's not possible to access the
     * value of a dictEntry and it's also impossible to use dictSetKey(). It 
     * enables an optimization to store a key directly without an allocating 
     * dictEntry in between, if it is the only key in the bucket. */
    unsigned int no_value:1;
    /* This flag is required for `no_value` optimization since the optimization
     * reuses LSB bits as metadata */ 
    unsigned int keys_are_odd:1;

    /* Ensures that the entire hash table is rehashed at once if set. */
    unsigned int force_full_rehash:1;
    
    /* Callback to extract key from stored-key object. When set, the dict can
     * store keys in one format (e.g., a structure) but look them up using a
     * different format, extracted from the stored-key. (e.g., sds or integer). 
     * Set to NULL if key and stored-key object are the same. Relevant only for
     * no_value=1 dicts. */
    const void *(*keyFromStoredKey)(const void *key __stored_key);

    /* Optional callback called when the dict is destroyed. */
    void (*onDictRelease)(dict *d);
} dictType;

#define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1<<(exp))
#define DICTHT_SIZE_MASK(exp) ((exp) == -1 ? 0 : (DICTHT_SIZE(exp))-1)

struct dict {
    dictType *type;

    dictEntry **ht_table[2];
    unsigned long ht_used[2];

    long rehashidx; /* rehashing not in progress if rehashidx == -1 */

    /* Note: pauserehash is a full unsigned so iterator increments
     * don't perform RMW on the same storage unit as other bitfields. */
    unsigned pauserehash; /* If >0 rehashing is paused */

    /* Keep small vars at end for optimal (minimal) struct padding */
    signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
    int16_t pauseAutoResize;  /* If >0 automatic resizing is disallowed (<0 indicates coding error) */
    void *metadata[];
};

/* If safe is set to 1 this is a safe iterator, that means, you can call
 * dictAdd, dictFind, and other functions against the dictionary even while
 * iterating. Otherwise it is a non safe iterator, and only dictNext()
 * should be called while iterating. */
typedef struct dictIterator {
    dict *d;
    long index;
    int table, safe;
    dictEntry *entry, *nextEntry;
    /* unsafe iterator fingerprint for misuse detection. */
    unsigned long long fingerprint;
} dictIterator;

typedef struct dictStats {
    int htidx;
    unsigned long buckets;
    unsigned long maxChainLen;
    unsigned long totalChainLen;
    unsigned long htSize;
    unsigned long htUsed;
    unsigned long *clvector;
} dictStats;

typedef void (dictScanFunction)(void *privdata, const dictEntry *de, dictEntry **plink);
typedef void *(dictDefragAllocFunction)(void *ptr);
typedef struct {
    dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */
    dictDefragAllocFunction *defragKey;   /* Defrag-realloc keys (optional) */
    dictDefragAllocFunction *defragVal;   /* Defrag-realloc values (optional) */
} dictDefragFunctions;

/* This is the initial size of every hash table */
#define DICT_HT_INITIAL_EXP      2
#define DICT_HT_INITIAL_SIZE     (1<<(DICT_HT_INITIAL_EXP))

/* ------------------------------- Macros ------------------------------------*/
#define dictFreeVal(d, entry) do {                     \
    if ((d)->type->valDestructor)                      \
        (d)->type->valDestructor((d), dictGetVal(entry)); \
   } while(0)

#define dictFreeKey(d, entry) \
    if ((d)->type->keyDestructor) \
        (d)->type->keyDestructor((d), dictGetKey(entry))

#define dictMetadata(d) (&(d)->metadata)
#define dictMetadataSize(d) ((d)->type->dictMetadataBytes \
                             ? (d)->type->dictMetadataBytes(d) : 0)

#define dictBuckets(d) (DICTHT_SIZE((d)->ht_size_exp[0])+DICTHT_SIZE((d)->ht_size_exp[1]))
#define dictSize(d) ((d)->ht_used[0]+(d)->ht_used[1])
#define dictIsEmpty(d) ((d)->ht_used[0] == 0 && (d)->ht_used[1] == 0)
#define dictIsRehashing(d) ((d)->rehashidx != -1)
#define dictPauseRehashing(d) ((d)->pauserehash++)
#define dictResumeRehashing(d) ((d)->pauserehash--)
#define dictIsRehashingPaused(d) ((d)->pauserehash > 0)
#define dictPauseAutoResize(d) ((d)->pauseAutoResize++)
#define dictResumeAutoResize(d) ((d)->pauseAutoResize--)

/* If our unsigned long type can store a 64 bit number, use a 64 bit PRNG. */
#if ULONG_MAX >= 0xffffffffffffffff
#define randomULong() ((unsigned long) genrand64_int64())
#else
#define randomULong() random()
#endif

typedef enum {
    DICT_RESIZE_ENABLE,
    DICT_RESIZE_AVOID,
    DICT_RESIZE_FORBID,
} dictResizeEnable;

/* API */
dict *dictCreate(dictType *type);
void dictTypeAddMeta(dict **d, dictType *typeWithMeta);
int dictExpand(dict *d, unsigned long size);
int dictTryExpand(dict *d, unsigned long size);
int dictShrink(dict *d, unsigned long size);
int dictAdd(dict *d, void *key __stored_key, void *val);
dictEntry *dictAddRaw(dict *d, void *key __stored_key, dictEntry **existing);
dictEntry *dictAddOrFind(dict *d, void *key __stored_key);
int dictReplace(dict *d, void *key __stored_key, void *val);
int dictDelete(dict *d, const void *key);
dictEntry *dictUnlink(dict *d, const void *key);
void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
dictEntryLink dictTwoPhaseUnlinkFind(dict *d, const void *key, int *table_index);
void dictTwoPhaseUnlinkFree(dict *d, dictEntryLink llink, int table_index);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash);
int dictShrinkIfNeeded(dict *d);
int dictExpandIfNeeded(dict *d);
void *dictGetKey(const dictEntry *de);
int dictEntryIsKey(const dictEntry *de);
int dictCompareKeys(dict *d, const void *key1, const void *key2);
size_t dictMemUsage(const dict *d);
size_t dictEntryMemUsage(int noValueDict);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
void dictInitIterator(dictIterator *iter, dict *d);
void dictInitSafeIterator(dictIterator *iter, dict *d);
void dictResetIterator(dictIterator *iter);
dictEntry *dictNext(dictIterator *iter);
dictEntry *dictGetNext(const dictEntry *de);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
dictEntry *dictGetFairRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictGetStats(char *buf, size_t bufsize, dict *d, int full);
uint64_t dictGenHashFunction(const void *key, size_t len);
uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len);
void dictEmpty(dict *d, void(callback)(dict*));
void dictSetResizeEnabled(dictResizeEnable enable);
int dictRehash(dict *d, int n);
int dictRehashMicroseconds(dict *d, uint64_t us);
void dictSetHashFunctionSeed(uint8_t *seed);
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);
unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
uint64_t dictGetHash(dict *d, const void *key);
void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size);

size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full);
dictStats* dictGetStatsHt(dict *d, int htidx, int full);
void dictCombineStats(dictStats *from, dictStats *into);
void dictFreeStats(dictStats *stats);

dictEntryLink dictFindLink(dict *d, const void *key, dictEntryLink *bucket);
void dictSetKeyAtLink(dict *d, void *key __stored_key, dictEntryLink *link, int newItem);

/* API relevant only when dict is used as a hash-map (no_value=0) */ 
void dictSetKey(dict *d, dictEntry* de, void *key __stored_key);
void dictSetVal(dict *d, dictEntry *de, void *val);
void *dictGetVal(const dictEntry *de);
void dictSetDoubleVal(dictEntry *de, double val);
double dictGetDoubleVal(const dictEntry *de);
double *dictGetDoubleValPtr(dictEntry *de);
void *dictFetchValue(dict *d, const void *key);
void dictSetUnsignedIntegerVal(dictEntry *de, uint64_t val);
uint64_t dictIncrUnsignedIntegerVal(dictEntry *de, uint64_t val);
uint64_t dictGetUnsignedIntegerVal(const dictEntry *de);

#define dictForEach(d, ty, m, ...) do { \
    dictIterator di; \
    dictEntry *de; \
    dictInitIterator(&di, d); \
    while ((de = dictNext(&di)) != NULL) { \
        ty *m = dictGetVal(de); \
        do { \
            __VA_ARGS__ \
        } while(0); \
    } \
    dictResetIterator(&di); \
} while(0);

#ifdef REDIS_TEST
int dictTest(int argc, char *argv[], int flags);
#endif

#endif /* __DICT_H */