aboutsummaryrefslogtreecommitdiff
path: root/examples/redis-unstable/src/zmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'examples/redis-unstable/src/zmalloc.c')
-rw-r--r--examples/redis-unstable/src/zmalloc.c1217
1 files changed, 1217 insertions, 0 deletions
diff --git a/examples/redis-unstable/src/zmalloc.c b/examples/redis-unstable/src/zmalloc.c
new file mode 100644
index 0000000..21d5749
--- /dev/null
+++ b/examples/redis-unstable/src/zmalloc.c
@@ -0,0 +1,1217 @@
1/* zmalloc - total amount of allocated memory aware version of malloc()
2 *
3 * Copyright (c) 2009-Present, Redis Ltd.
4 * All rights reserved.
5 *
6 * Licensed under your choice of (a) the Redis Source Available License 2.0
7 * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
8 * GNU Affero General Public License v3 (AGPLv3).
9 */
10
11#include "fmacros.h"
12#include "config.h"
13#include "solarisfixes.h"
14
15#include <stdio.h>
16#include <stdlib.h>
17#include <stdint.h>
18#include <unistd.h>
19
20#ifdef __linux__
21#include <sys/mman.h>
22#endif
23
24/* This function provide us access to the original libc free(). This is useful
25 * for instance to free results obtained by backtrace_symbols(). We need
26 * to define this function before including zmalloc.h that may shadow the
27 * free implementation if we use jemalloc or another non standard allocator. */
28void zlibc_free(void *ptr) {
29 free(ptr);
30}
31
32#include <string.h>
33#include "zmalloc.h"
34#include "atomicvar.h"
35#include "redisassert.h"
36
37#define UNUSED(x) ((void)(x))
38
39#ifdef HAVE_MALLOC_SIZE
40#define PREFIX_SIZE (0)
41#else
42/* Use at least 8 bytes alignment on all systems. */
43#if SIZE_MAX < 0xffffffffffffffffull
44#define PREFIX_SIZE 8
45#else
46#define PREFIX_SIZE (sizeof(size_t))
47#endif
48#endif
49
50/* When using the libc allocator, use a minimum allocation size to match the
51 * jemalloc behavior that doesn't return NULL in this case.
52 */
53#define MALLOC_MIN_SIZE(x) ((x) > 0 ? (x) : sizeof(long))
54
55/* Explicitly override malloc/free etc when using tcmalloc. */
56#if defined(USE_TCMALLOC)
57#define malloc(size) tc_malloc(size)
58#define calloc(count,size) tc_calloc(count,size)
59#define realloc(ptr,size) tc_realloc(ptr,size)
60#define free(ptr) tc_free(ptr)
61/* Explicitly override malloc/free etc when using jemalloc. */
62#elif defined(USE_JEMALLOC)
63#define malloc(size) je_malloc(size)
64#define calloc(count,size) je_calloc(count,size)
65#define realloc(ptr,size) je_realloc(ptr,size)
66#define free(ptr) je_free(ptr)
67#define mallocx(size,flags) je_mallocx(size,flags)
68#define rallocx(ptr,size,flags) je_rallocx(ptr,size,flags)
69#define dallocx(ptr,flags) je_dallocx(ptr,flags)
70#if defined(HAVE_ALLOC_WITH_USIZE)
71void *je_malloc_with_usize(size_t size, size_t *usize);
72void *je_calloc_with_usize(size_t num, size_t size, size_t *usize);
73void *je_realloc_with_usize(void *ptr, size_t size, size_t *old_usize, size_t *new_usize);
74void je_free_with_usize(void *ptr, size_t *usize);
75#define malloc_with_usize(size,usize) je_malloc_with_usize(size,usize)
76#define calloc_with_usize(num,size,usize) je_calloc_with_usize(num,size,usize)
77#define realloc_with_usize(ptr,size,old_usize,new_usize) je_realloc_with_usize(ptr,size,old_usize,new_usize)
78#define free_with_usize(ptr,usize) je_free_with_usize(ptr,usize)
79#endif
80#endif
81
82#define MAX_THREADS 16 /* Keep it a power of 2 so we can use '&' instead of '%'. */
83#define THREAD_MASK (MAX_THREADS - 1)
84#define PEAK_CHECK_THRESHOLD (1024 * 100) /* 100KB */
85
86typedef struct used_memory_entry {
87 redisAtomic long long used_memory;
88 redisAtomic long long last_peak_check;
89 char padding[CACHE_LINE_SIZE - sizeof(long long) - sizeof(long long)];
90} used_memory_entry;
91
92static __attribute__((aligned(CACHE_LINE_SIZE))) used_memory_entry used_memory[MAX_THREADS];
93static redisAtomic size_t num_active_threads = 0;
94static redisAtomic size_t zmalloc_peak = 0;
95static redisAtomic time_t zmalloc_peak_time = 0;
96static __thread long my_thread_index = -1;
97
98static inline void init_my_thread_index(void) {
99 if (unlikely(my_thread_index == -1)) {
100 atomicGetIncr(num_active_threads, my_thread_index, 1);
101 my_thread_index &= THREAD_MASK;
102 }
103}
104
105static void update_zmalloc_stat_alloc(long long bytes_delta) {
106 init_my_thread_index();
107
108 /* Per-thread allocation counter and the last counter value at which we ran a
109 * global peak check (throttles how often we call zmalloc_used_memory()). */
110 long long thread_used, thread_last_peak_check_used;
111 atomicIncrGet(used_memory[my_thread_index].used_memory, thread_used, bytes_delta);
112 atomicGet(used_memory[my_thread_index].last_peak_check, thread_last_peak_check_used);
113
114 /* Only run the (expensive) global used/peak check after this thread's
115 * allocation counter has advanced enough since the last check. */
116 if (unlikely(thread_used - thread_last_peak_check_used > PEAK_CHECK_THRESHOLD)) {
117 /* Snapshot of global used memory across all threads. */
118 size_t used_mem = zmalloc_used_memory();
119
120 /* Current published global peak. */
121 size_t published_peak;
122 atomicGet(zmalloc_peak, published_peak);
123
124 if (used_mem > published_peak) {
125 /* Try to publish `used_mem` as the new global peak.
126 *
127 * Another thread may update `zmalloc_peak` concurrently. Use a CAS loop:
128 * on failure, `old_peak` is refreshed with the latest peak value, and we
129 * retry only while our snapshot still exceeds it. */
130 size_t old_peak = published_peak;
131 while (used_mem > old_peak && !atomicCompareExchange(size_t, zmalloc_peak, old_peak, used_mem)) {
132 /* CAS failed: `old_peak` now holds the current `zmalloc_peak`. */
133 }
134
135 /* If we raised the peak, record when it was reached. */
136 if (used_mem > old_peak) {
137 atomicSet(zmalloc_peak_time, time(NULL));
138 }
139 }
140
141 /* Record the thread counter value at which we last ran a global peak check,
142 * to throttle future checks for this thread. */
143 atomicSet(used_memory[my_thread_index].last_peak_check, thread_used);
144 }
145}
146
147static void update_zmalloc_stat_free(long long num) {
148 init_my_thread_index();
149 atomicDecr(used_memory[my_thread_index].used_memory, num);
150}
151
152static void zmalloc_default_oom(size_t size) {
153 fprintf(stderr, "zmalloc: Out of memory trying to allocate %zu bytes\n",
154 size);
155 fflush(stderr);
156 abort();
157}
158
159static void (*zmalloc_oom_handler)(size_t) = zmalloc_default_oom;
160
161#ifdef HAVE_MALLOC_SIZE
162void *extend_to_usable(void *ptr, size_t size) {
163 UNUSED(size);
164 return ptr;
165}
166#endif
167
168/* Try allocating memory, and return NULL if failed.
169 * '*usable' is set to the usable size if non NULL. */
170static inline void *ztrymalloc_usable_internal(size_t size, size_t *usable) {
171 /* Possible overflow, return NULL, so that the caller can panic or handle a failed allocation. */
172 if (size >= SIZE_MAX/2) return NULL;
173#ifdef HAVE_ALLOC_WITH_USIZE
174 void *ptr = malloc_with_usize(MALLOC_MIN_SIZE(size)+PREFIX_SIZE, &size);
175#else
176 void *ptr = malloc(MALLOC_MIN_SIZE(size)+PREFIX_SIZE);
177#endif
178 if (!ptr) return NULL;
179#ifdef HAVE_ALLOC_WITH_USIZE
180 update_zmalloc_stat_alloc(size);
181 if (usable) *usable = size;
182 return ptr;
183#elif HAVE_MALLOC_SIZE
184 size = zmalloc_size(ptr);
185 update_zmalloc_stat_alloc(size);
186 if (usable) *usable = size;
187 return ptr;
188#else
189 size = MALLOC_MIN_SIZE(size);
190 *((size_t*)ptr) = size;
191 update_zmalloc_stat_alloc(size+PREFIX_SIZE);
192 if (usable) *usable = size;
193 return (char*)ptr+PREFIX_SIZE;
194#endif
195}
196
197void *ztrymalloc_usable(size_t size, size_t *usable) {
198 size_t usable_size = 0;
199 void *ptr = ztrymalloc_usable_internal(size, &usable_size);
200#ifdef HAVE_MALLOC_SIZE
201 ptr = extend_to_usable(ptr, usable_size);
202#endif
203 if (usable) *usable = usable_size;
204 return ptr;
205}
206
207/* Allocate memory or panic */
208void *zmalloc(size_t size) {
209 void *ptr = ztrymalloc_usable_internal(size, NULL);
210 if (!ptr) zmalloc_oom_handler(size);
211 return ptr;
212}
213
214/* Try allocating memory, and return NULL if failed. */
215void *ztrymalloc(size_t size) {
216 void *ptr = ztrymalloc_usable_internal(size, NULL);
217 return ptr;
218}
219
220/* Allocate memory or panic.
221 * '*usable' is set to the usable size if non NULL. */
222void *zmalloc_usable(size_t size, size_t *usable) {
223 size_t usable_size = 0;
224 void *ptr = ztrymalloc_usable_internal(size, &usable_size);
225 if (!ptr) zmalloc_oom_handler(size);
226#ifdef HAVE_MALLOC_SIZE
227 if (ptr) ptr = extend_to_usable(ptr, usable_size);
228#endif
229 if (usable) *usable = usable_size;
230 return ptr;
231}
232
233#if defined(USE_JEMALLOC)
234void *zmalloc_with_flags(size_t size, int flags) {
235 if (size >= SIZE_MAX/2) zmalloc_oom_handler(size);
236 void *ptr = mallocx(size+PREFIX_SIZE, flags);
237 if (!ptr) zmalloc_oom_handler(size);
238 update_zmalloc_stat_alloc(zmalloc_size(ptr));
239 return ptr;
240}
241
242void *zrealloc_with_flags(void *ptr, size_t size, int flags) {
243 /* Not allocating anything, just redirect to free. */
244 if (size == 0 && ptr != NULL) {
245 zfree_with_flags(ptr, flags);
246 return NULL;
247 }
248
249 /* Not freeing anything, just redirect to malloc. */
250 if (ptr == NULL)
251 return zmalloc_with_flags(size, flags);
252
253 /* Possible overflow, return NULL, so that the caller can panic or handle a failed allocation. */
254 if (size >= SIZE_MAX/2) {
255 zfree_with_flags(ptr, flags);
256 zmalloc_oom_handler(size);
257 return NULL;
258 }
259
260 size_t oldsize = zmalloc_size(ptr);
261 void *newptr = rallocx(ptr, size, flags);
262 if (newptr == NULL) {
263 zmalloc_oom_handler(size);
264 return NULL;
265 }
266
267 update_zmalloc_stat_free(oldsize);
268 size = zmalloc_size(newptr);
269 update_zmalloc_stat_alloc(size);
270 return newptr;
271}
272
273void zfree_with_flags(void *ptr, int flags) {
274 if (ptr == NULL) return;
275 update_zmalloc_stat_free(zmalloc_size(ptr));
276 dallocx(ptr, flags);
277}
278#endif
279
280/* Allocation and free functions that bypass the thread cache
281 * and go straight to the allocator arena bins.
282 * Currently implemented only for jemalloc. Used for online defragmentation. */
283#if (defined(USE_JEMALLOC) && defined(HAVE_DEFRAG))
284void *zmalloc_no_tcache(size_t size) {
285 if (size >= SIZE_MAX/2) zmalloc_oom_handler(size);
286 void *ptr = mallocx(size+PREFIX_SIZE, MALLOCX_TCACHE_NONE);
287 if (!ptr) zmalloc_oom_handler(size);
288 update_zmalloc_stat_alloc(zmalloc_size(ptr));
289 return ptr;
290}
291
292void zfree_no_tcache(void *ptr) {
293 if (ptr == NULL) return;
294 update_zmalloc_stat_free(zmalloc_size(ptr));
295 dallocx(ptr, MALLOCX_TCACHE_NONE);
296}
297#endif
298
299/* Try allocating memory and zero it, and return NULL if failed.
300 * '*usable' is set to the usable size if non NULL. */
301static inline void *ztrycalloc_usable_internal(size_t size, size_t *usable) {
302 /* Possible overflow, return NULL, so that the caller can panic or handle a failed allocation. */
303 if (size >= SIZE_MAX/2) return NULL;
304#ifdef HAVE_ALLOC_WITH_USIZE
305 void *ptr = calloc_with_usize(1, MALLOC_MIN_SIZE(size)+PREFIX_SIZE, &size);
306#else
307 void *ptr = calloc(1, MALLOC_MIN_SIZE(size)+PREFIX_SIZE);
308#endif
309 if (ptr == NULL) return NULL;
310
311#ifdef HAVE_ALLOC_WITH_USIZE
312 update_zmalloc_stat_alloc(size);
313 if (usable) *usable = size;
314 return ptr;
315#elif HAVE_MALLOC_SIZE
316 size = zmalloc_size(ptr);
317 update_zmalloc_stat_alloc(size);
318 if (usable) *usable = size;
319 return ptr;
320#else
321 size = MALLOC_MIN_SIZE(size);
322 *((size_t*)ptr) = size;
323 update_zmalloc_stat_alloc(size+PREFIX_SIZE);
324 if (usable) *usable = size;
325 return (char*)ptr+PREFIX_SIZE;
326#endif
327}
328
329void *ztrycalloc_usable(size_t size, size_t *usable) {
330 size_t usable_size = 0;
331 void *ptr = ztrycalloc_usable_internal(size, &usable_size);
332#ifdef HAVE_MALLOC_SIZE
333 ptr = extend_to_usable(ptr, usable_size);
334#endif
335 if (usable) *usable = usable_size;
336 return ptr;
337}
338
339/* Allocate memory and zero it or panic.
340 * We need this wrapper to have a calloc compatible signature */
341void *zcalloc_num(size_t num, size_t size) {
342 /* Ensure that the arguments to calloc(), when multiplied, do not wrap.
343 * Division operations are susceptible to divide-by-zero errors so we also check it. */
344 if ((size == 0) || (num > SIZE_MAX/size)) {
345 zmalloc_oom_handler(SIZE_MAX);
346 return NULL;
347 }
348 void *ptr = ztrycalloc_usable_internal(num*size, NULL);
349 if (!ptr) zmalloc_oom_handler(num*size);
350 return ptr;
351}
352
353/* Allocate memory and zero it or panic */
354void *zcalloc(size_t size) {
355 void *ptr = ztrycalloc_usable_internal(size, NULL);
356 if (!ptr) zmalloc_oom_handler(size);
357 return ptr;
358}
359
360/* Try allocating memory, and return NULL if failed. */
361void *ztrycalloc(size_t size) {
362 void *ptr = ztrycalloc_usable_internal(size, NULL);
363 return ptr;
364}
365
366/* Allocate memory or panic.
367 * '*usable' is set to the usable size if non NULL. */
368void *zcalloc_usable(size_t size, size_t *usable) {
369 size_t usable_size = 0;
370 void *ptr = ztrycalloc_usable_internal(size, &usable_size);
371 if (!ptr) zmalloc_oom_handler(size);
372#ifdef HAVE_MALLOC_SIZE
373 ptr = extend_to_usable(ptr, usable_size);
374#endif
375 if (usable) *usable = usable_size;
376 return ptr;
377}
378
379/* Try reallocating memory, and return NULL if failed.
380 * '*usable' is set to the usable size if non NULL
381 * '*old_usable' is set to the previous usable size if non NULL. */
382static inline void *ztryrealloc_usable_internal(void *ptr, size_t size, size_t *usable, size_t *old_usable) {
383#ifndef HAVE_MALLOC_SIZE
384 void *realptr;
385#endif
386 size_t oldsize, dummy;
387 void *newptr;
388
389 if (!usable) usable = &dummy;
390 if (!old_usable) old_usable = &dummy;
391
392 /* not allocating anything, just redirect to free. */
393 if (size == 0 && ptr != NULL) {
394 zfree_usable(ptr, &oldsize);
395 *usable = 0;
396 *old_usable = oldsize;
397 return NULL;
398 }
399 /* Not freeing anything, just redirect to malloc. */
400 if (ptr == NULL) {
401 *old_usable = 0;
402 return ztrymalloc_usable(size, usable);
403 }
404
405 /* Possible overflow, return NULL, so that the caller can panic or handle a failed allocation. */
406 if (size >= SIZE_MAX/2) {
407 zfree_usable(ptr, &oldsize);
408 *usable = 0;
409 *old_usable = oldsize;
410 return NULL;
411 }
412#ifdef HAVE_ALLOC_WITH_USIZE
413 newptr = realloc_with_usize(ptr, size, &oldsize, &size);
414 if (newptr == NULL) {
415 *usable = 0;
416 *old_usable = oldsize;
417 return NULL;
418 }
419 update_zmalloc_stat_free(oldsize);
420 update_zmalloc_stat_alloc(size);
421 *usable = size;
422 *old_usable = oldsize;
423 return newptr;
424#elif HAVE_MALLOC_SIZE
425 oldsize = zmalloc_size(ptr);
426 newptr = realloc(ptr,size);
427 if (newptr == NULL) {
428 *usable = 0;
429 *old_usable = oldsize;
430 return NULL;
431 }
432
433 update_zmalloc_stat_free(oldsize);
434 size = zmalloc_size(newptr);
435 update_zmalloc_stat_alloc(size);
436 *usable = size;
437 *old_usable = oldsize;
438 return newptr;
439#else
440 realptr = (char*)ptr-PREFIX_SIZE;
441 oldsize = *((size_t*)realptr);
442 newptr = realloc(realptr,size+PREFIX_SIZE);
443 if (newptr == NULL) {
444 *usable = 0;
445 *old_usable = oldsize;
446 return NULL;
447 }
448
449 *((size_t*)newptr) = size;
450 update_zmalloc_stat_free(oldsize);
451 update_zmalloc_stat_alloc(size);
452 *usable = size;
453 *old_usable = oldsize;
454 return (char*)newptr+PREFIX_SIZE;
455#endif
456}
457
458void *ztryrealloc_usable(void *ptr, size_t size, size_t *usable, size_t *old_usable) {
459 size_t usable_size = 0;
460 ptr = ztryrealloc_usable_internal(ptr, size, &usable_size, old_usable);
461#ifdef HAVE_MALLOC_SIZE
462 ptr = extend_to_usable(ptr, usable_size);
463#endif
464 if (usable) *usable = usable_size;
465 return ptr;
466}
467
468/* Reallocate memory and zero it or panic */
469void *zrealloc(void *ptr, size_t size) {
470 ptr = ztryrealloc_usable_internal(ptr, size, NULL, NULL);
471 if (!ptr && size != 0) zmalloc_oom_handler(size);
472 return ptr;
473}
474
475/* Try Reallocating memory, and return NULL if failed. */
476void *ztryrealloc(void *ptr, size_t size) {
477 ptr = ztryrealloc_usable_internal(ptr, size, NULL, NULL);
478 return ptr;
479}
480
481/* Reallocate memory or panic.
482 * '*old_usable' is set to the previous usable size if non NULL
483 * '*usable' is set to the usable size if non NULL. */
484void *zrealloc_usable(void *ptr, size_t size, size_t *usable, size_t *old_usable) {
485 size_t usable_size = 0;
486 ptr = ztryrealloc_usable(ptr, size, &usable_size, old_usable);
487 if (!ptr && size != 0) zmalloc_oom_handler(size);
488#ifdef HAVE_MALLOC_SIZE
489 ptr = extend_to_usable(ptr, usable_size);
490#endif
491 if (usable) *usable = usable_size;
492 return ptr;
493}
494
495/* Provide zmalloc_size() for systems where this function is not provided by
496 * malloc itself, given that in that case we store a header with this
497 * information as the first bytes of every allocation. */
498#ifndef HAVE_MALLOC_SIZE
499size_t zmalloc_size(void *ptr) {
500 void *realptr = (char*)ptr-PREFIX_SIZE;
501 size_t size = *((size_t*)realptr);
502 return size+PREFIX_SIZE;
503}
504size_t zmalloc_usable_size(void *ptr) {
505 return zmalloc_size(ptr)-PREFIX_SIZE;
506}
507#endif
508
509void zfree(void *ptr) {
510 if (ptr == NULL) return;
511
512#ifdef HAVE_ALLOC_WITH_USIZE
513 size_t oldsize;
514 free_with_usize(ptr, &oldsize);
515 update_zmalloc_stat_free(oldsize);
516#elif HAVE_MALLOC_SIZE
517 update_zmalloc_stat_free(zmalloc_size(ptr));
518 free(ptr);
519#else
520 size_t oldsize;
521 void *realptr = (char*)ptr-PREFIX_SIZE;
522 oldsize = *((size_t*)realptr);
523 update_zmalloc_stat_free(oldsize+PREFIX_SIZE);
524 free(realptr);
525#endif
526}
527
528/* Similar to zfree, '*usable' is set to the usable size being freed. */
529void zfree_usable(void *ptr, size_t *usable) {
530 size_t oldsize;
531#ifndef HAVE_MALLOC_SIZE
532 void *realptr;
533#endif
534
535 if (ptr == NULL) {
536 if (usable) *usable = 0;
537 return;
538 }
539
540#ifdef HAVE_ALLOC_WITH_USIZE
541 free_with_usize(ptr, &oldsize);
542 update_zmalloc_stat_free(oldsize);
543#elif HAVE_MALLOC_SIZE
544 update_zmalloc_stat_free(oldsize = zmalloc_size(ptr));
545 free(ptr);
546#else
547 realptr = (char*)ptr-PREFIX_SIZE;
548 oldsize = *((size_t*)realptr);
549 update_zmalloc_stat_free(oldsize+PREFIX_SIZE);
550 free(realptr);
551#endif
552 if (usable) *usable = oldsize;
553}
554
555char *zstrdup_usable(const char *s, size_t *usable) {
556 size_t l = strlen(s)+1;
557 char *p = zmalloc_usable(l, usable);
558
559 memcpy(p,s,l);
560 return p;
561}
562
563char *zstrdup(const char *s) {
564 return zstrdup_usable(s, NULL);
565}
566
567size_t zmalloc_used_memory(void) {
568 size_t local_num_active_threads;
569 long long total_mem = 0;
570 atomicGet(num_active_threads,local_num_active_threads);
571 if (local_num_active_threads > MAX_THREADS) {
572 local_num_active_threads = MAX_THREADS;
573 }
574 for (size_t i = 0; i < local_num_active_threads; ++i) {
575 long long thread_used_mem;
576 atomicGet(used_memory[i].used_memory, thread_used_mem);
577 total_mem += thread_used_mem;
578 }
579 return total_mem;
580}
581
582size_t zmalloc_get_peak_memory(void) {
583 size_t peak;
584 atomicGet(zmalloc_peak, peak);
585 return peak;
586}
587
588time_t zmalloc_get_peak_memory_time(void) {
589 time_t t;
590 atomicGet(zmalloc_peak_time, t);
591 return t;
592}
593
594void zmalloc_set_oom_handler(void (*oom_handler)(size_t)) {
595 zmalloc_oom_handler = oom_handler;
596}
597
598/* Use 'MADV_DONTNEED' to release memory to operating system quickly.
599 * We do that in a fork child process to avoid CoW when the parent modifies
600 * these shared pages. */
601void zmadvise_dontneed(void *ptr) {
602#if defined(USE_JEMALLOC) && defined(__linux__)
603 static size_t page_size = 0;
604 if (page_size == 0) page_size = sysconf(_SC_PAGESIZE);
605 size_t page_size_mask = page_size - 1;
606
607 size_t real_size = zmalloc_size(ptr);
608 if (real_size < page_size) return;
609
610 /* We need to align the pointer upwards according to page size, because
611 * the memory address is increased upwards and we only can free memory
612 * based on page. */
613 char *aligned_ptr = (char *)(((size_t)ptr+page_size_mask) & ~page_size_mask);
614 real_size -= (aligned_ptr-(char*)ptr);
615 if (real_size >= page_size) {
616 madvise((void *)aligned_ptr, real_size&~page_size_mask, MADV_DONTNEED);
617 }
618#else
619 (void)(ptr);
620#endif
621}
622
623/* Get the RSS information in an OS-specific way.
624 *
625 * WARNING: the function zmalloc_get_rss() is not designed to be fast
626 * and may not be called in the busy loops where Redis tries to release
627 * memory expiring or swapping out objects.
628 *
629 * For this kind of "fast RSS reporting" usages use instead the
630 * function RedisEstimateRSS() that is a much faster (and less precise)
631 * version of the function. */
632
633#if defined(HAVE_PROC_STAT)
634#include <sys/types.h>
635#include <sys/stat.h>
636#include <fcntl.h>
637#endif
638
639/* Get the i'th field from "/proc/self/stat" note i is 1 based as appears in the 'proc' man page */
640int get_proc_stat_ll(int i, long long *res) {
641#if defined(HAVE_PROC_STAT)
642 char buf[4096];
643 int fd, l;
644 char *p, *x;
645
646 if ((fd = open("/proc/self/stat",O_RDONLY)) == -1) return 0;
647 if ((l = read(fd,buf,sizeof(buf)-1)) <= 0) {
648 close(fd);
649 return 0;
650 }
651 close(fd);
652 buf[l] = '\0';
653 if (buf[l-1] == '\n') buf[l-1] = '\0';
654
655 /* Skip pid and process name (surrounded with parentheses) */
656 p = strrchr(buf, ')');
657 if (!p) return 0;
658 p++;
659 while (*p == ' ') p++;
660 if (*p == '\0') return 0;
661 i -= 3;
662 if (i < 0) return 0;
663
664 while (p && i--) {
665 p = strchr(p, ' ');
666 if (p) p++;
667 else return 0;
668 }
669 x = strchr(p,' ');
670 if (x) *x = '\0';
671
672 *res = strtoll(p,&x,10);
673 if (*x != '\0') return 0;
674 return 1;
675#else
676 UNUSED(i);
677 UNUSED(res);
678 return 0;
679#endif
680}
681
682#if defined(HAVE_PROC_STAT)
683size_t zmalloc_get_rss(void) {
684 int page = sysconf(_SC_PAGESIZE);
685 long long rss;
686
687 /* RSS is the 24th field in /proc/<pid>/stat */
688 if (!get_proc_stat_ll(24, &rss)) return 0;
689 rss *= page;
690 return rss;
691}
692#elif defined(HAVE_TASKINFO)
693#include <sys/types.h>
694#include <sys/sysctl.h>
695#include <mach/task.h>
696#include <mach/mach_init.h>
697
698size_t zmalloc_get_rss(void) {
699 task_t task = MACH_PORT_NULL;
700 struct task_basic_info t_info;
701 mach_msg_type_number_t t_info_count = TASK_BASIC_INFO_COUNT;
702
703 if (task_for_pid(current_task(), getpid(), &task) != KERN_SUCCESS)
704 return 0;
705 task_info(task, TASK_BASIC_INFO, (task_info_t)&t_info, &t_info_count);
706
707 return t_info.resident_size;
708}
709#elif defined(__FreeBSD__) || defined(__DragonFly__)
710#include <sys/types.h>
711#include <sys/sysctl.h>
712#include <sys/user.h>
713
714size_t zmalloc_get_rss(void) {
715 struct kinfo_proc info;
716 size_t infolen = sizeof(info);
717 int mib[4];
718 mib[0] = CTL_KERN;
719 mib[1] = KERN_PROC;
720 mib[2] = KERN_PROC_PID;
721 mib[3] = getpid();
722
723 if (sysctl(mib, 4, &info, &infolen, NULL, 0) == 0)
724#if defined(__FreeBSD__)
725 return (size_t)info.ki_rssize * getpagesize();
726#else
727 return (size_t)info.kp_vm_rssize * getpagesize();
728#endif
729
730 return 0L;
731}
732#elif defined(__NetBSD__) || defined(__OpenBSD__)
733#include <sys/types.h>
734#include <sys/sysctl.h>
735
736#if defined(__OpenBSD__)
737#define kinfo_proc2 kinfo_proc
738#define KERN_PROC2 KERN_PROC
739#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
740#endif
741
742size_t zmalloc_get_rss(void) {
743 struct kinfo_proc2 info;
744 size_t infolen = sizeof(info);
745 int mib[6];
746 mib[0] = CTL_KERN;
747 mib[1] = KERN_PROC2;
748 mib[2] = KERN_PROC_PID;
749 mib[3] = getpid();
750 mib[4] = sizeof(info);
751 mib[5] = 1;
752 if (sysctl(mib, __arraycount(mib), &info, &infolen, NULL, 0) == 0)
753 return (size_t)info.p_vm_rssize * getpagesize();
754
755 return 0L;
756}
757#elif defined(__HAIKU__)
758#include <OS.h>
759
760size_t zmalloc_get_rss(void) {
761 area_info info;
762 thread_info th;
763 size_t rss = 0;
764 ssize_t cookie = 0;
765
766 if (get_thread_info(find_thread(0), &th) != B_OK)
767 return 0;
768
769 while (get_next_area_info(th.team, &cookie, &info) == B_OK)
770 rss += info.ram_size;
771
772 return rss;
773}
774#elif defined(HAVE_PSINFO)
775#include <unistd.h>
776#include <sys/procfs.h>
777#include <fcntl.h>
778
779size_t zmalloc_get_rss(void) {
780 struct prpsinfo info;
781 char filename[256];
782 int fd;
783
784 snprintf(filename,256,"/proc/%ld/psinfo",(long) getpid());
785
786 if ((fd = open(filename,O_RDONLY)) == -1) return 0;
787 if (ioctl(fd, PIOCPSINFO, &info) == -1) {
788 close(fd);
789 return 0;
790 }
791
792 close(fd);
793 return info.pr_rssize;
794}
795#else
796size_t zmalloc_get_rss(void) {
797 /* If we can't get the RSS in an OS-specific way for this system just
798 * return the memory usage we estimated in zmalloc()..
799 *
800 * Fragmentation will appear to be always 1 (no fragmentation)
801 * of course... */
802 return zmalloc_used_memory();
803}
804#endif
805
806#if defined(USE_JEMALLOC)
807
808/* Compute the total memory wasted in fragmentation of inside small arena bins.
809 * Done by summing the memory in unused regs in all slabs of all small bins.
810 *
811 * Pass in arena to get the information of the specified arena, otherwise pass
812 * in MALLCTL_ARENAS_ALL to get all. */
813size_t zmalloc_get_frag_smallbins_by_arena(unsigned int arena) {
814 unsigned nbins;
815 size_t sz, frag = 0;
816
817 /* Pre-convert mallctl paths to MIB for better performance.
818 * This eliminates snprintf and string parsing overhead in the loop. */
819 size_t bin_size_mib[8], bin_nregs_mib[8], curregs_mib[8], curslabs_mib[8];
820 size_t bin_size_miblen = 8, bin_nregs_miblen = 8, curregs_miblen = 8, curslabs_miblen = 8;
821
822 sz = sizeof(unsigned);
823 assert(!je_mallctl("arenas.nbins", &nbins, &sz, NULL, 0));
824
825 /* Convert all patterns to MIB (required before using je_mallctlbymib) */
826 assert(!je_mallctlnametomib("arenas.bin.0.size", bin_size_mib, &bin_size_miblen));
827 assert(!je_mallctlnametomib("arenas.bin.0.nregs", bin_nregs_mib, &bin_nregs_miblen));
828 assert(!je_mallctlnametomib("stats.arenas.0.bins.0.curregs", curregs_mib, &curregs_miblen));
829 assert(!je_mallctlnametomib("stats.arenas.0.bins.0.curslabs", curslabs_mib, &curslabs_miblen));
830
831 for (unsigned j = 0; j < nbins; j++) {
832 size_t curregs, curslabs, reg_size;
833 uint32_t nregs;
834
835 /* The size of the current bin */
836 bin_size_mib[2] = j;
837 sz = sizeof(size_t);
838 assert(!je_mallctlbymib(bin_size_mib, bin_size_miblen, &reg_size, &sz, NULL, 0));
839
840 /* Number of used regions in the bin */
841 curregs_mib[2] = arena;
842 curregs_mib[4] = j;
843 sz = sizeof(size_t);
844 assert(!je_mallctlbymib(curregs_mib, curregs_miblen, &curregs, &sz, NULL, 0));
845
846 /* Number of regions per slab */
847 bin_nregs_mib[2] = j;
848 sz = sizeof(uint32_t);
849 assert(!je_mallctlbymib(bin_nregs_mib, bin_nregs_miblen, &nregs, &sz, NULL, 0));
850
851 /* Number of current slabs in the bin */
852 curslabs_mib[2] = arena;
853 curslabs_mib[4] = j;
854 sz = sizeof(size_t);
855 assert(!je_mallctlbymib(curslabs_mib, curslabs_miblen, &curslabs, &sz, NULL, 0));
856
857 /* Calculate the fragmentation bytes for the current bin and add it to the total. */
858 frag += ((nregs * curslabs) - curregs) * reg_size;
859 }
860
861 return frag;
862}
863
864/* Compute the total memory wasted in fragmentation of inside small arena bins.
865 * Done by summing the memory in unused regs in all slabs of all small bins. */
866size_t zmalloc_get_frag_smallbins(void) {
867 return zmalloc_get_frag_smallbins_by_arena(MALLCTL_ARENAS_ALL);
868}
869
870/* Get memory allocation information from allocator.
871 *
872 * refresh_stats indicates whether to refresh cached statistics.
873 * For the meaning of the other parameters, please refer to the function implementation
874 * and INFO's allocator_* in redis-doc. */
875int zmalloc_get_allocator_info(int refresh_stats, size_t *allocated, size_t *active, size_t *resident,
876 size_t *retained, size_t *muzzy, size_t *frag_smallbins_bytes)
877{
878 size_t sz;
879 *allocated = *resident = *active = 0;
880
881 /* Update the statistics cached by mallctl. */
882 if (refresh_stats) {
883 uint64_t epoch = 1;
884 sz = sizeof(epoch);
885 je_mallctl("epoch", &epoch, &sz, &epoch, sz);
886 }
887
888 sz = sizeof(size_t);
889 /* Unlike RSS, this does not include RSS from shared libraries and other non
890 * heap mappings. */
891 je_mallctl("stats.resident", resident, &sz, NULL, 0);
892 /* Unlike resident, this doesn't not include the pages jemalloc reserves
893 * for re-use (purge will clean that). */
894 je_mallctl("stats.active", active, &sz, NULL, 0);
895 /* Unlike zmalloc_used_memory, this matches the stats.resident by taking
896 * into account all allocations done by this process (not only zmalloc). */
897 je_mallctl("stats.allocated", allocated, &sz, NULL, 0);
898
899 /* Retained memory is memory released by `madvised(..., MADV_DONTNEED)`, which is not part
900 * of RSS or mapped memory, and doesn't have a strong association with physical memory in the OS.
901 * It is still part of the VM-Size, and may be used again in later allocations. */
902 if (retained) {
903 *retained = 0;
904 je_mallctl("stats.retained", retained, &sz, NULL, 0);
905 }
906
907 /* Unlike retained, Muzzy representats memory released with `madvised(..., MADV_FREE)`.
908 * These pages will show as RSS for the process, until the OS decides to re-use them. */
909 if (muzzy) {
910 char buf[100];
911 size_t pmuzzy, page;
912 snprintf(buf, sizeof(buf), "stats.arenas.%u.pmuzzy", MALLCTL_ARENAS_ALL);
913 assert(!je_mallctl(buf, &pmuzzy, &sz, NULL, 0));
914 assert(!je_mallctl("arenas.page", &page, &sz, NULL, 0));
915 *muzzy = pmuzzy * page;
916 }
917
918 /* Total size of consumed meomry in unused regs in small bins (AKA external fragmentation). */
919 *frag_smallbins_bytes = zmalloc_get_frag_smallbins();
920 return 1;
921}
922
923/* Get the specified arena memory allocation information from allocator.
924 *
925 * refresh_stats indicates whether to refresh cached statistics.
926 * For the meaning of the other parameters, please refer to the function implementation
927 * and INFO's allocator_* in redis-doc. */
928int zmalloc_get_allocator_info_by_arena(unsigned int arena, int refresh_stats, size_t *allocated,
929 size_t *active, size_t *resident, size_t *frag_smallbins_bytes)
930{
931 char buf[100];
932 size_t sz;
933 *allocated = *resident = *active = 0;
934
935 /* Update the statistics cached by mallctl. */
936 if (refresh_stats) {
937 uint64_t epoch = 1;
938 sz = sizeof(epoch);
939 je_mallctl("epoch", &epoch, &sz, &epoch, sz);
940 }
941
942 sz = sizeof(size_t);
943 /* Unlike RSS, this does not include RSS from shared libraries and other non
944 * heap mappings. */
945 snprintf(buf, sizeof(buf), "stats.arenas.%u.small.resident", arena);
946 je_mallctl(buf, resident, &sz, NULL, 0);
947 /* Unlike resident, this doesn't not include the pages jemalloc reserves
948 * for re-use (purge will clean that). */
949 size_t pactive, page;
950 snprintf(buf, sizeof(buf), "stats.arenas.%u.pactive", arena);
951 assert(!je_mallctl(buf, &pactive, &sz, NULL, 0));
952 assert(!je_mallctl("arenas.page", &page, &sz, NULL, 0));
953 *active = pactive * page;
954 /* Unlike zmalloc_used_memory, this matches the stats.resident by taking
955 * into account all allocations done by this process (not only zmalloc). */
956 size_t small_allcated, large_allacted;
957 snprintf(buf, sizeof(buf), "stats.arenas.%u.small.allocated", arena);
958 assert(!je_mallctl(buf, &small_allcated, &sz, NULL, 0));
959 *allocated += small_allcated;
960 snprintf(buf, sizeof(buf), "stats.arenas.%u.large.allocated", arena);
961 assert(!je_mallctl(buf, &large_allacted, &sz, NULL, 0));
962 *allocated += large_allacted;
963
964 /* Total size of consumed meomry in unused regs in small bins (AKA external fragmentation). */
965 *frag_smallbins_bytes = zmalloc_get_frag_smallbins_by_arena(arena);
966 return 1;
967}
968
969
970void set_jemalloc_bg_thread(int enable) {
971 /* let jemalloc do purging asynchronously, required when there's no traffic
972 * after flushdb */
973 char val = !!enable;
974 je_mallctl("background_thread", NULL, 0, &val, 1);
975}
976
977int jemalloc_purge(void) {
978 /* return all unused (reserved) pages to the OS */
979 char tmp[32];
980 unsigned narenas = 0;
981 size_t sz = sizeof(unsigned);
982 if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) {
983 snprintf(tmp, sizeof(tmp), "arena.%u.purge", narenas);
984 if (!je_mallctl(tmp, NULL, 0, NULL, 0))
985 return 0;
986 }
987 return -1;
988}
989
990#else
991
992int zmalloc_get_allocator_info(int refresh_stats, size_t *allocated, size_t *active, size_t *resident,
993 size_t *retained, size_t *muzzy, size_t *frag_smallbins_bytes)
994{
995 UNUSED(refresh_stats);
996 *allocated = *resident = *active = *frag_smallbins_bytes = 0;
997 if (retained) *retained = 0;
998 if (muzzy) *muzzy = 0;
999 return 1;
1000}
1001
1002int zmalloc_get_allocator_info_by_arena(unsigned int arena, int refresh_stats, size_t *allocated,
1003 size_t *active, size_t *resident, size_t *frag_smallbins_bytes)
1004{
1005 UNUSED(arena);
1006 UNUSED(refresh_stats);
1007 *allocated = *resident = *active = *frag_smallbins_bytes = 0;
1008 return 1;
1009}
1010
1011
1012void set_jemalloc_bg_thread(int enable) {
1013 ((void)(enable));
1014}
1015
1016int jemalloc_purge(void) {
1017 return 0;
1018}
1019
1020#endif
1021
1022#if defined(__APPLE__)
1023/* For proc_pidinfo() used later in zmalloc_get_smap_bytes_by_field().
1024 * Note that this file cannot be included in zmalloc.h because it includes
1025 * a Darwin queue.h file where there is a "LIST_HEAD" macro (!) defined
1026 * conficting with Redis user code. */
1027#include <libproc.h>
1028#endif
1029
1030/* Get the sum of the specified field (converted form kb to bytes) in
1031 * /proc/self/smaps. The field must be specified with trailing ":" as it
1032 * apperas in the smaps output.
1033 *
1034 * If a pid is specified, the information is extracted for such a pid,
1035 * otherwise if pid is -1 the information is reported is about the
1036 * current process.
1037 *
1038 * Example: zmalloc_get_smap_bytes_by_field("Rss:",-1);
1039 */
1040#if defined(HAVE_PROC_SMAPS)
1041size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
1042 char line[1024];
1043 size_t bytes = 0;
1044 int flen = strlen(field);
1045 FILE *fp;
1046
1047 if (pid == -1) {
1048 fp = fopen("/proc/self/smaps","r");
1049 } else {
1050 char filename[128];
1051 snprintf(filename,sizeof(filename),"/proc/%ld/smaps",pid);
1052 fp = fopen(filename,"r");
1053 }
1054
1055 if (!fp) return 0;
1056 while(fgets(line,sizeof(line),fp) != NULL) {
1057 if (strncmp(line,field,flen) == 0) {
1058 char *p = strchr(line,'k');
1059 if (p) {
1060 *p = '\0';
1061 bytes += strtol(line+flen,NULL,10) * 1024;
1062 }
1063 }
1064 }
1065 fclose(fp);
1066 return bytes;
1067}
1068#else
1069/* Get sum of the specified field from libproc api call.
1070 * As there are per page value basis we need to convert
1071 * them accordingly.
1072 *
1073 * Note that AnonHugePages is a no-op as THP feature
1074 * is not supported in this platform
1075 */
1076size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
1077#if defined(__APPLE__)
1078 struct proc_regioninfo pri;
1079 if (pid == -1) pid = getpid();
1080 if (proc_pidinfo(pid, PROC_PIDREGIONINFO, 0, &pri,
1081 PROC_PIDREGIONINFO_SIZE) == PROC_PIDREGIONINFO_SIZE)
1082 {
1083 int pagesize = getpagesize();
1084 if (!strcmp(field, "Private_Dirty:")) {
1085 return (size_t)pri.pri_pages_dirtied * pagesize;
1086 } else if (!strcmp(field, "Rss:")) {
1087 return (size_t)pri.pri_pages_resident * pagesize;
1088 } else if (!strcmp(field, "AnonHugePages:")) {
1089 return 0;
1090 }
1091 }
1092 return 0;
1093#endif
1094 ((void) field);
1095 ((void) pid);
1096 return 0;
1097}
1098#endif
1099
1100/* Return the total number bytes in pages marked as Private Dirty.
1101 *
1102 * Note: depending on the platform and memory footprint of the process, this
1103 * call can be slow, exceeding 1000ms!
1104 */
1105size_t zmalloc_get_private_dirty(long pid) {
1106 return zmalloc_get_smap_bytes_by_field("Private_Dirty:",pid);
1107}
1108
1109/* Returns the size of physical memory (RAM) in bytes.
1110 * It looks ugly, but this is the cleanest way to achieve cross platform results.
1111 * Cleaned up from:
1112 *
1113 * http://nadeausoftware.com/articles/2012/09/c_c_tip_how_get_physical_memory_size_system
1114 *
1115 * Note that this function:
1116 * 1) Was released under the following CC attribution license:
1117 * http://creativecommons.org/licenses/by/3.0/deed.en_US.
1118 * 2) Was originally implemented by David Robert Nadeau.
1119 * 3) Was modified for Redis by Matt Stancliff.
1120 * 4) This note exists in order to comply with the original license.
1121 */
1122size_t zmalloc_get_memory_size(void) {
1123#if defined(__unix__) || defined(__unix) || defined(unix) || \
1124 (defined(__APPLE__) && defined(__MACH__))
1125#if defined(CTL_HW) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM64))
1126 int mib[2];
1127 mib[0] = CTL_HW;
1128#if defined(HW_MEMSIZE)
1129 mib[1] = HW_MEMSIZE; /* OSX. --------------------- */
1130#elif defined(HW_PHYSMEM64)
1131 mib[1] = HW_PHYSMEM64; /* NetBSD, OpenBSD. --------- */
1132#endif
1133 int64_t size = 0; /* 64-bit */
1134 size_t len = sizeof(size);
1135 if (sysctl( mib, 2, &size, &len, NULL, 0) == 0)
1136 return (size_t)size;
1137 return 0L; /* Failed? */
1138
1139#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
1140 /* FreeBSD, Linux, OpenBSD, and Solaris. -------------------- */
1141 return (size_t)sysconf(_SC_PHYS_PAGES) * (size_t)sysconf(_SC_PAGESIZE);
1142
1143#elif defined(CTL_HW) && (defined(HW_PHYSMEM) || defined(HW_REALMEM))
1144 /* DragonFly BSD, FreeBSD, NetBSD, OpenBSD, and OSX. -------- */
1145 int mib[2];
1146 mib[0] = CTL_HW;
1147#if defined(HW_REALMEM)
1148 mib[1] = HW_REALMEM; /* FreeBSD. ----------------- */
1149#elif defined(HW_PHYSMEM)
1150 mib[1] = HW_PHYSMEM; /* Others. ------------------ */
1151#endif
1152 unsigned int size = 0; /* 32-bit */
1153 size_t len = sizeof(size);
1154 if (sysctl(mib, 2, &size, &len, NULL, 0) == 0)
1155 return (size_t)size;
1156 return 0L; /* Failed? */
1157#else
1158 return 0L; /* Unknown method to get the data. */
1159#endif
1160#else
1161 return 0L; /* Unknown OS. */
1162#endif
1163}
1164
1165#ifdef REDIS_TEST
1166#include "testhelp.h"
1167#include "redisassert.h"
1168
1169#define TEST(name) printf("test — %s\n", name);
1170
1171int zmalloc_test(int argc, char **argv, int flags) {
1172 void *ptr, *ptr2;
1173
1174 UNUSED(argc);
1175 UNUSED(argv);
1176 UNUSED(flags);
1177
1178 printf("Malloc prefix size: %d\n", (int) PREFIX_SIZE);
1179
1180 TEST("Initial used memory is 0") {
1181 assert(zmalloc_used_memory() == 0);
1182 }
1183
1184 TEST("Allocated 123 bytes") {
1185 ptr = zmalloc(123);
1186 printf("Allocated 123 bytes; used: %zu\n", zmalloc_used_memory());
1187 }
1188
1189 TEST("Reallocated to 456 bytes") {
1190 ptr = zrealloc(ptr, 456);
1191 printf("Reallocated to 456 bytes; used: %zu\n", zmalloc_used_memory());
1192 }
1193
1194 TEST("Callocated 123 bytes") {
1195 ptr2 = zcalloc(123);
1196 printf("Callocated 123 bytes; used: %zu\n", zmalloc_used_memory());
1197 }
1198
1199 TEST("Freed pointers") {
1200 zfree(ptr);
1201 zfree(ptr2);
1202 printf("Freed pointers; used: %zu\n", zmalloc_used_memory());
1203 }
1204
1205 TEST("Allocated 0 bytes") {
1206 ptr = zmalloc(0);
1207 printf("Allocated 0 bytes; used: %zu\n", zmalloc_used_memory());
1208 zfree(ptr);
1209 }
1210
1211 TEST("At the end used memory is 0") {
1212 assert(zmalloc_used_memory() == 0);
1213 }
1214
1215 return 0;
1216}
1217#endif