diff options
Diffstat (limited to 'examples/redis-unstable/deps/jemalloc/src/hpa.c')
| -rw-r--r-- | examples/redis-unstable/deps/jemalloc/src/hpa.c | 1044 |
1 files changed, 0 insertions, 1044 deletions
diff --git a/examples/redis-unstable/deps/jemalloc/src/hpa.c b/examples/redis-unstable/deps/jemalloc/src/hpa.c deleted file mode 100644 index 7e2aeba..0000000 --- a/examples/redis-unstable/deps/jemalloc/src/hpa.c +++ /dev/null @@ -1,1044 +0,0 @@ -#include "jemalloc/internal/jemalloc_preamble.h" -#include "jemalloc/internal/jemalloc_internal_includes.h" - -#include "jemalloc/internal/hpa.h" - -#include "jemalloc/internal/fb.h" -#include "jemalloc/internal/witness.h" - -#define HPA_EDEN_SIZE (128 * HUGEPAGE) - -static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, - size_t alignment, bool zero, bool guarded, bool frequent_reuse, - bool *deferred_work_generated); -static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, - size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated); -static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, - size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated); -static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, - size_t old_size, size_t new_size, bool *deferred_work_generated); -static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata, - bool *deferred_work_generated); -static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, - edata_list_active_t *list, bool *deferred_work_generated); -static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self); - -bool -hpa_supported() { -#ifdef _WIN32 - /* - * At least until the API and implementation is somewhat settled, we - * don't want to try to debug the VM subsystem on the hardest-to-test - * platform. - */ - return false; -#endif - if (!pages_can_hugify) { - return false; - } - /* - * We fundamentally rely on a address-space-hungry growth strategy for - * hugepages. - */ - if (LG_SIZEOF_PTR != 3) { - return false; - } - /* - * If we couldn't detect the value of HUGEPAGE, HUGEPAGE_PAGES becomes - * this sentinel value -- see the comment in pages.h. - */ - if (HUGEPAGE_PAGES == 1) { - return false; - } - return true; -} - -static void -hpa_do_consistency_checks(hpa_shard_t *shard) { - assert(shard->base != NULL); -} - -bool -hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) { - /* malloc_conf processing should have filtered out these cases. */ - assert(hpa_supported()); - bool err; - err = malloc_mutex_init(¢ral->grow_mtx, "hpa_central_grow", - WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive); - if (err) { - return true; - } - err = malloc_mutex_init(¢ral->mtx, "hpa_central", - WITNESS_RANK_HPA_CENTRAL, malloc_mutex_rank_exclusive); - if (err) { - return true; - } - central->base = base; - central->eden = NULL; - central->eden_len = 0; - central->age_counter = 0; - central->hooks = *hooks; - return false; -} - -static hpdata_t * -hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) { - return (hpdata_t *)base_alloc(tsdn, central->base, sizeof(hpdata_t), - CACHELINE); -} - -hpdata_t * -hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size, - bool *oom) { - /* Don't yet support big allocations; these should get filtered out. */ - assert(size <= HUGEPAGE); - /* - * Should only try to extract from the central allocator if the local - * shard is exhausted. We should hold the grow_mtx on that shard. - */ - witness_assert_positive_depth_to_rank( - tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW); - - malloc_mutex_lock(tsdn, ¢ral->grow_mtx); - *oom = false; - - hpdata_t *ps = NULL; - - /* Is eden a perfect fit? */ - if (central->eden != NULL && central->eden_len == HUGEPAGE) { - ps = hpa_alloc_ps(tsdn, central); - if (ps == NULL) { - *oom = true; - malloc_mutex_unlock(tsdn, ¢ral->grow_mtx); - return NULL; - } - hpdata_init(ps, central->eden, central->age_counter++); - central->eden = NULL; - central->eden_len = 0; - malloc_mutex_unlock(tsdn, ¢ral->grow_mtx); - return ps; - } - - /* - * We're about to try to allocate from eden by splitting. If eden is - * NULL, we have to allocate it too. Otherwise, we just have to - * allocate an edata_t for the new psset. - */ - if (central->eden == NULL) { - /* - * During development, we're primarily concerned with systems - * with overcommit. Eventually, we should be more careful here. - */ - bool commit = true; - /* Allocate address space, bailing if we fail. */ - void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE, - &commit); - if (new_eden == NULL) { - *oom = true; - malloc_mutex_unlock(tsdn, ¢ral->grow_mtx); - return NULL; - } - ps = hpa_alloc_ps(tsdn, central); - if (ps == NULL) { - pages_unmap(new_eden, HPA_EDEN_SIZE); - *oom = true; - malloc_mutex_unlock(tsdn, ¢ral->grow_mtx); - return NULL; - } - central->eden = new_eden; - central->eden_len = HPA_EDEN_SIZE; - } else { - /* Eden is already nonempty; only need an edata for ps. */ - ps = hpa_alloc_ps(tsdn, central); - if (ps == NULL) { - *oom = true; - malloc_mutex_unlock(tsdn, ¢ral->grow_mtx); - return NULL; - } - } - assert(ps != NULL); - assert(central->eden != NULL); - assert(central->eden_len > HUGEPAGE); - assert(central->eden_len % HUGEPAGE == 0); - assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden); - - hpdata_init(ps, central->eden, central->age_counter++); - - char *eden_char = (char *)central->eden; - eden_char += HUGEPAGE; - central->eden = (void *)eden_char; - central->eden_len -= HUGEPAGE; - - malloc_mutex_unlock(tsdn, ¢ral->grow_mtx); - - return ps; -} - -bool -hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap, - base_t *base, edata_cache_t *edata_cache, unsigned ind, - const hpa_shard_opts_t *opts) { - /* malloc_conf processing should have filtered out these cases. */ - assert(hpa_supported()); - bool err; - err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow", - WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive); - if (err) { - return true; - } - err = malloc_mutex_init(&shard->mtx, "hpa_shard", - WITNESS_RANK_HPA_SHARD, malloc_mutex_rank_exclusive); - if (err) { - return true; - } - - assert(edata_cache != NULL); - shard->central = central; - shard->base = base; - edata_cache_fast_init(&shard->ecf, edata_cache); - psset_init(&shard->psset); - shard->age_counter = 0; - shard->ind = ind; - shard->emap = emap; - - shard->opts = *opts; - - shard->npending_purge = 0; - nstime_init_zero(&shard->last_purge); - - shard->stats.npurge_passes = 0; - shard->stats.npurges = 0; - shard->stats.nhugifies = 0; - shard->stats.ndehugifies = 0; - - /* - * Fill these in last, so that if an hpa_shard gets used despite - * initialization failing, we'll at least crash instead of just - * operating on corrupted data. - */ - shard->pai.alloc = &hpa_alloc; - shard->pai.alloc_batch = &hpa_alloc_batch; - shard->pai.expand = &hpa_expand; - shard->pai.shrink = &hpa_shrink; - shard->pai.dalloc = &hpa_dalloc; - shard->pai.dalloc_batch = &hpa_dalloc_batch; - shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work; - - hpa_do_consistency_checks(shard); - - return false; -} - -/* - * Note that the stats functions here follow the usual stats naming conventions; - * "merge" obtains the stats from some live object of instance, while "accum" - * only combines the stats from one stats objet to another. Hence the lack of - * locking here. - */ -static void -hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst, - hpa_shard_nonderived_stats_t *src) { - dst->npurge_passes += src->npurge_passes; - dst->npurges += src->npurges; - dst->nhugifies += src->nhugifies; - dst->ndehugifies += src->ndehugifies; -} - -void -hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) { - psset_stats_accum(&dst->psset_stats, &src->psset_stats); - hpa_shard_nonderived_stats_accum(&dst->nonderived_stats, - &src->nonderived_stats); -} - -void -hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, - hpa_shard_stats_t *dst) { - hpa_do_consistency_checks(shard); - - malloc_mutex_lock(tsdn, &shard->grow_mtx); - malloc_mutex_lock(tsdn, &shard->mtx); - psset_stats_accum(&dst->psset_stats, &shard->psset.stats); - hpa_shard_nonderived_stats_accum(&dst->nonderived_stats, &shard->stats); - malloc_mutex_unlock(tsdn, &shard->mtx); - malloc_mutex_unlock(tsdn, &shard->grow_mtx); -} - -static bool -hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) { - /* - * Note that this needs to be >= rather than just >, because of the - * important special case in which the hugification threshold is exactly - * HUGEPAGE. - */ - return hpdata_nactive_get(ps) * PAGE - >= shard->opts.hugification_threshold; -} - -static size_t -hpa_adjusted_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - return psset_ndirty(&shard->psset) - shard->npending_purge; -} - -static size_t -hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - if (shard->opts.dirty_mult == (fxp_t)-1) { - return (size_t)-1; - } - return fxp_mul_frac(psset_nactive(&shard->psset), - shard->opts.dirty_mult); -} - -static bool -hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - hpdata_t *to_hugify = psset_pick_hugify(&shard->psset); - if (to_hugify == NULL) { - return false; - } - return hpa_adjusted_ndirty(tsdn, shard) - + hpdata_nretained_get(to_hugify) > hpa_ndirty_max(tsdn, shard); -} - -static bool -hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - if (hpa_adjusted_ndirty(tsdn, shard) > hpa_ndirty_max(tsdn, shard)) { - return true; - } - if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) { - return true; - } - return false; -} - -static void -hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard, - hpdata_t *ps) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - if (hpdata_changing_state_get(ps)) { - hpdata_purge_allowed_set(ps, false); - hpdata_disallow_hugify(ps); - return; - } - /* - * Hugepages are distinctly costly to purge, so try to avoid it unless - * they're *particularly* full of dirty pages. Eventually, we should - * use a smarter / more dynamic heuristic for situations where we have - * to manually hugify. - * - * In situations where we don't manually hugify, this problem is - * reduced. The "bad" situation we're trying to avoid is one's that's - * common in some Linux configurations (where both enabled and defrag - * are set to madvise) that can lead to long latency spikes on the first - * access after a hugification. The ideal policy in such configurations - * is probably time-based for both purging and hugifying; only hugify a - * hugepage if it's met the criteria for some extended period of time, - * and only dehugify it if it's failed to meet the criteria for an - * extended period of time. When background threads are on, we should - * try to take this hit on one of them, as well. - * - * I think the ideal setting is THP always enabled, and defrag set to - * deferred; in that case we don't need any explicit calls on the - * allocator's end at all; we just try to pack allocations in a - * hugepage-friendly manner and let the OS hugify in the background. - */ - hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0); - if (hpa_good_hugification_candidate(shard, ps) - && !hpdata_huge_get(ps)) { - nstime_t now; - shard->central->hooks.curtime(&now, /* first_reading */ true); - hpdata_allow_hugify(ps, now); - } - /* - * Once a hugepage has become eligible for hugification, we don't mark - * it as ineligible just because it stops meeting the criteria (this - * could lead to situations where a hugepage that spends most of its - * time meeting the criteria never quite getting hugified if there are - * intervening deallocations). The idea is that the hugification delay - * will allow them to get purged, reseting their "hugify-allowed" bit. - * If they don't get purged, then the hugification isn't hurting and - * might help. As an exception, we don't hugify hugepages that are now - * empty; it definitely doesn't help there until the hugepage gets - * reused, which is likely not for a while. - */ - if (hpdata_nactive_get(ps) == 0) { - hpdata_disallow_hugify(ps); - } -} - -static bool -hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - hpdata_t *to_hugify = psset_pick_hugify(&shard->psset); - return to_hugify != NULL || hpa_should_purge(tsdn, shard); -} - -/* Returns whether or not we purged anything. */ -static bool -hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - - hpdata_t *to_purge = psset_pick_purge(&shard->psset); - if (to_purge == NULL) { - return false; - } - assert(hpdata_purge_allowed_get(to_purge)); - assert(!hpdata_changing_state_get(to_purge)); - - /* - * Don't let anyone else purge or hugify this page while - * we're purging it (allocations and deallocations are - * OK). - */ - psset_update_begin(&shard->psset, to_purge); - assert(hpdata_alloc_allowed_get(to_purge)); - hpdata_mid_purge_set(to_purge, true); - hpdata_purge_allowed_set(to_purge, false); - hpdata_disallow_hugify(to_purge); - /* - * Unlike with hugification (where concurrent - * allocations are allowed), concurrent allocation out - * of a hugepage being purged is unsafe; we might hand - * out an extent for an allocation and then purge it - * (clearing out user data). - */ - hpdata_alloc_allowed_set(to_purge, false); - psset_update_end(&shard->psset, to_purge); - - /* Gather all the metadata we'll need during the purge. */ - bool dehugify = hpdata_huge_get(to_purge); - hpdata_purge_state_t purge_state; - size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state); - - shard->npending_purge += num_to_purge; - - malloc_mutex_unlock(tsdn, &shard->mtx); - - /* Actually do the purging, now that the lock is dropped. */ - if (dehugify) { - shard->central->hooks.dehugify(hpdata_addr_get(to_purge), - HUGEPAGE); - } - size_t total_purged = 0; - uint64_t purges_this_pass = 0; - void *purge_addr; - size_t purge_size; - while (hpdata_purge_next(to_purge, &purge_state, &purge_addr, - &purge_size)) { - total_purged += purge_size; - assert(total_purged <= HUGEPAGE); - purges_this_pass++; - shard->central->hooks.purge(purge_addr, purge_size); - } - - malloc_mutex_lock(tsdn, &shard->mtx); - /* The shard updates */ - shard->npending_purge -= num_to_purge; - shard->stats.npurge_passes++; - shard->stats.npurges += purges_this_pass; - shard->central->hooks.curtime(&shard->last_purge, - /* first_reading */ false); - if (dehugify) { - shard->stats.ndehugifies++; - } - - /* The hpdata updates. */ - psset_update_begin(&shard->psset, to_purge); - if (dehugify) { - hpdata_dehugify(to_purge); - } - hpdata_purge_end(to_purge, &purge_state); - hpdata_mid_purge_set(to_purge, false); - - hpdata_alloc_allowed_set(to_purge, true); - hpa_update_purge_hugify_eligibility(tsdn, shard, to_purge); - - psset_update_end(&shard->psset, to_purge); - - return true; -} - -/* Returns whether or not we hugified anything. */ -static bool -hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - - if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) { - return false; - } - - hpdata_t *to_hugify = psset_pick_hugify(&shard->psset); - if (to_hugify == NULL) { - return false; - } - assert(hpdata_hugify_allowed_get(to_hugify)); - assert(!hpdata_changing_state_get(to_hugify)); - - /* Make sure that it's been hugifiable for long enough. */ - nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify); - uint64_t millis = shard->central->hooks.ms_since(&time_hugify_allowed); - if (millis < shard->opts.hugify_delay_ms) { - return false; - } - - /* - * Don't let anyone else purge or hugify this page while - * we're hugifying it (allocations and deallocations are - * OK). - */ - psset_update_begin(&shard->psset, to_hugify); - hpdata_mid_hugify_set(to_hugify, true); - hpdata_purge_allowed_set(to_hugify, false); - hpdata_disallow_hugify(to_hugify); - assert(hpdata_alloc_allowed_get(to_hugify)); - psset_update_end(&shard->psset, to_hugify); - - malloc_mutex_unlock(tsdn, &shard->mtx); - - shard->central->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE); - - malloc_mutex_lock(tsdn, &shard->mtx); - shard->stats.nhugifies++; - - psset_update_begin(&shard->psset, to_hugify); - hpdata_hugify(to_hugify); - hpdata_mid_hugify_set(to_hugify, false); - hpa_update_purge_hugify_eligibility(tsdn, shard, to_hugify); - psset_update_end(&shard->psset, to_hugify); - - return true; -} - -/* - * Execution of deferred work is forced if it's triggered by an explicit - * hpa_shard_do_deferred_work() call. - */ -static void -hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard, - bool forced) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - if (!forced && shard->opts.deferral_allowed) { - return; - } - /* - * If we're on a background thread, do work so long as there's work to - * be done. Otherwise, bound latency to not be *too* bad by doing at - * most a small fixed number of operations. - */ - bool hugified = false; - bool purged = false; - size_t max_ops = (forced ? (size_t)-1 : 16); - size_t nops = 0; - do { - /* - * Always purge before hugifying, to make sure we get some - * ability to hit our quiescence targets. - */ - purged = false; - while (hpa_should_purge(tsdn, shard) && nops < max_ops) { - purged = hpa_try_purge(tsdn, shard); - if (purged) { - nops++; - } - } - hugified = hpa_try_hugify(tsdn, shard); - if (hugified) { - nops++; - } - malloc_mutex_assert_owner(tsdn, &shard->mtx); - malloc_mutex_assert_owner(tsdn, &shard->mtx); - } while ((hugified || purged) && nops < max_ops); -} - -static edata_t * -hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, - bool *oom) { - bool err; - edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf); - if (edata == NULL) { - *oom = true; - return NULL; - } - - hpdata_t *ps = psset_pick_alloc(&shard->psset, size); - if (ps == NULL) { - edata_cache_fast_put(tsdn, &shard->ecf, edata); - return NULL; - } - - psset_update_begin(&shard->psset, ps); - - if (hpdata_empty(ps)) { - /* - * If the pageslab used to be empty, treat it as though it's - * brand new for fragmentation-avoidance purposes; what we're - * trying to approximate is the age of the allocations *in* that - * pageslab, and the allocations in the new pageslab are - * definitionally the youngest in this hpa shard. - */ - hpdata_age_set(ps, shard->age_counter++); - } - - void *addr = hpdata_reserve_alloc(ps, size); - edata_init(edata, shard->ind, addr, size, /* slab */ false, - SC_NSIZES, /* sn */ hpdata_age_get(ps), extent_state_active, - /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA, - EXTENT_NOT_HEAD); - edata_ps_set(edata, ps); - - /* - * This could theoretically be moved outside of the critical section, - * but that introduces the potential for a race. Without the lock, the - * (initially nonempty, since this is the reuse pathway) pageslab we - * allocated out of could become otherwise empty while the lock is - * dropped. This would force us to deal with a pageslab eviction down - * the error pathway, which is a pain. - */ - err = emap_register_boundary(tsdn, shard->emap, edata, - SC_NSIZES, /* slab */ false); - if (err) { - hpdata_unreserve(ps, edata_addr_get(edata), - edata_size_get(edata)); - /* - * We should arguably reset dirty state here, but this would - * require some sort of prepare + commit functionality that's a - * little much to deal with for now. - * - * We don't have a do_deferred_work down this pathway, on the - * principle that we didn't *really* affect shard state (we - * tweaked the stats, but our tweaks weren't really accurate). - */ - psset_update_end(&shard->psset, ps); - edata_cache_fast_put(tsdn, &shard->ecf, edata); - *oom = true; - return NULL; - } - - hpa_update_purge_hugify_eligibility(tsdn, shard, ps); - psset_update_end(&shard->psset, ps); - return edata; -} - -static size_t -hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, - bool *oom, size_t nallocs, edata_list_active_t *results, - bool *deferred_work_generated) { - malloc_mutex_lock(tsdn, &shard->mtx); - size_t nsuccess = 0; - for (; nsuccess < nallocs; nsuccess++) { - edata_t *edata = hpa_try_alloc_one_no_grow(tsdn, shard, size, - oom); - if (edata == NULL) { - break; - } - edata_list_active_append(results, edata); - } - - hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false); - *deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard); - malloc_mutex_unlock(tsdn, &shard->mtx); - return nsuccess; -} - -static size_t -hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, - size_t nallocs, edata_list_active_t *results, - bool *deferred_work_generated) { - assert(size <= shard->opts.slab_max_alloc); - bool oom = false; - - size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom, - nallocs, results, deferred_work_generated); - - if (nsuccess == nallocs || oom) { - return nsuccess; - } - - /* - * We didn't OOM, but weren't able to fill everything requested of us; - * try to grow. - */ - malloc_mutex_lock(tsdn, &shard->grow_mtx); - /* - * Check for grow races; maybe some earlier thread expanded the psset - * in between when we dropped the main mutex and grabbed the grow mutex. - */ - nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom, - nallocs - nsuccess, results, deferred_work_generated); - if (nsuccess == nallocs || oom) { - malloc_mutex_unlock(tsdn, &shard->grow_mtx); - return nsuccess; - } - - /* - * Note that we don't hold shard->mtx here (while growing); - * deallocations (and allocations of smaller sizes) may still succeed - * while we're doing this potentially expensive system call. - */ - hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size, &oom); - if (ps == NULL) { - malloc_mutex_unlock(tsdn, &shard->grow_mtx); - return nsuccess; - } - - /* - * We got the pageslab; allocate from it. This does an unlock followed - * by a lock on the same mutex, and holds the grow mutex while doing - * deferred work, but this is an uncommon path; the simplicity is worth - * it. - */ - malloc_mutex_lock(tsdn, &shard->mtx); - psset_insert(&shard->psset, ps); - malloc_mutex_unlock(tsdn, &shard->mtx); - - nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom, - nallocs - nsuccess, results, deferred_work_generated); - /* - * Drop grow_mtx before doing deferred work; other threads blocked on it - * should be allowed to proceed while we're working. - */ - malloc_mutex_unlock(tsdn, &shard->grow_mtx); - - return nsuccess; -} - -static hpa_shard_t * -hpa_from_pai(pai_t *self) { - assert(self->alloc = &hpa_alloc); - assert(self->expand = &hpa_expand); - assert(self->shrink = &hpa_shrink); - assert(self->dalloc = &hpa_dalloc); - return (hpa_shard_t *)self; -} - -static size_t -hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs, - edata_list_active_t *results, bool *deferred_work_generated) { - assert(nallocs > 0); - assert((size & PAGE_MASK) == 0); - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 0); - hpa_shard_t *shard = hpa_from_pai(self); - - if (size > shard->opts.slab_max_alloc) { - return 0; - } - - size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs, - results, deferred_work_generated); - - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 0); - - /* - * Guard the sanity checks with config_debug because the loop cannot be - * proven non-circular by the compiler, even if everything within the - * loop is optimized away. - */ - if (config_debug) { - edata_t *edata; - ql_foreach(edata, &results->head, ql_link_active) { - emap_assert_mapped(tsdn, shard->emap, edata); - assert(edata_pai_get(edata) == EXTENT_PAI_HPA); - assert(edata_state_get(edata) == extent_state_active); - assert(edata_arena_ind_get(edata) == shard->ind); - assert(edata_szind_get_maybe_invalid(edata) == - SC_NSIZES); - assert(!edata_slab_get(edata)); - assert(edata_committed_get(edata)); - assert(edata_base_get(edata) == edata_addr_get(edata)); - assert(edata_base_get(edata) != NULL); - } - } - return nsuccess; -} - -static edata_t * -hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero, - bool guarded, bool frequent_reuse, bool *deferred_work_generated) { - assert((size & PAGE_MASK) == 0); - assert(!guarded); - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 0); - - /* We don't handle alignment or zeroing for now. */ - if (alignment > PAGE || zero) { - return NULL; - } - /* - * An alloc with alignment == PAGE and zero == false is equivalent to a - * batch alloc of 1. Just do that, so we can share code. - */ - edata_list_active_t results; - edata_list_active_init(&results); - size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1, - &results, deferred_work_generated); - assert(nallocs == 0 || nallocs == 1); - edata_t *edata = edata_list_active_first(&results); - return edata; -} - -static bool -hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, - size_t new_size, bool zero, bool *deferred_work_generated) { - /* Expand not yet supported. */ - return true; -} - -static bool -hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, - size_t old_size, size_t new_size, bool *deferred_work_generated) { - /* Shrink not yet supported. */ - return true; -} - -static void -hpa_dalloc_prepare_unlocked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) { - malloc_mutex_assert_not_owner(tsdn, &shard->mtx); - - assert(edata_pai_get(edata) == EXTENT_PAI_HPA); - assert(edata_state_get(edata) == extent_state_active); - assert(edata_arena_ind_get(edata) == shard->ind); - assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES); - assert(edata_committed_get(edata)); - assert(edata_base_get(edata) != NULL); - - /* - * Another thread shouldn't be trying to touch the metadata of an - * allocation being freed. The one exception is a merge attempt from a - * lower-addressed PAC extent; in this case we have a nominal race on - * the edata metadata bits, but in practice the fact that the PAI bits - * are different will prevent any further access. The race is bad, but - * benign in practice, and the long term plan is to track enough state - * in the rtree to prevent these merge attempts in the first place. - */ - edata_addr_set(edata, edata_base_get(edata)); - edata_zeroed_set(edata, false); - emap_deregister_boundary(tsdn, shard->emap, edata); -} - -static void -hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - - /* - * Release the metadata early, to avoid having to remember to do it - * while we're also doing tricky purging logic. First, we need to grab - * a few bits of metadata from it. - * - * Note that the shard mutex protects ps's metadata too; it wouldn't be - * correct to try to read most information out of it without the lock. - */ - hpdata_t *ps = edata_ps_get(edata); - /* Currently, all edatas come from pageslabs. */ - assert(ps != NULL); - void *unreserve_addr = edata_addr_get(edata); - size_t unreserve_size = edata_size_get(edata); - edata_cache_fast_put(tsdn, &shard->ecf, edata); - - psset_update_begin(&shard->psset, ps); - hpdata_unreserve(ps, unreserve_addr, unreserve_size); - hpa_update_purge_hugify_eligibility(tsdn, shard, ps); - psset_update_end(&shard->psset, ps); -} - -static void -hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list, - bool *deferred_work_generated) { - hpa_shard_t *shard = hpa_from_pai(self); - - edata_t *edata; - ql_foreach(edata, &list->head, ql_link_active) { - hpa_dalloc_prepare_unlocked(tsdn, shard, edata); - } - - malloc_mutex_lock(tsdn, &shard->mtx); - /* Now, remove from the list. */ - while ((edata = edata_list_active_first(list)) != NULL) { - edata_list_active_remove(list, edata); - hpa_dalloc_locked(tsdn, shard, edata); - } - hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false); - *deferred_work_generated = - hpa_shard_has_deferred_work(tsdn, shard); - - malloc_mutex_unlock(tsdn, &shard->mtx); -} - -static void -hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata, - bool *deferred_work_generated) { - assert(!edata_guarded_get(edata)); - /* Just a dalloc_batch of size 1; this lets us share logic. */ - edata_list_active_t dalloc_list; - edata_list_active_init(&dalloc_list); - edata_list_active_append(&dalloc_list, edata); - hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated); -} - -/* - * Calculate time until either purging or hugification ought to happen. - * Called by background threads. - */ -static uint64_t -hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) { - hpa_shard_t *shard = hpa_from_pai(self); - uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX; - - malloc_mutex_lock(tsdn, &shard->mtx); - - hpdata_t *to_hugify = psset_pick_hugify(&shard->psset); - if (to_hugify != NULL) { - nstime_t time_hugify_allowed = - hpdata_time_hugify_allowed(to_hugify); - uint64_t since_hugify_allowed_ms = - shard->central->hooks.ms_since(&time_hugify_allowed); - /* - * If not enough time has passed since hugification was allowed, - * sleep for the rest. - */ - if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) { - time_ns = shard->opts.hugify_delay_ms - - since_hugify_allowed_ms; - time_ns *= 1000 * 1000; - } else { - malloc_mutex_unlock(tsdn, &shard->mtx); - return BACKGROUND_THREAD_DEFERRED_MIN; - } - } - - if (hpa_should_purge(tsdn, shard)) { - /* - * If we haven't purged before, no need to check interval - * between purges. Simply purge as soon as possible. - */ - if (shard->stats.npurge_passes == 0) { - malloc_mutex_unlock(tsdn, &shard->mtx); - return BACKGROUND_THREAD_DEFERRED_MIN; - } - uint64_t since_last_purge_ms = shard->central->hooks.ms_since( - &shard->last_purge); - - if (since_last_purge_ms < shard->opts.min_purge_interval_ms) { - uint64_t until_purge_ns; - until_purge_ns = shard->opts.min_purge_interval_ms - - since_last_purge_ms; - until_purge_ns *= 1000 * 1000; - - if (until_purge_ns < time_ns) { - time_ns = until_purge_ns; - } - } else { - time_ns = BACKGROUND_THREAD_DEFERRED_MIN; - } - } - malloc_mutex_unlock(tsdn, &shard->mtx); - return time_ns; -} - -void -hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) { - hpa_do_consistency_checks(shard); - - malloc_mutex_lock(tsdn, &shard->mtx); - edata_cache_fast_disable(tsdn, &shard->ecf); - malloc_mutex_unlock(tsdn, &shard->mtx); -} - -static void -hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) { - assert(bin_stats->npageslabs == 0); - assert(bin_stats->nactive == 0); -} - -static void -hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) { - malloc_mutex_assert_owner(tsdn, &shard->mtx); - for (int huge = 0; huge <= 1; huge++) { - hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]); - for (pszind_t i = 0; i < PSSET_NPSIZES; i++) { - hpa_shard_assert_stats_empty( - &psset->stats.nonfull_slabs[i][huge]); - } - } -} - -void -hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) { - hpa_do_consistency_checks(shard); - /* - * By the time we're here, the arena code should have dalloc'd all the - * active extents, which means we should have eventually evicted - * everything from the psset, so it shouldn't be able to serve even a - * 1-page allocation. - */ - if (config_debug) { - malloc_mutex_lock(tsdn, &shard->mtx); - hpa_assert_empty(tsdn, shard, &shard->psset); - malloc_mutex_unlock(tsdn, &shard->mtx); - } - hpdata_t *ps; - while ((ps = psset_pick_alloc(&shard->psset, PAGE)) != NULL) { - /* There should be no allocations anywhere. */ - assert(hpdata_empty(ps)); - psset_remove(&shard->psset, ps); - shard->central->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE); - } -} - -void -hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard, - bool deferral_allowed) { - hpa_do_consistency_checks(shard); - - malloc_mutex_lock(tsdn, &shard->mtx); - bool deferral_previously_allowed = shard->opts.deferral_allowed; - shard->opts.deferral_allowed = deferral_allowed; - if (deferral_previously_allowed && !deferral_allowed) { - hpa_shard_maybe_do_deferred_work(tsdn, shard, - /* forced */ true); - } - malloc_mutex_unlock(tsdn, &shard->mtx); -} - -void -hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) { - hpa_do_consistency_checks(shard); - - malloc_mutex_lock(tsdn, &shard->mtx); - hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ true); - malloc_mutex_unlock(tsdn, &shard->mtx); -} - -void -hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) { - hpa_do_consistency_checks(shard); - - malloc_mutex_prefork(tsdn, &shard->grow_mtx); -} - -void -hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard) { - hpa_do_consistency_checks(shard); - - malloc_mutex_prefork(tsdn, &shard->mtx); -} - -void -hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) { - hpa_do_consistency_checks(shard); - - malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx); - malloc_mutex_postfork_parent(tsdn, &shard->mtx); -} - -void -hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) { - hpa_do_consistency_checks(shard); - - malloc_mutex_postfork_child(tsdn, &shard->grow_mtx); - malloc_mutex_postfork_child(tsdn, &shard->mtx); -} |
