1#include "virtgpu-utils.h"
  2
  3#include <malloc.h>
  4#include <stdlib.h>
  5
  6#include <cstring>
  7
  8#define NODE_ALLOC_ALIGN 64
  9#define NODE_PTR_MASK    (~((uintptr_t) NODE_ALLOC_ALIGN - 1))
 10#define NODE_LEVEL_MASK  ((uintptr_t) NODE_ALLOC_ALIGN - 1)
 11#define NULL_NODE        0
 12
 13#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align)
 14#define os_free_aligned(_ptr)            free(_ptr)
 15#define p_atomic_cmpxchg(v, old, _new)   __sync_val_compare_and_swap((v), (old), (_new))
 16
 17static inline uint64_t util_logbase2_64(uint64_t n) {
 18#if defined(HAVE___BUILTIN_CLZLL)
 19    return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
 20#else
 21    uint64_t pos = 0ull;
 22    if (n >= 1ull << 32) {
 23        n >>= 32;
 24        pos += 32;
 25    }
 26    if (n >= 1ull << 16) {
 27        n >>= 16;
 28        pos += 16;
 29    }
 30    if (n >= 1ull << 8) {
 31        n >>= 8;
 32        pos += 8;
 33    }
 34    if (n >= 1ull << 4) {
 35        n >>= 4;
 36        pos += 4;
 37    }
 38    if (n >= 1ull << 2) {
 39        n >>= 2;
 40        pos += 2;
 41    }
 42    if (n >= 1ull << 1) {
 43        pos += 1;
 44    }
 45    return pos;
 46#endif
 47}
 48
 49void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size) {
 50    memset(arr, 0, sizeof(*arr));
 51    arr->elem_size      = elem_size;
 52    arr->node_size_log2 = util_logbase2_64(node_size);
 53    assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2));
 54}
 55
 56static inline void * os_malloc_aligned(size_t size, size_t alignment) {
 57    void * ptr;
 58    alignment = (alignment + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
 59    if (posix_memalign(&ptr, alignment, size) != 0) {
 60        return NULL;
 61    }
 62    return ptr;
 63}
 64
 65static inline void * _util_sparse_array_node_data(uintptr_t handle) {
 66    return (void *) (handle & NODE_PTR_MASK);
 67}
 68
 69static inline unsigned _util_sparse_array_node_level(uintptr_t handle) {
 70    return handle & NODE_LEVEL_MASK;
 71}
 72
 73static inline void _util_sparse_array_node_finish(util_sparse_array * arr, uintptr_t node) {
 74    if (_util_sparse_array_node_level(node) > 0) {
 75        uintptr_t * children  = (uintptr_t *) _util_sparse_array_node_data(node);
 76        size_t      node_size = 1ull << arr->node_size_log2;
 77        for (size_t i = 0; i < node_size; i++) {
 78            if (children[i]) {
 79                _util_sparse_array_node_finish(arr, children[i]);
 80            }
 81        }
 82    }
 83
 84    os_free_aligned(_util_sparse_array_node_data(node));
 85}
 86
 87static inline uintptr_t _util_sparse_array_node(void * data, unsigned level) {
 88    assert(data != NULL);
 89    assert(((uintptr_t) data & NODE_LEVEL_MASK) == 0);
 90    assert((level & NODE_PTR_MASK) == 0);
 91    return (uintptr_t) data | level;
 92}
 93
 94inline uintptr_t _util_sparse_array_node_alloc(util_sparse_array * arr, unsigned level) {
 95    size_t size;
 96    if (level == 0) {
 97        size = arr->elem_size << arr->node_size_log2;
 98    } else {
 99        size = sizeof(uintptr_t) << arr->node_size_log2;
100    }
101
102    void * data = os_malloc_aligned(size, NODE_ALLOC_ALIGN);
103    memset(data, 0, size);
104
105    return _util_sparse_array_node(data, level);
106}
107
108static inline uintptr_t _util_sparse_array_set_or_free_node(uintptr_t * node_ptr, uintptr_t cmp_node, uintptr_t node) {
109    uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node);
110
111    if (prev_node != cmp_node) {
112        /* We lost the race.  Free this one and return the one that was already
113       * allocated.
114       */
115        os_free_aligned(_util_sparse_array_node_data(node));
116        return prev_node;
117    } else {
118        return node;
119    }
120}
121
122void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx) {
123    const unsigned node_size_log2 = arr->node_size_log2;
124    uintptr_t      root           = p_atomic_read(&arr->root);
125    if (unlikely(!root)) {
126        unsigned root_level = 0;
127        uint64_t idx_iter   = idx >> node_size_log2;
128        while (idx_iter) {
129            idx_iter >>= node_size_log2;
130            root_level++;
131        }
132        uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level);
133        root               = _util_sparse_array_set_or_free_node(&arr->root, NULL_NODE, new_root);
134    }
135
136    while (1) {
137        unsigned root_level = _util_sparse_array_node_level(root);
138        uint64_t root_idx   = idx >> (root_level * node_size_log2);
139        if (likely(root_idx < (1ull << node_size_log2))) {
140            break;
141        }
142
143        /* In this case, we have a root but its level is low enough that the
144       * requested index is out-of-bounds.
145       */
146        uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1);
147
148        uintptr_t * new_root_children = (uintptr_t *) _util_sparse_array_node_data(new_root);
149        new_root_children[0]          = root;
150
151        /* We only add one at a time instead of the whole tree because it's
152       * easier to ensure correctness of both the tree building and the
153       * clean-up path.  Because we're only adding one node we never have to
154       * worry about trying to free multiple things without freeing the old
155       * things.
156       */
157        root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root);
158    }
159
160    void *   node_data  = _util_sparse_array_node_data(root);
161    unsigned node_level = _util_sparse_array_node_level(root);
162    while (node_level > 0) {
163        uint64_t child_idx = (idx >> (node_level * node_size_log2)) & ((1ull << node_size_log2) - 1);
164
165        uintptr_t * children = (uintptr_t *) node_data;
166        uintptr_t   child    = p_atomic_read(&children[child_idx]);
167
168        if (unlikely(!child)) {
169            child = _util_sparse_array_node_alloc(arr, node_level - 1);
170            child = _util_sparse_array_set_or_free_node(&children[child_idx], NULL_NODE, child);
171        }
172
173        node_data  = _util_sparse_array_node_data(child);
174        node_level = _util_sparse_array_node_level(child);
175    }
176
177    uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1);
178    return (void *) ((char *) node_data + (elem_idx * arr->elem_size));
179}