1#include "virtgpu-utils.h"
2
3#include <malloc.h>
4#include <stdlib.h>
5
6#include <cstring>
7
8#define NODE_ALLOC_ALIGN 64
9#define NODE_PTR_MASK (~((uintptr_t) NODE_ALLOC_ALIGN - 1))
10#define NODE_LEVEL_MASK ((uintptr_t) NODE_ALLOC_ALIGN - 1)
11#define NULL_NODE 0
12
13#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align)
14#define os_free_aligned(_ptr) free(_ptr)
15#define p_atomic_cmpxchg(v, old, _new) __sync_val_compare_and_swap((v), (old), (_new))
16
17static inline uint64_t util_logbase2_64(uint64_t n) {
18#if defined(HAVE___BUILTIN_CLZLL)
19 return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
20#else
21 uint64_t pos = 0ull;
22 if (n >= 1ull << 32) {
23 n >>= 32;
24 pos += 32;
25 }
26 if (n >= 1ull << 16) {
27 n >>= 16;
28 pos += 16;
29 }
30 if (n >= 1ull << 8) {
31 n >>= 8;
32 pos += 8;
33 }
34 if (n >= 1ull << 4) {
35 n >>= 4;
36 pos += 4;
37 }
38 if (n >= 1ull << 2) {
39 n >>= 2;
40 pos += 2;
41 }
42 if (n >= 1ull << 1) {
43 pos += 1;
44 }
45 return pos;
46#endif
47}
48
49void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size) {
50 memset(arr, 0, sizeof(*arr));
51 arr->elem_size = elem_size;
52 arr->node_size_log2 = util_logbase2_64(node_size);
53 assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2));
54}
55
56static inline void * os_malloc_aligned(size_t size, size_t alignment) {
57 void * ptr;
58 alignment = (alignment + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
59 if (posix_memalign(&ptr, alignment, size) != 0) {
60 return NULL;
61 }
62 return ptr;
63}
64
65static inline void * _util_sparse_array_node_data(uintptr_t handle) {
66 return (void *) (handle & NODE_PTR_MASK);
67}
68
69static inline unsigned _util_sparse_array_node_level(uintptr_t handle) {
70 return handle & NODE_LEVEL_MASK;
71}
72
73static inline void _util_sparse_array_node_finish(util_sparse_array * arr, uintptr_t node) {
74 if (_util_sparse_array_node_level(node) > 0) {
75 uintptr_t * children = (uintptr_t *) _util_sparse_array_node_data(node);
76 size_t node_size = 1ull << arr->node_size_log2;
77 for (size_t i = 0; i < node_size; i++) {
78 if (children[i]) {
79 _util_sparse_array_node_finish(arr, children[i]);
80 }
81 }
82 }
83
84 os_free_aligned(_util_sparse_array_node_data(node));
85}
86
87static inline uintptr_t _util_sparse_array_node(void * data, unsigned level) {
88 assert(data != NULL);
89 assert(((uintptr_t) data & NODE_LEVEL_MASK) == 0);
90 assert((level & NODE_PTR_MASK) == 0);
91 return (uintptr_t) data | level;
92}
93
94inline uintptr_t _util_sparse_array_node_alloc(util_sparse_array * arr, unsigned level) {
95 size_t size;
96 if (level == 0) {
97 size = arr->elem_size << arr->node_size_log2;
98 } else {
99 size = sizeof(uintptr_t) << arr->node_size_log2;
100 }
101
102 void * data = os_malloc_aligned(size, NODE_ALLOC_ALIGN);
103 memset(data, 0, size);
104
105 return _util_sparse_array_node(data, level);
106}
107
108static inline uintptr_t _util_sparse_array_set_or_free_node(uintptr_t * node_ptr, uintptr_t cmp_node, uintptr_t node) {
109 uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node);
110
111 if (prev_node != cmp_node) {
112 /* We lost the race. Free this one and return the one that was already
113 * allocated.
114 */
115 os_free_aligned(_util_sparse_array_node_data(node));
116 return prev_node;
117 } else {
118 return node;
119 }
120}
121
122void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx) {
123 const unsigned node_size_log2 = arr->node_size_log2;
124 uintptr_t root = p_atomic_read(&arr->root);
125 if (unlikely(!root)) {
126 unsigned root_level = 0;
127 uint64_t idx_iter = idx >> node_size_log2;
128 while (idx_iter) {
129 idx_iter >>= node_size_log2;
130 root_level++;
131 }
132 uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level);
133 root = _util_sparse_array_set_or_free_node(&arr->root, NULL_NODE, new_root);
134 }
135
136 while (1) {
137 unsigned root_level = _util_sparse_array_node_level(root);
138 uint64_t root_idx = idx >> (root_level * node_size_log2);
139 if (likely(root_idx < (1ull << node_size_log2))) {
140 break;
141 }
142
143 /* In this case, we have a root but its level is low enough that the
144 * requested index is out-of-bounds.
145 */
146 uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1);
147
148 uintptr_t * new_root_children = (uintptr_t *) _util_sparse_array_node_data(new_root);
149 new_root_children[0] = root;
150
151 /* We only add one at a time instead of the whole tree because it's
152 * easier to ensure correctness of both the tree building and the
153 * clean-up path. Because we're only adding one node we never have to
154 * worry about trying to free multiple things without freeing the old
155 * things.
156 */
157 root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root);
158 }
159
160 void * node_data = _util_sparse_array_node_data(root);
161 unsigned node_level = _util_sparse_array_node_level(root);
162 while (node_level > 0) {
163 uint64_t child_idx = (idx >> (node_level * node_size_log2)) & ((1ull << node_size_log2) - 1);
164
165 uintptr_t * children = (uintptr_t *) node_data;
166 uintptr_t child = p_atomic_read(&children[child_idx]);
167
168 if (unlikely(!child)) {
169 child = _util_sparse_array_node_alloc(arr, node_level - 1);
170 child = _util_sparse_array_set_or_free_node(&children[child_idx], NULL_NODE, child);
171 }
172
173 node_data = _util_sparse_array_node_data(child);
174 node_level = _util_sparse_array_node_level(child);
175 }
176
177 uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1);
178 return (void *) ((char *) node_data + (elem_idx * arr->elem_size));
179}