summaryrefslogtreecommitdiff
path: root/examples/dte/block-iter.c
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2023-11-09 23:19:53 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2023-11-09 23:19:53 +0100
commit1566b6faa8534118c3566188181367cd0868468f (patch)
tree1de8d4b369efb5e592685a31088f798a6b63ffa1 /examples/dte/block-iter.c
parent349991bf6efe473ab9a5cbdae0a8114d72b997e3 (diff)
downloadcrep-1566b6faa8534118c3566188181367cd0868468f.tar.gz
Added partial matching and introduced threads
Diffstat (limited to 'examples/dte/block-iter.c')
-rw-r--r--examples/dte/block-iter.c343
1 files changed, 343 insertions, 0 deletions
diff --git a/examples/dte/block-iter.c b/examples/dte/block-iter.c
new file mode 100644
index 0000000..73fc2ec
--- /dev/null
+++ b/examples/dte/block-iter.c
@@ -0,0 +1,343 @@
1#include <string.h>
2#include "block-iter.h"
3#include "util/debug.h"
4#include "util/utf8.h"
5#include "util/xmalloc.h"
6
7void block_iter_normalize(BlockIter *bi)
8{
9 const Block *blk = bi->blk;
10 if (bi->offset == blk->size && blk->node.next != bi->head) {
11 bi->blk = BLOCK(blk->node.next);
12 bi->offset = 0;
13 }
14}
15
16/*
17 * Move after next newline (beginning of next line or end of file).
18 * Returns number of bytes iterator advanced.
19 */
20size_t block_iter_eat_line(BlockIter *bi)
21{
22 block_iter_normalize(bi);
23 const size_t offset = bi->offset;
24 if (unlikely(offset == bi->blk->size)) {
25 return 0;
26 }
27
28 // There must be at least one newline
29 if (bi->blk->nl == 1) {
30 bi->offset = bi->blk->size;
31 } else {
32 const unsigned char *end;
33 end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset);
34 BUG_ON(!end);
35 bi->offset = (size_t)(end + 1 - bi->blk->data);
36 }
37
38 return bi->offset - offset;
39}
40
41/*
42 * Move to beginning of next line.
43 * If there is no next line, iterator is not advanced.
44 * Returns number of bytes iterator advanced.
45 */
46size_t block_iter_next_line(BlockIter *bi)
47{
48 block_iter_normalize(bi);
49 const size_t offset = bi->offset;
50 if (unlikely(offset == bi->blk->size)) {
51 return 0;
52 }
53
54 // There must be at least one newline
55 size_t new_offset;
56 if (bi->blk->nl == 1) {
57 new_offset = bi->blk->size;
58 } else {
59 const unsigned char *end;
60 end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset);
61 BUG_ON(!end);
62 new_offset = (size_t)(end + 1 - bi->blk->data);
63 }
64 if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) {
65 return 0;
66 }
67
68 bi->offset = new_offset;
69 return bi->offset - offset;
70}
71
72/*
73 * Move to beginning of previous line.
74 * Returns number of bytes moved, which is zero if there's no previous line.
75 */
76size_t block_iter_prev_line(BlockIter *bi)
77{
78 Block *blk = bi->blk;
79 size_t offset = bi->offset;
80 size_t start = offset;
81
82 while (offset && blk->data[offset - 1] != '\n') {
83 offset--;
84 }
85
86 if (!offset) {
87 if (blk->node.prev == bi->head) {
88 return 0;
89 }
90 bi->blk = blk = BLOCK(blk->node.prev);
91 offset = blk->size;
92 start += offset;
93 }
94
95 offset--;
96 while (offset && blk->data[offset - 1] != '\n') {
97 offset--;
98 }
99 bi->offset = offset;
100 return start - offset;
101}
102
103size_t block_iter_get_char(const BlockIter *bi, CodePoint *up)
104{
105 BlockIter tmp = *bi;
106 return block_iter_next_char(&tmp, up);
107}
108
109size_t block_iter_next_char(BlockIter *bi, CodePoint *up)
110{
111 size_t offset = bi->offset;
112 if (unlikely(offset == bi->blk->size)) {
113 if (unlikely(bi->blk->node.next == bi->head)) {
114 return 0;
115 }
116 bi->blk = BLOCK(bi->blk->node.next);
117 bi->offset = offset = 0;
118 }
119
120 // Note: this block can't be empty
121 *up = bi->blk->data[offset];
122 if (likely(*up < 0x80)) {
123 bi->offset++;
124 return 1;
125 }
126
127 *up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset);
128 return bi->offset - offset;
129}
130
131size_t block_iter_prev_char(BlockIter *bi, CodePoint *up)
132{
133 size_t offset = bi->offset;
134 if (unlikely(offset == 0)) {
135 if (unlikely(bi->blk->node.prev == bi->head)) {
136 return 0;
137 }
138 bi->blk = BLOCK(bi->blk->node.prev);
139 bi->offset = offset = bi->blk->size;
140 }
141
142 // Note: this block can't be empty
143 *up = bi->blk->data[offset - 1];
144 if (likely(*up < 0x80)) {
145 bi->offset--;
146 return 1;
147 }
148
149 *up = u_prev_char(bi->blk->data, &bi->offset);
150 return offset - bi->offset;
151}
152
153size_t block_iter_next_column(BlockIter *bi)
154{
155 CodePoint u;
156 size_t size = block_iter_next_char(bi, &u);
157 while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) {
158 size += block_iter_next_char(bi, &u);
159 }
160 return size;
161}
162
163size_t block_iter_prev_column(BlockIter *bi)
164{
165 CodePoint u;
166 size_t skip, total = 0;
167 do {
168 skip = block_iter_prev_char(bi, &u);
169 total += skip;
170 } while (skip && u_is_zero_width(u));
171 return total;
172}
173
174size_t block_iter_bol(BlockIter *bi)
175{
176 block_iter_normalize(bi);
177 size_t offset = bi->offset;
178 if (offset == 0 || offset == bi->blk->size) {
179 return 0;
180 }
181
182 if (bi->blk->nl == 1) {
183 offset = 0;
184 } else {
185 while (offset && bi->blk->data[offset - 1] != '\n') {
186 offset--;
187 }
188 }
189
190 const size_t ret = bi->offset - offset;
191 bi->offset = offset;
192 return ret;
193}
194
195size_t block_iter_eol(BlockIter *bi)
196{
197 block_iter_normalize(bi);
198 const Block *blk = bi->blk;
199 const size_t offset = bi->offset;
200 if (unlikely(offset == blk->size)) {
201 // Cursor at end of last block
202 return 0;
203 }
204 if (blk->nl == 1) {
205 bi->offset = blk->size - 1;
206 return bi->offset - offset;
207 }
208 const unsigned char *end = memchr(blk->data + offset, '\n', blk->size - offset);
209 BUG_ON(!end);
210 bi->offset = (size_t)(end - blk->data);
211 return bi->offset - offset;
212}
213
214void block_iter_back_bytes(BlockIter *bi, size_t count)
215{
216 while (count > bi->offset) {
217 count -= bi->offset;
218 bi->blk = BLOCK(bi->blk->node.prev);
219 bi->offset = bi->blk->size;
220 }
221 bi->offset -= count;
222}
223
224void block_iter_skip_bytes(BlockIter *bi, size_t count)
225{
226 size_t avail = bi->blk->size - bi->offset;
227 while (count > avail) {
228 count -= avail;
229 bi->blk = BLOCK(bi->blk->node.next);
230 bi->offset = 0;
231 avail = bi->blk->size;
232 }
233 bi->offset += count;
234}
235
236void block_iter_goto_offset(BlockIter *bi, size_t offset)
237{
238 Block *blk;
239 block_for_each(blk, bi->head) {
240 if (offset <= blk->size) {
241 bi->blk = blk;
242 bi->offset = offset;
243 return;
244 }
245 offset -= blk->size;
246 }
247}
248
249void block_iter_goto_line(BlockIter *bi, size_t line)
250{
251 Block *blk = BLOCK(bi->head->next);
252 size_t nl = 0;
253 while (blk->node.next != bi->head && nl + blk->nl < line) {
254 nl += blk->nl;
255 blk = BLOCK(blk->node.next);
256 }
257
258 bi->blk = blk;
259 bi->offset = 0;
260 while (nl < line) {
261 if (!block_iter_eat_line(bi)) {
262 break;
263 }
264 nl++;
265 }
266}
267
268size_t block_iter_get_offset(const BlockIter *bi)
269{
270 const Block *blk;
271 size_t offset = 0;
272 block_for_each(blk, bi->head) {
273 if (blk == bi->blk) {
274 break;
275 }
276 offset += blk->size;
277 }
278 return offset + bi->offset;
279}
280
281char *block_iter_get_bytes(const BlockIter *bi, size_t len)
282{
283 if (len == 0) {
284 return NULL;
285 }
286
287 const Block *blk = bi->blk;
288 size_t offset = bi->offset;
289 size_t pos = 0;
290 char *buf = xmalloc(len);
291
292 while (pos < len) {
293 const size_t avail = blk->size - offset;
294 size_t count = MIN(len - pos, avail);
295 memcpy(buf + pos, blk->data + offset, count);
296 pos += count;
297 BUG_ON(pos < len && blk->node.next == bi->head);
298 blk = BLOCK(blk->node.next);
299 offset = 0;
300 }
301
302 return buf;
303}
304
305// bi should be at bol
306void fill_line_nl_ref(BlockIter *bi, StringView *line)
307{
308 block_iter_normalize(bi);
309 line->data = bi->blk->data + bi->offset;
310 const size_t max = bi->blk->size - bi->offset;
311 if (unlikely(max == 0)) {
312 // Cursor at end of last block
313 line->length = 0;
314 return;
315 }
316 if (bi->blk->nl == 1) {
317 BUG_ON(line->data[max - 1] != '\n');
318 line->length = max;
319 return;
320 }
321 const unsigned char *nl = memchr(line->data, '\n', max);
322 BUG_ON(!nl);
323 line->length = (size_t)(nl - line->data + 1);
324 BUG_ON(line->length == 0);
325}
326
327void fill_line_ref(BlockIter *bi, StringView *line)
328{
329 fill_line_nl_ref(bi, line);
330 // Trim the newline
331 line->length -= (line->length > 0);
332}
333
334// Set the `line` argument to point to the current line and return
335// the offset of the cursor, relative to the start of the line
336// (zero means cursor is at bol)
337size_t fetch_this_line(const BlockIter *bi, StringView *line)
338{
339 BlockIter tmp = *bi;
340 size_t count = block_iter_bol(&tmp);
341 fill_line_ref(&tmp, line);
342 return count;
343}