1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
|
#include <string.h>
#include "block-iter.h"
#include "util/debug.h"
#include "util/utf8.h"
#include "util/xmalloc.h"
void block_iter_normalize(BlockIter *bi)
{
const Block *blk = bi->blk;
if (bi->offset == blk->size && blk->node.next != bi->head) {
bi->blk = BLOCK(blk->node.next);
bi->offset = 0;
}
}
/*
* Move after next newline (beginning of next line or end of file).
* Returns number of bytes iterator advanced.
*/
size_t block_iter_eat_line(BlockIter *bi)
{
block_iter_normalize(bi);
const size_t offset = bi->offset;
if (unlikely(offset == bi->blk->size)) {
return 0;
}
// There must be at least one newline
if (bi->blk->nl == 1) {
bi->offset = bi->blk->size;
} else {
const unsigned char *end;
end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset);
BUG_ON(!end);
bi->offset = (size_t)(end + 1 - bi->blk->data);
}
return bi->offset - offset;
}
/*
* Move to beginning of next line.
* If there is no next line, iterator is not advanced.
* Returns number of bytes iterator advanced.
*/
size_t block_iter_next_line(BlockIter *bi)
{
block_iter_normalize(bi);
const size_t offset = bi->offset;
if (unlikely(offset == bi->blk->size)) {
return 0;
}
// There must be at least one newline
size_t new_offset;
if (bi->blk->nl == 1) {
new_offset = bi->blk->size;
} else {
const unsigned char *end;
end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset);
BUG_ON(!end);
new_offset = (size_t)(end + 1 - bi->blk->data);
}
if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) {
return 0;
}
bi->offset = new_offset;
return bi->offset - offset;
}
/*
* Move to beginning of previous line.
* Returns number of bytes moved, which is zero if there's no previous line.
*/
size_t block_iter_prev_line(BlockIter *bi)
{
Block *blk = bi->blk;
size_t offset = bi->offset;
size_t start = offset;
while (offset && blk->data[offset - 1] != '\n') {
offset--;
}
if (!offset) {
if (blk->node.prev == bi->head) {
return 0;
}
bi->blk = blk = BLOCK(blk->node.prev);
offset = blk->size;
start += offset;
}
offset--;
while (offset && blk->data[offset - 1] != '\n') {
offset--;
}
bi->offset = offset;
return start - offset;
}
size_t block_iter_get_char(const BlockIter *bi, CodePoint *up)
{
BlockIter tmp = *bi;
return block_iter_next_char(&tmp, up);
}
size_t block_iter_next_char(BlockIter *bi, CodePoint *up)
{
size_t offset = bi->offset;
if (unlikely(offset == bi->blk->size)) {
if (unlikely(bi->blk->node.next == bi->head)) {
return 0;
}
bi->blk = BLOCK(bi->blk->node.next);
bi->offset = offset = 0;
}
// Note: this block can't be empty
*up = bi->blk->data[offset];
if (likely(*up < 0x80)) {
bi->offset++;
return 1;
}
*up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset);
return bi->offset - offset;
}
size_t block_iter_prev_char(BlockIter *bi, CodePoint *up)
{
size_t offset = bi->offset;
if (unlikely(offset == 0)) {
if (unlikely(bi->blk->node.prev == bi->head)) {
return 0;
}
bi->blk = BLOCK(bi->blk->node.prev);
bi->offset = offset = bi->blk->size;
}
// Note: this block can't be empty
*up = bi->blk->data[offset - 1];
if (likely(*up < 0x80)) {
bi->offset--;
return 1;
}
*up = u_prev_char(bi->blk->data, &bi->offset);
return offset - bi->offset;
}
size_t block_iter_next_column(BlockIter *bi)
{
CodePoint u;
size_t size = block_iter_next_char(bi, &u);
while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) {
size += block_iter_next_char(bi, &u);
}
return size;
}
size_t block_iter_prev_column(BlockIter *bi)
{
CodePoint u;
size_t skip, total = 0;
do {
skip = block_iter_prev_char(bi, &u);
total += skip;
} while (skip && u_is_zero_width(u));
return total;
}
size_t block_iter_bol(BlockIter *bi)
{
block_iter_normalize(bi);
size_t offset = bi->offset;
if (offset == 0 || offset == bi->blk->size) {
return 0;
}
if (bi->blk->nl == 1) {
offset = 0;
} else {
while (offset && bi->blk->data[offset - 1] != '\n') {
offset--;
}
}
const size_t ret = bi->offset - offset;
bi->offset = offset;
return ret;
}
size_t block_iter_eol(BlockIter *bi)
{
block_iter_normalize(bi);
const Block *blk = bi->blk;
const size_t offset = bi->offset;
if (unlikely(offset == blk->size)) {
// Cursor at end of last block
return 0;
}
if (blk->nl == 1) {
bi->offset = blk->size - 1;
return bi->offset - offset;
}
const unsigned char *end = memchr(blk->data + offset, '\n', blk->size - offset);
BUG_ON(!end);
bi->offset = (size_t)(end - blk->data);
return bi->offset - offset;
}
void block_iter_back_bytes(BlockIter *bi, size_t count)
{
while (count > bi->offset) {
count -= bi->offset;
bi->blk = BLOCK(bi->blk->node.prev);
bi->offset = bi->blk->size;
}
bi->offset -= count;
}
void block_iter_skip_bytes(BlockIter *bi, size_t count)
{
size_t avail = bi->blk->size - bi->offset;
while (count > avail) {
count -= avail;
bi->blk = BLOCK(bi->blk->node.next);
bi->offset = 0;
avail = bi->blk->size;
}
bi->offset += count;
}
void block_iter_goto_offset(BlockIter *bi, size_t offset)
{
Block *blk;
block_for_each(blk, bi->head) {
if (offset <= blk->size) {
bi->blk = blk;
bi->offset = offset;
return;
}
offset -= blk->size;
}
}
void block_iter_goto_line(BlockIter *bi, size_t line)
{
Block *blk = BLOCK(bi->head->next);
size_t nl = 0;
while (blk->node.next != bi->head && nl + blk->nl < line) {
nl += blk->nl;
blk = BLOCK(blk->node.next);
}
bi->blk = blk;
bi->offset = 0;
while (nl < line) {
if (!block_iter_eat_line(bi)) {
break;
}
nl++;
}
}
size_t block_iter_get_offset(const BlockIter *bi)
{
const Block *blk;
size_t offset = 0;
block_for_each(blk, bi->head) {
if (blk == bi->blk) {
break;
}
offset += blk->size;
}
return offset + bi->offset;
}
char *block_iter_get_bytes(const BlockIter *bi, size_t len)
{
if (len == 0) {
return NULL;
}
const Block *blk = bi->blk;
size_t offset = bi->offset;
size_t pos = 0;
char *buf = xmalloc(len);
while (pos < len) {
const size_t avail = blk->size - offset;
size_t count = MIN(len - pos, avail);
memcpy(buf + pos, blk->data + offset, count);
pos += count;
BUG_ON(pos < len && blk->node.next == bi->head);
blk = BLOCK(blk->node.next);
offset = 0;
}
return buf;
}
// bi should be at bol
void fill_line_nl_ref(BlockIter *bi, StringView *line)
{
block_iter_normalize(bi);
line->data = bi->blk->data + bi->offset;
const size_t max = bi->blk->size - bi->offset;
if (unlikely(max == 0)) {
// Cursor at end of last block
line->length = 0;
return;
}
if (bi->blk->nl == 1) {
BUG_ON(line->data[max - 1] != '\n');
line->length = max;
return;
}
const unsigned char *nl = memchr(line->data, '\n', max);
BUG_ON(!nl);
line->length = (size_t)(nl - line->data + 1);
BUG_ON(line->length == 0);
}
void fill_line_ref(BlockIter *bi, StringView *line)
{
fill_line_nl_ref(bi, line);
// Trim the newline
line->length -= (line->length > 0);
}
// Set the `line` argument to point to the current line and return
// the offset of the cursor, relative to the start of the line
// (zero means cursor is at bol)
size_t fetch_this_line(const BlockIter *bi, StringView *line)
{
BlockIter tmp = *bi;
size_t count = block_iter_bol(&tmp);
fill_line_ref(&tmp, line);
return count;
}
|