diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2023-11-09 23:19:53 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2023-11-09 23:19:53 +0100 |
| commit | 1566b6faa8534118c3566188181367cd0868468f (patch) | |
| tree | 1de8d4b369efb5e592685a31088f798a6b63ffa1 /examples/dte/block-iter.c | |
| parent | 349991bf6efe473ab9a5cbdae0a8114d72b997e3 (diff) | |
| download | crep-1566b6faa8534118c3566188181367cd0868468f.tar.gz | |
Added partial matching and introduced threads
Diffstat (limited to 'examples/dte/block-iter.c')
| -rw-r--r-- | examples/dte/block-iter.c | 343 |
1 files changed, 343 insertions, 0 deletions
diff --git a/examples/dte/block-iter.c b/examples/dte/block-iter.c new file mode 100644 index 0000000..73fc2ec --- /dev/null +++ b/examples/dte/block-iter.c | |||
| @@ -0,0 +1,343 @@ | |||
| 1 | #include <string.h> | ||
| 2 | #include "block-iter.h" | ||
| 3 | #include "util/debug.h" | ||
| 4 | #include "util/utf8.h" | ||
| 5 | #include "util/xmalloc.h" | ||
| 6 | |||
| 7 | void block_iter_normalize(BlockIter *bi) | ||
| 8 | { | ||
| 9 | const Block *blk = bi->blk; | ||
| 10 | if (bi->offset == blk->size && blk->node.next != bi->head) { | ||
| 11 | bi->blk = BLOCK(blk->node.next); | ||
| 12 | bi->offset = 0; | ||
| 13 | } | ||
| 14 | } | ||
| 15 | |||
| 16 | /* | ||
| 17 | * Move after next newline (beginning of next line or end of file). | ||
| 18 | * Returns number of bytes iterator advanced. | ||
| 19 | */ | ||
| 20 | size_t block_iter_eat_line(BlockIter *bi) | ||
| 21 | { | ||
| 22 | block_iter_normalize(bi); | ||
| 23 | const size_t offset = bi->offset; | ||
| 24 | if (unlikely(offset == bi->blk->size)) { | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | |||
| 28 | // There must be at least one newline | ||
| 29 | if (bi->blk->nl == 1) { | ||
| 30 | bi->offset = bi->blk->size; | ||
| 31 | } else { | ||
| 32 | const unsigned char *end; | ||
| 33 | end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset); | ||
| 34 | BUG_ON(!end); | ||
| 35 | bi->offset = (size_t)(end + 1 - bi->blk->data); | ||
| 36 | } | ||
| 37 | |||
| 38 | return bi->offset - offset; | ||
| 39 | } | ||
| 40 | |||
| 41 | /* | ||
| 42 | * Move to beginning of next line. | ||
| 43 | * If there is no next line, iterator is not advanced. | ||
| 44 | * Returns number of bytes iterator advanced. | ||
| 45 | */ | ||
| 46 | size_t block_iter_next_line(BlockIter *bi) | ||
| 47 | { | ||
| 48 | block_iter_normalize(bi); | ||
| 49 | const size_t offset = bi->offset; | ||
| 50 | if (unlikely(offset == bi->blk->size)) { | ||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | |||
| 54 | // There must be at least one newline | ||
| 55 | size_t new_offset; | ||
| 56 | if (bi->blk->nl == 1) { | ||
| 57 | new_offset = bi->blk->size; | ||
| 58 | } else { | ||
| 59 | const unsigned char *end; | ||
| 60 | end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset); | ||
| 61 | BUG_ON(!end); | ||
| 62 | new_offset = (size_t)(end + 1 - bi->blk->data); | ||
| 63 | } | ||
| 64 | if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) { | ||
| 65 | return 0; | ||
| 66 | } | ||
| 67 | |||
| 68 | bi->offset = new_offset; | ||
| 69 | return bi->offset - offset; | ||
| 70 | } | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Move to beginning of previous line. | ||
| 74 | * Returns number of bytes moved, which is zero if there's no previous line. | ||
| 75 | */ | ||
| 76 | size_t block_iter_prev_line(BlockIter *bi) | ||
| 77 | { | ||
| 78 | Block *blk = bi->blk; | ||
| 79 | size_t offset = bi->offset; | ||
| 80 | size_t start = offset; | ||
| 81 | |||
| 82 | while (offset && blk->data[offset - 1] != '\n') { | ||
| 83 | offset--; | ||
| 84 | } | ||
| 85 | |||
| 86 | if (!offset) { | ||
| 87 | if (blk->node.prev == bi->head) { | ||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | bi->blk = blk = BLOCK(blk->node.prev); | ||
| 91 | offset = blk->size; | ||
| 92 | start += offset; | ||
| 93 | } | ||
| 94 | |||
| 95 | offset--; | ||
| 96 | while (offset && blk->data[offset - 1] != '\n') { | ||
| 97 | offset--; | ||
| 98 | } | ||
| 99 | bi->offset = offset; | ||
| 100 | return start - offset; | ||
| 101 | } | ||
| 102 | |||
| 103 | size_t block_iter_get_char(const BlockIter *bi, CodePoint *up) | ||
| 104 | { | ||
| 105 | BlockIter tmp = *bi; | ||
| 106 | return block_iter_next_char(&tmp, up); | ||
| 107 | } | ||
| 108 | |||
| 109 | size_t block_iter_next_char(BlockIter *bi, CodePoint *up) | ||
| 110 | { | ||
| 111 | size_t offset = bi->offset; | ||
| 112 | if (unlikely(offset == bi->blk->size)) { | ||
| 113 | if (unlikely(bi->blk->node.next == bi->head)) { | ||
| 114 | return 0; | ||
| 115 | } | ||
| 116 | bi->blk = BLOCK(bi->blk->node.next); | ||
| 117 | bi->offset = offset = 0; | ||
| 118 | } | ||
| 119 | |||
| 120 | // Note: this block can't be empty | ||
| 121 | *up = bi->blk->data[offset]; | ||
| 122 | if (likely(*up < 0x80)) { | ||
| 123 | bi->offset++; | ||
| 124 | return 1; | ||
| 125 | } | ||
| 126 | |||
| 127 | *up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset); | ||
| 128 | return bi->offset - offset; | ||
| 129 | } | ||
| 130 | |||
| 131 | size_t block_iter_prev_char(BlockIter *bi, CodePoint *up) | ||
| 132 | { | ||
| 133 | size_t offset = bi->offset; | ||
| 134 | if (unlikely(offset == 0)) { | ||
| 135 | if (unlikely(bi->blk->node.prev == bi->head)) { | ||
| 136 | return 0; | ||
| 137 | } | ||
| 138 | bi->blk = BLOCK(bi->blk->node.prev); | ||
| 139 | bi->offset = offset = bi->blk->size; | ||
| 140 | } | ||
| 141 | |||
| 142 | // Note: this block can't be empty | ||
| 143 | *up = bi->blk->data[offset - 1]; | ||
| 144 | if (likely(*up < 0x80)) { | ||
| 145 | bi->offset--; | ||
| 146 | return 1; | ||
| 147 | } | ||
| 148 | |||
| 149 | *up = u_prev_char(bi->blk->data, &bi->offset); | ||
| 150 | return offset - bi->offset; | ||
| 151 | } | ||
| 152 | |||
| 153 | size_t block_iter_next_column(BlockIter *bi) | ||
| 154 | { | ||
| 155 | CodePoint u; | ||
| 156 | size_t size = block_iter_next_char(bi, &u); | ||
| 157 | while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) { | ||
| 158 | size += block_iter_next_char(bi, &u); | ||
| 159 | } | ||
| 160 | return size; | ||
| 161 | } | ||
| 162 | |||
| 163 | size_t block_iter_prev_column(BlockIter *bi) | ||
| 164 | { | ||
| 165 | CodePoint u; | ||
| 166 | size_t skip, total = 0; | ||
| 167 | do { | ||
| 168 | skip = block_iter_prev_char(bi, &u); | ||
| 169 | total += skip; | ||
| 170 | } while (skip && u_is_zero_width(u)); | ||
| 171 | return total; | ||
| 172 | } | ||
| 173 | |||
| 174 | size_t block_iter_bol(BlockIter *bi) | ||
| 175 | { | ||
| 176 | block_iter_normalize(bi); | ||
| 177 | size_t offset = bi->offset; | ||
| 178 | if (offset == 0 || offset == bi->blk->size) { | ||
| 179 | return 0; | ||
| 180 | } | ||
| 181 | |||
| 182 | if (bi->blk->nl == 1) { | ||
| 183 | offset = 0; | ||
| 184 | } else { | ||
| 185 | while (offset && bi->blk->data[offset - 1] != '\n') { | ||
| 186 | offset--; | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | const size_t ret = bi->offset - offset; | ||
| 191 | bi->offset = offset; | ||
| 192 | return ret; | ||
| 193 | } | ||
| 194 | |||
| 195 | size_t block_iter_eol(BlockIter *bi) | ||
| 196 | { | ||
| 197 | block_iter_normalize(bi); | ||
| 198 | const Block *blk = bi->blk; | ||
| 199 | const size_t offset = bi->offset; | ||
| 200 | if (unlikely(offset == blk->size)) { | ||
| 201 | // Cursor at end of last block | ||
| 202 | return 0; | ||
| 203 | } | ||
| 204 | if (blk->nl == 1) { | ||
| 205 | bi->offset = blk->size - 1; | ||
| 206 | return bi->offset - offset; | ||
| 207 | } | ||
| 208 | const unsigned char *end = memchr(blk->data + offset, '\n', blk->size - offset); | ||
| 209 | BUG_ON(!end); | ||
| 210 | bi->offset = (size_t)(end - blk->data); | ||
| 211 | return bi->offset - offset; | ||
| 212 | } | ||
| 213 | |||
| 214 | void block_iter_back_bytes(BlockIter *bi, size_t count) | ||
| 215 | { | ||
| 216 | while (count > bi->offset) { | ||
| 217 | count -= bi->offset; | ||
| 218 | bi->blk = BLOCK(bi->blk->node.prev); | ||
| 219 | bi->offset = bi->blk->size; | ||
| 220 | } | ||
| 221 | bi->offset -= count; | ||
| 222 | } | ||
| 223 | |||
| 224 | void block_iter_skip_bytes(BlockIter *bi, size_t count) | ||
| 225 | { | ||
| 226 | size_t avail = bi->blk->size - bi->offset; | ||
| 227 | while (count > avail) { | ||
| 228 | count -= avail; | ||
| 229 | bi->blk = BLOCK(bi->blk->node.next); | ||
| 230 | bi->offset = 0; | ||
| 231 | avail = bi->blk->size; | ||
| 232 | } | ||
| 233 | bi->offset += count; | ||
| 234 | } | ||
| 235 | |||
| 236 | void block_iter_goto_offset(BlockIter *bi, size_t offset) | ||
| 237 | { | ||
| 238 | Block *blk; | ||
| 239 | block_for_each(blk, bi->head) { | ||
| 240 | if (offset <= blk->size) { | ||
| 241 | bi->blk = blk; | ||
| 242 | bi->offset = offset; | ||
| 243 | return; | ||
| 244 | } | ||
| 245 | offset -= blk->size; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | |||
| 249 | void block_iter_goto_line(BlockIter *bi, size_t line) | ||
| 250 | { | ||
| 251 | Block *blk = BLOCK(bi->head->next); | ||
| 252 | size_t nl = 0; | ||
| 253 | while (blk->node.next != bi->head && nl + blk->nl < line) { | ||
| 254 | nl += blk->nl; | ||
| 255 | blk = BLOCK(blk->node.next); | ||
| 256 | } | ||
| 257 | |||
| 258 | bi->blk = blk; | ||
| 259 | bi->offset = 0; | ||
| 260 | while (nl < line) { | ||
| 261 | if (!block_iter_eat_line(bi)) { | ||
| 262 | break; | ||
| 263 | } | ||
| 264 | nl++; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | |||
| 268 | size_t block_iter_get_offset(const BlockIter *bi) | ||
| 269 | { | ||
| 270 | const Block *blk; | ||
| 271 | size_t offset = 0; | ||
| 272 | block_for_each(blk, bi->head) { | ||
| 273 | if (blk == bi->blk) { | ||
| 274 | break; | ||
| 275 | } | ||
| 276 | offset += blk->size; | ||
| 277 | } | ||
| 278 | return offset + bi->offset; | ||
| 279 | } | ||
| 280 | |||
| 281 | char *block_iter_get_bytes(const BlockIter *bi, size_t len) | ||
| 282 | { | ||
| 283 | if (len == 0) { | ||
| 284 | return NULL; | ||
| 285 | } | ||
| 286 | |||
| 287 | const Block *blk = bi->blk; | ||
| 288 | size_t offset = bi->offset; | ||
| 289 | size_t pos = 0; | ||
| 290 | char *buf = xmalloc(len); | ||
| 291 | |||
| 292 | while (pos < len) { | ||
| 293 | const size_t avail = blk->size - offset; | ||
| 294 | size_t count = MIN(len - pos, avail); | ||
| 295 | memcpy(buf + pos, blk->data + offset, count); | ||
| 296 | pos += count; | ||
| 297 | BUG_ON(pos < len && blk->node.next == bi->head); | ||
| 298 | blk = BLOCK(blk->node.next); | ||
| 299 | offset = 0; | ||
| 300 | } | ||
| 301 | |||
| 302 | return buf; | ||
| 303 | } | ||
| 304 | |||
| 305 | // bi should be at bol | ||
| 306 | void fill_line_nl_ref(BlockIter *bi, StringView *line) | ||
| 307 | { | ||
| 308 | block_iter_normalize(bi); | ||
| 309 | line->data = bi->blk->data + bi->offset; | ||
| 310 | const size_t max = bi->blk->size - bi->offset; | ||
| 311 | if (unlikely(max == 0)) { | ||
| 312 | // Cursor at end of last block | ||
| 313 | line->length = 0; | ||
| 314 | return; | ||
| 315 | } | ||
| 316 | if (bi->blk->nl == 1) { | ||
| 317 | BUG_ON(line->data[max - 1] != '\n'); | ||
| 318 | line->length = max; | ||
| 319 | return; | ||
| 320 | } | ||
| 321 | const unsigned char *nl = memchr(line->data, '\n', max); | ||
| 322 | BUG_ON(!nl); | ||
| 323 | line->length = (size_t)(nl - line->data + 1); | ||
| 324 | BUG_ON(line->length == 0); | ||
| 325 | } | ||
| 326 | |||
| 327 | void fill_line_ref(BlockIter *bi, StringView *line) | ||
| 328 | { | ||
| 329 | fill_line_nl_ref(bi, line); | ||
| 330 | // Trim the newline | ||
| 331 | line->length -= (line->length > 0); | ||
| 332 | } | ||
| 333 | |||
| 334 | // Set the `line` argument to point to the current line and return | ||
| 335 | // the offset of the cursor, relative to the start of the line | ||
| 336 | // (zero means cursor is at bol) | ||
| 337 | size_t fetch_this_line(const BlockIter *bi, StringView *line) | ||
| 338 | { | ||
| 339 | BlockIter tmp = *bi; | ||
| 340 | size_t count = block_iter_bol(&tmp); | ||
| 341 | fill_line_ref(&tmp, line); | ||
| 342 | return count; | ||
| 343 | } | ||
