Added partial matching and introduced threads

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2023-11-09 23:19:53 +0100
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2023-11-09 23:19:53 +0100
commit: 1566b6faa8534118c3566188181367cd0868468f (patch)
tree: 1de8d4b369efb5e592685a31088f798a6b63ffa1 /examples/dte/block-iter.c
parent: 349991bf6efe473ab9a5cbdae0a8114d72b997e3 (diff)
download: crep-1566b6faa8534118c3566188181367cd0868468f.tar.gz
1 files changed, 343 insertions, 0 deletions
diff --git a/examples/dte/block-iter.c b/examples/dte/block-iter.c
new file mode 100644
index 0000000..73fc2ec
--- /dev/null
+++ b/examples/dte/block-iter.c
@@ -0,0 +1,343 @@
+#include <string.h>
+#include "block-iter.h"
+#include "util/debug.h"
+#include "util/utf8.h"
+#include "util/xmalloc.h"
+void block_iter_normalize(BlockIter *bi)
+{
+    const Block *blk = bi->blk;
+    if (bi->offset == blk->size && blk->node.next != bi->head) {
+        bi->blk = BLOCK(blk->node.next);
+        bi->offset = 0;
+    }
+}
+/*
+ * Move after next newline (beginning of next line or end of file).
+ * Returns number of bytes iterator advanced.
+ */
+size_t block_iter_eat_line(BlockIter *bi)
+{
+    block_iter_normalize(bi);
+    const size_t offset = bi->offset;
+    if (unlikely(offset == bi->blk->size)) {
+        return 0;
+    }
+    // There must be at least one newline
+    if (bi->blk->nl == 1) {
+        bi->offset = bi->blk->size;
+    } else {
+        const unsigned char *end;
+        end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset);
+        BUG_ON(!end);
+        bi->offset = (size_t)(end + 1 - bi->blk->data);
+    }
+    return bi->offset - offset;
+}
+/*
+ * Move to beginning of next line.
+ * If there is no next line, iterator is not advanced.
+ * Returns number of bytes iterator advanced.
+ */
+size_t block_iter_next_line(BlockIter *bi)
+{
+    block_iter_normalize(bi);
+    const size_t offset = bi->offset;
+    if (unlikely(offset == bi->blk->size)) {
+        return 0;
+    }
+    // There must be at least one newline
+    size_t new_offset;
+    if (bi->blk->nl == 1) {
+        new_offset = bi->blk->size;
+    } else {
+        const unsigned char *end;
+        end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset);
+        BUG_ON(!end);
+        new_offset = (size_t)(end + 1 - bi->blk->data);
+    }
+    if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) {
+        return 0;
+    }
+    bi->offset = new_offset;
+    return bi->offset - offset;
+}
+/*
+ * Move to beginning of previous line.
+ * Returns number of bytes moved, which is zero if there's no previous line.
+ */
+size_t block_iter_prev_line(BlockIter *bi)
+{
+    Block *blk = bi->blk;
+    size_t offset = bi->offset;
+    size_t start = offset;
+    while (offset && blk->data[offset - 1] != '\n') {
+        offset--;
+    }
+    if (!offset) {
+        if (blk->node.prev == bi->head) {
+            return 0;
+        }
+        bi->blk = blk = BLOCK(blk->node.prev);
+        offset = blk->size;
+        start += offset;
+    }
+    offset--;
+    while (offset && blk->data[offset - 1] != '\n') {
+        offset--;
+    }
+    bi->offset = offset;
+    return start - offset;
+}
+size_t block_iter_get_char(const BlockIter *bi, CodePoint *up)
+{
+    BlockIter tmp = *bi;
+    return block_iter_next_char(&tmp, up);
+}
+size_t block_iter_next_char(BlockIter *bi, CodePoint *up)
+{
+    size_t offset = bi->offset;
+    if (unlikely(offset == bi->blk->size)) {
+        if (unlikely(bi->blk->node.next == bi->head)) {
+            return 0;
+        }
+        bi->blk = BLOCK(bi->blk->node.next);
+        bi->offset = offset = 0;
+    }
+    // Note: this block can't be empty
+    *up = bi->blk->data[offset];
+    if (likely(*up < 0x80)) {
+        bi->offset++;
+        return 1;
+    }
+    *up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset);
+    return bi->offset - offset;
+}
+size_t block_iter_prev_char(BlockIter *bi, CodePoint *up)
+{
+    size_t offset = bi->offset;
+    if (unlikely(offset == 0)) {
+        if (unlikely(bi->blk->node.prev == bi->head)) {
+            return 0;
+        }
+        bi->blk = BLOCK(bi->blk->node.prev);
+        bi->offset = offset = bi->blk->size;
+    }
+    // Note: this block can't be empty
+    *up = bi->blk->data[offset - 1];
+    if (likely(*up < 0x80)) {
+        bi->offset--;
+        return 1;
+    }
+    *up = u_prev_char(bi->blk->data, &bi->offset);
+    return offset - bi->offset;
+}
+size_t block_iter_next_column(BlockIter *bi)
+{
+    CodePoint u;
+    size_t size = block_iter_next_char(bi, &u);
+    while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) {
+        size += block_iter_next_char(bi, &u);
+    }
+    return size;
+}
+size_t block_iter_prev_column(BlockIter *bi)
+{
+    CodePoint u;
+    size_t skip, total = 0;
+    do {
+        skip = block_iter_prev_char(bi, &u);
+        total += skip;
+    } while (skip && u_is_zero_width(u));
+    return total;
+}
+size_t block_iter_bol(BlockIter *bi)
+{
+    block_iter_normalize(bi);
+    size_t offset = bi->offset;
+    if (offset == 0 || offset == bi->blk->size) {
+        return 0;
+    }
+    if (bi->blk->nl == 1) {
+        offset = 0;
+    } else {
+        while (offset && bi->blk->data[offset - 1] != '\n') {
+            offset--;
+        }
+    }
+    const size_t ret = bi->offset - offset;
+    bi->offset = offset;
+    return ret;
+}
+size_t block_iter_eol(BlockIter *bi)
+{
+    block_iter_normalize(bi);
+    const Block *blk = bi->blk;
+    const size_t offset = bi->offset;
+    if (unlikely(offset == blk->size)) {
+        // Cursor at end of last block
+        return 0;
+    }
+    if (blk->nl == 1) {
+        bi->offset = blk->size - 1;
+        return bi->offset - offset;
+    }
+    const unsigned char *end = memchr(blk->data + offset, '\n', blk->size - offset);
+    BUG_ON(!end);
+    bi->offset = (size_t)(end - blk->data);
+    return bi->offset - offset;
+}
+void block_iter_back_bytes(BlockIter *bi, size_t count)
+{
+    while (count > bi->offset) {
+        count -= bi->offset;
+        bi->blk = BLOCK(bi->blk->node.prev);
+        bi->offset = bi->blk->size;
+    }
+    bi->offset -= count;
+}
+void block_iter_skip_bytes(BlockIter *bi, size_t count)
+{
+    size_t avail = bi->blk->size - bi->offset;
+    while (count > avail) {
+        count -= avail;
+        bi->blk = BLOCK(bi->blk->node.next);
+        bi->offset = 0;
+        avail = bi->blk->size;
+    }
+    bi->offset += count;
+}
+void block_iter_goto_offset(BlockIter *bi, size_t offset)
+{
+    Block *blk;
+    block_for_each(blk, bi->head) {
+        if (offset <= blk->size) {
+            bi->blk = blk;
+            bi->offset = offset;
+            return;
+        }
+        offset -= blk->size;
+    }
+}
+void block_iter_goto_line(BlockIter *bi, size_t line)
+{
+    Block *blk = BLOCK(bi->head->next);
+    size_t nl = 0;
+    while (blk->node.next != bi->head && nl + blk->nl < line) {
+        nl += blk->nl;
+        blk = BLOCK(blk->node.next);
+    }
+    bi->blk = blk;
+    bi->offset = 0;
+    while (nl < line) {
+        if (!block_iter_eat_line(bi)) {
+            break;
+        }
+        nl++;
+    }
+}
+size_t block_iter_get_offset(const BlockIter *bi)
+{
+    const Block *blk;
+    size_t offset = 0;
+    block_for_each(blk, bi->head) {
+        if (blk == bi->blk) {
+            break;
+        }
+        offset += blk->size;
+    }
+    return offset + bi->offset;
+}
+char *block_iter_get_bytes(const BlockIter *bi, size_t len)
+{
+    if (len == 0) {
+        return NULL;
+    }
+    const Block *blk = bi->blk;
+    size_t offset = bi->offset;
+    size_t pos = 0;
+    char *buf = xmalloc(len);
+    while (pos < len) {
+        const size_t avail = blk->size - offset;
+        size_t count = MIN(len - pos, avail);
+        memcpy(buf + pos, blk->data + offset, count);
+        pos += count;
+        BUG_ON(pos < len && blk->node.next == bi->head);
+        blk = BLOCK(blk->node.next);
+        offset = 0;
+    }
+    return buf;
+}
+// bi should be at bol
+void fill_line_nl_ref(BlockIter *bi, StringView *line)
+{
+    block_iter_normalize(bi);
+    line->data = bi->blk->data + bi->offset;
+    const size_t max = bi->blk->size - bi->offset;
+    if (unlikely(max == 0)) {
+        // Cursor at end of last block
+        line->length = 0;
+        return;
+    }
+    if (bi->blk->nl == 1) {
+        BUG_ON(line->data[max - 1] != '\n');
+        line->length = max;
+        return;
+    }
+    const unsigned char *nl = memchr(line->data, '\n', max);
+    BUG_ON(!nl);
+    line->length = (size_t)(nl - line->data + 1);
+    BUG_ON(line->length == 0);
+}
+void fill_line_ref(BlockIter *bi, StringView *line)
+{
+    fill_line_nl_ref(bi, line);
+    // Trim the newline
+    line->length -= (line->length > 0);
+}
+// Set the `line` argument to point to the current line and return
+// the offset of the cursor, relative to the start of the line
+// (zero means cursor is at bol)
+size_t fetch_this_line(const BlockIter *bi, StringView *line)
+{
+    BlockIter tmp = *bi;
+    size_t count = block_iter_bol(&tmp);
+    fill_line_ref(&tmp, line);
+    return count;
+}
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2023-11-09 23:19:53 +0100
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2023-11-09 23:19:53 +0100
commit	1566b6faa8534118c3566188181367cd0868468f (patch)
tree	1de8d4b369efb5e592685a31088f798a6b63ffa1 /examples/dte/block-iter.c
parent	349991bf6efe473ab9a5cbdae0a8114d72b997e3 (diff)
download	crep-1566b6faa8534118c3566188181367cd0868468f.tar.gz

diff --git a/examples/dte/block-iter.c b/examples/dte/block-iter.c new file mode 100644 index 0000000..73fc2ec --- /dev/null +++ b/examples/dte/block-iter.c
@@ -0,0 +1,343 @@
	1	#include <string.h>
	2	#include "block-iter.h"
	3	#include "util/debug.h"
	4	#include "util/utf8.h"
	5	#include "util/xmalloc.h"
	6
	7	void block_iter_normalize(BlockIter *bi)
	8	{
	9	const Block *blk = bi->blk;
	10	if (bi->offset == blk->size && blk->node.next != bi->head) {
	11	bi->blk = BLOCK(blk->node.next);
	12	bi->offset = 0;
	13	}
	14	}
	15
	16	/*
	17	* Move after next newline (beginning of next line or end of file).
	18	* Returns number of bytes iterator advanced.
	19	*/
	20	size_t block_iter_eat_line(BlockIter *bi)
	21	{
	22	block_iter_normalize(bi);
	23	const size_t offset = bi->offset;
	24	if (unlikely(offset == bi->blk->size)) {
	25	return 0;
	26	}
	27
	28	// There must be at least one newline
	29	if (bi->blk->nl == 1) {
	30	bi->offset = bi->blk->size;
	31	} else {
	32	const unsigned char *end;
	33	end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset);
	34	BUG_ON(!end);
	35	bi->offset = (size_t)(end + 1 - bi->blk->data);
	36	}
	37
	38	return bi->offset - offset;
	39	}
	40
	41	/*
	42	* Move to beginning of next line.
	43	* If there is no next line, iterator is not advanced.
	44	* Returns number of bytes iterator advanced.
	45	*/
	46	size_t block_iter_next_line(BlockIter *bi)
	47	{
	48	block_iter_normalize(bi);
	49	const size_t offset = bi->offset;
	50	if (unlikely(offset == bi->blk->size)) {
	51	return 0;
	52	}
	53
	54	// There must be at least one newline
	55	size_t new_offset;
	56	if (bi->blk->nl == 1) {
	57	new_offset = bi->blk->size;
	58	} else {
	59	const unsigned char *end;
	60	end = memchr(bi->blk->data + offset, '\n', bi->blk->size - offset);
	61	BUG_ON(!end);
	62	new_offset = (size_t)(end + 1 - bi->blk->data);
	63	}
	64	if (new_offset == bi->blk->size && bi->blk->node.next == bi->head) {
	65	return 0;
	66	}
	67
	68	bi->offset = new_offset;
	69	return bi->offset - offset;
	70	}
	71
	72	/*
	73	* Move to beginning of previous line.
	74	* Returns number of bytes moved, which is zero if there's no previous line.
	75	*/
	76	size_t block_iter_prev_line(BlockIter *bi)
	77	{
	78	Block *blk = bi->blk;
	79	size_t offset = bi->offset;
	80	size_t start = offset;
	81
	82	while (offset && blk->data[offset - 1] != '\n') {
	83	offset--;
	84	}
	85
	86	if (!offset) {
	87	if (blk->node.prev == bi->head) {
	88	return 0;
	89	}
	90	bi->blk = blk = BLOCK(blk->node.prev);
	91	offset = blk->size;
	92	start += offset;
	93	}
	94
	95	offset--;
	96	while (offset && blk->data[offset - 1] != '\n') {
	97	offset--;
	98	}
	99	bi->offset = offset;
	100	return start - offset;
	101	}
	102
	103	size_t block_iter_get_char(const BlockIter bi, CodePoint up)
	104	{
	105	BlockIter tmp = *bi;
	106	return block_iter_next_char(&tmp, up);
	107	}
	108
	109	size_t block_iter_next_char(BlockIter bi, CodePoint up)
	110	{
	111	size_t offset = bi->offset;
	112	if (unlikely(offset == bi->blk->size)) {
	113	if (unlikely(bi->blk->node.next == bi->head)) {
	114	return 0;
	115	}
	116	bi->blk = BLOCK(bi->blk->node.next);
	117	bi->offset = offset = 0;
	118	}
	119
	120	// Note: this block can't be empty
	121	*up = bi->blk->data[offset];
	122	if (likely(*up < 0x80)) {
	123	bi->offset++;
	124	return 1;
	125	}
	126
	127	*up = u_get_nonascii(bi->blk->data, bi->blk->size, &bi->offset);
	128	return bi->offset - offset;
	129	}
	130
	131	size_t block_iter_prev_char(BlockIter bi, CodePoint up)
	132	{
	133	size_t offset = bi->offset;
	134	if (unlikely(offset == 0)) {
	135	if (unlikely(bi->blk->node.prev == bi->head)) {
	136	return 0;
	137	}
	138	bi->blk = BLOCK(bi->blk->node.prev);
	139	bi->offset = offset = bi->blk->size;
	140	}
	141
	142	// Note: this block can't be empty
	143	*up = bi->blk->data[offset - 1];
	144	if (likely(*up < 0x80)) {
	145	bi->offset--;
	146	return 1;
	147	}
	148
	149	*up = u_prev_char(bi->blk->data, &bi->offset);
	150	return offset - bi->offset;
	151	}
	152
	153	size_t block_iter_next_column(BlockIter *bi)
	154	{
	155	CodePoint u;
	156	size_t size = block_iter_next_char(bi, &u);
	157	while (block_iter_get_char(bi, &u) && u_is_zero_width(u)) {
	158	size += block_iter_next_char(bi, &u);
	159	}
	160	return size;
	161	}
	162
	163	size_t block_iter_prev_column(BlockIter *bi)
	164	{
	165	CodePoint u;
	166	size_t skip, total = 0;
	167	do {
	168	skip = block_iter_prev_char(bi, &u);
	169	total += skip;
	170	} while (skip && u_is_zero_width(u));
	171	return total;
	172	}
	173
	174	size_t block_iter_bol(BlockIter *bi)
	175	{
	176	block_iter_normalize(bi);
	177	size_t offset = bi->offset;
	178	if (offset == 0 \|\| offset == bi->blk->size) {
	179	return 0;
	180	}
	181
	182	if (bi->blk->nl == 1) {
	183	offset = 0;
	184	} else {
	185	while (offset && bi->blk->data[offset - 1] != '\n') {
	186	offset--;
	187	}
	188	}
	189
	190	const size_t ret = bi->offset - offset;
	191	bi->offset = offset;
	192	return ret;
	193	}
	194
	195	size_t block_iter_eol(BlockIter *bi)
	196	{
	197	block_iter_normalize(bi);
	198	const Block *blk = bi->blk;
	199	const size_t offset = bi->offset;
	200	if (unlikely(offset == blk->size)) {
	201	// Cursor at end of last block
	202	return 0;
	203	}
	204	if (blk->nl == 1) {
	205	bi->offset = blk->size - 1;
	206	return bi->offset - offset;
	207	}
	208	const unsigned char *end = memchr(blk->data + offset, '\n', blk->size - offset);
	209	BUG_ON(!end);
	210	bi->offset = (size_t)(end - blk->data);
	211	return bi->offset - offset;
	212	}
	213
	214	void block_iter_back_bytes(BlockIter *bi, size_t count)
	215	{
	216	while (count > bi->offset) {
	217	count -= bi->offset;
	218	bi->blk = BLOCK(bi->blk->node.prev);
	219	bi->offset = bi->blk->size;
	220	}
	221	bi->offset -= count;
	222	}
	223
	224	void block_iter_skip_bytes(BlockIter *bi, size_t count)
	225	{
	226	size_t avail = bi->blk->size - bi->offset;
	227	while (count > avail) {
	228	count -= avail;
	229	bi->blk = BLOCK(bi->blk->node.next);
	230	bi->offset = 0;
	231	avail = bi->blk->size;
	232	}
	233	bi->offset += count;
	234	}
	235
	236	void block_iter_goto_offset(BlockIter *bi, size_t offset)
	237	{
	238	Block *blk;
	239	block_for_each(blk, bi->head) {
	240	if (offset <= blk->size) {
	241	bi->blk = blk;
	242	bi->offset = offset;
	243	return;
	244	}
	245	offset -= blk->size;
	246	}
	247	}
	248
	249	void block_iter_goto_line(BlockIter *bi, size_t line)
	250	{
	251	Block *blk = BLOCK(bi->head->next);
	252	size_t nl = 0;
	253	while (blk->node.next != bi->head && nl + blk->nl < line) {
	254	nl += blk->nl;
	255	blk = BLOCK(blk->node.next);
	256	}
	257
	258	bi->blk = blk;
	259	bi->offset = 0;
	260	while (nl < line) {
	261	if (!block_iter_eat_line(bi)) {
	262	break;
	263	}
	264	nl++;
	265	}
	266	}
	267
	268	size_t block_iter_get_offset(const BlockIter *bi)
	269	{
	270	const Block *blk;
	271	size_t offset = 0;
	272	block_for_each(blk, bi->head) {
	273	if (blk == bi->blk) {
	274	break;
	275	}
	276	offset += blk->size;
	277	}
	278	return offset + bi->offset;
	279	}
	280
	281	char block_iter_get_bytes(const BlockIter bi, size_t len)
	282	{
	283	if (len == 0) {
	284	return NULL;
	285	}
	286
	287	const Block *blk = bi->blk;
	288	size_t offset = bi->offset;
	289	size_t pos = 0;
	290	char *buf = xmalloc(len);
	291
	292	while (pos < len) {
	293	const size_t avail = blk->size - offset;
	294	size_t count = MIN(len - pos, avail);
	295	memcpy(buf + pos, blk->data + offset, count);
	296	pos += count;
	297	BUG_ON(pos < len && blk->node.next == bi->head);
	298	blk = BLOCK(blk->node.next);
	299	offset = 0;
	300	}
	301
	302	return buf;
	303	}
	304
	305	// bi should be at bol
	306	void fill_line_nl_ref(BlockIter bi, StringView line)
	307	{
	308	block_iter_normalize(bi);
	309	line->data = bi->blk->data + bi->offset;
	310	const size_t max = bi->blk->size - bi->offset;
	311	if (unlikely(max == 0)) {
	312	// Cursor at end of last block
	313	line->length = 0;
	314	return;
	315	}
	316	if (bi->blk->nl == 1) {
	317	BUG_ON(line->data[max - 1] != '\n');
	318	line->length = max;
	319	return;
	320	}
	321	const unsigned char *nl = memchr(line->data, '\n', max);
	322	BUG_ON(!nl);
	323	line->length = (size_t)(nl - line->data + 1);
	324	BUG_ON(line->length == 0);
	325	}
	326
	327	void fill_line_ref(BlockIter bi, StringView line)
	328	{
	329	fill_line_nl_ref(bi, line);
	330	// Trim the newline
	331	line->length -= (line->length > 0);
	332	}
	333
	334	// Set the `line` argument to point to the current line and return
	335	// the offset of the cursor, relative to the start of the line
	336	// (zero means cursor is at bol)
	337	size_t fetch_this_line(const BlockIter bi, StringView line)
	338	{
	339	BlockIter tmp = *bi;
	340	size_t count = block_iter_bol(&tmp);
	341	fill_line_ref(&tmp, line);
	342	return count;
	343	}