aboutsummaryrefslogtreecommitdiff
path: root/examples/dte/regexp.c
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:52:54 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:52:54 +0100
commitdcacc00e3750300617ba6e16eb346713f91a783a (patch)
tree38e2d4fb5ed9d119711d4295c6eda4b014af73fd /examples/dte/regexp.c
parent58dac10aeb8f5a041c46bddbeaf4c7966a99b998 (diff)
downloadcrep-dcacc00e3750300617ba6e16eb346713f91a783a.tar.gz
Remove testing data
Diffstat (limited to 'examples/dte/regexp.c')
-rw-r--r--examples/dte/regexp.c151
1 files changed, 0 insertions, 151 deletions
diff --git a/examples/dte/regexp.c b/examples/dte/regexp.c
deleted file mode 100644
index dc4eb0f..0000000
--- a/examples/dte/regexp.c
+++ /dev/null
@@ -1,151 +0,0 @@
1#include <errno.h>
2#include <stdlib.h>
3#include "regexp.h"
4#include "error.h"
5#include "util/debug.h"
6#include "util/hashmap.h"
7#include "util/str-util.h"
8#include "util/xmalloc.h"
9#include "util/xsnprintf.h"
10
11static HashMap interned_regexps;
12
13bool regexp_error_msg(const regex_t *re, const char *pattern, int err)
14{
15 char msg[1024];
16 regerror(err, re, msg, sizeof(msg));
17 return error_msg("%s: %s", msg, pattern);
18}
19
20bool regexp_compile_internal(regex_t *re, const char *pattern, int flags)
21{
22 int err = regcomp(re, pattern, flags);
23 if (err) {
24 return regexp_error_msg(re, pattern, err);
25 }
26 return true;
27}
28
29void regexp_compile_or_fatal_error(regex_t *re, const char *pattern, int flags)
30{
31 // Note: DEFAULT_REGEX_FLAGS isn't used here because this function
32 // is only used for compiling built-in patterns, where we explicitly
33 // avoid using "enhanced" features
34 int err = regcomp(re, pattern, flags | REG_EXTENDED);
35 if (unlikely(err)) {
36 char msg[1024];
37 regerror(err, re, msg, sizeof(msg));
38 fatal_error(msg, EINVAL);
39 }
40}
41
42bool regexp_exec (
43 const regex_t *re,
44 const char *buf,
45 size_t size,
46 size_t nmatch,
47 regmatch_t *pmatch,
48 int flags
49) {
50 // "If REG_STARTEND is specified, pmatch must point to at least one
51 // regmatch_t (even if nmatch is 0 or REG_NOSUB was specified), to
52 // hold the input offsets for REG_STARTEND."
53 // -- https://man.openbsd.org/regex.3
54 BUG_ON(!pmatch);
55
56// ASan's __interceptor_regexec() doesn't support REG_STARTEND
57#if defined(REG_STARTEND) && !defined(ASAN_ENABLED) && !defined(MSAN_ENABLED)
58 pmatch[0].rm_so = 0;
59 pmatch[0].rm_eo = size;
60 return !regexec(re, buf, nmatch, pmatch, flags | REG_STARTEND);
61#else
62 // Buffer must be null-terminated if REG_STARTEND isn't supported
63 char *tmp = xstrcut(buf, size);
64 int ret = !regexec(re, tmp, nmatch, pmatch, flags);
65 free(tmp);
66 return ret;
67#endif
68}
69
70// Check which word boundary tokens are supported by regcomp(3)
71// (if any) and initialize `rwbt` with them for later use
72bool regexp_init_word_boundary_tokens(RegexpWordBoundaryTokens *rwbt)
73{
74 static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE";
75 const regoff_t match_start = 20, match_end = 23;
76 static const RegexpWordBoundaryTokens pairs[] = {
77 {"\\<", "\\>"},
78 {"[[:<:]]", "[[:>:]]"},
79 {"\\b", "\\b"},
80 };
81
82 BUG_ON(ARRAYLEN(text) <= match_end);
83 BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5));
84
85 for (size_t i = 0; i < ARRAYLEN(pairs); i++) {
86 const char *start = pairs[i].start;
87 const char *end = pairs[i].end;
88 char patt[32];
89 xsnprintf(patt, sizeof(patt), "%s(foo)%s", start, end);
90 regex_t re;
91 if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) {
92 continue;
93 }
94 regmatch_t m[2];
95 bool match = !regexec(&re, text, ARRAYLEN(m), m, 0);
96 regfree(&re);
97 if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) {
98 *rwbt = pairs[i];
99 return true;
100 }
101 }
102
103 return false;
104}
105
106void free_cached_regexp(CachedRegexp *cr)
107{
108 regfree(&cr->re);
109 free(cr);
110}
111
112const InternedRegexp *regexp_intern(const char *pattern)
113{
114 if (pattern[0] == '\0') {
115 return NULL;
116 }
117
118 InternedRegexp *ir = hashmap_get(&interned_regexps, pattern);
119 if (ir) {
120 return ir;
121 }
122
123 ir = xnew(InternedRegexp, 1);
124 int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB);
125 if (unlikely(err)) {
126 regexp_error_msg(&ir->re, pattern, err);
127 free(ir);
128 return NULL;
129 }
130
131 ir->str = xstrdup(pattern);
132 return hashmap_insert(&interned_regexps, ir->str, ir);
133}
134
135bool regexp_is_interned(const char *pattern)
136{
137 return !!hashmap_find(&interned_regexps, pattern);
138}
139
140// Note: this does NOT free InternedRegexp::str, because it points at the
141// same string as HashMapEntry::key and is already freed by hashmap_free()
142static void free_interned_regexp(InternedRegexp *ir)
143{
144 regfree(&ir->re);
145 free(ir);
146}
147
148void free_interned_regexps(void)
149{
150 hashmap_free(&interned_regexps, (FreeFunction)free_interned_regexp);
151}