diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:52:54 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:52:54 +0100 |
| commit | dcacc00e3750300617ba6e16eb346713f91a783a (patch) | |
| tree | 38e2d4fb5ed9d119711d4295c6eda4b014af73fd /examples/dte/regexp.c | |
| parent | 58dac10aeb8f5a041c46bddbeaf4c7966a99b998 (diff) | |
| download | crep-dcacc00e3750300617ba6e16eb346713f91a783a.tar.gz | |
Remove testing data
Diffstat (limited to 'examples/dte/regexp.c')
| -rw-r--r-- | examples/dte/regexp.c | 151 |
1 files changed, 0 insertions, 151 deletions
diff --git a/examples/dte/regexp.c b/examples/dte/regexp.c deleted file mode 100644 index dc4eb0f..0000000 --- a/examples/dte/regexp.c +++ /dev/null | |||
| @@ -1,151 +0,0 @@ | |||
| 1 | #include <errno.h> | ||
| 2 | #include <stdlib.h> | ||
| 3 | #include "regexp.h" | ||
| 4 | #include "error.h" | ||
| 5 | #include "util/debug.h" | ||
| 6 | #include "util/hashmap.h" | ||
| 7 | #include "util/str-util.h" | ||
| 8 | #include "util/xmalloc.h" | ||
| 9 | #include "util/xsnprintf.h" | ||
| 10 | |||
| 11 | static HashMap interned_regexps; | ||
| 12 | |||
| 13 | bool regexp_error_msg(const regex_t *re, const char *pattern, int err) | ||
| 14 | { | ||
| 15 | char msg[1024]; | ||
| 16 | regerror(err, re, msg, sizeof(msg)); | ||
| 17 | return error_msg("%s: %s", msg, pattern); | ||
| 18 | } | ||
| 19 | |||
| 20 | bool regexp_compile_internal(regex_t *re, const char *pattern, int flags) | ||
| 21 | { | ||
| 22 | int err = regcomp(re, pattern, flags); | ||
| 23 | if (err) { | ||
| 24 | return regexp_error_msg(re, pattern, err); | ||
| 25 | } | ||
| 26 | return true; | ||
| 27 | } | ||
| 28 | |||
| 29 | void regexp_compile_or_fatal_error(regex_t *re, const char *pattern, int flags) | ||
| 30 | { | ||
| 31 | // Note: DEFAULT_REGEX_FLAGS isn't used here because this function | ||
| 32 | // is only used for compiling built-in patterns, where we explicitly | ||
| 33 | // avoid using "enhanced" features | ||
| 34 | int err = regcomp(re, pattern, flags | REG_EXTENDED); | ||
| 35 | if (unlikely(err)) { | ||
| 36 | char msg[1024]; | ||
| 37 | regerror(err, re, msg, sizeof(msg)); | ||
| 38 | fatal_error(msg, EINVAL); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | bool regexp_exec ( | ||
| 43 | const regex_t *re, | ||
| 44 | const char *buf, | ||
| 45 | size_t size, | ||
| 46 | size_t nmatch, | ||
| 47 | regmatch_t *pmatch, | ||
| 48 | int flags | ||
| 49 | ) { | ||
| 50 | // "If REG_STARTEND is specified, pmatch must point to at least one | ||
| 51 | // regmatch_t (even if nmatch is 0 or REG_NOSUB was specified), to | ||
| 52 | // hold the input offsets for REG_STARTEND." | ||
| 53 | // -- https://man.openbsd.org/regex.3 | ||
| 54 | BUG_ON(!pmatch); | ||
| 55 | |||
| 56 | // ASan's __interceptor_regexec() doesn't support REG_STARTEND | ||
| 57 | #if defined(REG_STARTEND) && !defined(ASAN_ENABLED) && !defined(MSAN_ENABLED) | ||
| 58 | pmatch[0].rm_so = 0; | ||
| 59 | pmatch[0].rm_eo = size; | ||
| 60 | return !regexec(re, buf, nmatch, pmatch, flags | REG_STARTEND); | ||
| 61 | #else | ||
| 62 | // Buffer must be null-terminated if REG_STARTEND isn't supported | ||
| 63 | char *tmp = xstrcut(buf, size); | ||
| 64 | int ret = !regexec(re, tmp, nmatch, pmatch, flags); | ||
| 65 | free(tmp); | ||
| 66 | return ret; | ||
| 67 | #endif | ||
| 68 | } | ||
| 69 | |||
| 70 | // Check which word boundary tokens are supported by regcomp(3) | ||
| 71 | // (if any) and initialize `rwbt` with them for later use | ||
| 72 | bool regexp_init_word_boundary_tokens(RegexpWordBoundaryTokens *rwbt) | ||
| 73 | { | ||
| 74 | static const char text[] = "SSfooEE SSfoo fooEE foo SSfooEE"; | ||
| 75 | const regoff_t match_start = 20, match_end = 23; | ||
| 76 | static const RegexpWordBoundaryTokens pairs[] = { | ||
| 77 | {"\\<", "\\>"}, | ||
| 78 | {"[[:<:]]", "[[:>:]]"}, | ||
| 79 | {"\\b", "\\b"}, | ||
| 80 | }; | ||
| 81 | |||
| 82 | BUG_ON(ARRAYLEN(text) <= match_end); | ||
| 83 | BUG_ON(!mem_equal(text + match_start - 1, " foo ", 5)); | ||
| 84 | |||
| 85 | for (size_t i = 0; i < ARRAYLEN(pairs); i++) { | ||
| 86 | const char *start = pairs[i].start; | ||
| 87 | const char *end = pairs[i].end; | ||
| 88 | char patt[32]; | ||
| 89 | xsnprintf(patt, sizeof(patt), "%s(foo)%s", start, end); | ||
| 90 | regex_t re; | ||
| 91 | if (regcomp(&re, patt, DEFAULT_REGEX_FLAGS) != 0) { | ||
| 92 | continue; | ||
| 93 | } | ||
| 94 | regmatch_t m[2]; | ||
| 95 | bool match = !regexec(&re, text, ARRAYLEN(m), m, 0); | ||
| 96 | regfree(&re); | ||
| 97 | if (match && m[0].rm_so == match_start && m[0].rm_eo == match_end) { | ||
| 98 | *rwbt = pairs[i]; | ||
| 99 | return true; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | return false; | ||
| 104 | } | ||
| 105 | |||
| 106 | void free_cached_regexp(CachedRegexp *cr) | ||
| 107 | { | ||
| 108 | regfree(&cr->re); | ||
| 109 | free(cr); | ||
| 110 | } | ||
| 111 | |||
| 112 | const InternedRegexp *regexp_intern(const char *pattern) | ||
| 113 | { | ||
| 114 | if (pattern[0] == '\0') { | ||
| 115 | return NULL; | ||
| 116 | } | ||
| 117 | |||
| 118 | InternedRegexp *ir = hashmap_get(&interned_regexps, pattern); | ||
| 119 | if (ir) { | ||
| 120 | return ir; | ||
| 121 | } | ||
| 122 | |||
| 123 | ir = xnew(InternedRegexp, 1); | ||
| 124 | int err = regcomp(&ir->re, pattern, DEFAULT_REGEX_FLAGS | REG_NEWLINE | REG_NOSUB); | ||
| 125 | if (unlikely(err)) { | ||
| 126 | regexp_error_msg(&ir->re, pattern, err); | ||
| 127 | free(ir); | ||
| 128 | return NULL; | ||
| 129 | } | ||
| 130 | |||
| 131 | ir->str = xstrdup(pattern); | ||
| 132 | return hashmap_insert(&interned_regexps, ir->str, ir); | ||
| 133 | } | ||
| 134 | |||
| 135 | bool regexp_is_interned(const char *pattern) | ||
| 136 | { | ||
| 137 | return !!hashmap_find(&interned_regexps, pattern); | ||
| 138 | } | ||
| 139 | |||
| 140 | // Note: this does NOT free InternedRegexp::str, because it points at the | ||
| 141 | // same string as HashMapEntry::key and is already freed by hashmap_free() | ||
| 142 | static void free_interned_regexp(InternedRegexp *ir) | ||
| 143 | { | ||
| 144 | regfree(&ir->re); | ||
| 145 | free(ir); | ||
| 146 | } | ||
| 147 | |||
| 148 | void free_interned_regexps(void) | ||
| 149 | { | ||
| 150 | hashmap_free(&interned_regexps, (FreeFunction)free_interned_regexp); | ||
| 151 | } | ||
