diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:52:54 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:52:54 +0100 |
| commit | dcacc00e3750300617ba6e16eb346713f91a783a (patch) | |
| tree | 38e2d4fb5ed9d119711d4295c6eda4b014af73fd /examples/redis-unstable/modules/vector-sets/fastjson.c | |
| parent | 58dac10aeb8f5a041c46bddbeaf4c7966a99b998 (diff) | |
| download | crep-dcacc00e3750300617ba6e16eb346713f91a783a.tar.gz | |
Remove testing data
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/fastjson.c')
| -rw-r--r-- | examples/redis-unstable/modules/vector-sets/fastjson.c | 441 |
1 files changed, 0 insertions, 441 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/fastjson.c b/examples/redis-unstable/modules/vector-sets/fastjson.c deleted file mode 100644 index 78926e2..0000000 --- a/examples/redis-unstable/modules/vector-sets/fastjson.c +++ /dev/null | |||
| @@ -1,441 +0,0 @@ | |||
| 1 | /* Ultra‑lightweight top‑level JSON field extractor. | ||
| 2 | * Return the element directly as an expr.c token. | ||
| 3 | * This code is directly included inside expr.c. | ||
| 4 | * | ||
| 5 | * Copyright (c) 2025-Present, Redis Ltd. | ||
| 6 | * All rights reserved. | ||
| 7 | * | ||
| 8 | * Licensed under your choice of the Redis Source Available License 2.0 | ||
| 9 | * (RSALv2) or the Server Side Public License v1 (SSPLv1). | ||
| 10 | * | ||
| 11 | * Originally authored by: Salvatore Sanfilippo. | ||
| 12 | * | ||
| 13 | * ------------------------------------------------------------------ | ||
| 14 | * | ||
| 15 | * DESIGN GOALS: | ||
| 16 | * | ||
| 17 | * 1. Zero heap allocations while seeking the requested key. | ||
| 18 | * 2. A single parse (and therefore a single allocation, if needed) | ||
| 19 | * when the key finally matches. | ||
| 20 | * 3. Same subset‑of‑JSON coverage needed by expr.c: | ||
| 21 | * - Strings (escapes: \" \\ \n \r \t). | ||
| 22 | * - Numbers (double). | ||
| 23 | * - Booleans. | ||
| 24 | * - Null. | ||
| 25 | * - Flat arrays of the above primitives. | ||
| 26 | * | ||
| 27 | * Any other value (nested object, unicode escape, etc.) returns NULL. | ||
| 28 | * Should be very easy to extend it in case in the future we want | ||
| 29 | * more for the FILTER option of VSIM. | ||
| 30 | * 4. No global state, so this file can be #included directly in expr.c. | ||
| 31 | * | ||
| 32 | * The only API expr.c uses directly is: | ||
| 33 | * | ||
| 34 | * exprtoken *jsonExtractField(const char *json, size_t json_len, | ||
| 35 | * const char *field, size_t field_len); | ||
| 36 | * ------------------------------------------------------------------ */ | ||
| 37 | |||
| 38 | #include <ctype.h> | ||
| 39 | #include <string.h> | ||
| 40 | |||
| 41 | // Forward declarations. | ||
| 42 | static int jsonSkipValue(const char **p, const char *end); | ||
| 43 | static exprtoken *jsonParseValueToken(const char **p, const char *end); | ||
| 44 | |||
| 45 | /* Similar to ctype.h isdigit() but covers the whole JSON number charset, | ||
| 46 | * including exp form. */ | ||
| 47 | static int jsonIsNumberChar(int c) { | ||
| 48 | return isdigit(c) || c=='-' || c=='+' || c=='.' || c=='e' || c=='E'; | ||
| 49 | } | ||
| 50 | |||
| 51 | /* ========================== Fast skipping of JSON ========================= | ||
| 52 | * The helpers here are designed to skip values without performing any | ||
| 53 | * allocation. This way, for the use case of this JSON parser, we are able | ||
| 54 | * to easily (and with good speed) skip fields and values we are not | ||
| 55 | * interested in. Then, later in the code, when we find the field we want | ||
| 56 | * to obtain, we finally call the functions that turn a given JSON value | ||
| 57 | * associated to a field into our of our expressions token. | ||
| 58 | * ========================================================================== */ | ||
| 59 | |||
| 60 | /* Advance *p consuming all the spaces. */ | ||
| 61 | static inline void jsonSkipWhiteSpaces(const char **p, const char *end) { | ||
| 62 | while (*p < end && isspace((unsigned char)**p)) (*p)++; | ||
| 63 | } | ||
| 64 | |||
| 65 | /* Advance *p past a JSON string. Returns 1 on success, 0 on error. */ | ||
| 66 | static int jsonSkipString(const char **p, const char *end) { | ||
| 67 | if (*p >= end || **p != '"') return 0; | ||
| 68 | (*p)++; /* Skip opening quote. */ | ||
| 69 | while (*p < end) { | ||
| 70 | if (**p == '\\') { | ||
| 71 | (*p) += 2; | ||
| 72 | continue; | ||
| 73 | } | ||
| 74 | if (**p == '"') { | ||
| 75 | (*p)++; /* Skip closing quote. */ | ||
| 76 | return 1; | ||
| 77 | } | ||
| 78 | (*p)++; | ||
| 79 | } | ||
| 80 | return 0; /* unterminated */ | ||
| 81 | } | ||
| 82 | |||
| 83 | /* Skip an array or object generically using depth counter. | ||
| 84 | * Opener and closer tells the function how the aggregated | ||
| 85 | * data type starts/stops, basically [] or {}. */ | ||
| 86 | static int jsonSkipBracketed(const char **p, const char *end, | ||
| 87 | char opener, char closer) { | ||
| 88 | int depth = 1; | ||
| 89 | (*p)++; /* Skip opener. */ | ||
| 90 | |||
| 91 | /* Loop until we reach the end of the input or find the matching | ||
| 92 | * closer (depth becomes 0). */ | ||
| 93 | while (*p < end && depth > 0) { | ||
| 94 | char c = **p; | ||
| 95 | |||
| 96 | if (c == '"') { | ||
| 97 | // Found a string, delegate skipping to jsonSkipString(). | ||
| 98 | if (!jsonSkipString(p, end)) { | ||
| 99 | return 0; // String skipping failed (e.g., unterminated) | ||
| 100 | } | ||
| 101 | /* jsonSkipString() advances *p past the closing quote. | ||
| 102 | * Continue the loop to process the character *after* the string. */ | ||
| 103 | continue; | ||
| 104 | } | ||
| 105 | |||
| 106 | /* If it's not a string, check if it affects the depth for the | ||
| 107 | * specific brackets we are currently tracking. */ | ||
| 108 | if (c == opener) { | ||
| 109 | depth++; | ||
| 110 | } else if (c == closer) { | ||
| 111 | depth--; | ||
| 112 | } | ||
| 113 | |||
| 114 | /* Always advance the pointer for any non-string character. | ||
| 115 | * This handles commas, colons, whitespace, numbers, literals, | ||
| 116 | * and even nested brackets of a *different* type than the | ||
| 117 | * one we are currently skipping (e.g. skipping a { inside []). */ | ||
| 118 | (*p)++; | ||
| 119 | } | ||
| 120 | |||
| 121 | /* Return 1 (true) if we successfully found the matching closer, | ||
| 122 | * otherwise there is a parse error and we return 0. */ | ||
| 123 | return depth == 0; | ||
| 124 | } | ||
| 125 | |||
| 126 | /* Skip a single JSON literal (true, null, ...) starting at *p. | ||
| 127 | * Returns 1 on success, 0 on failure. */ | ||
| 128 | static int jsonSkipLiteral(const char **p, const char *end, const char *lit) { | ||
| 129 | size_t l = strlen(lit); | ||
| 130 | if (*p + l > end) return 0; | ||
| 131 | if (strncmp(*p, lit, l) == 0) { *p += l; return 1; } | ||
| 132 | return 0; | ||
| 133 | } | ||
| 134 | |||
| 135 | /* Skip number, don't check that number format is correct, just consume | ||
| 136 | * number-alike characters. | ||
| 137 | * | ||
| 138 | * Note: More robust number skipping might check validity, | ||
| 139 | * but for skipping, just consuming plausible characters is enough. */ | ||
| 140 | static int jsonSkipNumber(const char **p, const char *end) { | ||
| 141 | const char *num_start = *p; | ||
| 142 | while (*p < end && jsonIsNumberChar(**p)) (*p)++; | ||
| 143 | return *p > num_start; // Any progress made? Otherwise no number found. | ||
| 144 | } | ||
| 145 | |||
| 146 | /* Skip any JSON value. 1 = success, 0 = error. */ | ||
| 147 | static int jsonSkipValue(const char **p, const char *end) { | ||
| 148 | jsonSkipWhiteSpaces(p, end); | ||
| 149 | if (*p >= end) return 0; | ||
| 150 | switch (**p) { | ||
| 151 | case '"': return jsonSkipString(p, end); | ||
| 152 | case '{': return jsonSkipBracketed(p, end, '{', '}'); | ||
| 153 | case '[': return jsonSkipBracketed(p, end, '[', ']'); | ||
| 154 | case 't': return jsonSkipLiteral(p, end, "true"); | ||
| 155 | case 'f': return jsonSkipLiteral(p, end, "false"); | ||
| 156 | case 'n': return jsonSkipLiteral(p, end, "null"); | ||
| 157 | default: return jsonSkipNumber(p, end); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | /* =========================== JSON to exprtoken ============================ | ||
| 162 | * The functions below convert a given json value to the equivalent | ||
| 163 | * expression token structure. | ||
| 164 | * ========================================================================== */ | ||
| 165 | |||
| 166 | static exprtoken *jsonParseStringToken(const char **p, const char *end) { | ||
| 167 | if (*p >= end || **p != '"') return NULL; | ||
| 168 | const char *start = ++(*p); | ||
| 169 | int esc = 0; size_t len = 0; int has_esc = 0; | ||
| 170 | const char *q = *p; | ||
| 171 | while (q < end) { | ||
| 172 | if (esc) { esc = 0; q++; len++; has_esc = 1; continue; } | ||
| 173 | if (*q == '\\') { esc = 1; q++; continue; } | ||
| 174 | if (*q == '"') break; | ||
| 175 | q++; len++; | ||
| 176 | } | ||
| 177 | if (q >= end || *q != '"') return NULL; // Unterminated string | ||
| 178 | exprtoken *t = exprNewToken(EXPR_TOKEN_STR); | ||
| 179 | |||
| 180 | if (!has_esc) { | ||
| 181 | // No escapes, we can point directly into the original JSON string. | ||
| 182 | t->str.start = (char*)start; t->str.len = len; t->str.heapstr = NULL; | ||
| 183 | } else { | ||
| 184 | // Escapes present, need to allocate and copy/process escapes. | ||
| 185 | char *dst = RedisModule_Alloc(len + 1); | ||
| 186 | |||
| 187 | t->str.start = t->str.heapstr = dst; t->str.len = len; | ||
| 188 | const char *r = start; esc = 0; | ||
| 189 | while (r < q) { | ||
| 190 | if (esc) { | ||
| 191 | switch (*r) { | ||
| 192 | // Supported escapes from Goal 3. | ||
| 193 | case 'n': *dst='\n'; break; | ||
| 194 | case 'r': *dst='\r'; break; | ||
| 195 | case 't': *dst='\t'; break; | ||
| 196 | case '\\': *dst='\\'; break; | ||
| 197 | case '"': *dst='\"'; break; | ||
| 198 | // Escapes (like \uXXXX, \b, \f) are not supported for now, | ||
| 199 | // we just copy them verbatim. | ||
| 200 | default: *dst=*r; break; | ||
| 201 | } | ||
| 202 | dst++; esc = 0; r++; continue; | ||
| 203 | } | ||
| 204 | if (*r == '\\') { esc = 1; r++; continue; } | ||
| 205 | *dst++ = *r++; | ||
| 206 | } | ||
| 207 | *dst = '\0'; // Null-terminate the allocated string. | ||
| 208 | } | ||
| 209 | *p = q + 1; // Advance the main pointer past the closing quote. | ||
| 210 | return t; | ||
| 211 | } | ||
| 212 | |||
| 213 | static exprtoken *jsonParseNumberToken(const char **p, const char *end) { | ||
| 214 | // Use a buffer to extract the number literal for parsing with strtod(). | ||
| 215 | char buf[256]; int idx = 0; | ||
| 216 | const char *start = *p; // For strtod partial failures check. | ||
| 217 | |||
| 218 | // Copy potential number characters to buffer. | ||
| 219 | while (*p < end && idx < (int)sizeof(buf)-1 && jsonIsNumberChar(**p)) { | ||
| 220 | buf[idx++] = **p; | ||
| 221 | (*p)++; | ||
| 222 | } | ||
| 223 | buf[idx]='\0'; // Null-terminate buffer. | ||
| 224 | |||
| 225 | if (idx==0) return NULL; // No number characters found. | ||
| 226 | |||
| 227 | char *ep; // End pointer for strtod validation. | ||
| 228 | double v = strtod(buf, &ep); | ||
| 229 | |||
| 230 | /* Check if strtod() consumed the entire buffer content. | ||
| 231 | * If not, the number format was invalid. */ | ||
| 232 | if (*ep!='\0') { | ||
| 233 | // strtod() failed; rewind p to the start and return NULL | ||
| 234 | *p = start; | ||
| 235 | return NULL; | ||
| 236 | } | ||
| 237 | |||
| 238 | // If strtod() succeeded, create and return the token.. | ||
| 239 | exprtoken *t = exprNewToken(EXPR_TOKEN_NUM); | ||
| 240 | t->num = v; | ||
| 241 | return t; | ||
| 242 | } | ||
| 243 | |||
| 244 | static exprtoken *jsonParseLiteralToken(const char **p, const char *end, const char *lit, int type, double num) { | ||
| 245 | size_t l = strlen(lit); | ||
| 246 | |||
| 247 | // Ensure we don't read past 'end'. | ||
| 248 | if ((*p + l) > end) return NULL; | ||
| 249 | |||
| 250 | if (strncmp(*p, lit, l) != 0) return NULL; // Literal doesn't match. | ||
| 251 | |||
| 252 | // Check that the character *after* the literal is a valid JSON delimiter | ||
| 253 | // (whitespace, comma, closing bracket/brace, or end of input) | ||
| 254 | // This prevents matching "trueblabla" as "true". | ||
| 255 | if ((*p + l) < end) { | ||
| 256 | char next_char = *(*p + l); | ||
| 257 | if (!isspace((unsigned char)next_char) && next_char!=',' && | ||
| 258 | next_char!=']' && next_char!='}') { | ||
| 259 | return NULL; // Invalid character following literal. | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | // Literal matched and is correctly terminated. | ||
| 264 | *p += l; | ||
| 265 | exprtoken *t = exprNewToken(type); | ||
| 266 | t->num = num; | ||
| 267 | return t; | ||
| 268 | } | ||
| 269 | |||
| 270 | static exprtoken *jsonParseArrayToken(const char **p, const char *end) { | ||
| 271 | if (*p >= end || **p != '[') return NULL; | ||
| 272 | (*p)++; // Skip '['. | ||
| 273 | jsonSkipWhiteSpaces(p,end); | ||
| 274 | |||
| 275 | exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE); | ||
| 276 | t->tuple.len = 0; t->tuple.ele = NULL; size_t alloc = 0; | ||
| 277 | |||
| 278 | // Handle empty array []. | ||
| 279 | if (*p < end && **p == ']') { | ||
| 280 | (*p)++; // Skip ']'. | ||
| 281 | return t; | ||
| 282 | } | ||
| 283 | |||
| 284 | // Parse array elements. | ||
| 285 | while (1) { | ||
| 286 | exprtoken *ele = jsonParseValueToken(p,end); | ||
| 287 | if (!ele) { | ||
| 288 | exprTokenRelease(t); // Clean up partially built array token. | ||
| 289 | return NULL; | ||
| 290 | } | ||
| 291 | |||
| 292 | // Grow allocated space for elements if needed. | ||
| 293 | if (t->tuple.len == alloc) { | ||
| 294 | size_t newsize = alloc ? alloc * 2 : 4; | ||
| 295 | // Check for potential overflow if newsize becomes huge. | ||
| 296 | if (newsize < alloc) { | ||
| 297 | exprTokenRelease(ele); | ||
| 298 | exprTokenRelease(t); | ||
| 299 | return NULL; | ||
| 300 | } | ||
| 301 | exprtoken **newele = RedisModule_Realloc(t->tuple.ele, | ||
| 302 | sizeof(exprtoken*)*newsize); | ||
| 303 | t->tuple.ele = newele; | ||
| 304 | alloc = newsize; | ||
| 305 | } | ||
| 306 | t->tuple.ele[t->tuple.len++] = ele; // Add element. | ||
| 307 | |||
| 308 | jsonSkipWhiteSpaces(p,end); | ||
| 309 | if (*p>=end) { | ||
| 310 | // Unterminated array. Note that this check is crucial because | ||
| 311 | // previous value parsed may seek 'p' to 'end'. | ||
| 312 | exprTokenRelease(t); | ||
| 313 | return NULL; | ||
| 314 | } | ||
| 315 | |||
| 316 | // Check for comma (more elements) or closing bracket. | ||
| 317 | if (**p == ',') { | ||
| 318 | (*p)++; // Skip ',' | ||
| 319 | jsonSkipWhiteSpaces(p,end); // Skip whitespace before next element | ||
| 320 | continue; // Parse next element | ||
| 321 | } else if (**p == ']') { | ||
| 322 | (*p)++; // Skip ']' | ||
| 323 | return t; // End of array | ||
| 324 | } else { | ||
| 325 | // Unexpected character (not ',' or ']') | ||
| 326 | exprTokenRelease(t); | ||
| 327 | return NULL; | ||
| 328 | } | ||
| 329 | } | ||
| 330 | } | ||
| 331 | |||
| 332 | /* Turn a JSON value into an expr token. */ | ||
| 333 | static exprtoken *jsonParseValueToken(const char **p, const char *end) { | ||
| 334 | jsonSkipWhiteSpaces(p,end); | ||
| 335 | if (*p >= end) return NULL; | ||
| 336 | |||
| 337 | switch (**p) { | ||
| 338 | case '"': return jsonParseStringToken(p,end); | ||
| 339 | case '[': return jsonParseArrayToken(p,end); | ||
| 340 | case '{': return NULL; // No nested elements support for now. | ||
| 341 | case 't': return jsonParseLiteralToken(p,end,"true",EXPR_TOKEN_NUM,1); | ||
| 342 | case 'f': return jsonParseLiteralToken(p,end,"false",EXPR_TOKEN_NUM,0); | ||
| 343 | case 'n': return jsonParseLiteralToken(p,end,"null",EXPR_TOKEN_NULL,0); | ||
| 344 | default: | ||
| 345 | // Check if it starts like a number. | ||
| 346 | if (isdigit((unsigned char)**p) || **p=='-' || **p=='+') { | ||
| 347 | return jsonParseNumberToken(p,end); | ||
| 348 | } | ||
| 349 | // Anything else is an unsupported type or malformed JSON. | ||
| 350 | return NULL; | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | /* ============================== Fast key seeking ========================== */ | ||
| 355 | |||
| 356 | /* Finds the start of the value for a given field key within a JSON object. | ||
| 357 | * Returns pointer to the first char of the value, or NULL if not found/error. | ||
| 358 | * This function does not perform any allocation and is optimized to seek | ||
| 359 | * the specified *toplevel* filed as fast as possible. */ | ||
| 360 | static const char *jsonSeekField(const char *json, const char *end, | ||
| 361 | const char *field, size_t flen) { | ||
| 362 | const char *p = json; | ||
| 363 | jsonSkipWhiteSpaces(&p,end); | ||
| 364 | if (p >= end || *p != '{') return NULL; // Must start with '{'. | ||
| 365 | p++; // skip '{'. | ||
| 366 | |||
| 367 | while (1) { | ||
| 368 | jsonSkipWhiteSpaces(&p,end); | ||
| 369 | if (p >= end) return NULL; // Reached end within object. | ||
| 370 | |||
| 371 | if (*p == '}') return NULL; // End of object, field not found. | ||
| 372 | |||
| 373 | // Expecting a key (string). | ||
| 374 | if (*p != '"') return NULL; // Key must be a string. | ||
| 375 | |||
| 376 | // --- Key Matching using jsonSkipString --- | ||
| 377 | const char *key_start = p + 1; // Start of key content. | ||
| 378 | const char *key_end_p = p; // Will later contain the end. | ||
| 379 | |||
| 380 | // Use jsonSkipString() to find the end. | ||
| 381 | if (!jsonSkipString(&key_end_p, end)) { | ||
| 382 | // Unterminated / invalid key string. | ||
| 383 | return NULL; | ||
| 384 | } | ||
| 385 | |||
| 386 | // Calculate the length of the key's content. | ||
| 387 | size_t klen = (key_end_p - 1) - key_start; | ||
| 388 | |||
| 389 | /* Perform the comparison using the raw key content. | ||
| 390 | * WARNING: This uses memcmp(), so we don't handle escaped chars | ||
| 391 | * within the key matching against unescaped chars in 'field'. */ | ||
| 392 | int match = klen == flen && !memcmp(key_start, field, flen); | ||
| 393 | |||
| 394 | // Update the main pointer 'p' to be after the key string. | ||
| 395 | p = key_end_p; | ||
| 396 | |||
| 397 | // Now we expect to find a ":" followed by a value. | ||
| 398 | jsonSkipWhiteSpaces(&p,end); | ||
| 399 | if (p>=end || *p!=':') return NULL; // Expect ':' after key | ||
| 400 | p++; // Skip ':'. | ||
| 401 | |||
| 402 | // Seek value. | ||
| 403 | jsonSkipWhiteSpaces(&p,end); | ||
| 404 | if (p>=end) return NULL; // Expect value after ':' | ||
| 405 | |||
| 406 | if (match) { | ||
| 407 | // Found the matching key, p now points to the start of the value. | ||
| 408 | return p; | ||
| 409 | } else { | ||
| 410 | // Key didn't match, skip the corresponding value. | ||
| 411 | if (!jsonSkipValue(&p,end)) return NULL; // Syntax error. | ||
| 412 | } | ||
| 413 | |||
| 414 | |||
| 415 | // Look for comma or a closing brace. | ||
| 416 | jsonSkipWhiteSpaces(&p,end); | ||
| 417 | if (p>=end) return NULL; // Reached end after value. | ||
| 418 | |||
| 419 | if (*p == ',') { | ||
| 420 | p++; // Skip comma, continue loop to find next key. | ||
| 421 | continue; | ||
| 422 | } else if (*p == '}') { | ||
| 423 | return NULL; // Reached end of object, field not found. | ||
| 424 | } | ||
| 425 | return NULL; // Malformed JSON (unexpected char after value). | ||
| 426 | } | ||
| 427 | } | ||
| 428 | |||
| 429 | /* This is the only real API that this file conceptually exports (it is | ||
| 430 | * inlined, actually). */ | ||
| 431 | exprtoken *jsonExtractField(const char *json, size_t json_len, | ||
| 432 | const char *field, size_t field_len) | ||
| 433 | { | ||
| 434 | const char *end = json + json_len; | ||
| 435 | const char *valptr = jsonSeekField(json,end,field,field_len); | ||
| 436 | if (!valptr) return NULL; | ||
| 437 | |||
| 438 | /* Key found, valptr points to the start of the value. | ||
| 439 | * Convert it into an expression token object. */ | ||
| 440 | return jsonParseValueToken(&valptr,end); | ||
| 441 | } | ||
