diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:40:55 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-01-21 22:40:55 +0100 |
| commit | 5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch) | |
| tree | 1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/modules/vector-sets/fastjson.c | |
| parent | c7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff) | |
| download | crep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz | |
Add Redis source code for testing
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/fastjson.c')
| -rw-r--r-- | examples/redis-unstable/modules/vector-sets/fastjson.c | 441 |
1 files changed, 441 insertions, 0 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/fastjson.c b/examples/redis-unstable/modules/vector-sets/fastjson.c new file mode 100644 index 0000000..78926e2 --- /dev/null +++ b/examples/redis-unstable/modules/vector-sets/fastjson.c @@ -0,0 +1,441 @@ +/* Ultra‑lightweight top‑level JSON field extractor. + * Return the element directly as an expr.c token. + * This code is directly included inside expr.c. + * + * Copyright (c) 2025-Present, Redis Ltd. + * All rights reserved. + * + * Licensed under your choice of the Redis Source Available License 2.0 + * (RSALv2) or the Server Side Public License v1 (SSPLv1). + * + * Originally authored by: Salvatore Sanfilippo. + * + * ------------------------------------------------------------------ + * + * DESIGN GOALS: + * + * 1. Zero heap allocations while seeking the requested key. + * 2. A single parse (and therefore a single allocation, if needed) + * when the key finally matches. + * 3. Same subset‑of‑JSON coverage needed by expr.c: + * - Strings (escapes: \" \\ \n \r \t). + * - Numbers (double). + * - Booleans. + * - Null. + * - Flat arrays of the above primitives. + * + * Any other value (nested object, unicode escape, etc.) returns NULL. + * Should be very easy to extend it in case in the future we want + * more for the FILTER option of VSIM. + * 4. No global state, so this file can be #included directly in expr.c. + * + * The only API expr.c uses directly is: + * + * exprtoken *jsonExtractField(const char *json, size_t json_len, + * const char *field, size_t field_len); + * ------------------------------------------------------------------ */ + +#include <ctype.h> +#include <string.h> + +// Forward declarations. +static int jsonSkipValue(const char **p, const char *end); +static exprtoken *jsonParseValueToken(const char **p, const char *end); + +/* Similar to ctype.h isdigit() but covers the whole JSON number charset, + * including exp form. */ +static int jsonIsNumberChar(int c) { + return isdigit(c) || c=='-' || c=='+' || c=='.' || c=='e' || c=='E'; +} + +/* ========================== Fast skipping of JSON ========================= + * The helpers here are designed to skip values without performing any + * allocation. This way, for the use case of this JSON parser, we are able + * to easily (and with good speed) skip fields and values we are not + * interested in. Then, later in the code, when we find the field we want + * to obtain, we finally call the functions that turn a given JSON value + * associated to a field into our of our expressions token. + * ========================================================================== */ + +/* Advance *p consuming all the spaces. */ +static inline void jsonSkipWhiteSpaces(const char **p, const char *end) { + while (*p < end && isspace((unsigned char)**p)) (*p)++; +} + +/* Advance *p past a JSON string. Returns 1 on success, 0 on error. */ +static int jsonSkipString(const char **p, const char *end) { + if (*p >= end || **p != '"') return 0; + (*p)++; /* Skip opening quote. */ + while (*p < end) { + if (**p == '\\') { + (*p) += 2; + continue; + } + if (**p == '"') { + (*p)++; /* Skip closing quote. */ + return 1; + } + (*p)++; + } + return 0; /* unterminated */ +} + +/* Skip an array or object generically using depth counter. + * Opener and closer tells the function how the aggregated + * data type starts/stops, basically [] or {}. */ +static int jsonSkipBracketed(const char **p, const char *end, + char opener, char closer) { + int depth = 1; + (*p)++; /* Skip opener. */ + + /* Loop until we reach the end of the input or find the matching + * closer (depth becomes 0). */ + while (*p < end && depth > 0) { + char c = **p; + + if (c == '"') { + // Found a string, delegate skipping to jsonSkipString(). + if (!jsonSkipString(p, end)) { + return 0; // String skipping failed (e.g., unterminated) + } + /* jsonSkipString() advances *p past the closing quote. + * Continue the loop to process the character *after* the string. */ + continue; + } + + /* If it's not a string, check if it affects the depth for the + * specific brackets we are currently tracking. */ + if (c == opener) { + depth++; + } else if (c == closer) { + depth--; + } + + /* Always advance the pointer for any non-string character. + * This handles commas, colons, whitespace, numbers, literals, + * and even nested brackets of a *different* type than the + * one we are currently skipping (e.g. skipping a { inside []). */ + (*p)++; + } + + /* Return 1 (true) if we successfully found the matching closer, + * otherwise there is a parse error and we return 0. */ + return depth == 0; +} + +/* Skip a single JSON literal (true, null, ...) starting at *p. + * Returns 1 on success, 0 on failure. */ +static int jsonSkipLiteral(const char **p, const char *end, const char *lit) { + size_t l = strlen(lit); + if (*p + l > end) return 0; + if (strncmp(*p, lit, l) == 0) { *p += l; return 1; } + return 0; +} + +/* Skip number, don't check that number format is correct, just consume + * number-alike characters. + * + * Note: More robust number skipping might check validity, + * but for skipping, just consuming plausible characters is enough. */ +static int jsonSkipNumber(const char **p, const char *end) { + const char *num_start = *p; + while (*p < end && jsonIsNumberChar(**p)) (*p)++; + return *p > num_start; // Any progress made? Otherwise no number found. +} + +/* Skip any JSON value. 1 = success, 0 = error. */ +static int jsonSkipValue(const char **p, const char *end) { + jsonSkipWhiteSpaces(p, end); + if (*p >= end) return 0; + switch (**p) { + case '"': return jsonSkipString(p, end); + case '{': return jsonSkipBracketed(p, end, '{', '}'); + case '[': return jsonSkipBracketed(p, end, '[', ']'); + case 't': return jsonSkipLiteral(p, end, "true"); + case 'f': return jsonSkipLiteral(p, end, "false"); + case 'n': return jsonSkipLiteral(p, end, "null"); + default: return jsonSkipNumber(p, end); + } +} + +/* =========================== JSON to exprtoken ============================ + * The functions below convert a given json value to the equivalent + * expression token structure. + * ========================================================================== */ + +static exprtoken *jsonParseStringToken(const char **p, const char *end) { + if (*p >= end || **p != '"') return NULL; + const char *start = ++(*p); + int esc = 0; size_t len = 0; int has_esc = 0; + const char *q = *p; + while (q < end) { + if (esc) { esc = 0; q++; len++; has_esc = 1; continue; } + if (*q == '\\') { esc = 1; q++; continue; } + if (*q == '"') break; + q++; len++; + } + if (q >= end || *q != '"') return NULL; // Unterminated string + exprtoken *t = exprNewToken(EXPR_TOKEN_STR); + + if (!has_esc) { + // No escapes, we can point directly into the original JSON string. + t->str.start = (char*)start; t->str.len = len; t->str.heapstr = NULL; + } else { + // Escapes present, need to allocate and copy/process escapes. + char *dst = RedisModule_Alloc(len + 1); + + t->str.start = t->str.heapstr = dst; t->str.len = len; + const char *r = start; esc = 0; + while (r < q) { + if (esc) { + switch (*r) { + // Supported escapes from Goal 3. + case 'n': *dst='\n'; break; + case 'r': *dst='\r'; break; + case 't': *dst='\t'; break; + case '\\': *dst='\\'; break; + case '"': *dst='\"'; break; + // Escapes (like \uXXXX, \b, \f) are not supported for now, + // we just copy them verbatim. + default: *dst=*r; break; + } + dst++; esc = 0; r++; continue; + } + if (*r == '\\') { esc = 1; r++; continue; } + *dst++ = *r++; + } + *dst = '\0'; // Null-terminate the allocated string. + } + *p = q + 1; // Advance the main pointer past the closing quote. + return t; +} + +static exprtoken *jsonParseNumberToken(const char **p, const char *end) { + // Use a buffer to extract the number literal for parsing with strtod(). + char buf[256]; int idx = 0; + const char *start = *p; // For strtod partial failures check. + + // Copy potential number characters to buffer. + while (*p < end && idx < (int)sizeof(buf)-1 && jsonIsNumberChar(**p)) { + buf[idx++] = **p; + (*p)++; + } + buf[idx]='\0'; // Null-terminate buffer. + + if (idx==0) return NULL; // No number characters found. + + char *ep; // End pointer for strtod validation. + double v = strtod(buf, &ep); + + /* Check if strtod() consumed the entire buffer content. + * If not, the number format was invalid. */ + if (*ep!='\0') { + // strtod() failed; rewind p to the start and return NULL + *p = start; + return NULL; + } + + // If strtod() succeeded, create and return the token.. + exprtoken *t = exprNewToken(EXPR_TOKEN_NUM); + t->num = v; + return t; +} + +static exprtoken *jsonParseLiteralToken(const char **p, const char *end, const char *lit, int type, double num) { + size_t l = strlen(lit); + + // Ensure we don't read past 'end'. + if ((*p + l) > end) return NULL; + + if (strncmp(*p, lit, l) != 0) return NULL; // Literal doesn't match. + + // Check that the character *after* the literal is a valid JSON delimiter + // (whitespace, comma, closing bracket/brace, or end of input) + // This prevents matching "trueblabla" as "true". + if ((*p + l) < end) { + char next_char = *(*p + l); + if (!isspace((unsigned char)next_char) && next_char!=',' && + next_char!=']' && next_char!='}') { + return NULL; // Invalid character following literal. + } + } + + // Literal matched and is correctly terminated. + *p += l; + exprtoken *t = exprNewToken(type); + t->num = num; + return t; +} + +static exprtoken *jsonParseArrayToken(const char **p, const char *end) { + if (*p >= end || **p != '[') return NULL; + (*p)++; // Skip '['. + jsonSkipWhiteSpaces(p,end); + + exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE); + t->tuple.len = 0; t->tuple.ele = NULL; size_t alloc = 0; + + // Handle empty array []. + if (*p < end && **p == ']') { + (*p)++; // Skip ']'. + return t; + } + + // Parse array elements. + while (1) { + exprtoken *ele = jsonParseValueToken(p,end); + if (!ele) { + exprTokenRelease(t); // Clean up partially built array token. + return NULL; + } + + // Grow allocated space for elements if needed. + if (t->tuple.len == alloc) { + size_t newsize = alloc ? alloc * 2 : 4; + // Check for potential overflow if newsize becomes huge. + if (newsize < alloc) { + exprTokenRelease(ele); + exprTokenRelease(t); + return NULL; + } + exprtoken **newele = RedisModule_Realloc(t->tuple.ele, + sizeof(exprtoken*)*newsize); + t->tuple.ele = newele; + alloc = newsize; + } + t->tuple.ele[t->tuple.len++] = ele; // Add element. + + jsonSkipWhiteSpaces(p,end); + if (*p>=end) { + // Unterminated array. Note that this check is crucial because + // previous value parsed may seek 'p' to 'end'. + exprTokenRelease(t); + return NULL; + } + + // Check for comma (more elements) or closing bracket. + if (**p == ',') { + (*p)++; // Skip ',' + jsonSkipWhiteSpaces(p,end); // Skip whitespace before next element + continue; // Parse next element + } else if (**p == ']') { + (*p)++; // Skip ']' + return t; // End of array + } else { + // Unexpected character (not ',' or ']') + exprTokenRelease(t); + return NULL; + } + } +} + +/* Turn a JSON value into an expr token. */ +static exprtoken *jsonParseValueToken(const char **p, const char *end) { + jsonSkipWhiteSpaces(p,end); + if (*p >= end) return NULL; + + switch (**p) { + case '"': return jsonParseStringToken(p,end); + case '[': return jsonParseArrayToken(p,end); + case '{': return NULL; // No nested elements support for now. + case 't': return jsonParseLiteralToken(p,end,"true",EXPR_TOKEN_NUM,1); + case 'f': return jsonParseLiteralToken(p,end,"false",EXPR_TOKEN_NUM,0); + case 'n': return jsonParseLiteralToken(p,end,"null",EXPR_TOKEN_NULL,0); + default: + // Check if it starts like a number. + if (isdigit((unsigned char)**p) || **p=='-' || **p=='+') { + return jsonParseNumberToken(p,end); + } + // Anything else is an unsupported type or malformed JSON. + return NULL; + } +} + +/* ============================== Fast key seeking ========================== */ + +/* Finds the start of the value for a given field key within a JSON object. + * Returns pointer to the first char of the value, or NULL if not found/error. + * This function does not perform any allocation and is optimized to seek + * the specified *toplevel* filed as fast as possible. */ +static const char *jsonSeekField(const char *json, const char *end, + const char *field, size_t flen) { + const char *p = json; + jsonSkipWhiteSpaces(&p,end); + if (p >= end || *p != '{') return NULL; // Must start with '{'. + p++; // skip '{'. + + while (1) { + jsonSkipWhiteSpaces(&p,end); + if (p >= end) return NULL; // Reached end within object. + + if (*p == '}') return NULL; // End of object, field not found. + + // Expecting a key (string). + if (*p != '"') return NULL; // Key must be a string. + + // --- Key Matching using jsonSkipString --- + const char *key_start = p + 1; // Start of key content. + const char *key_end_p = p; // Will later contain the end. + + // Use jsonSkipString() to find the end. + if (!jsonSkipString(&key_end_p, end)) { + // Unterminated / invalid key string. + return NULL; + } + + // Calculate the length of the key's content. + size_t klen = (key_end_p - 1) - key_start; + + /* Perform the comparison using the raw key content. + * WARNING: This uses memcmp(), so we don't handle escaped chars + * within the key matching against unescaped chars in 'field'. */ + int match = klen == flen && !memcmp(key_start, field, flen); + + // Update the main pointer 'p' to be after the key string. + p = key_end_p; + + // Now we expect to find a ":" followed by a value. + jsonSkipWhiteSpaces(&p,end); + if (p>=end || *p!=':') return NULL; // Expect ':' after key + p++; // Skip ':'. + + // Seek value. + jsonSkipWhiteSpaces(&p,end); + if (p>=end) return NULL; // Expect value after ':' + + if (match) { + // Found the matching key, p now points to the start of the value. + return p; + } else { + // Key didn't match, skip the corresponding value. + if (!jsonSkipValue(&p,end)) return NULL; // Syntax error. + } + + + // Look for comma or a closing brace. + jsonSkipWhiteSpaces(&p,end); + if (p>=end) return NULL; // Reached end after value. + + if (*p == ',') { + p++; // Skip comma, continue loop to find next key. + continue; + } else if (*p == '}') { + return NULL; // Reached end of object, field not found. + } + return NULL; // Malformed JSON (unexpected char after value). + } +} + +/* This is the only real API that this file conceptually exports (it is + * inlined, actually). */ +exprtoken *jsonExtractField(const char *json, size_t json_len, + const char *field, size_t field_len) +{ + const char *end = json + json_len; + const char *valptr = jsonSeekField(json,end,field,field_len); + if (!valptr) return NULL; + + /* Key found, valptr points to the start of the value. + * Convert it into an expression token object. */ + return jsonParseValueToken(&valptr,end); +} |
