diff options
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/fastjson.c')
| -rw-r--r-- | examples/redis-unstable/modules/vector-sets/fastjson.c | 441 |
1 files changed, 0 insertions, 441 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/fastjson.c b/examples/redis-unstable/modules/vector-sets/fastjson.c deleted file mode 100644 index 78926e2..0000000 --- a/examples/redis-unstable/modules/vector-sets/fastjson.c +++ /dev/null @@ -1,441 +0,0 @@ -/* Ultra‑lightweight top‑level JSON field extractor. - * Return the element directly as an expr.c token. - * This code is directly included inside expr.c. - * - * Copyright (c) 2025-Present, Redis Ltd. - * All rights reserved. - * - * Licensed under your choice of the Redis Source Available License 2.0 - * (RSALv2) or the Server Side Public License v1 (SSPLv1). - * - * Originally authored by: Salvatore Sanfilippo. - * - * ------------------------------------------------------------------ - * - * DESIGN GOALS: - * - * 1. Zero heap allocations while seeking the requested key. - * 2. A single parse (and therefore a single allocation, if needed) - * when the key finally matches. - * 3. Same subset‑of‑JSON coverage needed by expr.c: - * - Strings (escapes: \" \\ \n \r \t). - * - Numbers (double). - * - Booleans. - * - Null. - * - Flat arrays of the above primitives. - * - * Any other value (nested object, unicode escape, etc.) returns NULL. - * Should be very easy to extend it in case in the future we want - * more for the FILTER option of VSIM. - * 4. No global state, so this file can be #included directly in expr.c. - * - * The only API expr.c uses directly is: - * - * exprtoken *jsonExtractField(const char *json, size_t json_len, - * const char *field, size_t field_len); - * ------------------------------------------------------------------ */ - -#include <ctype.h> -#include <string.h> - -// Forward declarations. -static int jsonSkipValue(const char **p, const char *end); -static exprtoken *jsonParseValueToken(const char **p, const char *end); - -/* Similar to ctype.h isdigit() but covers the whole JSON number charset, - * including exp form. */ -static int jsonIsNumberChar(int c) { - return isdigit(c) || c=='-' || c=='+' || c=='.' || c=='e' || c=='E'; -} - -/* ========================== Fast skipping of JSON ========================= - * The helpers here are designed to skip values without performing any - * allocation. This way, for the use case of this JSON parser, we are able - * to easily (and with good speed) skip fields and values we are not - * interested in. Then, later in the code, when we find the field we want - * to obtain, we finally call the functions that turn a given JSON value - * associated to a field into our of our expressions token. - * ========================================================================== */ - -/* Advance *p consuming all the spaces. */ -static inline void jsonSkipWhiteSpaces(const char **p, const char *end) { - while (*p < end && isspace((unsigned char)**p)) (*p)++; -} - -/* Advance *p past a JSON string. Returns 1 on success, 0 on error. */ -static int jsonSkipString(const char **p, const char *end) { - if (*p >= end || **p != '"') return 0; - (*p)++; /* Skip opening quote. */ - while (*p < end) { - if (**p == '\\') { - (*p) += 2; - continue; - } - if (**p == '"') { - (*p)++; /* Skip closing quote. */ - return 1; - } - (*p)++; - } - return 0; /* unterminated */ -} - -/* Skip an array or object generically using depth counter. - * Opener and closer tells the function how the aggregated - * data type starts/stops, basically [] or {}. */ -static int jsonSkipBracketed(const char **p, const char *end, - char opener, char closer) { - int depth = 1; - (*p)++; /* Skip opener. */ - - /* Loop until we reach the end of the input or find the matching - * closer (depth becomes 0). */ - while (*p < end && depth > 0) { - char c = **p; - - if (c == '"') { - // Found a string, delegate skipping to jsonSkipString(). - if (!jsonSkipString(p, end)) { - return 0; // String skipping failed (e.g., unterminated) - } - /* jsonSkipString() advances *p past the closing quote. - * Continue the loop to process the character *after* the string. */ - continue; - } - - /* If it's not a string, check if it affects the depth for the - * specific brackets we are currently tracking. */ - if (c == opener) { - depth++; - } else if (c == closer) { - depth--; - } - - /* Always advance the pointer for any non-string character. - * This handles commas, colons, whitespace, numbers, literals, - * and even nested brackets of a *different* type than the - * one we are currently skipping (e.g. skipping a { inside []). */ - (*p)++; - } - - /* Return 1 (true) if we successfully found the matching closer, - * otherwise there is a parse error and we return 0. */ - return depth == 0; -} - -/* Skip a single JSON literal (true, null, ...) starting at *p. - * Returns 1 on success, 0 on failure. */ -static int jsonSkipLiteral(const char **p, const char *end, const char *lit) { - size_t l = strlen(lit); - if (*p + l > end) return 0; - if (strncmp(*p, lit, l) == 0) { *p += l; return 1; } - return 0; -} - -/* Skip number, don't check that number format is correct, just consume - * number-alike characters. - * - * Note: More robust number skipping might check validity, - * but for skipping, just consuming plausible characters is enough. */ -static int jsonSkipNumber(const char **p, const char *end) { - const char *num_start = *p; - while (*p < end && jsonIsNumberChar(**p)) (*p)++; - return *p > num_start; // Any progress made? Otherwise no number found. -} - -/* Skip any JSON value. 1 = success, 0 = error. */ -static int jsonSkipValue(const char **p, const char *end) { - jsonSkipWhiteSpaces(p, end); - if (*p >= end) return 0; - switch (**p) { - case '"': return jsonSkipString(p, end); - case '{': return jsonSkipBracketed(p, end, '{', '}'); - case '[': return jsonSkipBracketed(p, end, '[', ']'); - case 't': return jsonSkipLiteral(p, end, "true"); - case 'f': return jsonSkipLiteral(p, end, "false"); - case 'n': return jsonSkipLiteral(p, end, "null"); - default: return jsonSkipNumber(p, end); - } -} - -/* =========================== JSON to exprtoken ============================ - * The functions below convert a given json value to the equivalent - * expression token structure. - * ========================================================================== */ - -static exprtoken *jsonParseStringToken(const char **p, const char *end) { - if (*p >= end || **p != '"') return NULL; - const char *start = ++(*p); - int esc = 0; size_t len = 0; int has_esc = 0; - const char *q = *p; - while (q < end) { - if (esc) { esc = 0; q++; len++; has_esc = 1; continue; } - if (*q == '\\') { esc = 1; q++; continue; } - if (*q == '"') break; - q++; len++; - } - if (q >= end || *q != '"') return NULL; // Unterminated string - exprtoken *t = exprNewToken(EXPR_TOKEN_STR); - - if (!has_esc) { - // No escapes, we can point directly into the original JSON string. - t->str.start = (char*)start; t->str.len = len; t->str.heapstr = NULL; - } else { - // Escapes present, need to allocate and copy/process escapes. - char *dst = RedisModule_Alloc(len + 1); - - t->str.start = t->str.heapstr = dst; t->str.len = len; - const char *r = start; esc = 0; - while (r < q) { - if (esc) { - switch (*r) { - // Supported escapes from Goal 3. - case 'n': *dst='\n'; break; - case 'r': *dst='\r'; break; - case 't': *dst='\t'; break; - case '\\': *dst='\\'; break; - case '"': *dst='\"'; break; - // Escapes (like \uXXXX, \b, \f) are not supported for now, - // we just copy them verbatim. - default: *dst=*r; break; - } - dst++; esc = 0; r++; continue; - } - if (*r == '\\') { esc = 1; r++; continue; } - *dst++ = *r++; - } - *dst = '\0'; // Null-terminate the allocated string. - } - *p = q + 1; // Advance the main pointer past the closing quote. - return t; -} - -static exprtoken *jsonParseNumberToken(const char **p, const char *end) { - // Use a buffer to extract the number literal for parsing with strtod(). - char buf[256]; int idx = 0; - const char *start = *p; // For strtod partial failures check. - - // Copy potential number characters to buffer. - while (*p < end && idx < (int)sizeof(buf)-1 && jsonIsNumberChar(**p)) { - buf[idx++] = **p; - (*p)++; - } - buf[idx]='\0'; // Null-terminate buffer. - - if (idx==0) return NULL; // No number characters found. - - char *ep; // End pointer for strtod validation. - double v = strtod(buf, &ep); - - /* Check if strtod() consumed the entire buffer content. - * If not, the number format was invalid. */ - if (*ep!='\0') { - // strtod() failed; rewind p to the start and return NULL - *p = start; - return NULL; - } - - // If strtod() succeeded, create and return the token.. - exprtoken *t = exprNewToken(EXPR_TOKEN_NUM); - t->num = v; - return t; -} - -static exprtoken *jsonParseLiteralToken(const char **p, const char *end, const char *lit, int type, double num) { - size_t l = strlen(lit); - - // Ensure we don't read past 'end'. - if ((*p + l) > end) return NULL; - - if (strncmp(*p, lit, l) != 0) return NULL; // Literal doesn't match. - - // Check that the character *after* the literal is a valid JSON delimiter - // (whitespace, comma, closing bracket/brace, or end of input) - // This prevents matching "trueblabla" as "true". - if ((*p + l) < end) { - char next_char = *(*p + l); - if (!isspace((unsigned char)next_char) && next_char!=',' && - next_char!=']' && next_char!='}') { - return NULL; // Invalid character following literal. - } - } - - // Literal matched and is correctly terminated. - *p += l; - exprtoken *t = exprNewToken(type); - t->num = num; - return t; -} - -static exprtoken *jsonParseArrayToken(const char **p, const char *end) { - if (*p >= end || **p != '[') return NULL; - (*p)++; // Skip '['. - jsonSkipWhiteSpaces(p,end); - - exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE); - t->tuple.len = 0; t->tuple.ele = NULL; size_t alloc = 0; - - // Handle empty array []. - if (*p < end && **p == ']') { - (*p)++; // Skip ']'. - return t; - } - - // Parse array elements. - while (1) { - exprtoken *ele = jsonParseValueToken(p,end); - if (!ele) { - exprTokenRelease(t); // Clean up partially built array token. - return NULL; - } - - // Grow allocated space for elements if needed. - if (t->tuple.len == alloc) { - size_t newsize = alloc ? alloc * 2 : 4; - // Check for potential overflow if newsize becomes huge. - if (newsize < alloc) { - exprTokenRelease(ele); - exprTokenRelease(t); - return NULL; - } - exprtoken **newele = RedisModule_Realloc(t->tuple.ele, - sizeof(exprtoken*)*newsize); - t->tuple.ele = newele; - alloc = newsize; - } - t->tuple.ele[t->tuple.len++] = ele; // Add element. - - jsonSkipWhiteSpaces(p,end); - if (*p>=end) { - // Unterminated array. Note that this check is crucial because - // previous value parsed may seek 'p' to 'end'. - exprTokenRelease(t); - return NULL; - } - - // Check for comma (more elements) or closing bracket. - if (**p == ',') { - (*p)++; // Skip ',' - jsonSkipWhiteSpaces(p,end); // Skip whitespace before next element - continue; // Parse next element - } else if (**p == ']') { - (*p)++; // Skip ']' - return t; // End of array - } else { - // Unexpected character (not ',' or ']') - exprTokenRelease(t); - return NULL; - } - } -} - -/* Turn a JSON value into an expr token. */ -static exprtoken *jsonParseValueToken(const char **p, const char *end) { - jsonSkipWhiteSpaces(p,end); - if (*p >= end) return NULL; - - switch (**p) { - case '"': return jsonParseStringToken(p,end); - case '[': return jsonParseArrayToken(p,end); - case '{': return NULL; // No nested elements support for now. - case 't': return jsonParseLiteralToken(p,end,"true",EXPR_TOKEN_NUM,1); - case 'f': return jsonParseLiteralToken(p,end,"false",EXPR_TOKEN_NUM,0); - case 'n': return jsonParseLiteralToken(p,end,"null",EXPR_TOKEN_NULL,0); - default: - // Check if it starts like a number. - if (isdigit((unsigned char)**p) || **p=='-' || **p=='+') { - return jsonParseNumberToken(p,end); - } - // Anything else is an unsupported type or malformed JSON. - return NULL; - } -} - -/* ============================== Fast key seeking ========================== */ - -/* Finds the start of the value for a given field key within a JSON object. - * Returns pointer to the first char of the value, or NULL if not found/error. - * This function does not perform any allocation and is optimized to seek - * the specified *toplevel* filed as fast as possible. */ -static const char *jsonSeekField(const char *json, const char *end, - const char *field, size_t flen) { - const char *p = json; - jsonSkipWhiteSpaces(&p,end); - if (p >= end || *p != '{') return NULL; // Must start with '{'. - p++; // skip '{'. - - while (1) { - jsonSkipWhiteSpaces(&p,end); - if (p >= end) return NULL; // Reached end within object. - - if (*p == '}') return NULL; // End of object, field not found. - - // Expecting a key (string). - if (*p != '"') return NULL; // Key must be a string. - - // --- Key Matching using jsonSkipString --- - const char *key_start = p + 1; // Start of key content. - const char *key_end_p = p; // Will later contain the end. - - // Use jsonSkipString() to find the end. - if (!jsonSkipString(&key_end_p, end)) { - // Unterminated / invalid key string. - return NULL; - } - - // Calculate the length of the key's content. - size_t klen = (key_end_p - 1) - key_start; - - /* Perform the comparison using the raw key content. - * WARNING: This uses memcmp(), so we don't handle escaped chars - * within the key matching against unescaped chars in 'field'. */ - int match = klen == flen && !memcmp(key_start, field, flen); - - // Update the main pointer 'p' to be after the key string. - p = key_end_p; - - // Now we expect to find a ":" followed by a value. - jsonSkipWhiteSpaces(&p,end); - if (p>=end || *p!=':') return NULL; // Expect ':' after key - p++; // Skip ':'. - - // Seek value. - jsonSkipWhiteSpaces(&p,end); - if (p>=end) return NULL; // Expect value after ':' - - if (match) { - // Found the matching key, p now points to the start of the value. - return p; - } else { - // Key didn't match, skip the corresponding value. - if (!jsonSkipValue(&p,end)) return NULL; // Syntax error. - } - - - // Look for comma or a closing brace. - jsonSkipWhiteSpaces(&p,end); - if (p>=end) return NULL; // Reached end after value. - - if (*p == ',') { - p++; // Skip comma, continue loop to find next key. - continue; - } else if (*p == '}') { - return NULL; // Reached end of object, field not found. - } - return NULL; // Malformed JSON (unexpected char after value). - } -} - -/* This is the only real API that this file conceptually exports (it is - * inlined, actually). */ -exprtoken *jsonExtractField(const char *json, size_t json_len, - const char *field, size_t field_len) -{ - const char *end = json + json_len; - const char *valptr = jsonSeekField(json,end,field,field_len); - if (!valptr) return NULL; - - /* Key found, valptr points to the start of the value. - * Convert it into an expression token object. */ - return jsonParseValueToken(&valptr,end); -} |
