summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/modules/vector-sets/fastjson.c
diff options
context:
space:
mode:
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/fastjson.c')
-rw-r--r--examples/redis-unstable/modules/vector-sets/fastjson.c441
1 files changed, 0 insertions, 441 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/fastjson.c b/examples/redis-unstable/modules/vector-sets/fastjson.c
deleted file mode 100644
index 78926e2..0000000
--- a/examples/redis-unstable/modules/vector-sets/fastjson.c
+++ /dev/null
@@ -1,441 +0,0 @@
-/* Ultra‑lightweight top‑level JSON field extractor.
- * Return the element directly as an expr.c token.
- * This code is directly included inside expr.c.
- *
- * Copyright (c) 2025-Present, Redis Ltd.
- * All rights reserved.
- *
- * Licensed under your choice of the Redis Source Available License 2.0
- * (RSALv2) or the Server Side Public License v1 (SSPLv1).
- *
- * Originally authored by: Salvatore Sanfilippo.
- *
- * ------------------------------------------------------------------
- *
- * DESIGN GOALS:
- *
- * 1. Zero heap allocations while seeking the requested key.
- * 2. A single parse (and therefore a single allocation, if needed)
- * when the key finally matches.
- * 3. Same subset‑of‑JSON coverage needed by expr.c:
- * - Strings (escapes: \" \\ \n \r \t).
- * - Numbers (double).
- * - Booleans.
- * - Null.
- * - Flat arrays of the above primitives.
- *
- * Any other value (nested object, unicode escape, etc.) returns NULL.
- * Should be very easy to extend it in case in the future we want
- * more for the FILTER option of VSIM.
- * 4. No global state, so this file can be #included directly in expr.c.
- *
- * The only API expr.c uses directly is:
- *
- * exprtoken *jsonExtractField(const char *json, size_t json_len,
- * const char *field, size_t field_len);
- * ------------------------------------------------------------------ */
-
-#include <ctype.h>
-#include <string.h>
-
-// Forward declarations.
-static int jsonSkipValue(const char **p, const char *end);
-static exprtoken *jsonParseValueToken(const char **p, const char *end);
-
-/* Similar to ctype.h isdigit() but covers the whole JSON number charset,
- * including exp form. */
-static int jsonIsNumberChar(int c) {
- return isdigit(c) || c=='-' || c=='+' || c=='.' || c=='e' || c=='E';
-}
-
-/* ========================== Fast skipping of JSON =========================
- * The helpers here are designed to skip values without performing any
- * allocation. This way, for the use case of this JSON parser, we are able
- * to easily (and with good speed) skip fields and values we are not
- * interested in. Then, later in the code, when we find the field we want
- * to obtain, we finally call the functions that turn a given JSON value
- * associated to a field into our of our expressions token.
- * ========================================================================== */
-
-/* Advance *p consuming all the spaces. */
-static inline void jsonSkipWhiteSpaces(const char **p, const char *end) {
- while (*p < end && isspace((unsigned char)**p)) (*p)++;
-}
-
-/* Advance *p past a JSON string. Returns 1 on success, 0 on error. */
-static int jsonSkipString(const char **p, const char *end) {
- if (*p >= end || **p != '"') return 0;
- (*p)++; /* Skip opening quote. */
- while (*p < end) {
- if (**p == '\\') {
- (*p) += 2;
- continue;
- }
- if (**p == '"') {
- (*p)++; /* Skip closing quote. */
- return 1;
- }
- (*p)++;
- }
- return 0; /* unterminated */
-}
-
-/* Skip an array or object generically using depth counter.
- * Opener and closer tells the function how the aggregated
- * data type starts/stops, basically [] or {}. */
-static int jsonSkipBracketed(const char **p, const char *end,
- char opener, char closer) {
- int depth = 1;
- (*p)++; /* Skip opener. */
-
- /* Loop until we reach the end of the input or find the matching
- * closer (depth becomes 0). */
- while (*p < end && depth > 0) {
- char c = **p;
-
- if (c == '"') {
- // Found a string, delegate skipping to jsonSkipString().
- if (!jsonSkipString(p, end)) {
- return 0; // String skipping failed (e.g., unterminated)
- }
- /* jsonSkipString() advances *p past the closing quote.
- * Continue the loop to process the character *after* the string. */
- continue;
- }
-
- /* If it's not a string, check if it affects the depth for the
- * specific brackets we are currently tracking. */
- if (c == opener) {
- depth++;
- } else if (c == closer) {
- depth--;
- }
-
- /* Always advance the pointer for any non-string character.
- * This handles commas, colons, whitespace, numbers, literals,
- * and even nested brackets of a *different* type than the
- * one we are currently skipping (e.g. skipping a { inside []). */
- (*p)++;
- }
-
- /* Return 1 (true) if we successfully found the matching closer,
- * otherwise there is a parse error and we return 0. */
- return depth == 0;
-}
-
-/* Skip a single JSON literal (true, null, ...) starting at *p.
- * Returns 1 on success, 0 on failure. */
-static int jsonSkipLiteral(const char **p, const char *end, const char *lit) {
- size_t l = strlen(lit);
- if (*p + l > end) return 0;
- if (strncmp(*p, lit, l) == 0) { *p += l; return 1; }
- return 0;
-}
-
-/* Skip number, don't check that number format is correct, just consume
- * number-alike characters.
- *
- * Note: More robust number skipping might check validity,
- * but for skipping, just consuming plausible characters is enough. */
-static int jsonSkipNumber(const char **p, const char *end) {
- const char *num_start = *p;
- while (*p < end && jsonIsNumberChar(**p)) (*p)++;
- return *p > num_start; // Any progress made? Otherwise no number found.
-}
-
-/* Skip any JSON value. 1 = success, 0 = error. */
-static int jsonSkipValue(const char **p, const char *end) {
- jsonSkipWhiteSpaces(p, end);
- if (*p >= end) return 0;
- switch (**p) {
- case '"': return jsonSkipString(p, end);
- case '{': return jsonSkipBracketed(p, end, '{', '}');
- case '[': return jsonSkipBracketed(p, end, '[', ']');
- case 't': return jsonSkipLiteral(p, end, "true");
- case 'f': return jsonSkipLiteral(p, end, "false");
- case 'n': return jsonSkipLiteral(p, end, "null");
- default: return jsonSkipNumber(p, end);
- }
-}
-
-/* =========================== JSON to exprtoken ============================
- * The functions below convert a given json value to the equivalent
- * expression token structure.
- * ========================================================================== */
-
-static exprtoken *jsonParseStringToken(const char **p, const char *end) {
- if (*p >= end || **p != '"') return NULL;
- const char *start = ++(*p);
- int esc = 0; size_t len = 0; int has_esc = 0;
- const char *q = *p;
- while (q < end) {
- if (esc) { esc = 0; q++; len++; has_esc = 1; continue; }
- if (*q == '\\') { esc = 1; q++; continue; }
- if (*q == '"') break;
- q++; len++;
- }
- if (q >= end || *q != '"') return NULL; // Unterminated string
- exprtoken *t = exprNewToken(EXPR_TOKEN_STR);
-
- if (!has_esc) {
- // No escapes, we can point directly into the original JSON string.
- t->str.start = (char*)start; t->str.len = len; t->str.heapstr = NULL;
- } else {
- // Escapes present, need to allocate and copy/process escapes.
- char *dst = RedisModule_Alloc(len + 1);
-
- t->str.start = t->str.heapstr = dst; t->str.len = len;
- const char *r = start; esc = 0;
- while (r < q) {
- if (esc) {
- switch (*r) {
- // Supported escapes from Goal 3.
- case 'n': *dst='\n'; break;
- case 'r': *dst='\r'; break;
- case 't': *dst='\t'; break;
- case '\\': *dst='\\'; break;
- case '"': *dst='\"'; break;
- // Escapes (like \uXXXX, \b, \f) are not supported for now,
- // we just copy them verbatim.
- default: *dst=*r; break;
- }
- dst++; esc = 0; r++; continue;
- }
- if (*r == '\\') { esc = 1; r++; continue; }
- *dst++ = *r++;
- }
- *dst = '\0'; // Null-terminate the allocated string.
- }
- *p = q + 1; // Advance the main pointer past the closing quote.
- return t;
-}
-
-static exprtoken *jsonParseNumberToken(const char **p, const char *end) {
- // Use a buffer to extract the number literal for parsing with strtod().
- char buf[256]; int idx = 0;
- const char *start = *p; // For strtod partial failures check.
-
- // Copy potential number characters to buffer.
- while (*p < end && idx < (int)sizeof(buf)-1 && jsonIsNumberChar(**p)) {
- buf[idx++] = **p;
- (*p)++;
- }
- buf[idx]='\0'; // Null-terminate buffer.
-
- if (idx==0) return NULL; // No number characters found.
-
- char *ep; // End pointer for strtod validation.
- double v = strtod(buf, &ep);
-
- /* Check if strtod() consumed the entire buffer content.
- * If not, the number format was invalid. */
- if (*ep!='\0') {
- // strtod() failed; rewind p to the start and return NULL
- *p = start;
- return NULL;
- }
-
- // If strtod() succeeded, create and return the token..
- exprtoken *t = exprNewToken(EXPR_TOKEN_NUM);
- t->num = v;
- return t;
-}
-
-static exprtoken *jsonParseLiteralToken(const char **p, const char *end, const char *lit, int type, double num) {
- size_t l = strlen(lit);
-
- // Ensure we don't read past 'end'.
- if ((*p + l) > end) return NULL;
-
- if (strncmp(*p, lit, l) != 0) return NULL; // Literal doesn't match.
-
- // Check that the character *after* the literal is a valid JSON delimiter
- // (whitespace, comma, closing bracket/brace, or end of input)
- // This prevents matching "trueblabla" as "true".
- if ((*p + l) < end) {
- char next_char = *(*p + l);
- if (!isspace((unsigned char)next_char) && next_char!=',' &&
- next_char!=']' && next_char!='}') {
- return NULL; // Invalid character following literal.
- }
- }
-
- // Literal matched and is correctly terminated.
- *p += l;
- exprtoken *t = exprNewToken(type);
- t->num = num;
- return t;
-}
-
-static exprtoken *jsonParseArrayToken(const char **p, const char *end) {
- if (*p >= end || **p != '[') return NULL;
- (*p)++; // Skip '['.
- jsonSkipWhiteSpaces(p,end);
-
- exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE);
- t->tuple.len = 0; t->tuple.ele = NULL; size_t alloc = 0;
-
- // Handle empty array [].
- if (*p < end && **p == ']') {
- (*p)++; // Skip ']'.
- return t;
- }
-
- // Parse array elements.
- while (1) {
- exprtoken *ele = jsonParseValueToken(p,end);
- if (!ele) {
- exprTokenRelease(t); // Clean up partially built array token.
- return NULL;
- }
-
- // Grow allocated space for elements if needed.
- if (t->tuple.len == alloc) {
- size_t newsize = alloc ? alloc * 2 : 4;
- // Check for potential overflow if newsize becomes huge.
- if (newsize < alloc) {
- exprTokenRelease(ele);
- exprTokenRelease(t);
- return NULL;
- }
- exprtoken **newele = RedisModule_Realloc(t->tuple.ele,
- sizeof(exprtoken*)*newsize);
- t->tuple.ele = newele;
- alloc = newsize;
- }
- t->tuple.ele[t->tuple.len++] = ele; // Add element.
-
- jsonSkipWhiteSpaces(p,end);
- if (*p>=end) {
- // Unterminated array. Note that this check is crucial because
- // previous value parsed may seek 'p' to 'end'.
- exprTokenRelease(t);
- return NULL;
- }
-
- // Check for comma (more elements) or closing bracket.
- if (**p == ',') {
- (*p)++; // Skip ','
- jsonSkipWhiteSpaces(p,end); // Skip whitespace before next element
- continue; // Parse next element
- } else if (**p == ']') {
- (*p)++; // Skip ']'
- return t; // End of array
- } else {
- // Unexpected character (not ',' or ']')
- exprTokenRelease(t);
- return NULL;
- }
- }
-}
-
-/* Turn a JSON value into an expr token. */
-static exprtoken *jsonParseValueToken(const char **p, const char *end) {
- jsonSkipWhiteSpaces(p,end);
- if (*p >= end) return NULL;
-
- switch (**p) {
- case '"': return jsonParseStringToken(p,end);
- case '[': return jsonParseArrayToken(p,end);
- case '{': return NULL; // No nested elements support for now.
- case 't': return jsonParseLiteralToken(p,end,"true",EXPR_TOKEN_NUM,1);
- case 'f': return jsonParseLiteralToken(p,end,"false",EXPR_TOKEN_NUM,0);
- case 'n': return jsonParseLiteralToken(p,end,"null",EXPR_TOKEN_NULL,0);
- default:
- // Check if it starts like a number.
- if (isdigit((unsigned char)**p) || **p=='-' || **p=='+') {
- return jsonParseNumberToken(p,end);
- }
- // Anything else is an unsupported type or malformed JSON.
- return NULL;
- }
-}
-
-/* ============================== Fast key seeking ========================== */
-
-/* Finds the start of the value for a given field key within a JSON object.
- * Returns pointer to the first char of the value, or NULL if not found/error.
- * This function does not perform any allocation and is optimized to seek
- * the specified *toplevel* filed as fast as possible. */
-static const char *jsonSeekField(const char *json, const char *end,
- const char *field, size_t flen) {
- const char *p = json;
- jsonSkipWhiteSpaces(&p,end);
- if (p >= end || *p != '{') return NULL; // Must start with '{'.
- p++; // skip '{'.
-
- while (1) {
- jsonSkipWhiteSpaces(&p,end);
- if (p >= end) return NULL; // Reached end within object.
-
- if (*p == '}') return NULL; // End of object, field not found.
-
- // Expecting a key (string).
- if (*p != '"') return NULL; // Key must be a string.
-
- // --- Key Matching using jsonSkipString ---
- const char *key_start = p + 1; // Start of key content.
- const char *key_end_p = p; // Will later contain the end.
-
- // Use jsonSkipString() to find the end.
- if (!jsonSkipString(&key_end_p, end)) {
- // Unterminated / invalid key string.
- return NULL;
- }
-
- // Calculate the length of the key's content.
- size_t klen = (key_end_p - 1) - key_start;
-
- /* Perform the comparison using the raw key content.
- * WARNING: This uses memcmp(), so we don't handle escaped chars
- * within the key matching against unescaped chars in 'field'. */
- int match = klen == flen && !memcmp(key_start, field, flen);
-
- // Update the main pointer 'p' to be after the key string.
- p = key_end_p;
-
- // Now we expect to find a ":" followed by a value.
- jsonSkipWhiteSpaces(&p,end);
- if (p>=end || *p!=':') return NULL; // Expect ':' after key
- p++; // Skip ':'.
-
- // Seek value.
- jsonSkipWhiteSpaces(&p,end);
- if (p>=end) return NULL; // Expect value after ':'
-
- if (match) {
- // Found the matching key, p now points to the start of the value.
- return p;
- } else {
- // Key didn't match, skip the corresponding value.
- if (!jsonSkipValue(&p,end)) return NULL; // Syntax error.
- }
-
-
- // Look for comma or a closing brace.
- jsonSkipWhiteSpaces(&p,end);
- if (p>=end) return NULL; // Reached end after value.
-
- if (*p == ',') {
- p++; // Skip comma, continue loop to find next key.
- continue;
- } else if (*p == '}') {
- return NULL; // Reached end of object, field not found.
- }
- return NULL; // Malformed JSON (unexpected char after value).
- }
-}
-
-/* This is the only real API that this file conceptually exports (it is
- * inlined, actually). */
-exprtoken *jsonExtractField(const char *json, size_t json_len,
- const char *field, size_t field_len)
-{
- const char *end = json + json_len;
- const char *valptr = jsonSeekField(json,end,field,field_len);
- if (!valptr) return NULL;
-
- /* Key found, valptr points to the start of the value.
- * Convert it into an expression token object. */
- return jsonParseValueToken(&valptr,end);
-}