summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/modules/vector-sets/fastjson.c
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:40:55 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:40:55 +0100
commit5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch)
tree1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/modules/vector-sets/fastjson.c
parentc7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff)
downloadcrep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz
Add Redis source code for testing
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/fastjson.c')
-rw-r--r--examples/redis-unstable/modules/vector-sets/fastjson.c441
1 files changed, 441 insertions, 0 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/fastjson.c b/examples/redis-unstable/modules/vector-sets/fastjson.c
new file mode 100644
index 0000000..78926e2
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/fastjson.c
@@ -0,0 +1,441 @@
+/* Ultra‑lightweight top‑level JSON field extractor.
+ * Return the element directly as an expr.c token.
+ * This code is directly included inside expr.c.
+ *
+ * Copyright (c) 2025-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ *
+ * Originally authored by: Salvatore Sanfilippo.
+ *
+ * ------------------------------------------------------------------
+ *
+ * DESIGN GOALS:
+ *
+ * 1. Zero heap allocations while seeking the requested key.
+ * 2. A single parse (and therefore a single allocation, if needed)
+ * when the key finally matches.
+ * 3. Same subset‑of‑JSON coverage needed by expr.c:
+ * - Strings (escapes: \" \\ \n \r \t).
+ * - Numbers (double).
+ * - Booleans.
+ * - Null.
+ * - Flat arrays of the above primitives.
+ *
+ * Any other value (nested object, unicode escape, etc.) returns NULL.
+ * Should be very easy to extend it in case in the future we want
+ * more for the FILTER option of VSIM.
+ * 4. No global state, so this file can be #included directly in expr.c.
+ *
+ * The only API expr.c uses directly is:
+ *
+ * exprtoken *jsonExtractField(const char *json, size_t json_len,
+ * const char *field, size_t field_len);
+ * ------------------------------------------------------------------ */
+
+#include <ctype.h>
+#include <string.h>
+
+// Forward declarations.
+static int jsonSkipValue(const char **p, const char *end);
+static exprtoken *jsonParseValueToken(const char **p, const char *end);
+
+/* Similar to ctype.h isdigit() but covers the whole JSON number charset,
+ * including exp form. */
+static int jsonIsNumberChar(int c) {
+ return isdigit(c) || c=='-' || c=='+' || c=='.' || c=='e' || c=='E';
+}
+
+/* ========================== Fast skipping of JSON =========================
+ * The helpers here are designed to skip values without performing any
+ * allocation. This way, for the use case of this JSON parser, we are able
+ * to easily (and with good speed) skip fields and values we are not
+ * interested in. Then, later in the code, when we find the field we want
+ * to obtain, we finally call the functions that turn a given JSON value
+ * associated to a field into our of our expressions token.
+ * ========================================================================== */
+
+/* Advance *p consuming all the spaces. */
+static inline void jsonSkipWhiteSpaces(const char **p, const char *end) {
+ while (*p < end && isspace((unsigned char)**p)) (*p)++;
+}
+
+/* Advance *p past a JSON string. Returns 1 on success, 0 on error. */
+static int jsonSkipString(const char **p, const char *end) {
+ if (*p >= end || **p != '"') return 0;
+ (*p)++; /* Skip opening quote. */
+ while (*p < end) {
+ if (**p == '\\') {
+ (*p) += 2;
+ continue;
+ }
+ if (**p == '"') {
+ (*p)++; /* Skip closing quote. */
+ return 1;
+ }
+ (*p)++;
+ }
+ return 0; /* unterminated */
+}
+
+/* Skip an array or object generically using depth counter.
+ * Opener and closer tells the function how the aggregated
+ * data type starts/stops, basically [] or {}. */
+static int jsonSkipBracketed(const char **p, const char *end,
+ char opener, char closer) {
+ int depth = 1;
+ (*p)++; /* Skip opener. */
+
+ /* Loop until we reach the end of the input or find the matching
+ * closer (depth becomes 0). */
+ while (*p < end && depth > 0) {
+ char c = **p;
+
+ if (c == '"') {
+ // Found a string, delegate skipping to jsonSkipString().
+ if (!jsonSkipString(p, end)) {
+ return 0; // String skipping failed (e.g., unterminated)
+ }
+ /* jsonSkipString() advances *p past the closing quote.
+ * Continue the loop to process the character *after* the string. */
+ continue;
+ }
+
+ /* If it's not a string, check if it affects the depth for the
+ * specific brackets we are currently tracking. */
+ if (c == opener) {
+ depth++;
+ } else if (c == closer) {
+ depth--;
+ }
+
+ /* Always advance the pointer for any non-string character.
+ * This handles commas, colons, whitespace, numbers, literals,
+ * and even nested brackets of a *different* type than the
+ * one we are currently skipping (e.g. skipping a { inside []). */
+ (*p)++;
+ }
+
+ /* Return 1 (true) if we successfully found the matching closer,
+ * otherwise there is a parse error and we return 0. */
+ return depth == 0;
+}
+
+/* Skip a single JSON literal (true, null, ...) starting at *p.
+ * Returns 1 on success, 0 on failure. */
+static int jsonSkipLiteral(const char **p, const char *end, const char *lit) {
+ size_t l = strlen(lit);
+ if (*p + l > end) return 0;
+ if (strncmp(*p, lit, l) == 0) { *p += l; return 1; }
+ return 0;
+}
+
+/* Skip number, don't check that number format is correct, just consume
+ * number-alike characters.
+ *
+ * Note: More robust number skipping might check validity,
+ * but for skipping, just consuming plausible characters is enough. */
+static int jsonSkipNumber(const char **p, const char *end) {
+ const char *num_start = *p;
+ while (*p < end && jsonIsNumberChar(**p)) (*p)++;
+ return *p > num_start; // Any progress made? Otherwise no number found.
+}
+
+/* Skip any JSON value. 1 = success, 0 = error. */
+static int jsonSkipValue(const char **p, const char *end) {
+ jsonSkipWhiteSpaces(p, end);
+ if (*p >= end) return 0;
+ switch (**p) {
+ case '"': return jsonSkipString(p, end);
+ case '{': return jsonSkipBracketed(p, end, '{', '}');
+ case '[': return jsonSkipBracketed(p, end, '[', ']');
+ case 't': return jsonSkipLiteral(p, end, "true");
+ case 'f': return jsonSkipLiteral(p, end, "false");
+ case 'n': return jsonSkipLiteral(p, end, "null");
+ default: return jsonSkipNumber(p, end);
+ }
+}
+
+/* =========================== JSON to exprtoken ============================
+ * The functions below convert a given json value to the equivalent
+ * expression token structure.
+ * ========================================================================== */
+
+static exprtoken *jsonParseStringToken(const char **p, const char *end) {
+ if (*p >= end || **p != '"') return NULL;
+ const char *start = ++(*p);
+ int esc = 0; size_t len = 0; int has_esc = 0;
+ const char *q = *p;
+ while (q < end) {
+ if (esc) { esc = 0; q++; len++; has_esc = 1; continue; }
+ if (*q == '\\') { esc = 1; q++; continue; }
+ if (*q == '"') break;
+ q++; len++;
+ }
+ if (q >= end || *q != '"') return NULL; // Unterminated string
+ exprtoken *t = exprNewToken(EXPR_TOKEN_STR);
+
+ if (!has_esc) {
+ // No escapes, we can point directly into the original JSON string.
+ t->str.start = (char*)start; t->str.len = len; t->str.heapstr = NULL;
+ } else {
+ // Escapes present, need to allocate and copy/process escapes.
+ char *dst = RedisModule_Alloc(len + 1);
+
+ t->str.start = t->str.heapstr = dst; t->str.len = len;
+ const char *r = start; esc = 0;
+ while (r < q) {
+ if (esc) {
+ switch (*r) {
+ // Supported escapes from Goal 3.
+ case 'n': *dst='\n'; break;
+ case 'r': *dst='\r'; break;
+ case 't': *dst='\t'; break;
+ case '\\': *dst='\\'; break;
+ case '"': *dst='\"'; break;
+ // Escapes (like \uXXXX, \b, \f) are not supported for now,
+ // we just copy them verbatim.
+ default: *dst=*r; break;
+ }
+ dst++; esc = 0; r++; continue;
+ }
+ if (*r == '\\') { esc = 1; r++; continue; }
+ *dst++ = *r++;
+ }
+ *dst = '\0'; // Null-terminate the allocated string.
+ }
+ *p = q + 1; // Advance the main pointer past the closing quote.
+ return t;
+}
+
+static exprtoken *jsonParseNumberToken(const char **p, const char *end) {
+ // Use a buffer to extract the number literal for parsing with strtod().
+ char buf[256]; int idx = 0;
+ const char *start = *p; // For strtod partial failures check.
+
+ // Copy potential number characters to buffer.
+ while (*p < end && idx < (int)sizeof(buf)-1 && jsonIsNumberChar(**p)) {
+ buf[idx++] = **p;
+ (*p)++;
+ }
+ buf[idx]='\0'; // Null-terminate buffer.
+
+ if (idx==0) return NULL; // No number characters found.
+
+ char *ep; // End pointer for strtod validation.
+ double v = strtod(buf, &ep);
+
+ /* Check if strtod() consumed the entire buffer content.
+ * If not, the number format was invalid. */
+ if (*ep!='\0') {
+ // strtod() failed; rewind p to the start and return NULL
+ *p = start;
+ return NULL;
+ }
+
+ // If strtod() succeeded, create and return the token..
+ exprtoken *t = exprNewToken(EXPR_TOKEN_NUM);
+ t->num = v;
+ return t;
+}
+
+static exprtoken *jsonParseLiteralToken(const char **p, const char *end, const char *lit, int type, double num) {
+ size_t l = strlen(lit);
+
+ // Ensure we don't read past 'end'.
+ if ((*p + l) > end) return NULL;
+
+ if (strncmp(*p, lit, l) != 0) return NULL; // Literal doesn't match.
+
+ // Check that the character *after* the literal is a valid JSON delimiter
+ // (whitespace, comma, closing bracket/brace, or end of input)
+ // This prevents matching "trueblabla" as "true".
+ if ((*p + l) < end) {
+ char next_char = *(*p + l);
+ if (!isspace((unsigned char)next_char) && next_char!=',' &&
+ next_char!=']' && next_char!='}') {
+ return NULL; // Invalid character following literal.
+ }
+ }
+
+ // Literal matched and is correctly terminated.
+ *p += l;
+ exprtoken *t = exprNewToken(type);
+ t->num = num;
+ return t;
+}
+
+static exprtoken *jsonParseArrayToken(const char **p, const char *end) {
+ if (*p >= end || **p != '[') return NULL;
+ (*p)++; // Skip '['.
+ jsonSkipWhiteSpaces(p,end);
+
+ exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE);
+ t->tuple.len = 0; t->tuple.ele = NULL; size_t alloc = 0;
+
+ // Handle empty array [].
+ if (*p < end && **p == ']') {
+ (*p)++; // Skip ']'.
+ return t;
+ }
+
+ // Parse array elements.
+ while (1) {
+ exprtoken *ele = jsonParseValueToken(p,end);
+ if (!ele) {
+ exprTokenRelease(t); // Clean up partially built array token.
+ return NULL;
+ }
+
+ // Grow allocated space for elements if needed.
+ if (t->tuple.len == alloc) {
+ size_t newsize = alloc ? alloc * 2 : 4;
+ // Check for potential overflow if newsize becomes huge.
+ if (newsize < alloc) {
+ exprTokenRelease(ele);
+ exprTokenRelease(t);
+ return NULL;
+ }
+ exprtoken **newele = RedisModule_Realloc(t->tuple.ele,
+ sizeof(exprtoken*)*newsize);
+ t->tuple.ele = newele;
+ alloc = newsize;
+ }
+ t->tuple.ele[t->tuple.len++] = ele; // Add element.
+
+ jsonSkipWhiteSpaces(p,end);
+ if (*p>=end) {
+ // Unterminated array. Note that this check is crucial because
+ // previous value parsed may seek 'p' to 'end'.
+ exprTokenRelease(t);
+ return NULL;
+ }
+
+ // Check for comma (more elements) or closing bracket.
+ if (**p == ',') {
+ (*p)++; // Skip ','
+ jsonSkipWhiteSpaces(p,end); // Skip whitespace before next element
+ continue; // Parse next element
+ } else if (**p == ']') {
+ (*p)++; // Skip ']'
+ return t; // End of array
+ } else {
+ // Unexpected character (not ',' or ']')
+ exprTokenRelease(t);
+ return NULL;
+ }
+ }
+}
+
+/* Turn a JSON value into an expr token. */
+static exprtoken *jsonParseValueToken(const char **p, const char *end) {
+ jsonSkipWhiteSpaces(p,end);
+ if (*p >= end) return NULL;
+
+ switch (**p) {
+ case '"': return jsonParseStringToken(p,end);
+ case '[': return jsonParseArrayToken(p,end);
+ case '{': return NULL; // No nested elements support for now.
+ case 't': return jsonParseLiteralToken(p,end,"true",EXPR_TOKEN_NUM,1);
+ case 'f': return jsonParseLiteralToken(p,end,"false",EXPR_TOKEN_NUM,0);
+ case 'n': return jsonParseLiteralToken(p,end,"null",EXPR_TOKEN_NULL,0);
+ default:
+ // Check if it starts like a number.
+ if (isdigit((unsigned char)**p) || **p=='-' || **p=='+') {
+ return jsonParseNumberToken(p,end);
+ }
+ // Anything else is an unsupported type or malformed JSON.
+ return NULL;
+ }
+}
+
+/* ============================== Fast key seeking ========================== */
+
+/* Finds the start of the value for a given field key within a JSON object.
+ * Returns pointer to the first char of the value, or NULL if not found/error.
+ * This function does not perform any allocation and is optimized to seek
+ * the specified *toplevel* filed as fast as possible. */
+static const char *jsonSeekField(const char *json, const char *end,
+ const char *field, size_t flen) {
+ const char *p = json;
+ jsonSkipWhiteSpaces(&p,end);
+ if (p >= end || *p != '{') return NULL; // Must start with '{'.
+ p++; // skip '{'.
+
+ while (1) {
+ jsonSkipWhiteSpaces(&p,end);
+ if (p >= end) return NULL; // Reached end within object.
+
+ if (*p == '}') return NULL; // End of object, field not found.
+
+ // Expecting a key (string).
+ if (*p != '"') return NULL; // Key must be a string.
+
+ // --- Key Matching using jsonSkipString ---
+ const char *key_start = p + 1; // Start of key content.
+ const char *key_end_p = p; // Will later contain the end.
+
+ // Use jsonSkipString() to find the end.
+ if (!jsonSkipString(&key_end_p, end)) {
+ // Unterminated / invalid key string.
+ return NULL;
+ }
+
+ // Calculate the length of the key's content.
+ size_t klen = (key_end_p - 1) - key_start;
+
+ /* Perform the comparison using the raw key content.
+ * WARNING: This uses memcmp(), so we don't handle escaped chars
+ * within the key matching against unescaped chars in 'field'. */
+ int match = klen == flen && !memcmp(key_start, field, flen);
+
+ // Update the main pointer 'p' to be after the key string.
+ p = key_end_p;
+
+ // Now we expect to find a ":" followed by a value.
+ jsonSkipWhiteSpaces(&p,end);
+ if (p>=end || *p!=':') return NULL; // Expect ':' after key
+ p++; // Skip ':'.
+
+ // Seek value.
+ jsonSkipWhiteSpaces(&p,end);
+ if (p>=end) return NULL; // Expect value after ':'
+
+ if (match) {
+ // Found the matching key, p now points to the start of the value.
+ return p;
+ } else {
+ // Key didn't match, skip the corresponding value.
+ if (!jsonSkipValue(&p,end)) return NULL; // Syntax error.
+ }
+
+
+ // Look for comma or a closing brace.
+ jsonSkipWhiteSpaces(&p,end);
+ if (p>=end) return NULL; // Reached end after value.
+
+ if (*p == ',') {
+ p++; // Skip comma, continue loop to find next key.
+ continue;
+ } else if (*p == '}') {
+ return NULL; // Reached end of object, field not found.
+ }
+ return NULL; // Malformed JSON (unexpected char after value).
+ }
+}
+
+/* This is the only real API that this file conceptually exports (it is
+ * inlined, actually). */
+exprtoken *jsonExtractField(const char *json, size_t json_len,
+ const char *field, size_t field_len)
+{
+ const char *end = json + json_len;
+ const char *valptr = jsonSeekField(json,end,field,field_len);
+ if (!valptr) return NULL;
+
+ /* Key found, valptr points to the start of the value.
+ * Convert it into an expression token object. */
+ return jsonParseValueToken(&valptr,end);
+}