Add Redis source code for testing

author: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-01-21 22:40:55 +0100
committer: Mitja Felicijan <mitja.felicijan@gmail.com> 2026-01-21 22:40:55 +0100
commit: 5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch)
tree: 1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/modules/vector-sets/fastjson.c
parent: c7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff)
download: crep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz
1 files changed, 441 insertions, 0 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/fastjson.c b/examples/redis-unstable/modules/vector-sets/fastjson.c
new file mode 100644
index 0000000..78926e2
--- /dev/null
+++ b/examples/redis-unstable/modules/vector-sets/fastjson.c
@@ -0,0 +1,441 @@
+/* Ultra‑lightweight top‑level JSON field extractor.
+ * Return the element directly as an expr.c token.
+ * This code is directly included inside expr.c.
+ *
+ * Copyright (c) 2025-Present, Redis Ltd.
+ * All rights reserved.
+ *
+ * Licensed under your choice of the Redis Source Available License 2.0
+ * (RSALv2) or the Server Side Public License v1 (SSPLv1).
+ *
+ * Originally authored by: Salvatore Sanfilippo.
+ *
+ * ------------------------------------------------------------------
+ *
+ * DESIGN GOALS:
+ *
+ * 1. Zero heap allocations while seeking the requested key.
+ * 2. A single parse (and therefore a single allocation, if needed)
+ *    when the key finally matches.
+ * 3. Same subset‑of‑JSON coverage needed by expr.c:
+ * - Strings (escapes: \" \\ \n \r \t).
+ * - Numbers (double).
+ * - Booleans.
+ * - Null.
+ * - Flat arrays of the above primitives.
+ *
+ * Any other value (nested object, unicode escape, etc.) returns NULL.
+ * Should be very easy to extend it in case in the future we want
+ * more for the FILTER option of VSIM.
+ * 4. No global state, so this file can be #included directly in expr.c.
+ *
+ * The only API expr.c uses directly is:
+ *
+ * exprtoken *jsonExtractField(const char *json, size_t json_len,
+ * const char *field, size_t field_len);
+ * ------------------------------------------------------------------ */
+
+#include <ctype.h>
+#include <string.h>
+
+// Forward declarations.
+static int jsonSkipValue(const char **p, const char *end);
+static exprtoken *jsonParseValueToken(const char **p, const char *end);
+
+/* Similar to ctype.h isdigit() but covers the whole JSON number charset,
+ * including exp form. */
+static int jsonIsNumberChar(int c) {
+    return isdigit(c) || c=='-' || c=='+' || c=='.' || c=='e' || c=='E';
+}
+
+/* ========================== Fast skipping of JSON =========================
+ * The helpers here are designed to skip values without performing any
+ * allocation. This way, for the use case of this JSON parser, we are able
+ * to easily (and with good speed) skip fields and values we are not
+ * interested in. Then, later in the code, when we find the field we want
+ * to obtain, we finally call the functions that turn a given JSON value
+ * associated to a field into our of our expressions token.
+ * ========================================================================== */
+
+/* Advance *p consuming all the spaces. */
+static inline void jsonSkipWhiteSpaces(const char **p, const char *end) {
+    while (*p < end && isspace((unsigned char)**p)) (*p)++;
+}
+
+/* Advance *p past a JSON string. Returns 1 on success, 0 on error. */
+static int jsonSkipString(const char **p, const char *end) {
+    if (*p >= end || **p != '"') return 0;
+    (*p)++; /* Skip opening quote. */
+    while (*p < end) {
+        if (**p == '\\') {
+            (*p) += 2;
+            continue;
+        }
+        if (**p == '"') {
+            (*p)++; /* Skip closing quote. */
+            return 1;
+        }
+        (*p)++;
+    }
+    return 0; /* unterminated */
+}
+
+/* Skip an array or object generically using depth counter.
+ * Opener and closer tells the function how the aggregated
+ * data type starts/stops, basically [] or {}. */
+static int jsonSkipBracketed(const char **p, const char *end,
+                             char opener, char closer) {
+    int depth = 1;
+    (*p)++; /* Skip opener. */
+
+    /* Loop until we reach the end of the input or find the matching
+     * closer (depth becomes 0). */
+    while (*p < end && depth > 0) {
+        char c = **p;
+
+        if (c == '"') {
+            // Found a string, delegate skipping to jsonSkipString().
+            if (!jsonSkipString(p, end)) {
+                return 0; // String skipping failed (e.g., unterminated)
+            }
+            /* jsonSkipString() advances *p past the closing quote.
+             * Continue the loop to process the character *after* the string. */
+            continue;
+        }
+
+        /* If it's not a string, check if it affects the depth for the
+         * specific brackets we are currently tracking. */
+        if (c == opener) {
+            depth++;
+        } else if (c == closer) {
+            depth--;
+        }
+
+        /* Always advance the pointer for any non-string character.
+         * This handles commas, colons, whitespace, numbers, literals,
+         * and even nested brackets of a *different* type than the
+         * one we are currently skipping (e.g. skipping a { inside []). */
+        (*p)++;
+    }
+
+    /* Return 1 (true) if we successfully found the matching closer,
+     * otherwise there is a parse error and we return 0. */
+    return depth == 0;
+}
+
+/* Skip a single JSON literal (true, null, ...) starting at *p.
+ * Returns 1 on success, 0 on failure. */
+static int jsonSkipLiteral(const char **p, const char *end, const char *lit) {
+    size_t l = strlen(lit);
+    if (*p + l > end) return 0;
+    if (strncmp(*p, lit, l) == 0) { *p += l; return 1; }
+    return 0;
+}
+
+/* Skip number, don't check that number format is correct, just consume
+ * number-alike characters.
+ *
+ * Note: More robust number skipping might check validity,
+ * but for skipping, just consuming plausible characters is enough. */
+static int jsonSkipNumber(const char **p, const char *end) {
+    const char *num_start = *p;
+    while (*p < end && jsonIsNumberChar(**p)) (*p)++;
+    return *p > num_start; // Any progress made? Otherwise no number found.
+}
+
+/* Skip any JSON value. 1 = success, 0 = error. */
+static int jsonSkipValue(const char **p, const char *end) {
+    jsonSkipWhiteSpaces(p, end);
+    if (*p >= end) return 0;
+    switch (**p) {
+    case '"': return jsonSkipString(p, end);
+    case '{':  return jsonSkipBracketed(p, end, '{', '}');
+    case '[':  return jsonSkipBracketed(p, end, '[', ']');
+    case 't':  return jsonSkipLiteral(p, end, "true");
+    case 'f':  return jsonSkipLiteral(p, end, "false");
+    case 'n':  return jsonSkipLiteral(p, end, "null");
+    default: return jsonSkipNumber(p, end);
+    }
+}
+
+/* =========================== JSON to exprtoken ============================
+ * The functions below convert a given json value to the equivalent
+ * expression token structure.
+ * ========================================================================== */
+
+static exprtoken *jsonParseStringToken(const char **p, const char *end) {
+    if (*p >= end || **p != '"') return NULL;
+    const char *start = ++(*p);
+    int esc = 0; size_t len = 0; int has_esc = 0;
+    const char *q = *p;
+    while (q < end) {
+        if (esc) { esc = 0; q++; len++; has_esc = 1; continue; }
+        if (*q == '\\') { esc = 1; q++; continue; }
+        if (*q == '"') break;
+        q++; len++;
+    }
+    if (q >= end || *q != '"') return NULL; // Unterminated string
+    exprtoken *t = exprNewToken(EXPR_TOKEN_STR);
+
+    if (!has_esc) {
+        // No escapes, we can point directly into the original JSON string.
+        t->str.start = (char*)start; t->str.len = len; t->str.heapstr = NULL;
+    } else {
+        // Escapes present, need to allocate and copy/process escapes.
+        char *dst = RedisModule_Alloc(len + 1);
+
+        t->str.start = t->str.heapstr = dst; t->str.len = len;
+        const char *r = start; esc = 0;
+        while (r < q) {
+            if (esc) {
+                switch (*r) {
+                // Supported escapes from Goal 3.
+                case 'n': *dst='\n'; break;
+                case 'r': *dst='\r'; break;
+                case 't': *dst='\t'; break;
+                case '\\': *dst='\\'; break;
+                case '"': *dst='\"'; break;
+                // Escapes (like \uXXXX, \b, \f) are not supported for now,
+                // we just copy them verbatim.
+                default: *dst=*r; break;
+                }
+                dst++; esc = 0; r++; continue;
+            }
+            if (*r == '\\') { esc = 1; r++; continue; }
+            *dst++ = *r++;
+        }
+        *dst = '\0'; // Null-terminate the allocated string.
+    }
+    *p = q + 1; // Advance the main pointer past the closing quote.
+    return t;
+}
+
+static exprtoken *jsonParseNumberToken(const char **p, const char *end) {
+    // Use a buffer to extract the number literal for parsing with strtod().
+    char buf[256]; int idx = 0;
+    const char *start = *p; // For strtod partial failures check.
+
+    // Copy potential number characters to buffer.
+    while (*p < end && idx < (int)sizeof(buf)-1 && jsonIsNumberChar(**p)) {
+        buf[idx++] = **p;
+        (*p)++;
+    }
+    buf[idx]='\0'; // Null-terminate buffer.
+
+    if (idx==0) return NULL; // No number characters found.
+
+    char *ep; // End pointer for strtod validation.
+    double v = strtod(buf, &ep);
+
+    /* Check if strtod() consumed the entire buffer content.
+     * If not, the number format was invalid. */
+    if (*ep!='\0') {
+        // strtod() failed; rewind p to the start and return NULL
+        *p = start;
+        return NULL;
+    }
+
+    // If strtod() succeeded, create and return the token..
+    exprtoken *t = exprNewToken(EXPR_TOKEN_NUM);
+    t->num = v;
+    return t;
+}
+
+static exprtoken *jsonParseLiteralToken(const char **p, const char *end, const char *lit, int type, double num) {
+    size_t l = strlen(lit);
+
+    // Ensure we don't read past 'end'.
+    if ((*p + l) > end) return NULL;
+
+    if (strncmp(*p, lit, l) != 0) return NULL; // Literal doesn't match.
+
+    // Check that the character *after* the literal is a valid JSON delimiter
+    // (whitespace, comma, closing bracket/brace, or end of input)
+    // This prevents matching "trueblabla" as "true".
+    if ((*p + l) < end) {
+        char next_char = *(*p + l);
+        if (!isspace((unsigned char)next_char) && next_char!=',' &&
+            next_char!=']' && next_char!='}') {
+            return NULL; // Invalid character following literal.
+        }
+    }
+
+    // Literal matched and is correctly terminated.
+    *p += l;
+    exprtoken *t = exprNewToken(type);
+    t->num = num;
+    return t;
+}
+
+static exprtoken *jsonParseArrayToken(const char **p, const char *end) {
+    if (*p >= end || **p != '[') return NULL;
+    (*p)++; // Skip '['.
+    jsonSkipWhiteSpaces(p,end);
+
+    exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE);
+    t->tuple.len = 0; t->tuple.ele = NULL; size_t alloc = 0;
+
+    // Handle empty array [].
+    if (*p < end && **p == ']') {
+        (*p)++; // Skip ']'.
+        return t;
+    }
+
+    // Parse array elements.
+    while (1) {
+        exprtoken *ele = jsonParseValueToken(p,end);
+        if (!ele) {
+            exprTokenRelease(t); // Clean up partially built array token.
+            return NULL;
+        }
+
+        // Grow allocated space for elements if needed.
+        if (t->tuple.len == alloc) {
+            size_t newsize = alloc ? alloc * 2 : 4;
+            // Check for potential overflow if newsize becomes huge.
+            if (newsize < alloc) {
+                exprTokenRelease(ele);
+                exprTokenRelease(t);
+                return NULL;
+            }
+            exprtoken **newele = RedisModule_Realloc(t->tuple.ele,
+                                           sizeof(exprtoken*)*newsize);
+            t->tuple.ele = newele;
+            alloc = newsize;
+        }
+        t->tuple.ele[t->tuple.len++] = ele; // Add element.
+
+        jsonSkipWhiteSpaces(p,end);
+        if (*p>=end) {
+            // Unterminated array. Note that this check is crucial because
+            // previous value parsed may seek 'p' to 'end'.
+            exprTokenRelease(t);
+            return NULL;
+        }
+
+        // Check for comma (more elements) or closing bracket.
+        if (**p == ',') {
+            (*p)++; // Skip ','
+            jsonSkipWhiteSpaces(p,end); // Skip whitespace before next element
+            continue; // Parse next element
+        } else if (**p == ']') {
+            (*p)++; // Skip ']'
+            return t; // End of array
+        } else {
+            // Unexpected character (not ',' or ']')
+            exprTokenRelease(t);
+            return NULL;
+        }
+    }
+}
+
+/* Turn a JSON value into an expr token. */
+static exprtoken *jsonParseValueToken(const char **p, const char *end) {
+    jsonSkipWhiteSpaces(p,end);
+    if (*p >= end) return NULL;
+
+    switch (**p) {
+    case '"': return jsonParseStringToken(p,end);
+    case '[':  return jsonParseArrayToken(p,end);
+    case '{':  return NULL; // No nested elements support for now.
+    case 't':  return jsonParseLiteralToken(p,end,"true",EXPR_TOKEN_NUM,1);
+    case 'f':  return jsonParseLiteralToken(p,end,"false",EXPR_TOKEN_NUM,0);
+    case 'n':  return jsonParseLiteralToken(p,end,"null",EXPR_TOKEN_NULL,0);
+    default:
+        // Check if it starts like a number.
+        if (isdigit((unsigned char)**p) || **p=='-' || **p=='+') {
+             return jsonParseNumberToken(p,end);
+        }
+        // Anything else is an unsupported type or malformed JSON.
+        return NULL;
+    }
+}
+
+/* ============================== Fast key seeking ========================== */
+
+/* Finds the start of the value for a given field key within a JSON object.
+ * Returns pointer to the first char of the value, or NULL if not found/error.
+ * This function does not perform any allocation and is optimized to seek
+ * the specified *toplevel* filed as fast as possible. */
+static const char *jsonSeekField(const char *json, const char *end,
+                                 const char *field, size_t flen) {
+    const char *p = json;
+    jsonSkipWhiteSpaces(&p,end);
+    if (p >= end || *p != '{') return NULL; // Must start with '{'.
+    p++; // skip '{'.
+
+    while (1) {
+        jsonSkipWhiteSpaces(&p,end);
+        if (p >= end) return NULL; // Reached end within object.
+
+        if (*p == '}') return NULL; // End of object, field not found.
+
+        // Expecting a key (string).
+        if (*p != '"') return NULL; // Key must be a string.
+
+        // --- Key Matching using jsonSkipString ---
+        const char *key_start = p + 1; // Start of key content.
+        const char *key_end_p = p;     // Will later contain the end.
+
+        // Use jsonSkipString() to find the end.
+        if (!jsonSkipString(&key_end_p, end)) {
+            // Unterminated / invalid key string.
+            return NULL;
+        }
+
+        // Calculate the length of the key's content.
+        size_t klen = (key_end_p - 1) - key_start;
+
+        /* Perform the comparison using the raw key content.
+         * WARNING: This uses memcmp(), so we don't handle escaped chars
+         * within the key matching against unescaped chars in 'field'. */
+        int match = klen == flen && !memcmp(key_start, field, flen);
+
+        // Update the main pointer 'p' to be after the key string.
+        p = key_end_p;
+
+        // Now we expect to find a ":" followed by a value.
+        jsonSkipWhiteSpaces(&p,end);
+        if (p>=end || *p!=':') return NULL; // Expect ':' after key
+        p++; // Skip ':'.
+
+	// Seek value.
+        jsonSkipWhiteSpaces(&p,end);
+        if (p>=end) return NULL; // Expect value after ':'
+
+        if (match) {
+            // Found the matching key, p now points to the start of the value.
+            return p;
+        } else {
+            // Key didn't match, skip the corresponding value.
+            if (!jsonSkipValue(&p,end)) return NULL; // Syntax error.
+        }
+
+
+        // Look for comma or a closing brace.
+        jsonSkipWhiteSpaces(&p,end);
+        if (p>=end) return NULL; // Reached end after value.
+
+        if (*p == ',') {
+            p++; // Skip comma, continue loop to find next key.
+            continue;
+        } else if (*p == '}') {
+            return NULL; // Reached end of object, field not found.
+        }
+        return NULL; // Malformed JSON (unexpected char after value).
+    }
+}
+
+/* This is the only real API that this file conceptually exports (it is
+ * inlined, actually). */
+exprtoken *jsonExtractField(const char *json, size_t json_len,
+                            const char *field, size_t field_len)
+{
+    const char *end = json + json_len;
+    const char *valptr = jsonSeekField(json,end,field,field_len);
+    if (!valptr) return NULL;
+
+    /* Key found, valptr points to the start of the value.
+     * Convert it into an expression token object. */
+    return jsonParseValueToken(&valptr,end);
+}
author	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-01-21 22:40:55 +0100
committer	Mitja Felicijan <mitja.felicijan@gmail.com>	2026-01-21 22:40:55 +0100
commit	5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda (patch)
tree	1acdfa5220cd13b7be43a2a01368e80d306473ca /examples/redis-unstable/modules/vector-sets/fastjson.c
parent	c7ab12bba64d9c20ccd79b132dac475f7bc3923e (diff)
download	crep-5d8dfe892a2ea89f706ee140c3bdcfd89fe03fda.tar.gz