summaryrefslogtreecommitdiff
path: root/examples/redis-unstable/modules/vector-sets/fastjson.c
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:52:54 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-01-21 22:52:54 +0100
commitdcacc00e3750300617ba6e16eb346713f91a783a (patch)
tree38e2d4fb5ed9d119711d4295c6eda4b014af73fd /examples/redis-unstable/modules/vector-sets/fastjson.c
parent58dac10aeb8f5a041c46bddbeaf4c7966a99b998 (diff)
downloadcrep-dcacc00e3750300617ba6e16eb346713f91a783a.tar.gz
Remove testing data
Diffstat (limited to 'examples/redis-unstable/modules/vector-sets/fastjson.c')
-rw-r--r--examples/redis-unstable/modules/vector-sets/fastjson.c441
1 files changed, 0 insertions, 441 deletions
diff --git a/examples/redis-unstable/modules/vector-sets/fastjson.c b/examples/redis-unstable/modules/vector-sets/fastjson.c
deleted file mode 100644
index 78926e2..0000000
--- a/examples/redis-unstable/modules/vector-sets/fastjson.c
+++ /dev/null
@@ -1,441 +0,0 @@
1/* Ultra‑lightweight top‑level JSON field extractor.
2 * Return the element directly as an expr.c token.
3 * This code is directly included inside expr.c.
4 *
5 * Copyright (c) 2025-Present, Redis Ltd.
6 * All rights reserved.
7 *
8 * Licensed under your choice of the Redis Source Available License 2.0
9 * (RSALv2) or the Server Side Public License v1 (SSPLv1).
10 *
11 * Originally authored by: Salvatore Sanfilippo.
12 *
13 * ------------------------------------------------------------------
14 *
15 * DESIGN GOALS:
16 *
17 * 1. Zero heap allocations while seeking the requested key.
18 * 2. A single parse (and therefore a single allocation, if needed)
19 * when the key finally matches.
20 * 3. Same subset‑of‑JSON coverage needed by expr.c:
21 * - Strings (escapes: \" \\ \n \r \t).
22 * - Numbers (double).
23 * - Booleans.
24 * - Null.
25 * - Flat arrays of the above primitives.
26 *
27 * Any other value (nested object, unicode escape, etc.) returns NULL.
28 * Should be very easy to extend it in case in the future we want
29 * more for the FILTER option of VSIM.
30 * 4. No global state, so this file can be #included directly in expr.c.
31 *
32 * The only API expr.c uses directly is:
33 *
34 * exprtoken *jsonExtractField(const char *json, size_t json_len,
35 * const char *field, size_t field_len);
36 * ------------------------------------------------------------------ */
37
38#include <ctype.h>
39#include <string.h>
40
41// Forward declarations.
42static int jsonSkipValue(const char **p, const char *end);
43static exprtoken *jsonParseValueToken(const char **p, const char *end);
44
45/* Similar to ctype.h isdigit() but covers the whole JSON number charset,
46 * including exp form. */
47static int jsonIsNumberChar(int c) {
48 return isdigit(c) || c=='-' || c=='+' || c=='.' || c=='e' || c=='E';
49}
50
51/* ========================== Fast skipping of JSON =========================
52 * The helpers here are designed to skip values without performing any
53 * allocation. This way, for the use case of this JSON parser, we are able
54 * to easily (and with good speed) skip fields and values we are not
55 * interested in. Then, later in the code, when we find the field we want
56 * to obtain, we finally call the functions that turn a given JSON value
57 * associated to a field into our of our expressions token.
58 * ========================================================================== */
59
60/* Advance *p consuming all the spaces. */
61static inline void jsonSkipWhiteSpaces(const char **p, const char *end) {
62 while (*p < end && isspace((unsigned char)**p)) (*p)++;
63}
64
65/* Advance *p past a JSON string. Returns 1 on success, 0 on error. */
66static int jsonSkipString(const char **p, const char *end) {
67 if (*p >= end || **p != '"') return 0;
68 (*p)++; /* Skip opening quote. */
69 while (*p < end) {
70 if (**p == '\\') {
71 (*p) += 2;
72 continue;
73 }
74 if (**p == '"') {
75 (*p)++; /* Skip closing quote. */
76 return 1;
77 }
78 (*p)++;
79 }
80 return 0; /* unterminated */
81}
82
83/* Skip an array or object generically using depth counter.
84 * Opener and closer tells the function how the aggregated
85 * data type starts/stops, basically [] or {}. */
86static int jsonSkipBracketed(const char **p, const char *end,
87 char opener, char closer) {
88 int depth = 1;
89 (*p)++; /* Skip opener. */
90
91 /* Loop until we reach the end of the input or find the matching
92 * closer (depth becomes 0). */
93 while (*p < end && depth > 0) {
94 char c = **p;
95
96 if (c == '"') {
97 // Found a string, delegate skipping to jsonSkipString().
98 if (!jsonSkipString(p, end)) {
99 return 0; // String skipping failed (e.g., unterminated)
100 }
101 /* jsonSkipString() advances *p past the closing quote.
102 * Continue the loop to process the character *after* the string. */
103 continue;
104 }
105
106 /* If it's not a string, check if it affects the depth for the
107 * specific brackets we are currently tracking. */
108 if (c == opener) {
109 depth++;
110 } else if (c == closer) {
111 depth--;
112 }
113
114 /* Always advance the pointer for any non-string character.
115 * This handles commas, colons, whitespace, numbers, literals,
116 * and even nested brackets of a *different* type than the
117 * one we are currently skipping (e.g. skipping a { inside []). */
118 (*p)++;
119 }
120
121 /* Return 1 (true) if we successfully found the matching closer,
122 * otherwise there is a parse error and we return 0. */
123 return depth == 0;
124}
125
126/* Skip a single JSON literal (true, null, ...) starting at *p.
127 * Returns 1 on success, 0 on failure. */
128static int jsonSkipLiteral(const char **p, const char *end, const char *lit) {
129 size_t l = strlen(lit);
130 if (*p + l > end) return 0;
131 if (strncmp(*p, lit, l) == 0) { *p += l; return 1; }
132 return 0;
133}
134
135/* Skip number, don't check that number format is correct, just consume
136 * number-alike characters.
137 *
138 * Note: More robust number skipping might check validity,
139 * but for skipping, just consuming plausible characters is enough. */
140static int jsonSkipNumber(const char **p, const char *end) {
141 const char *num_start = *p;
142 while (*p < end && jsonIsNumberChar(**p)) (*p)++;
143 return *p > num_start; // Any progress made? Otherwise no number found.
144}
145
146/* Skip any JSON value. 1 = success, 0 = error. */
147static int jsonSkipValue(const char **p, const char *end) {
148 jsonSkipWhiteSpaces(p, end);
149 if (*p >= end) return 0;
150 switch (**p) {
151 case '"': return jsonSkipString(p, end);
152 case '{': return jsonSkipBracketed(p, end, '{', '}');
153 case '[': return jsonSkipBracketed(p, end, '[', ']');
154 case 't': return jsonSkipLiteral(p, end, "true");
155 case 'f': return jsonSkipLiteral(p, end, "false");
156 case 'n': return jsonSkipLiteral(p, end, "null");
157 default: return jsonSkipNumber(p, end);
158 }
159}
160
161/* =========================== JSON to exprtoken ============================
162 * The functions below convert a given json value to the equivalent
163 * expression token structure.
164 * ========================================================================== */
165
166static exprtoken *jsonParseStringToken(const char **p, const char *end) {
167 if (*p >= end || **p != '"') return NULL;
168 const char *start = ++(*p);
169 int esc = 0; size_t len = 0; int has_esc = 0;
170 const char *q = *p;
171 while (q < end) {
172 if (esc) { esc = 0; q++; len++; has_esc = 1; continue; }
173 if (*q == '\\') { esc = 1; q++; continue; }
174 if (*q == '"') break;
175 q++; len++;
176 }
177 if (q >= end || *q != '"') return NULL; // Unterminated string
178 exprtoken *t = exprNewToken(EXPR_TOKEN_STR);
179
180 if (!has_esc) {
181 // No escapes, we can point directly into the original JSON string.
182 t->str.start = (char*)start; t->str.len = len; t->str.heapstr = NULL;
183 } else {
184 // Escapes present, need to allocate and copy/process escapes.
185 char *dst = RedisModule_Alloc(len + 1);
186
187 t->str.start = t->str.heapstr = dst; t->str.len = len;
188 const char *r = start; esc = 0;
189 while (r < q) {
190 if (esc) {
191 switch (*r) {
192 // Supported escapes from Goal 3.
193 case 'n': *dst='\n'; break;
194 case 'r': *dst='\r'; break;
195 case 't': *dst='\t'; break;
196 case '\\': *dst='\\'; break;
197 case '"': *dst='\"'; break;
198 // Escapes (like \uXXXX, \b, \f) are not supported for now,
199 // we just copy them verbatim.
200 default: *dst=*r; break;
201 }
202 dst++; esc = 0; r++; continue;
203 }
204 if (*r == '\\') { esc = 1; r++; continue; }
205 *dst++ = *r++;
206 }
207 *dst = '\0'; // Null-terminate the allocated string.
208 }
209 *p = q + 1; // Advance the main pointer past the closing quote.
210 return t;
211}
212
213static exprtoken *jsonParseNumberToken(const char **p, const char *end) {
214 // Use a buffer to extract the number literal for parsing with strtod().
215 char buf[256]; int idx = 0;
216 const char *start = *p; // For strtod partial failures check.
217
218 // Copy potential number characters to buffer.
219 while (*p < end && idx < (int)sizeof(buf)-1 && jsonIsNumberChar(**p)) {
220 buf[idx++] = **p;
221 (*p)++;
222 }
223 buf[idx]='\0'; // Null-terminate buffer.
224
225 if (idx==0) return NULL; // No number characters found.
226
227 char *ep; // End pointer for strtod validation.
228 double v = strtod(buf, &ep);
229
230 /* Check if strtod() consumed the entire buffer content.
231 * If not, the number format was invalid. */
232 if (*ep!='\0') {
233 // strtod() failed; rewind p to the start and return NULL
234 *p = start;
235 return NULL;
236 }
237
238 // If strtod() succeeded, create and return the token..
239 exprtoken *t = exprNewToken(EXPR_TOKEN_NUM);
240 t->num = v;
241 return t;
242}
243
244static exprtoken *jsonParseLiteralToken(const char **p, const char *end, const char *lit, int type, double num) {
245 size_t l = strlen(lit);
246
247 // Ensure we don't read past 'end'.
248 if ((*p + l) > end) return NULL;
249
250 if (strncmp(*p, lit, l) != 0) return NULL; // Literal doesn't match.
251
252 // Check that the character *after* the literal is a valid JSON delimiter
253 // (whitespace, comma, closing bracket/brace, or end of input)
254 // This prevents matching "trueblabla" as "true".
255 if ((*p + l) < end) {
256 char next_char = *(*p + l);
257 if (!isspace((unsigned char)next_char) && next_char!=',' &&
258 next_char!=']' && next_char!='}') {
259 return NULL; // Invalid character following literal.
260 }
261 }
262
263 // Literal matched and is correctly terminated.
264 *p += l;
265 exprtoken *t = exprNewToken(type);
266 t->num = num;
267 return t;
268}
269
270static exprtoken *jsonParseArrayToken(const char **p, const char *end) {
271 if (*p >= end || **p != '[') return NULL;
272 (*p)++; // Skip '['.
273 jsonSkipWhiteSpaces(p,end);
274
275 exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE);
276 t->tuple.len = 0; t->tuple.ele = NULL; size_t alloc = 0;
277
278 // Handle empty array [].
279 if (*p < end && **p == ']') {
280 (*p)++; // Skip ']'.
281 return t;
282 }
283
284 // Parse array elements.
285 while (1) {
286 exprtoken *ele = jsonParseValueToken(p,end);
287 if (!ele) {
288 exprTokenRelease(t); // Clean up partially built array token.
289 return NULL;
290 }
291
292 // Grow allocated space for elements if needed.
293 if (t->tuple.len == alloc) {
294 size_t newsize = alloc ? alloc * 2 : 4;
295 // Check for potential overflow if newsize becomes huge.
296 if (newsize < alloc) {
297 exprTokenRelease(ele);
298 exprTokenRelease(t);
299 return NULL;
300 }
301 exprtoken **newele = RedisModule_Realloc(t->tuple.ele,
302 sizeof(exprtoken*)*newsize);
303 t->tuple.ele = newele;
304 alloc = newsize;
305 }
306 t->tuple.ele[t->tuple.len++] = ele; // Add element.
307
308 jsonSkipWhiteSpaces(p,end);
309 if (*p>=end) {
310 // Unterminated array. Note that this check is crucial because
311 // previous value parsed may seek 'p' to 'end'.
312 exprTokenRelease(t);
313 return NULL;
314 }
315
316 // Check for comma (more elements) or closing bracket.
317 if (**p == ',') {
318 (*p)++; // Skip ','
319 jsonSkipWhiteSpaces(p,end); // Skip whitespace before next element
320 continue; // Parse next element
321 } else if (**p == ']') {
322 (*p)++; // Skip ']'
323 return t; // End of array
324 } else {
325 // Unexpected character (not ',' or ']')
326 exprTokenRelease(t);
327 return NULL;
328 }
329 }
330}
331
332/* Turn a JSON value into an expr token. */
333static exprtoken *jsonParseValueToken(const char **p, const char *end) {
334 jsonSkipWhiteSpaces(p,end);
335 if (*p >= end) return NULL;
336
337 switch (**p) {
338 case '"': return jsonParseStringToken(p,end);
339 case '[': return jsonParseArrayToken(p,end);
340 case '{': return NULL; // No nested elements support for now.
341 case 't': return jsonParseLiteralToken(p,end,"true",EXPR_TOKEN_NUM,1);
342 case 'f': return jsonParseLiteralToken(p,end,"false",EXPR_TOKEN_NUM,0);
343 case 'n': return jsonParseLiteralToken(p,end,"null",EXPR_TOKEN_NULL,0);
344 default:
345 // Check if it starts like a number.
346 if (isdigit((unsigned char)**p) || **p=='-' || **p=='+') {
347 return jsonParseNumberToken(p,end);
348 }
349 // Anything else is an unsupported type or malformed JSON.
350 return NULL;
351 }
352}
353
354/* ============================== Fast key seeking ========================== */
355
356/* Finds the start of the value for a given field key within a JSON object.
357 * Returns pointer to the first char of the value, or NULL if not found/error.
358 * This function does not perform any allocation and is optimized to seek
359 * the specified *toplevel* filed as fast as possible. */
360static const char *jsonSeekField(const char *json, const char *end,
361 const char *field, size_t flen) {
362 const char *p = json;
363 jsonSkipWhiteSpaces(&p,end);
364 if (p >= end || *p != '{') return NULL; // Must start with '{'.
365 p++; // skip '{'.
366
367 while (1) {
368 jsonSkipWhiteSpaces(&p,end);
369 if (p >= end) return NULL; // Reached end within object.
370
371 if (*p == '}') return NULL; // End of object, field not found.
372
373 // Expecting a key (string).
374 if (*p != '"') return NULL; // Key must be a string.
375
376 // --- Key Matching using jsonSkipString ---
377 const char *key_start = p + 1; // Start of key content.
378 const char *key_end_p = p; // Will later contain the end.
379
380 // Use jsonSkipString() to find the end.
381 if (!jsonSkipString(&key_end_p, end)) {
382 // Unterminated / invalid key string.
383 return NULL;
384 }
385
386 // Calculate the length of the key's content.
387 size_t klen = (key_end_p - 1) - key_start;
388
389 /* Perform the comparison using the raw key content.
390 * WARNING: This uses memcmp(), so we don't handle escaped chars
391 * within the key matching against unescaped chars in 'field'. */
392 int match = klen == flen && !memcmp(key_start, field, flen);
393
394 // Update the main pointer 'p' to be after the key string.
395 p = key_end_p;
396
397 // Now we expect to find a ":" followed by a value.
398 jsonSkipWhiteSpaces(&p,end);
399 if (p>=end || *p!=':') return NULL; // Expect ':' after key
400 p++; // Skip ':'.
401
402 // Seek value.
403 jsonSkipWhiteSpaces(&p,end);
404 if (p>=end) return NULL; // Expect value after ':'
405
406 if (match) {
407 // Found the matching key, p now points to the start of the value.
408 return p;
409 } else {
410 // Key didn't match, skip the corresponding value.
411 if (!jsonSkipValue(&p,end)) return NULL; // Syntax error.
412 }
413
414
415 // Look for comma or a closing brace.
416 jsonSkipWhiteSpaces(&p,end);
417 if (p>=end) return NULL; // Reached end after value.
418
419 if (*p == ',') {
420 p++; // Skip comma, continue loop to find next key.
421 continue;
422 } else if (*p == '}') {
423 return NULL; // Reached end of object, field not found.
424 }
425 return NULL; // Malformed JSON (unexpected char after value).
426 }
427}
428
429/* This is the only real API that this file conceptually exports (it is
430 * inlined, actually). */
431exprtoken *jsonExtractField(const char *json, size_t json_len,
432 const char *field, size_t field_len)
433{
434 const char *end = json + json_len;
435 const char *valptr = jsonSeekField(json,end,field,field_len);
436 if (!valptr) return NULL;
437
438 /* Key found, valptr points to the start of the value.
439 * Convert it into an expression token object. */
440 return jsonParseValueToken(&valptr,end);
441}