1#include "common.h"
  2#include "json-partial.h"
  3#include <exception>
  4#include <iostream>
  5#include <stdexcept>
  6
  7template <class T> static void assert_equals(const T & expected, const T & actual) {
  8  if (expected != actual) {
  9      std::cerr << "Expected: " << expected << std::endl;
 10      std::cerr << "Actual: " << actual << std::endl;
 11      std::cerr << std::flush;
 12      throw std::runtime_error("Test failed");
 13  }
 14}
 15
 16static void test_json_healing() {
 17  auto parse = [](const std::string & str) {
 18      std::cerr << "# Parsing: " << str << '\n';
 19      std::string::const_iterator it = str.begin();
 20      const auto end = str.end();
 21      common_json out;
 22      std::string healing_marker = "$llama.cpp.json$";
 23      if (common_json_parse(it, end, healing_marker, out)) {
 24          auto dump = out.json.dump();
 25          std::cerr << "Parsed: " << dump << '\n';
 26          std::cerr << "Magic: " << out.healing_marker.json_dump_marker << '\n';
 27          std::string result;
 28          if (!out.healing_marker.json_dump_marker.empty()) {
 29              auto i = dump.find(out.healing_marker.json_dump_marker);
 30              if (i == std::string::npos) {
 31                  throw std::runtime_error("Failed to find magic in dump " + dump + " (magic: " + out.healing_marker.json_dump_marker + ")");
 32              }
 33              result = dump.substr(0, i);
 34          } else {
 35            result = dump;
 36          }
 37          std::cerr << "Result: " << result << '\n';
 38          if (string_starts_with(str, result)) {
 39            std::cerr << "Failure!\n";
 40          }
 41        //   return dump;
 42      } else {
 43        throw std::runtime_error("Failed to parse: " + str);
 44      }
 45
 46  };
 47  auto parse_all = [&](const std::string & str) {
 48      for (size_t i = 1; i < str.size(); i++) {
 49          parse(str.substr(0, i));
 50      }
 51  };
 52  parse_all("{\"a\": \"b\"}");
 53  parse_all("{\"hey\": 1, \"ho\\\"ha\": [1]}");
 54
 55  parse_all("[{\"a\": \"b\"}]");
 56
 57  auto test = [&](const std::vector<std::string> & inputs, const std::string & expected, const std::string & expected_marker) {
 58      for (const auto & input : inputs) {
 59        common_json out;
 60        assert_equals(true, common_json_parse(input, "$foo", out));
 61        assert_equals<std::string>(expected, out.json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true));
 62        assert_equals<std::string>(expected_marker, out.healing_marker.json_dump_marker);
 63      }
 64  };
 65  // No healing needed:
 66  test(
 67    {
 68      R"([{"a":"b"}, "y"])",
 69    },
 70    R"([{"a":"b"},"y"])",
 71    ""
 72  );
 73  // Partial literals can't be healed:
 74  test(
 75    {
 76      R"([1)",
 77      R"([tru)",
 78      R"([n)",
 79      R"([nul)",
 80      R"([23.2)",
 81    },
 82    R"(["$foo"])",
 83    R"("$foo)"
 84  );
 85  test(
 86    {
 87      R"({"a": 1)",
 88      R"({"a": tru)",
 89      R"({"a": n)",
 90      R"({"a": nul)",
 91      R"({"a": 23.2)",
 92    },
 93    R"({"a":"$foo"})",
 94    R"("$foo)"
 95  );
 96  test(
 97    {
 98      R"({)",
 99    },
100    R"({"$foo":1})",
101    R"("$foo)"
102  );
103  test(
104    {
105      R"([)",
106    },
107    R"(["$foo"])",
108    R"("$foo)"
109  );
110  // Healing right after a full literal
111  test(
112    {
113      R"(1 )",
114    },
115    R"(1)",
116    ""
117  );
118  test(
119    {
120      R"(true)",
121      R"(true )",
122    },
123    R"(true)",
124    ""
125  );
126  test(
127    {
128      R"(null)",
129      R"(null )",
130    },
131    R"(null)",
132    ""
133  );
134  test(
135    {
136      R"([1 )",
137    },
138    R"([1,"$foo"])",
139    R"(,"$foo)"
140  );
141  test(
142    {
143      R"([{})",
144      R"([{} )",
145    },
146    R"([{},"$foo"])",
147    R"(,"$foo)"
148  );
149  test(
150    {
151      R"([true)",
152    },
153    // TODO: detect the true/false/null literal was complete
154    R"(["$foo"])",
155    R"("$foo)"
156  );
157  test(
158    {
159      R"([true )",
160    },
161    R"([true,"$foo"])",
162    R"(,"$foo)"
163  );
164  test(
165    {
166      R"([true,)",
167    },
168    R"([true,"$foo"])",
169    R"("$foo)"
170  );
171  // Test nesting
172  test(
173    {
174      R"([{"a": [{"b": [{)",
175    },
176    R"([{"a":[{"b":[{"$foo":1}]}]}])",
177    R"("$foo)"
178  );
179  test(
180    {
181      R"([{"a": [{"b": [)",
182    },
183    R"([{"a":[{"b":["$foo"]}]}])",
184    R"("$foo)"
185  );
186
187  test(
188    {
189      R"([{"a": "b"})",
190      R"([{"a": "b"} )",
191    },
192    R"([{"a":"b"},"$foo"])",
193    R"(,"$foo)"
194  );
195  test(
196    {
197      R"([{"a": "b"},)",
198      R"([{"a": "b"}, )",
199    },
200    R"([{"a":"b"},"$foo"])",
201    R"("$foo)"
202  );
203  test(
204    {
205      R"({ "code)",
206    },
207    R"({"code$foo":1})",
208    R"($foo)"
209  );
210  test(
211    {
212      R"({ "code\)",
213    },
214    R"({"code\\$foo":1})",
215    R"(\$foo)"
216  );
217  test(
218    {
219      R"({ "code")",
220    },
221    R"({"code":"$foo"})",
222    R"(:"$foo)"
223  );
224  test(
225    {
226      R"({ "key")",
227    },
228    R"({"key":"$foo"})",
229    R"(:"$foo)"
230  );
231  // Test unicode escape sequences
232  test(
233    {
234      R"({"a":"\u)",
235    },
236    R"({"a":"\u0000$foo"})",
237    R"(0000$foo)"
238  );
239  test(
240    {
241      R"({"a":"\u00)",
242    },
243    R"({"a":"\u0000$foo"})",
244    R"(00$foo)"
245  );
246  test(
247    {
248      R"({"a":"\ud300)",
249    },
250    R"({"a":"\ud300$foo"})",
251    R"($foo)"
252  );
253  test(
254    {
255      R"({"a":"\ud800)",
256    },
257    R"({"a":"\ud800\udc00$foo"})",
258    R"(\udc00$foo)"
259  );
260  test(
261    {
262      R"({"a":"\ud800\)",
263    },
264    R"({"a":"\ud800\udc00$foo"})",
265    R"(udc00$foo)"
266  );
267  test(
268    {
269      R"({"a":"\ud800\u)",
270    },
271    R"({"a":"\ud800\udc00$foo"})",
272    R"(dc00$foo)"
273  );
274  test(
275    {
276      R"({"a":"\ud800\udc00)",
277    },
278    R"({"a":"\ud800\udc00$foo"})",
279    R"($foo)"
280  );
281}
282
283int main() {
284    test_json_healing();
285    std::cerr << "All tests passed.\n";
286    return 0;
287}