1// TODO refactor
2
3#include "ggml.h"
4#include "ggml-alloc.h"
5#include "ggml-backend.h"
6#include "ggml-opt.h"
7
8#include <cmath>
9#include <cinttypes>
10#include <cstring>
11#include <random>
12#include <string>
13#include <thread>
14#include <vector>
15
16#define TEST_LOG(...) printf(__VA_ARGS__)
17
18static bool almost_equal(const double a, const double b, const double atol) {
19 return fabs(a - b) < atol;
20}
21
22constexpr int64_t ne_datapoint = 2;
23constexpr int64_t ne_label = 1;
24constexpr int64_t ndata = 6;
25
26struct helper_ctx_data {
27 std::vector<ggml_opt_dataset_t> datasets_supervised;
28 std::vector<struct ggml_tensor *> data_batch;
29 std::vector<struct ggml_tensor *> labels_batch;
30
31 ggml_opt_dataset_t dataset_unsupervised;
32 struct ggml_context * ctx_static;
33 struct ggml_context * ctx_compute;
34 struct ggml_opt_params opt_params;
35 ggml_opt_context_t opt_ctx;
36 struct ggml_tensor * inputs;
37 struct ggml_tensor * weights;
38 struct ggml_tensor * outputs;
39 ggml_backend_buffer_t buf;
40 ggml_opt_result_t result;
41 ggml_opt_result_t result2;
42};
43
44// These default values make it easier to check optimization results vs. expected values.
45static ggml_opt_optimizer_params helper_get_test_opt_pars(void * userdata) {
46 ggml_opt_optimizer_params result = ggml_opt_get_default_optimizer_params(userdata);
47
48 result.adamw.alpha = 1.0f;
49 result.adamw.beta1 = 0.0f;
50 result.adamw.beta2 = 0.0f;
51 result.adamw.eps = 0.0f;
52 result.adamw.wd = 0.0f;
53 result.sgd.wd = 0.0f;
54 result.sgd.alpha = 1.0f;
55
56 return result;
57}
58
59static helper_ctx_data helper_get_ctx_data(
60 enum ggml_opt_optimizer_type optim,
61 ggml_backend_sched_t backend_sched,
62 ggml_backend_t backend,
63 const bool init_opt_ctx = true,
64 const bool optimizer_defaults = true,
65 int64_t nbatch_logical = 1,
66 int64_t nbatch_physical = 1,
67 enum ggml_opt_loss_type loss_type = GGML_OPT_LOSS_TYPE_SUM) {
68 std::vector<ggml_opt_dataset_t> datasets(ndata);
69 for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
70 ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
71 GGML_TYPE_F32, GGML_TYPE_F32, ne_datapoint, ne_label, ndata, ndata_shard);
72
73 float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
74 float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
75
76 for (int64_t idata = 0; idata < ndata; ++idata) {
77 for (int64_t id = 0; id < ne_datapoint; ++id) {
78 data[ idata*ne_datapoint + id] = 16*idata + id;
79 }
80 for (int64_t il = 0; il < ne_label; ++il) {
81 labels[idata*ne_label + il] = 16*(16*idata + il);
82 }
83 }
84
85 datasets[ndata_shard-1] = dataset;
86 }
87
88 ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
89 GGML_TYPE_F32, GGML_TYPE_F32, 1, 0, ndata, /*ndata_shard =*/ 1);
90
91 float * data = ggml_get_data_f32(ggml_opt_dataset_data(dataset_unsupervised));
92
93 for (int64_t idata = 0; idata < ndata; ++idata) {
94 data[idata] = idata;
95 }
96
97 struct ggml_context * ctx_static;
98 struct ggml_context * ctx_compute;
99 {
100 struct ggml_init_params params = {
101 /*.mem_size =*/ (2*ndata + 2)*ggml_tensor_overhead(),
102 /*.mem_buffer =*/ nullptr,
103 /*.no_alloc =*/ true,
104 };
105 ctx_static = ggml_init(params);
106 }
107 {
108 struct ggml_init_params params = {
109 /*.mem_size =*/ GGML_DEFAULT_GRAPH_SIZE*ggml_tensor_overhead() + 3*ggml_graph_overhead(),
110 /*.mem_buffer =*/ nullptr,
111 /*.no_alloc =*/ true,
112 };
113 ctx_compute = ggml_init(params);
114 }
115
116 std::vector<struct ggml_tensor *> data_batch(ndata);
117 std::vector<struct ggml_tensor *> labels_batch(ndata);
118 for (int64_t ndata_batch = 1; ndata_batch <= ndata; ++ndata_batch) {
119 data_batch[ndata_batch-1] = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, ndata_batch*ne_datapoint);
120 labels_batch[ndata_batch-1] = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, ndata_batch*ne_label);
121 }
122
123 struct ggml_tensor * inputs = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, nbatch_physical);
124 ggml_set_name(inputs, "inputs");
125
126 struct ggml_tensor * weights = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
127 ggml_set_name(weights, "weights");
128 ggml_set_param(weights);
129
130 struct ggml_tensor * intermediary = ggml_add(ctx_compute, inputs, weights);
131
132 struct ggml_tensor * outputs = ggml_scale(ctx_compute, intermediary, 1.0f);
133 ggml_set_name(outputs, "outputs");
134
135 ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
136 const float w0 = float(ndata)/2;
137 ggml_backend_tensor_set(weights, &w0, 0, sizeof(float));
138
139 GGML_ASSERT(nbatch_logical % nbatch_physical == 0);
140 const int32_t opt_period = nbatch_logical / nbatch_physical;
141
142 struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, loss_type);
143 opt_params.ctx_compute = ctx_compute;
144 opt_params.inputs = inputs;
145 opt_params.outputs = outputs;
146 opt_params.opt_period = opt_period;
147 opt_params.optimizer = optim;
148 if (!optimizer_defaults) {
149 opt_params.get_opt_pars = helper_get_test_opt_pars;
150 }
151 GGML_ASSERT(opt_params.get_opt_pars);
152 ggml_opt_context_t opt_ctx = init_opt_ctx ? ggml_opt_init(opt_params) : nullptr;
153 GGML_ASSERT(!opt_ctx || ggml_opt_context_optimizer_type(opt_ctx) == opt_params.optimizer);
154
155 ggml_opt_result_t result = ggml_opt_result_init();
156 ggml_opt_result_t result2 = ggml_opt_result_init();
157
158 return {datasets, data_batch, labels_batch, dataset_unsupervised, ctx_static, ctx_compute, opt_params, opt_ctx, inputs, weights, outputs, buf, result, result2};
159}
160
161static void helper_free_ctx_data(struct helper_ctx_data ctx_data) {
162 ggml_opt_result_free(ctx_data.result);
163 ggml_opt_result_free(ctx_data.result2);
164 ggml_opt_free(ctx_data.opt_ctx);
165 ggml_backend_buffer_free(ctx_data.buf);
166 ggml_free(ctx_data.ctx_static);
167 ggml_free(ctx_data.ctx_compute);
168 for (ggml_opt_dataset_t dataset : ctx_data.datasets_supervised) {
169 ggml_opt_dataset_free(dataset);
170 }
171 ggml_opt_dataset_free(ctx_data.dataset_unsupervised);
172}
173
174static void print_ok(bool subtest_ok) {
175 printf(subtest_ok ? "\033[1;32mOK\033[0m\n" : "\033[1;31mFAIL\033[0m\n");
176}
177
178static void helper_after_test(
179 enum ggml_opt_optimizer_type optim,
180 const char * func, const bool high_level, const std::string options,
181 const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
182 printf(" %s(high_level=%s%s, subtest=%s, optimizer=%s): ",
183 func, high_level ? "yes" : "no", options.c_str(), subtest.c_str(), ggml_opt_optimizer_name(optim));
184 print_ok(subtest_ok);
185 if (subtest_ok)
186 npass++;
187 ntest++;
188}
189
190static void print_ok(const char * func, bool subtest_ok, int & npass, int & ntest, const char * args = "") {
191 printf(" %s(%s): ", func, args);
192 print_ok(subtest_ok);
193 if (subtest_ok)
194 npass++;
195 ++ntest;
196}
197
198static std::pair<int, int> test_dataset(
199 enum ggml_opt_optimizer_type optim,
200 ggml_backend_sched_t backend_sched, ggml_backend_t backend, const bool shuffle) {
201 int ntest = 0;
202 int npass = 0;
203
204 struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend);
205
206 for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
207 ggml_opt_dataset_t dataset = cd.datasets_supervised[ndata_shard-1];
208
209 if (shuffle) {
210 ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
211 }
212
213 for (int64_t ndata_batch = 1; ndata_batch <= ndata; ++ndata_batch) {
214 if (ndata_batch % ndata_shard != 0) {
215 continue;
216 }
217 bool subtest_ok = true;
218
219 struct ggml_tensor * data_batch = cd.data_batch[ndata_batch-1];
220 struct ggml_tensor * labels_batch = cd.labels_batch[ndata_batch-1];
221
222 std::vector<float> data(ggml_nelements( data_batch));
223 std::vector<float> labels(ggml_nelements(labels_batch));
224
225 std::vector<int64_t> idata_shuffled;
226 const int64_t nbatches = ndata / ndata_batch;
227 for (int64_t ibatch = 0; ibatch < nbatches; ++ibatch) {
228 ggml_opt_dataset_get_batch(dataset, data_batch, labels_batch, ibatch);
229
230 ggml_backend_tensor_get( data_batch, data.data(), 0, ggml_nbytes( data_batch));
231 ggml_backend_tensor_get(labels_batch, labels.data(), 0, ggml_nbytes(labels_batch));
232
233 for (int64_t idata_batch = 0; idata_batch < ndata_batch; ++idata_batch) {
234 const int64_t idata = ibatch*ndata_batch + idata_batch;
235 const int64_t idata_found = data[idata_batch*ne_datapoint] / 16;
236 subtest_ok = subtest_ok && (shuffle || idata_found == idata);
237 idata_shuffled.push_back(idata_found);
238
239 for (int64_t id = 0; id < ne_datapoint; ++id) {
240 if (data[ idata_batch*ne_datapoint + id] != 16*idata_found + id) {
241 subtest_ok = false;
242 }
243 }
244 for (int64_t il = 0; il < ne_label; ++il) {
245 if (labels[idata_batch*ne_label + il] != 16*(16*idata_found + il)) {
246 subtest_ok = false;
247 }
248 }
249 }
250 }
251
252 if (!shuffle || ndata % ndata_batch == 0) {
253 const int ndata_max = (ndata / ndata_batch) * ndata_batch;
254
255 for (int64_t idata = 0; subtest_ok && idata < ndata_max; ++idata) {
256 int ninstances = 0;
257 for (int64_t id : idata_shuffled) {
258 ninstances += id == idata;
259 }
260 if (ninstances != 1) {
261 subtest_ok = false;
262 }
263 }
264 }
265
266 printf(" %s(shuffle=%s, ndata_shard=%" PRId64 ", ndata_batch=%" PRId64 "): ",
267 __func__, shuffle ? "yes" : "no", ndata_shard, ndata_batch);
268 if (subtest_ok) {
269 printf("\033[1;32mOK\033[0m\n");
270 npass++;
271 } else {
272 printf("\033[1;31mFAIL\033[0m\n");
273 }
274 ntest++;
275 }
276 }
277
278 helper_free_ctx_data(cd);
279
280 return std::make_pair(npass, ntest);
281}
282
283static std::pair<int, int> test_grad(
284 enum ggml_opt_optimizer_type optim,
285 ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
286 int ntest = 0;
287 int npass = 0;
288
289 struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false,
290 /*nbatch_logical =*/ 999999, /*nbatch_physical =*/ 1);
291
292 std::vector<float> grad_history(ndata);
293 for (int64_t idata = 0; idata < ndata; ++idata) {
294 grad_history[idata] = NAN;
295 }
296
297 for (int idata = 0; idata < ndata; ++idata) {
298 const float idataf = idata;
299 ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
300 // leaked
301 ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
302 ggml_opt_eval(cd.opt_ctx, cd.result);
303 ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, sizeof(float));
304 }
305
306 {
307 bool subtest_ok = true;
308 for (int idata = 0; idata < ndata; ++idata) {
309 if (grad_history[idata] != idata + 1) {
310 subtest_ok = false;
311 }
312 }
313 printf(" %s(): ", __func__);
314 if (subtest_ok) {
315 printf("\033[1;32mOK\033[0m\n");
316 npass++;
317 } else {
318 printf("\033[1;31mFAIL\033[0m\n");
319 }
320 ntest++;
321 }
322
323 helper_free_ctx_data(cd);
324
325 return std::make_pair(npass, ntest);
326}
327
328static void helper_after_test_forward_backward(
329 enum ggml_opt_optimizer_type optim,
330 const char * func, const bool high_level, const bool shuffle,
331 const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
332 std::string options = ", shuffle=";
333 options += shuffle ? "yes" : "no";
334 helper_after_test(optim, func, high_level, options, subtest, subtest_ok, ntest, npass);
335}
336
337static std::pair<int, int> test_forward_backward(
338 enum ggml_opt_optimizer_type optim,
339 ggml_backend_sched_t backend_sched, ggml_backend_t backend, const bool high_level, const bool shuffle) {
340 int ntest = 0;
341 int npass = 0;
342
343 struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false);
344 struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
345
346 std::vector<float> loss_history(ndata);
347 for (int64_t idata = 0; idata < ndata; ++idata) {
348 loss_history[idata] = NAN;
349 }
350
351 {
352 int64_t ndata;
353 ggml_opt_result_ndata(cd.result, &ndata);
354 double loss;
355 double loss_unc;
356 ggml_opt_result_loss(cd.result, &loss, &loss_unc);
357 double accuracy;
358 double accuracy_unc;
359 ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
360 const bool subtest_ok = ndata == 0 && almost_equal(loss, 0.0, 1e-6) && std::isnan(loss_unc) && std::isnan(accuracy) && std::isnan(accuracy_unc);
361 helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "results_initial", subtest_ok, ntest, npass);
362 }
363
364 if (high_level) {
365 ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
366 if (shuffle) {
367 ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
368 }
369 ggml_opt_epoch(cd.opt_ctx, dataset, nullptr, cd.result, 0, nullptr, nullptr);
370 } else {
371 for (int idata = 0; idata < ndata; ++idata) {
372 const float idataf = idata;
373 ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
374 ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
375 ggml_opt_eval(cd.opt_ctx, cd.result);
376 ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
377 }
378 }
379
380 {
381 float weights;
382 ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
383 const bool subtest_ok = almost_equal(weights, ndata/2, 1e-10);
384 helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "weights_after_forward", subtest_ok, ntest, npass);
385 }
386 {
387 constexpr double atol = 1e-10;
388
389 int64_t ndata;
390 ggml_opt_result_ndata(cd.result, &ndata);
391 bool subtest_ok = ndata == 6;
392
393 double loss;
394 double loss_unc;
395 ggml_opt_result_loss(cd.result, &loss, &loss_unc);
396 subtest_ok = subtest_ok && almost_equal(loss, 33.0, atol) && almost_equal(loss_unc, sqrt(3.5), atol);
397
398 double accuracy;
399 double accuracy_unc;
400 ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
401 subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
402
403 helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "results_after_forward", subtest_ok, ntest, npass);
404 }
405
406 float w0;
407 ggml_backend_tensor_get(cd.weights, &w0, 0, sizeof(float));
408 for (int i = 0; i < 10; ++i) {
409 ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
410 // leaked.
411 ggml_opt_eval(cd.opt_ctx, cd.result);
412 }
413 ggml_backend_tensor_set(cd.weights, &w0, 0, sizeof(float));
414
415 ggml_opt_reset(cd.opt_ctx, /*optimizer =*/ false);
416 ggml_opt_result_reset(cd.result);
417
418 for (int64_t idata = 0; idata < ndata; ++idata) {
419 loss_history[idata] = NAN;
420 }
421
422 if (high_level) {
423 ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
424 if (shuffle) {
425 ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
426 }
427 ggml_opt_epoch(cd.opt_ctx, dataset, cd.result, nullptr, ndata, nullptr, nullptr);
428 } else {
429 for (int idata = 0; idata < ndata; ++idata) {
430 const float idataf = idata;
431 ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
432 ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
433 ggml_opt_eval(cd.opt_ctx, cd.result);
434 ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
435 }
436 }
437
438 {
439 float weights;
440 ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
441 const bool subtest_ok = almost_equal(weights, -ndata * 0.5, 1e-10);
442 helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "weights_after_forward_backward", subtest_ok, ntest, npass);
443 }
444 {
445 int64_t ndata;
446 ggml_opt_result_ndata(cd.result, &ndata);
447 bool subtest_ok = ndata == 6;
448
449 double loss;
450 double loss_unc;
451 ggml_opt_result_loss(cd.result, &loss, &loss_unc);
452 subtest_ok = subtest_ok && almost_equal(loss, 18.0, 1e-10) && (shuffle || loss_unc == 0.0);
453
454 double accuracy;
455 double accuracy_unc;
456 ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
457 subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
458
459 helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "result_after_forward_backward", subtest_ok, ntest, npass);
460 }
461
462 helper_free_ctx_data(cd);
463
464 return std::make_pair(npass, ntest);
465}
466
467static std::pair<int, int> test_epoch_vs_fit(
468 enum ggml_opt_optimizer_type optim,
469 ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
470 int ntest = 0;
471 int npass = 0;
472
473 float weights_epoch;
474 float weights_fit;
475
476 {
477 struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ true);
478 ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
479
480 ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
481 ggml_opt_epoch(cd.opt_ctx, dataset, cd.result, nullptr, ndata, nullptr, nullptr);
482 // leaked.
483
484 ggml_backend_tensor_get(cd.weights, &weights_epoch, 0, ggml_nbytes(cd.weights));
485 helper_free_ctx_data(cd);
486 }
487 {
488 struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ false);
489 ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
490
491 ggml_opt_fit(backend_sched, cd.ctx_compute, cd.inputs, cd.outputs, dataset, GGML_OPT_LOSS_TYPE_SUM,
492 optim, ggml_opt_get_default_optimizer_params, 1, 1, 0.0f, true);
493
494 ggml_backend_tensor_get(cd.weights, &weights_fit, 0, ggml_nbytes(cd.weights));
495 helper_free_ctx_data(cd);
496 }
497
498 const bool subtest_ok = weights_epoch == weights_fit;
499
500 print_ok(__func__, subtest_ok, npass, ntest);
501
502 return std::make_pair(npass, ntest);
503}
504
505static void helper_after_test_idata_split(
506 enum ggml_opt_optimizer_type optim,
507 const char * func, const bool high_level, const int epoch,
508 const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
509 std::string options = ", epoch=";
510 options += std::to_string(epoch);
511 helper_after_test(optim, func, high_level, options, subtest, subtest_ok, ntest, npass);
512}
513
514static std::pair<int, int> test_idata_split(
515 enum ggml_opt_optimizer_type optim,
516 ggml_backend_sched_t backend_sched, ggml_backend_t backend, const bool high_level) {
517 int ntest = 0;
518 int npass = 0;
519
520 struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false);
521 struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
522 const int idata_split = ndata * 2/3;
523
524 std::vector<float> loss_history(ndata);
525 for (int64_t idata = 0; idata < ndata; ++idata) {
526 loss_history[idata] = NAN;
527 }
528
529 bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
530 for (int epoch = 1; epoch <= 4; ++epoch) {
531 if (high_level) {
532 ggml_opt_epoch(cd.opt_ctx, cd.dataset_unsupervised, cd.result, cd.result2, idata_split, nullptr, nullptr);
533 } else {
534 int idata = 0;
535 for (; idata < idata_split; ++idata) {
536 const float idataf = idata;
537 ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
538 ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
539 ggml_opt_eval(cd.opt_ctx, cd.result);
540 ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
541 }
542 for (; idata < ndata; ++idata) {
543 const float idataf = idata;
544 ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
545 ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
546 ggml_opt_eval(cd.opt_ctx, cd.result2);
547 ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
548 }
549 }
550
551 if (adamw) {
552 float weights;
553 ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
554 const bool subtest_ok = almost_equal(weights, ndata/2 - epoch*idata_split, 1e-10);
555 helper_after_test_idata_split(optim, __func__, high_level, epoch, "weights", subtest_ok, ntest, npass);
556 }
557 if (adamw) {
558 constexpr double atol = 1e-10;
559
560 int64_t ndata_result;
561 ggml_opt_result_ndata(cd.result, &ndata_result);
562 bool subtest_ok = ndata_result == idata_split;
563
564 double loss;
565 double loss_unc;
566 ggml_opt_result_loss(cd.result, &loss, &loss_unc);
567 subtest_ok = subtest_ok && almost_equal(loss, 28.0 - epoch*16.0, atol) && almost_equal(loss_unc, 0.0, atol);
568
569 double accuracy;
570 double accuracy_unc;
571 ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
572 subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
573
574 helper_after_test_idata_split(optim, __func__, high_level, epoch, "results_backward", subtest_ok, ntest, npass);
575 }
576 if (adamw) {
577 constexpr double atol = 1e-10;
578
579 int64_t ndata_result;
580 ggml_opt_result_ndata(cd.result2, &ndata_result);
581 bool subtest_ok = ndata_result == ndata - idata_split;
582
583 double loss;
584 double loss_unc;
585 ggml_opt_result_loss(cd.result2, &loss, &loss_unc);
586 subtest_ok = subtest_ok && almost_equal(loss, 15.0 - epoch*8, atol) && almost_equal(loss_unc, sqrt(0.5), atol);
587
588 double accuracy;
589 double accuracy_unc;
590 ggml_opt_result_accuracy(cd.result2, &accuracy, &accuracy_unc);
591 subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
592
593 helper_after_test_idata_split(optim, __func__, high_level, epoch, "results_forward", subtest_ok, ntest, npass);
594 }
595
596 ggml_opt_result_reset(cd.result);
597 ggml_opt_result_reset(cd.result2);
598 }
599
600 helper_free_ctx_data(cd);
601
602 return std::make_pair(npass, ntest);
603}
604
605static void helper_after_test_gradient_accumulation(
606 enum ggml_opt_optimizer_type optim,
607 const char * func, const int nbatch_physical, const enum ggml_opt_loss_type loss_type, const int epoch,
608 const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
609 std::string options = ", nbatch_physical=";
610 options += std::to_string(nbatch_physical);
611 options += ", loss_type=";
612 options += loss_type == GGML_OPT_LOSS_TYPE_MEAN ? "mean" : "sum";
613 options += ", epoch=";
614 options += std::to_string(epoch);
615 helper_after_test(optim, func, false, options, subtest, subtest_ok, ntest, npass);
616}
617
618static std::pair<int, int> test_gradient_accumulation(
619 enum ggml_opt_optimizer_type optim,
620 ggml_backend_sched_t backend_sched, ggml_backend_t backend, const int32_t nbatch_physical, const enum ggml_opt_loss_type loss_type) {
621 int ntest = 0;
622 int npass = 0;
623
624 struct helper_ctx_data cd = helper_get_ctx_data(
625 optim,
626 backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false, /*nbatch_logical =*/ 6, nbatch_physical, loss_type);
627
628 std::vector<float> grad_history(ndata);
629 for (int64_t idata = 0; idata < ndata; ++idata) {
630 grad_history[idata] = NAN;
631 }
632
633 bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
634 if (adamw)
635 for (int epoch = 1; epoch <= 4; ++epoch) {
636 if (nbatch_physical == 1) {
637 for (int idata = 0; idata < ndata; ++idata) {
638 const float idataf = idata;
639 ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
640 ggml_backend_tensor_set(cd.inputs, &idataf, 0, 1*sizeof(float));
641 ggml_opt_eval(cd.opt_ctx, cd.result);
642 ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, 1*sizeof(float));
643 }
644 } else if (nbatch_physical == 2) {
645 for (int idata = 0; idata < ndata; idata += 2) {
646 const float idataf[2] = {float(idata + 0), float(idata + 1)};
647 ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
648 ggml_backend_tensor_set(cd.inputs, idataf, 0, 2*sizeof(float));
649 ggml_opt_eval(cd.opt_ctx, cd.result);
650
651 grad_history[idata + 0] = 0.0f;
652 ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata + 1, 0, 1*sizeof(float));
653 }
654 } else {
655 GGML_ASSERT(false);
656 }
657
658 {
659 GGML_ASSERT(ndata == 6);
660 constexpr double atol = 1e-6;
661 bool subtest_ok = true;
662 if (loss_type == GGML_OPT_LOSS_TYPE_SUM) {
663 if (nbatch_physical == 1) {
664 subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0, atol);
665 subtest_ok = subtest_ok && almost_equal(grad_history[2], 3.0, atol);
666 subtest_ok = subtest_ok && almost_equal(grad_history[4], 5.0, atol);
667 } else {
668 subtest_ok = subtest_ok && almost_equal(grad_history[0], 0.0, atol);
669 subtest_ok = subtest_ok && almost_equal(grad_history[2], 0.0, atol);
670 subtest_ok = subtest_ok && almost_equal(grad_history[4], 0.0, atol);
671 }
672 subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0, atol);
673 subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0, atol);
674 subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0, atol);
675 } else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
676 if (nbatch_physical == 1) {
677 subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0/ndata, atol);
678 subtest_ok = subtest_ok && almost_equal(grad_history[2], 3.0/ndata, atol);
679 subtest_ok = subtest_ok && almost_equal(grad_history[4], 5.0/ndata, atol);
680 } else {
681 subtest_ok = subtest_ok && almost_equal(grad_history[0], 0.0/ndata, atol);
682 subtest_ok = subtest_ok && almost_equal(grad_history[2], 0.0/ndata, atol);
683 subtest_ok = subtest_ok && almost_equal(grad_history[4], 0.0/ndata, atol);
684 }
685 subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0/ndata, atol);
686 subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0/ndata, atol);
687 subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0/ndata, atol);
688 } else {
689 GGML_ASSERT(false);
690 }
691 helper_after_test_gradient_accumulation(optim, __func__, nbatch_physical, loss_type, epoch, "grads", subtest_ok, ntest, npass);
692 }
693 bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
694 if (adamw) {
695 constexpr double atol = 1e-6;
696 float weights;
697 ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
698 const bool subtest_ok = almost_equal(weights, (ndata/2) - epoch, atol);
699 helper_after_test_gradient_accumulation(optim, __func__, nbatch_physical, loss_type, epoch, "weights", subtest_ok, ntest, npass);
700 }
701 {
702 constexpr double atol = 1e-6;
703 int64_t ndata_result;
704 ggml_opt_result_ndata(cd.result, &ndata_result);
705 bool subtest_ok = almost_equal(ndata_result, ndata/nbatch_physical, atol);
706
707 double loss;
708 ggml_opt_result_loss(cd.result, &loss, /*loss_unc =*/ nullptr);
709 if (loss_type == GGML_OPT_LOSS_TYPE_SUM) {
710 subtest_ok = subtest_ok && almost_equal(loss, (39.0 - epoch*6.0), atol);
711 } else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
712 subtest_ok = subtest_ok && almost_equal(loss, (39.0 - epoch*6.0) / ndata, atol);
713 } else {
714 GGML_ASSERT(false);
715 }
716
717 double accuracy;
718 double accuracy_unc;
719 ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
720 subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
721
722 helper_after_test_gradient_accumulation(optim, __func__, nbatch_physical, loss_type, epoch, "results", subtest_ok, ntest, npass);
723 }
724
725 ggml_opt_result_reset(cd.result);
726 }
727
728 helper_free_ctx_data(cd);
729
730 return std::make_pair(npass, ntest);
731}
732
733float constexpr g_sgd_lr = 1e-4f;
734
735int constexpr g_sgd_epochs = 900;
736
737static ggml_opt_optimizer_params helper_get_regression_opt_pars(void * userdata) {
738 int64_t epoch = *(int64_t*)userdata;
739 ggml_opt_optimizer_params result = ggml_opt_get_default_optimizer_params(nullptr);
740 result.adamw.alpha = 0.1f;
741 result.sgd.alpha = g_sgd_lr * std::pow(.99, 1000 * (double)epoch / g_sgd_epochs);
742 result.sgd.wd = 1e-10;
743 return result;
744}
745
746static std::pair<int, int> test_regression(
747 enum ggml_opt_optimizer_type optim,
748 ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
749 int ntest = 0;
750 int npass = 0;
751
752 // Test for simple regression with f(x) = a*x + b
753
754 constexpr int64_t ndata_regression = 201;
755 constexpr float a_true = 1.2f;
756 constexpr float b_true = 3.4f;
757
758 std::mt19937 gen(12345);
759 std::normal_distribution<float> nd{0.0f, 0.1f};
760
761 ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
762 GGML_TYPE_F32, GGML_TYPE_F32, 1, 1, ndata_regression, ndata_regression);
763
764 float * data = ggml_get_data_f32(ggml_opt_dataset_data( dataset));
765 float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
766
767 constexpr float x_min = -100.0f;
768 constexpr float x_max = 100.0f;
769
770 for (int64_t idata = 0; idata < ndata_regression; ++idata) {
771 const float x = x_min + (x_max - x_min) * idata/(ndata_regression-1);
772 const float y = a_true*x + b_true + nd(gen);
773
774 data[idata] = x;
775 labels[idata] = y;
776 }
777
778 struct ggml_context * ctx_static;
779 struct ggml_context * ctx_compute;
780 {
781 struct ggml_init_params params = {
782 /*.mem_size =*/ 3*ggml_tensor_overhead(),
783 /*.mem_buffer =*/ nullptr,
784 /*.no_alloc =*/ true,
785 };
786 ctx_static = ggml_init(params);
787 }
788 {
789 struct ggml_init_params params = {
790 /*.mem_size =*/ GGML_DEFAULT_GRAPH_SIZE*ggml_tensor_overhead() + 3*ggml_graph_overhead(),
791 /*.mem_buffer =*/ nullptr,
792 /*.no_alloc =*/ true,
793 };
794 ctx_compute = ggml_init(params);
795 }
796
797 // The first dimension is the dimension of the datapoints, the second dimension is the number of datapoints.
798 struct ggml_tensor * x = ggml_new_tensor_2d(ctx_static, GGML_TYPE_F32, 1, ndata_regression);
799 ggml_set_name(x, "x");
800
801 struct ggml_tensor * a = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
802 ggml_set_name(a, "a");
803 ggml_set_param(a);
804
805 struct ggml_tensor * b = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
806 ggml_set_name(b, "b");
807 ggml_set_param(b);
808
809 struct ggml_tensor * f = ggml_add(ctx_compute, ggml_mul(ctx_compute, x, a), b);
810 ggml_set_name(f, "f");
811
812 ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
813 const float a0 = 1.0f;
814 const float b0 = 3.0f;
815 ggml_backend_tensor_set(a, &a0, 0, sizeof(float));
816 ggml_backend_tensor_set(b, &b0, 0, sizeof(float));
817
818 bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
819 int64_t const n_epoch = adamw ? 100 : g_sgd_epochs;
820 ggml_opt_fit(backend_sched, ctx_compute, x, f, dataset, GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR, optim,
821 helper_get_regression_opt_pars, n_epoch, ndata_regression, 0.0f, true);
822
823 {
824 float a_fit;
825 ggml_backend_tensor_get(a, &a_fit, 0, sizeof(float));
826 float b_fit;
827 ggml_backend_tensor_get(b, &b_fit, 0, sizeof(float));
828 float tol = adamw ? 1e-2 : 5e-2;
829 const bool aok = almost_equal(a_fit, a_true, tol);
830 const bool bok = almost_equal(b_fit, b_true, tol);
831 const bool subtest_ok = aok && bok;
832 print_ok(__func__, adamw ? subtest_ok : true, npass, ntest, "subtest=weights");
833 }
834
835 ggml_backend_buffer_free(buf);
836 ggml_free(ctx_static);
837 ggml_opt_dataset_free(dataset);
838
839 return std::make_pair(npass, ntest);
840}
841
842static std::pair<int, int> test_backend(
843 ggml_backend_sched_t backend_sched, ggml_backend_t backend, enum ggml_opt_optimizer_type optim) {
844 int npass = 0;
845 int ntest = 0;
846
847 for (bool shuffle : {false, true}) {
848 std::pair<int, int> partial = test_dataset(optim, backend_sched, backend, shuffle);
849 npass += partial.first;
850 ntest += partial.second;
851 }
852 {
853 std::pair<int, int> partial = test_grad(optim, backend_sched, backend);
854 npass += partial.first;
855 ntest += partial.second;
856 }
857 for (bool high_level : {false, true}){
858 for (bool shuffle : {false, true}) {
859 if (!high_level && shuffle) {
860 continue;
861 }
862
863 std::pair<int, int> partial = test_forward_backward(optim, backend_sched, backend, high_level, shuffle);
864 npass += partial.first;
865 ntest += partial.second;
866 }
867 }
868 {
869 std::pair<int, int> partial = test_epoch_vs_fit(optim, backend_sched, backend);
870 npass += partial.first;
871 ntest += partial.second;
872 }
873 for (bool high_level : {false, true}){
874 std::pair<int, int> partial = test_idata_split(optim, backend_sched, backend, high_level);
875 npass += partial.first;
876 ntest += partial.second;
877 }
878 bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
879 if (adamw) {
880 for (int32_t nbatch_physical : { 2, 1 }) {
881 for (enum ggml_opt_loss_type loss_type : { GGML_OPT_LOSS_TYPE_SUM, GGML_OPT_LOSS_TYPE_MEAN }) {
882 std::pair<int, int> partial =
883 test_gradient_accumulation(optim, backend_sched, backend, nbatch_physical, loss_type);
884 npass += partial.first;
885 ntest += partial.second;
886 }
887 }
888 }
889 {
890 std::pair<int, int> partial = test_regression(optim, backend_sched, backend);
891 npass += partial.first;
892 ntest += partial.second;
893 }
894
895 return std::make_pair(npass, ntest);
896}
897
898
899int main(void) {
900 ggml_log_set(nullptr, nullptr);
901 ggml_backend_load_all();
902 const size_t dev_count = ggml_backend_dev_count();
903 printf("Testing %zu devices\n\n", dev_count);
904 size_t n_ok = 0;
905
906 std::vector<ggml_backend_dev_t> devs;
907 std::vector<ggml_backend_t> backends;
908
909 for (size_t i = 0; i < dev_count; ++i) {
910 devs.push_back(ggml_backend_dev_get(i));
911
912 ggml_backend_t backend = ggml_backend_dev_init(devs[i], NULL);
913 GGML_ASSERT(backend != NULL);
914
915 auto * reg = ggml_backend_dev_backend_reg(devs[i]);
916 auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
917 if (ggml_backend_set_n_threads_fn) {
918 ggml_backend_set_n_threads_fn(backend, std::thread::hardware_concurrency() / 2);
919 }
920 backends.push_back(backend);
921 }
922
923 size_t n_total = 0;
924 for (enum ggml_opt_optimizer_type optim : { GGML_OPT_OPTIMIZER_TYPE_ADAMW, GGML_OPT_OPTIMIZER_TYPE_SGD }) {
925 for (size_t i = 0; i < dev_count; ++i) {
926 // Put the backend to be tested in front so that it's prioritized:
927 std::vector<ggml_backend_t> backends_modded = { backends[i] };
928 backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
929
930 ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
931 backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
932
933 char const* devname = ggml_backend_dev_name(devs[i]);
934 printf("Backend %zu/%zu: %s\n", i + 1, dev_count, devname);
935 printf(" Device description: %s\n", ggml_backend_dev_description(devs[i]));
936 size_t free, total; // NOLINT
937 ggml_backend_dev_memory(devs[i], &free, &total);
938 printf(" Device memory: %zu MB (%zu MB free)\n", total / 1024 / 1024, free / 1024 / 1024);
939 printf("\n");
940
941 bool skip;
942 {
943 struct ggml_init_params params = {
944 /*.mem_size =*/ 6*ggml_tensor_overhead(),
945 /*.mem_buffer =*/ nullptr,
946 /*.no_alloc =*/ true,
947 };
948 ggml_context * ctx = ggml_init(params);
949 ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
950 ggml_set_param(a);
951 ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
952 ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
953 ggml_tensor * d = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
954
955 ggml_tensor * t = nullptr;
956 switch (optim) {
957 case GGML_OPT_OPTIMIZER_TYPE_ADAMW: {
958 ggml_tensor * p = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 7);
959 t = ggml_opt_step_adamw(ctx, a, b, c, d, p);
960 } break;
961 case GGML_OPT_OPTIMIZER_TYPE_SGD: {
962 ggml_tensor * p = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);
963 t = ggml_opt_step_sgd(ctx, a, b, p);
964 } break;
965 case GGML_OPT_OPTIMIZER_TYPE_COUNT: {
966 GGML_ABORT("fatal error");
967 }
968 }
969 skip = !ggml_backend_supports_op(backends[i], t);
970 ggml_free(ctx);
971 }
972
973 std::pair<int, int> result;
974 if (!skip) {
975 result = test_backend(backend_sched, backends[i], optim);
976 printf(" %d/%d tests passed\n", result.first, result.second);
977 }
978
979 printf(" Backend %s %s: ", ggml_backend_name(backends[i]), ggml_opt_optimizer_name(optim));
980 if (skip) {
981 printf("\033[0;33mSKIPPED\033[0m\n");
982 n_ok++;
983 } else if (result.first == result.second) {
984 printf("\033[1;32mOK\033[0m\n");
985 n_ok++;
986 } else {
987 printf("\033[1;31mFAIL\033[0m\n");
988 }
989 ++n_total;
990 printf("\n");
991 ggml_backend_sched_free(backend_sched);
992 }
993 }
994
995 for (ggml_backend_t backend : backends) {
996 ggml_backend_free(backend);
997 }
998
999 printf("%zu/%zu backend*optimizer passed\n", n_ok, n_total);
1000 bool ok = n_ok == n_total;
1001 print_ok(ok);
1002 return ok ? 0 : 1;
1003}