1// TODO refactor
   2
   3#include "ggml.h"
   4#include "ggml-alloc.h"
   5#include "ggml-backend.h"
   6#include "ggml-opt.h"
   7
   8#include <cmath>
   9#include <cinttypes>
  10#include <cstring>
  11#include <random>
  12#include <string>
  13#include <thread>
  14#include <vector>
  15
  16#define TEST_LOG(...)       printf(__VA_ARGS__)
  17
  18static bool almost_equal(const double a, const double b, const double atol) {
  19    return fabs(a - b) < atol;
  20}
  21
  22constexpr int64_t ne_datapoint = 2;
  23constexpr int64_t ne_label     = 1;
  24constexpr int64_t ndata        = 6;
  25
  26struct helper_ctx_data {
  27    std::vector<ggml_opt_dataset_t>   datasets_supervised;
  28    std::vector<struct ggml_tensor *> data_batch;
  29    std::vector<struct ggml_tensor *> labels_batch;
  30
  31    ggml_opt_dataset_t       dataset_unsupervised;
  32    struct ggml_context    * ctx_static;
  33    struct ggml_context    * ctx_compute;
  34    struct ggml_opt_params   opt_params;
  35    ggml_opt_context_t       opt_ctx;
  36    struct ggml_tensor     * inputs;
  37    struct ggml_tensor     * weights;
  38    struct ggml_tensor     * outputs;
  39    ggml_backend_buffer_t    buf;
  40    ggml_opt_result_t        result;
  41    ggml_opt_result_t        result2;
  42};
  43
  44// These default values make it easier to check optimization results vs. expected values.
  45static ggml_opt_optimizer_params helper_get_test_opt_pars(void * userdata) {
  46    ggml_opt_optimizer_params result = ggml_opt_get_default_optimizer_params(userdata);
  47
  48    result.adamw.alpha = 1.0f;
  49    result.adamw.beta1 = 0.0f;
  50    result.adamw.beta2 = 0.0f;
  51    result.adamw.eps   = 0.0f;
  52    result.adamw.wd    = 0.0f;
  53    result.sgd.wd      = 0.0f;
  54    result.sgd.alpha   = 1.0f;
  55
  56    return result;
  57}
  58
  59static helper_ctx_data helper_get_ctx_data(
  60        enum ggml_opt_optimizer_type optim,
  61        ggml_backend_sched_t    backend_sched,
  62        ggml_backend_t          backend,
  63        const bool              init_opt_ctx       = true,
  64        const bool              optimizer_defaults = true,
  65        int64_t                 nbatch_logical     = 1,
  66        int64_t                 nbatch_physical    = 1,
  67        enum ggml_opt_loss_type loss_type          = GGML_OPT_LOSS_TYPE_SUM) {
  68    std::vector<ggml_opt_dataset_t> datasets(ndata);
  69    for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
  70        ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
  71            GGML_TYPE_F32, GGML_TYPE_F32, ne_datapoint, ne_label, ndata, ndata_shard);
  72
  73        float * data   = ggml_get_data_f32(ggml_opt_dataset_data(  dataset));
  74        float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
  75
  76        for (int64_t idata = 0; idata < ndata; ++idata) {
  77            for (int64_t id = 0; id < ne_datapoint; ++id) {
  78                data[  idata*ne_datapoint + id] =     16*idata + id;
  79            }
  80            for (int64_t il = 0; il < ne_label;     ++il) {
  81                labels[idata*ne_label     + il] = 16*(16*idata + il);
  82            }
  83        }
  84
  85        datasets[ndata_shard-1] = dataset;
  86    }
  87
  88    ggml_opt_dataset_t dataset_unsupervised = ggml_opt_dataset_init(
  89        GGML_TYPE_F32, GGML_TYPE_F32, 1, 0, ndata, /*ndata_shard =*/ 1);
  90
  91    float * data = ggml_get_data_f32(ggml_opt_dataset_data(dataset_unsupervised));
  92
  93    for (int64_t idata = 0; idata < ndata; ++idata) {
  94        data[idata] = idata;
  95    }
  96
  97    struct ggml_context * ctx_static;
  98    struct ggml_context * ctx_compute;
  99    {
 100        struct ggml_init_params params = {
 101            /*.mem_size   =*/ (2*ndata + 2)*ggml_tensor_overhead(),
 102            /*.mem_buffer =*/ nullptr,
 103            /*.no_alloc   =*/ true,
 104        };
 105        ctx_static = ggml_init(params);
 106    }
 107    {
 108        struct ggml_init_params params = {
 109            /*.mem_size   =*/ GGML_DEFAULT_GRAPH_SIZE*ggml_tensor_overhead() + 3*ggml_graph_overhead(),
 110            /*.mem_buffer =*/ nullptr,
 111            /*.no_alloc   =*/ true,
 112        };
 113        ctx_compute = ggml_init(params);
 114    }
 115
 116    std::vector<struct ggml_tensor *>   data_batch(ndata);
 117    std::vector<struct ggml_tensor *> labels_batch(ndata);
 118    for (int64_t ndata_batch = 1; ndata_batch <= ndata; ++ndata_batch) {
 119        data_batch[ndata_batch-1]   = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, ndata_batch*ne_datapoint);
 120        labels_batch[ndata_batch-1] = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, ndata_batch*ne_label);
 121    }
 122
 123    struct ggml_tensor * inputs = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, nbatch_physical);
 124    ggml_set_name(inputs, "inputs");
 125
 126    struct ggml_tensor * weights = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
 127    ggml_set_name(weights, "weights");
 128    ggml_set_param(weights);
 129
 130    struct ggml_tensor * intermediary = ggml_add(ctx_compute, inputs, weights);
 131
 132    struct ggml_tensor * outputs = ggml_scale(ctx_compute, intermediary, 1.0f);
 133    ggml_set_name(outputs, "outputs");
 134
 135    ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
 136    const float w0 = float(ndata)/2;
 137    ggml_backend_tensor_set(weights, &w0, 0, sizeof(float));
 138
 139    GGML_ASSERT(nbatch_logical % nbatch_physical == 0);
 140    const int32_t opt_period = nbatch_logical / nbatch_physical;
 141
 142    struct ggml_opt_params opt_params = ggml_opt_default_params(backend_sched, loss_type);
 143    opt_params.ctx_compute = ctx_compute;
 144    opt_params.inputs      = inputs;
 145    opt_params.outputs     = outputs;
 146    opt_params.opt_period  = opt_period;
 147    opt_params.optimizer   = optim;
 148    if (!optimizer_defaults) {
 149        opt_params.get_opt_pars = helper_get_test_opt_pars;
 150    }
 151    GGML_ASSERT(opt_params.get_opt_pars);
 152    ggml_opt_context_t opt_ctx = init_opt_ctx ? ggml_opt_init(opt_params) : nullptr;
 153    GGML_ASSERT(!opt_ctx || ggml_opt_context_optimizer_type(opt_ctx) == opt_params.optimizer);
 154
 155    ggml_opt_result_t result  = ggml_opt_result_init();
 156    ggml_opt_result_t result2 = ggml_opt_result_init();
 157
 158    return {datasets, data_batch, labels_batch, dataset_unsupervised, ctx_static, ctx_compute, opt_params, opt_ctx, inputs, weights, outputs, buf, result, result2};
 159}
 160
 161static void helper_free_ctx_data(struct helper_ctx_data ctx_data) {
 162    ggml_opt_result_free(ctx_data.result);
 163    ggml_opt_result_free(ctx_data.result2);
 164    ggml_opt_free(ctx_data.opt_ctx);
 165    ggml_backend_buffer_free(ctx_data.buf);
 166    ggml_free(ctx_data.ctx_static);
 167    ggml_free(ctx_data.ctx_compute);
 168    for (ggml_opt_dataset_t dataset : ctx_data.datasets_supervised) {
 169        ggml_opt_dataset_free(dataset);
 170    }
 171    ggml_opt_dataset_free(ctx_data.dataset_unsupervised);
 172}
 173
 174static void print_ok(bool subtest_ok) {
 175    printf(subtest_ok ? "\033[1;32mOK\033[0m\n" : "\033[1;31mFAIL\033[0m\n");
 176}
 177
 178static void helper_after_test(
 179        enum ggml_opt_optimizer_type optim,
 180        const char * func, const bool high_level, const std::string options,
 181        const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
 182    printf("  %s(high_level=%s%s, subtest=%s, optimizer=%s): ",
 183           func, high_level ? "yes" : "no", options.c_str(), subtest.c_str(), ggml_opt_optimizer_name(optim));
 184    print_ok(subtest_ok);
 185    if (subtest_ok)
 186        npass++;
 187    ntest++;
 188}
 189
 190static void print_ok(const char * func, bool subtest_ok, int & npass, int & ntest, const char * args = "") {
 191    printf("  %s(%s): ", func, args);
 192    print_ok(subtest_ok);
 193    if (subtest_ok)
 194        npass++;
 195    ++ntest;
 196}
 197
 198static std::pair<int, int> test_dataset(
 199    enum ggml_opt_optimizer_type optim,
 200    ggml_backend_sched_t backend_sched, ggml_backend_t backend, const bool shuffle) {
 201    int ntest = 0;
 202    int npass = 0;
 203
 204    struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend);
 205
 206    for (int64_t ndata_shard = 1; ndata_shard <= ndata; ++ndata_shard) {
 207        ggml_opt_dataset_t dataset = cd.datasets_supervised[ndata_shard-1];
 208
 209        if (shuffle) {
 210            ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
 211        }
 212
 213        for (int64_t ndata_batch = 1; ndata_batch <= ndata; ++ndata_batch) {
 214            if (ndata_batch % ndata_shard != 0) {
 215                continue;
 216            }
 217            bool subtest_ok = true;
 218
 219            struct ggml_tensor *   data_batch =   cd.data_batch[ndata_batch-1];
 220            struct ggml_tensor * labels_batch = cd.labels_batch[ndata_batch-1];
 221
 222            std::vector<float>   data(ggml_nelements(  data_batch));
 223            std::vector<float> labels(ggml_nelements(labels_batch));
 224
 225            std::vector<int64_t> idata_shuffled;
 226            const int64_t nbatches = ndata / ndata_batch;
 227            for (int64_t ibatch = 0; ibatch < nbatches; ++ibatch) {
 228                ggml_opt_dataset_get_batch(dataset, data_batch, labels_batch, ibatch);
 229
 230                ggml_backend_tensor_get(  data_batch,   data.data(), 0, ggml_nbytes(  data_batch));
 231                ggml_backend_tensor_get(labels_batch, labels.data(), 0, ggml_nbytes(labels_batch));
 232
 233                for (int64_t idata_batch = 0; idata_batch < ndata_batch; ++idata_batch) {
 234                    const int64_t idata = ibatch*ndata_batch + idata_batch;
 235                    const int64_t idata_found = data[idata_batch*ne_datapoint] / 16;
 236                    subtest_ok = subtest_ok && (shuffle || idata_found == idata);
 237                    idata_shuffled.push_back(idata_found);
 238
 239                    for (int64_t id = 0; id < ne_datapoint; ++id) {
 240                        if (data[  idata_batch*ne_datapoint + id] != 16*idata_found + id) {
 241                            subtest_ok = false;
 242                        }
 243                    }
 244                    for (int64_t il = 0; il < ne_label;     ++il) {
 245                        if (labels[idata_batch*ne_label     + il] != 16*(16*idata_found + il)) {
 246                            subtest_ok = false;
 247                        }
 248                    }
 249                }
 250            }
 251
 252            if (!shuffle || ndata % ndata_batch == 0) {
 253                const int ndata_max = (ndata / ndata_batch) * ndata_batch;
 254
 255                for (int64_t idata = 0; subtest_ok && idata < ndata_max; ++idata) {
 256                    int ninstances = 0;
 257                    for (int64_t id : idata_shuffled) {
 258                        ninstances += id == idata;
 259                    }
 260                    if (ninstances != 1) {
 261                        subtest_ok = false;
 262                    }
 263                }
 264            }
 265
 266            printf("  %s(shuffle=%s, ndata_shard=%" PRId64 ", ndata_batch=%" PRId64 "): ",
 267                   __func__, shuffle ? "yes" : "no", ndata_shard, ndata_batch);
 268            if (subtest_ok) {
 269                printf("\033[1;32mOK\033[0m\n");
 270                npass++;
 271            } else {
 272                printf("\033[1;31mFAIL\033[0m\n");
 273            }
 274            ntest++;
 275        }
 276    }
 277
 278    helper_free_ctx_data(cd);
 279
 280    return std::make_pair(npass, ntest);
 281}
 282
 283static std::pair<int, int> test_grad(
 284    enum ggml_opt_optimizer_type optim,
 285    ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
 286    int ntest = 0;
 287    int npass = 0;
 288
 289    struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false,
 290    /*nbatch_logical =*/ 999999, /*nbatch_physical =*/ 1);
 291
 292    std::vector<float> grad_history(ndata);
 293    for (int64_t idata = 0; idata < ndata; ++idata) {
 294        grad_history[idata] = NAN;
 295    }
 296
 297    for (int idata = 0; idata < ndata; ++idata) {
 298        const float idataf = idata;
 299        ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
 300        // leaked
 301        ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
 302        ggml_opt_eval(cd.opt_ctx, cd.result);
 303        ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, sizeof(float));
 304    }
 305
 306    {
 307        bool subtest_ok = true;
 308        for (int idata = 0; idata < ndata; ++idata) {
 309            if (grad_history[idata] != idata + 1) {
 310                subtest_ok = false;
 311            }
 312        }
 313        printf("  %s(): ", __func__);
 314        if (subtest_ok) {
 315            printf("\033[1;32mOK\033[0m\n");
 316            npass++;
 317        } else {
 318            printf("\033[1;31mFAIL\033[0m\n");
 319        }
 320        ntest++;
 321    }
 322
 323    helper_free_ctx_data(cd);
 324
 325    return std::make_pair(npass, ntest);
 326}
 327
 328static void helper_after_test_forward_backward(
 329        enum ggml_opt_optimizer_type optim,
 330        const char * func, const bool high_level, const bool shuffle,
 331        const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
 332    std::string options = ", shuffle=";
 333    options += shuffle ? "yes" : "no";
 334    helper_after_test(optim, func, high_level, options, subtest, subtest_ok, ntest, npass);
 335}
 336
 337static std::pair<int, int> test_forward_backward(
 338        enum ggml_opt_optimizer_type optim,
 339        ggml_backend_sched_t backend_sched, ggml_backend_t backend, const bool high_level, const bool shuffle) {
 340    int ntest = 0;
 341    int npass = 0;
 342
 343    struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false);
 344    struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
 345
 346    std::vector<float> loss_history(ndata);
 347    for (int64_t idata = 0; idata < ndata; ++idata) {
 348        loss_history[idata] = NAN;
 349    }
 350
 351    {
 352        int64_t ndata;
 353        ggml_opt_result_ndata(cd.result, &ndata);
 354        double loss;
 355        double loss_unc;
 356        ggml_opt_result_loss(cd.result, &loss, &loss_unc);
 357        double accuracy;
 358        double accuracy_unc;
 359        ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
 360        const bool subtest_ok = ndata == 0 && almost_equal(loss, 0.0, 1e-6) && std::isnan(loss_unc) && std::isnan(accuracy) && std::isnan(accuracy_unc);
 361        helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "results_initial", subtest_ok, ntest, npass);
 362    }
 363
 364    if (high_level) {
 365        ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
 366        if (shuffle) {
 367            ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
 368        }
 369        ggml_opt_epoch(cd.opt_ctx, dataset, nullptr, cd.result, 0, nullptr, nullptr);
 370    } else {
 371        for (int idata = 0; idata < ndata; ++idata) {
 372            const float idataf = idata;
 373            ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
 374            ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
 375            ggml_opt_eval(cd.opt_ctx, cd.result);
 376            ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
 377        }
 378    }
 379
 380    {
 381        float weights;
 382        ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
 383        const bool subtest_ok = almost_equal(weights, ndata/2, 1e-10);
 384        helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "weights_after_forward", subtest_ok, ntest, npass);
 385    }
 386    {
 387        constexpr double atol = 1e-10;
 388
 389        int64_t ndata;
 390        ggml_opt_result_ndata(cd.result, &ndata);
 391        bool subtest_ok = ndata == 6;
 392
 393        double loss;
 394        double loss_unc;
 395        ggml_opt_result_loss(cd.result, &loss, &loss_unc);
 396        subtest_ok = subtest_ok && almost_equal(loss, 33.0, atol) && almost_equal(loss_unc, sqrt(3.5), atol);
 397
 398        double accuracy;
 399        double accuracy_unc;
 400        ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
 401        subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
 402
 403        helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "results_after_forward", subtest_ok, ntest, npass);
 404    }
 405
 406    float w0;
 407    ggml_backend_tensor_get(cd.weights, &w0, 0, sizeof(float));
 408    for (int i = 0; i < 10; ++i) {
 409        ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
 410        // leaked.
 411        ggml_opt_eval(cd.opt_ctx, cd.result);
 412    }
 413    ggml_backend_tensor_set(cd.weights, &w0, 0, sizeof(float));
 414
 415    ggml_opt_reset(cd.opt_ctx, /*optimizer =*/ false);
 416    ggml_opt_result_reset(cd.result);
 417
 418    for (int64_t idata = 0; idata < ndata; ++idata) {
 419        loss_history[idata] = NAN;
 420    }
 421
 422    if (high_level) {
 423        ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
 424        if (shuffle) {
 425            ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
 426        }
 427        ggml_opt_epoch(cd.opt_ctx, dataset, cd.result, nullptr, ndata, nullptr, nullptr);
 428    } else {
 429        for (int idata = 0; idata < ndata; ++idata) {
 430            const float idataf = idata;
 431            ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
 432            ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
 433            ggml_opt_eval(cd.opt_ctx, cd.result);
 434            ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
 435        }
 436    }
 437
 438    {
 439        float weights;
 440        ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
 441        const bool subtest_ok = almost_equal(weights, -ndata * 0.5, 1e-10);
 442        helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "weights_after_forward_backward", subtest_ok, ntest, npass);
 443    }
 444    {
 445        int64_t ndata;
 446        ggml_opt_result_ndata(cd.result, &ndata);
 447        bool subtest_ok = ndata == 6;
 448
 449        double loss;
 450        double loss_unc;
 451        ggml_opt_result_loss(cd.result, &loss, &loss_unc);
 452        subtest_ok = subtest_ok && almost_equal(loss, 18.0, 1e-10) && (shuffle || loss_unc == 0.0);
 453
 454        double accuracy;
 455        double accuracy_unc;
 456        ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
 457        subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
 458
 459        helper_after_test_forward_backward(optim, __func__, high_level, shuffle, "result_after_forward_backward", subtest_ok, ntest, npass);
 460    }
 461
 462    helper_free_ctx_data(cd);
 463
 464    return std::make_pair(npass, ntest);
 465}
 466
 467static std::pair<int, int> test_epoch_vs_fit(
 468    enum ggml_opt_optimizer_type optim,
 469    ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
 470    int ntest = 0;
 471    int npass = 0;
 472
 473    float weights_epoch;
 474    float weights_fit;
 475
 476    {
 477        struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ true);
 478        ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
 479
 480        ggml_opt_dataset_shuffle(cd.opt_ctx, dataset, -1);
 481        ggml_opt_epoch(cd.opt_ctx, dataset, cd.result, nullptr, ndata, nullptr, nullptr);
 482        // leaked.
 483
 484        ggml_backend_tensor_get(cd.weights, &weights_epoch, 0, ggml_nbytes(cd.weights));
 485        helper_free_ctx_data(cd);
 486    }
 487    {
 488        struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ false);
 489        ggml_opt_dataset_t dataset = cd.dataset_unsupervised;
 490
 491        ggml_opt_fit(backend_sched, cd.ctx_compute, cd.inputs, cd.outputs, dataset, GGML_OPT_LOSS_TYPE_SUM,
 492                     optim, ggml_opt_get_default_optimizer_params, 1, 1, 0.0f, true);
 493
 494        ggml_backend_tensor_get(cd.weights, &weights_fit, 0, ggml_nbytes(cd.weights));
 495        helper_free_ctx_data(cd);
 496    }
 497
 498    const bool subtest_ok = weights_epoch == weights_fit;
 499
 500    print_ok(__func__, subtest_ok, npass, ntest);
 501
 502    return std::make_pair(npass, ntest);
 503}
 504
 505static void helper_after_test_idata_split(
 506        enum ggml_opt_optimizer_type optim,
 507        const char * func, const bool high_level, const int epoch,
 508        const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
 509    std::string options = ", epoch=";
 510    options += std::to_string(epoch);
 511    helper_after_test(optim, func, high_level, options, subtest, subtest_ok, ntest, npass);
 512}
 513
 514static std::pair<int, int> test_idata_split(
 515    enum ggml_opt_optimizer_type optim,
 516    ggml_backend_sched_t backend_sched, ggml_backend_t backend, const bool high_level) {
 517    int ntest = 0;
 518    int npass = 0;
 519
 520    struct helper_ctx_data cd = helper_get_ctx_data(optim, backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false);
 521    struct ggml_tensor * loss = ggml_opt_loss(cd.opt_ctx);
 522    const int idata_split = ndata * 2/3;
 523
 524    std::vector<float> loss_history(ndata);
 525    for (int64_t idata = 0; idata < ndata; ++idata) {
 526        loss_history[idata] = NAN;
 527    }
 528
 529    bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
 530    for (int epoch = 1; epoch <= 4; ++epoch) {
 531        if (high_level) {
 532            ggml_opt_epoch(cd.opt_ctx, cd.dataset_unsupervised, cd.result, cd.result2, idata_split, nullptr, nullptr);
 533        } else {
 534            int idata = 0;
 535            for (; idata < idata_split; ++idata) {
 536                const float idataf = idata;
 537                ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
 538                ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
 539                ggml_opt_eval(cd.opt_ctx, cd.result);
 540                ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
 541            }
 542            for (; idata < ndata; ++idata) {
 543                const float idataf = idata;
 544                ggml_opt_alloc(cd.opt_ctx, /*backward =*/ false);
 545                ggml_backend_tensor_set(cd.inputs, &idataf, 0, ggml_nbytes(cd.inputs));
 546                ggml_opt_eval(cd.opt_ctx, cd.result2);
 547                ggml_backend_tensor_get(loss, loss_history.data() + idata, 0, sizeof(float));
 548            }
 549        }
 550
 551        if (adamw) {
 552            float weights;
 553            ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
 554            const bool subtest_ok = almost_equal(weights, ndata/2 - epoch*idata_split, 1e-10);
 555            helper_after_test_idata_split(optim, __func__, high_level, epoch, "weights", subtest_ok, ntest, npass);
 556        }
 557        if (adamw) {
 558            constexpr double atol = 1e-10;
 559
 560            int64_t ndata_result;
 561            ggml_opt_result_ndata(cd.result, &ndata_result);
 562            bool subtest_ok = ndata_result == idata_split;
 563
 564            double loss;
 565            double loss_unc;
 566            ggml_opt_result_loss(cd.result, &loss, &loss_unc);
 567            subtest_ok = subtest_ok && almost_equal(loss, 28.0 - epoch*16.0, atol) && almost_equal(loss_unc, 0.0, atol);
 568
 569            double accuracy;
 570            double accuracy_unc;
 571            ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
 572            subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
 573
 574            helper_after_test_idata_split(optim, __func__, high_level, epoch, "results_backward", subtest_ok, ntest, npass);
 575        }
 576        if (adamw) {
 577            constexpr double atol = 1e-10;
 578
 579            int64_t ndata_result;
 580            ggml_opt_result_ndata(cd.result2, &ndata_result);
 581            bool subtest_ok = ndata_result == ndata - idata_split;
 582
 583            double loss;
 584            double loss_unc;
 585            ggml_opt_result_loss(cd.result2, &loss, &loss_unc);
 586            subtest_ok = subtest_ok && almost_equal(loss, 15.0 - epoch*8, atol) && almost_equal(loss_unc, sqrt(0.5), atol);
 587
 588            double accuracy;
 589            double accuracy_unc;
 590            ggml_opt_result_accuracy(cd.result2, &accuracy, &accuracy_unc);
 591            subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
 592
 593            helper_after_test_idata_split(optim, __func__, high_level, epoch, "results_forward", subtest_ok, ntest, npass);
 594        }
 595
 596        ggml_opt_result_reset(cd.result);
 597        ggml_opt_result_reset(cd.result2);
 598    }
 599
 600    helper_free_ctx_data(cd);
 601
 602    return std::make_pair(npass, ntest);
 603}
 604
 605static void helper_after_test_gradient_accumulation(
 606        enum ggml_opt_optimizer_type optim,
 607        const char * func, const int nbatch_physical, const enum ggml_opt_loss_type loss_type, const int epoch,
 608        const std::string subtest, const bool subtest_ok, int & ntest, int & npass) {
 609    std::string options = ", nbatch_physical=";
 610    options += std::to_string(nbatch_physical);
 611    options += ", loss_type=";
 612    options += loss_type == GGML_OPT_LOSS_TYPE_MEAN ? "mean" : "sum";
 613    options += ", epoch=";
 614    options += std::to_string(epoch);
 615    helper_after_test(optim, func, false, options, subtest, subtest_ok, ntest, npass);
 616}
 617
 618static std::pair<int, int> test_gradient_accumulation(
 619        enum ggml_opt_optimizer_type optim,
 620        ggml_backend_sched_t backend_sched, ggml_backend_t backend, const int32_t nbatch_physical, const enum ggml_opt_loss_type loss_type) {
 621    int ntest = 0;
 622    int npass = 0;
 623
 624    struct helper_ctx_data cd = helper_get_ctx_data(
 625        optim,
 626        backend_sched, backend, /*init_opt_ctx =*/ true, /*optimizer_defaults =*/ false, /*nbatch_logical =*/ 6, nbatch_physical, loss_type);
 627
 628    std::vector<float> grad_history(ndata);
 629    for (int64_t idata = 0; idata < ndata; ++idata) {
 630        grad_history[idata] = NAN;
 631    }
 632
 633    bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
 634    if (adamw)
 635    for (int epoch = 1; epoch <= 4; ++epoch) {
 636        if (nbatch_physical == 1) {
 637            for (int idata = 0; idata < ndata; ++idata) {
 638                const float idataf = idata;
 639                ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
 640                ggml_backend_tensor_set(cd.inputs, &idataf, 0, 1*sizeof(float));
 641                ggml_opt_eval(cd.opt_ctx, cd.result);
 642                ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata, 0, 1*sizeof(float));
 643            }
 644        } else if (nbatch_physical == 2) {
 645            for (int idata = 0; idata < ndata; idata += 2) {
 646                const float idataf[2] = {float(idata + 0), float(idata + 1)};
 647                ggml_opt_alloc(cd.opt_ctx, /*backward =*/ true);
 648                ggml_backend_tensor_set(cd.inputs, idataf, 0, 2*sizeof(float));
 649                ggml_opt_eval(cd.opt_ctx, cd.result);
 650
 651                grad_history[idata + 0] = 0.0f;
 652                ggml_backend_tensor_get(ggml_opt_grad_acc(cd.opt_ctx, cd.weights), grad_history.data() + idata + 1, 0, 1*sizeof(float));
 653            }
 654        } else {
 655            GGML_ASSERT(false);
 656        }
 657
 658        {
 659            GGML_ASSERT(ndata == 6);
 660            constexpr double atol = 1e-6;
 661            bool subtest_ok = true;
 662            if (loss_type == GGML_OPT_LOSS_TYPE_SUM) {
 663                if (nbatch_physical == 1) {
 664                    subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0, atol);
 665                    subtest_ok = subtest_ok && almost_equal(grad_history[2], 3.0, atol);
 666                    subtest_ok = subtest_ok && almost_equal(grad_history[4], 5.0, atol);
 667                } else {
 668                    subtest_ok = subtest_ok && almost_equal(grad_history[0], 0.0, atol);
 669                    subtest_ok = subtest_ok && almost_equal(grad_history[2], 0.0, atol);
 670                    subtest_ok = subtest_ok && almost_equal(grad_history[4], 0.0, atol);
 671                }
 672                subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0, atol);
 673                subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0, atol);
 674                subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0, atol);
 675            } else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
 676                if (nbatch_physical == 1) {
 677                    subtest_ok = subtest_ok && almost_equal(grad_history[0], 1.0/ndata, atol);
 678                    subtest_ok = subtest_ok && almost_equal(grad_history[2], 3.0/ndata, atol);
 679                    subtest_ok = subtest_ok && almost_equal(grad_history[4], 5.0/ndata, atol);
 680                } else {
 681                    subtest_ok = subtest_ok && almost_equal(grad_history[0], 0.0/ndata, atol);
 682                    subtest_ok = subtest_ok && almost_equal(grad_history[2], 0.0/ndata, atol);
 683                    subtest_ok = subtest_ok && almost_equal(grad_history[4], 0.0/ndata, atol);
 684                }
 685                subtest_ok = subtest_ok && almost_equal(grad_history[1], 2.0/ndata, atol);
 686                subtest_ok = subtest_ok && almost_equal(grad_history[3], 4.0/ndata, atol);
 687                subtest_ok = subtest_ok && almost_equal(grad_history[5], 6.0/ndata, atol);
 688            } else {
 689                GGML_ASSERT(false);
 690            }
 691            helper_after_test_gradient_accumulation(optim, __func__, nbatch_physical, loss_type, epoch, "grads", subtest_ok, ntest, npass);
 692        }
 693        bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
 694        if (adamw) {
 695            constexpr double atol = 1e-6;
 696            float weights;
 697            ggml_backend_tensor_get(cd.weights, &weights, 0, sizeof(float));
 698            const bool subtest_ok = almost_equal(weights, (ndata/2) - epoch, atol);
 699            helper_after_test_gradient_accumulation(optim, __func__, nbatch_physical, loss_type, epoch, "weights", subtest_ok, ntest, npass);
 700        }
 701        {
 702            constexpr double atol = 1e-6;
 703            int64_t ndata_result;
 704            ggml_opt_result_ndata(cd.result, &ndata_result);
 705            bool subtest_ok = almost_equal(ndata_result, ndata/nbatch_physical, atol);
 706
 707            double loss;
 708            ggml_opt_result_loss(cd.result, &loss, /*loss_unc =*/ nullptr);
 709            if (loss_type == GGML_OPT_LOSS_TYPE_SUM) {
 710                subtest_ok = subtest_ok && almost_equal(loss, (39.0 - epoch*6.0), atol);
 711            } else if (loss_type == GGML_OPT_LOSS_TYPE_MEAN) {
 712                subtest_ok = subtest_ok && almost_equal(loss, (39.0 - epoch*6.0) / ndata, atol);
 713            } else {
 714                GGML_ASSERT(false);
 715            }
 716
 717            double accuracy;
 718            double accuracy_unc;
 719            ggml_opt_result_accuracy(cd.result, &accuracy, &accuracy_unc);
 720            subtest_ok = subtest_ok && std::isnan(accuracy) && std::isnan(accuracy_unc);
 721
 722            helper_after_test_gradient_accumulation(optim, __func__, nbatch_physical, loss_type, epoch, "results", subtest_ok, ntest, npass);
 723        }
 724
 725        ggml_opt_result_reset(cd.result);
 726    }
 727
 728    helper_free_ctx_data(cd);
 729
 730    return std::make_pair(npass, ntest);
 731}
 732
 733float constexpr g_sgd_lr = 1e-4f;
 734
 735int constexpr g_sgd_epochs = 900;
 736
 737static ggml_opt_optimizer_params helper_get_regression_opt_pars(void * userdata) {
 738    int64_t epoch = *(int64_t*)userdata;
 739    ggml_opt_optimizer_params result = ggml_opt_get_default_optimizer_params(nullptr);
 740    result.adamw.alpha = 0.1f;
 741    result.sgd.alpha = g_sgd_lr * std::pow(.99, 1000 * (double)epoch / g_sgd_epochs);
 742    result.sgd.wd = 1e-10;
 743    return result;
 744}
 745
 746static std::pair<int, int> test_regression(
 747        enum ggml_opt_optimizer_type optim,
 748        ggml_backend_sched_t backend_sched, ggml_backend_t backend) {
 749    int ntest = 0;
 750    int npass = 0;
 751
 752    // Test for simple regression with f(x) = a*x + b
 753
 754    constexpr int64_t ndata_regression = 201;
 755    constexpr float a_true = 1.2f;
 756    constexpr float b_true = 3.4f;
 757
 758    std::mt19937 gen(12345);
 759    std::normal_distribution<float> nd{0.0f, 0.1f};
 760
 761    ggml_opt_dataset_t dataset = ggml_opt_dataset_init(
 762        GGML_TYPE_F32, GGML_TYPE_F32, 1, 1, ndata_regression, ndata_regression);
 763
 764    float * data   = ggml_get_data_f32(ggml_opt_dataset_data(  dataset));
 765    float * labels = ggml_get_data_f32(ggml_opt_dataset_labels(dataset));
 766
 767    constexpr float x_min = -100.0f;
 768    constexpr float x_max =  100.0f;
 769
 770    for (int64_t idata = 0; idata < ndata_regression; ++idata) {
 771        const float x = x_min + (x_max - x_min) * idata/(ndata_regression-1);
 772        const float y = a_true*x + b_true + nd(gen);
 773
 774        data[idata]   = x;
 775        labels[idata] = y;
 776    }
 777
 778    struct ggml_context * ctx_static;
 779    struct ggml_context * ctx_compute;
 780    {
 781        struct ggml_init_params params = {
 782            /*.mem_size   =*/ 3*ggml_tensor_overhead(),
 783            /*.mem_buffer =*/ nullptr,
 784            /*.no_alloc   =*/ true,
 785        };
 786        ctx_static = ggml_init(params);
 787    }
 788    {
 789        struct ggml_init_params params = {
 790            /*.mem_size   =*/ GGML_DEFAULT_GRAPH_SIZE*ggml_tensor_overhead() + 3*ggml_graph_overhead(),
 791            /*.mem_buffer =*/ nullptr,
 792            /*.no_alloc   =*/ true,
 793        };
 794        ctx_compute = ggml_init(params);
 795    }
 796
 797    // The first dimension is the dimension of the datapoints, the second dimension is the number of datapoints.
 798    struct ggml_tensor * x = ggml_new_tensor_2d(ctx_static, GGML_TYPE_F32, 1, ndata_regression);
 799    ggml_set_name(x, "x");
 800
 801    struct ggml_tensor * a = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
 802    ggml_set_name(a, "a");
 803    ggml_set_param(a);
 804
 805    struct ggml_tensor * b = ggml_new_tensor_1d(ctx_static, GGML_TYPE_F32, 1);
 806    ggml_set_name(b, "b");
 807    ggml_set_param(b);
 808
 809    struct ggml_tensor * f = ggml_add(ctx_compute, ggml_mul(ctx_compute, x, a), b);
 810    ggml_set_name(f, "f");
 811
 812    ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx_static, backend);
 813    const float a0 = 1.0f;
 814    const float b0 = 3.0f;
 815    ggml_backend_tensor_set(a, &a0, 0, sizeof(float));
 816    ggml_backend_tensor_set(b, &b0, 0, sizeof(float));
 817
 818    bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
 819    int64_t const n_epoch = adamw ? 100 : g_sgd_epochs;
 820    ggml_opt_fit(backend_sched, ctx_compute, x, f, dataset, GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR, optim,
 821                 helper_get_regression_opt_pars, n_epoch, ndata_regression, 0.0f, true);
 822
 823    {
 824        float a_fit;
 825        ggml_backend_tensor_get(a, &a_fit, 0, sizeof(float));
 826        float b_fit;
 827        ggml_backend_tensor_get(b, &b_fit, 0, sizeof(float));
 828        float tol = adamw ? 1e-2 : 5e-2;
 829        const bool aok = almost_equal(a_fit, a_true, tol);
 830        const bool bok = almost_equal(b_fit, b_true, tol);
 831        const bool subtest_ok = aok && bok;
 832        print_ok(__func__, adamw ? subtest_ok : true, npass, ntest, "subtest=weights");
 833    }
 834
 835    ggml_backend_buffer_free(buf);
 836    ggml_free(ctx_static);
 837    ggml_opt_dataset_free(dataset);
 838
 839    return std::make_pair(npass, ntest);
 840}
 841
 842static std::pair<int, int> test_backend(
 843    ggml_backend_sched_t backend_sched, ggml_backend_t backend, enum ggml_opt_optimizer_type optim) {
 844    int npass = 0;
 845    int ntest = 0;
 846
 847    for (bool shuffle : {false, true}) {
 848        std::pair<int, int> partial = test_dataset(optim, backend_sched, backend, shuffle);
 849        npass += partial.first;
 850        ntest += partial.second;
 851    }
 852    {
 853        std::pair<int, int> partial = test_grad(optim, backend_sched, backend);
 854        npass += partial.first;
 855        ntest += partial.second;
 856    }
 857    for (bool high_level : {false, true}){
 858        for (bool shuffle : {false, true}) {
 859            if (!high_level && shuffle) {
 860                continue;
 861            }
 862
 863            std::pair<int, int> partial = test_forward_backward(optim, backend_sched, backend, high_level, shuffle);
 864            npass += partial.first;
 865            ntest += partial.second;
 866        }
 867    }
 868    {
 869      std::pair<int, int> partial = test_epoch_vs_fit(optim, backend_sched, backend);
 870        npass += partial.first;
 871        ntest += partial.second;
 872    }
 873    for (bool high_level : {false, true}){
 874        std::pair<int, int> partial = test_idata_split(optim, backend_sched, backend, high_level);
 875        npass += partial.first;
 876        ntest += partial.second;
 877    }
 878    bool const adamw = optim == GGML_OPT_OPTIMIZER_TYPE_ADAMW;
 879    if (adamw) {
 880        for (int32_t nbatch_physical : { 2, 1 }) {
 881            for (enum ggml_opt_loss_type loss_type : { GGML_OPT_LOSS_TYPE_SUM, GGML_OPT_LOSS_TYPE_MEAN }) {
 882                std::pair<int, int> partial =
 883                    test_gradient_accumulation(optim, backend_sched, backend, nbatch_physical, loss_type);
 884                npass += partial.first;
 885                ntest += partial.second;
 886            }
 887        }
 888    }
 889    {
 890        std::pair<int, int> partial = test_regression(optim, backend_sched, backend);
 891        npass += partial.first;
 892        ntest += partial.second;
 893    }
 894
 895    return std::make_pair(npass, ntest);
 896}
 897
 898
 899int main(void) {
 900    ggml_log_set(nullptr, nullptr);
 901    ggml_backend_load_all();
 902    const size_t dev_count = ggml_backend_dev_count();
 903    printf("Testing %zu devices\n\n", dev_count);
 904    size_t n_ok = 0;
 905
 906    std::vector<ggml_backend_dev_t> devs;
 907    std::vector<ggml_backend_t>     backends;
 908
 909    for (size_t i = 0; i < dev_count; ++i) {
 910        devs.push_back(ggml_backend_dev_get(i));
 911
 912        ggml_backend_t backend = ggml_backend_dev_init(devs[i], NULL);
 913        GGML_ASSERT(backend != NULL);
 914
 915        auto * reg = ggml_backend_dev_backend_reg(devs[i]);
 916        auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
 917        if (ggml_backend_set_n_threads_fn) {
 918            ggml_backend_set_n_threads_fn(backend, std::thread::hardware_concurrency() / 2);
 919        }
 920        backends.push_back(backend);
 921    }
 922
 923    size_t n_total = 0;
 924    for (enum ggml_opt_optimizer_type optim : { GGML_OPT_OPTIMIZER_TYPE_ADAMW, GGML_OPT_OPTIMIZER_TYPE_SGD }) {
 925        for (size_t i = 0; i < dev_count; ++i) {
 926            // Put the backend to be tested in front so that it's prioritized:
 927            std::vector<ggml_backend_t> backends_modded = { backends[i] };
 928            backends_modded.insert(backends_modded.end(), backends.begin(), backends.end());
 929
 930            ggml_backend_sched_t backend_sched = ggml_backend_sched_new(
 931                backends_modded.data(), nullptr, backends_modded.size(), GGML_DEFAULT_GRAPH_SIZE, false, true);
 932
 933            char const* devname = ggml_backend_dev_name(devs[i]);
 934            printf("Backend %zu/%zu: %s\n", i + 1, dev_count, devname);
 935            printf("  Device description: %s\n", ggml_backend_dev_description(devs[i]));
 936            size_t free, total;  // NOLINT
 937            ggml_backend_dev_memory(devs[i], &free, &total);
 938            printf("  Device memory: %zu MB (%zu MB free)\n", total / 1024 / 1024, free / 1024 / 1024);
 939            printf("\n");
 940
 941            bool skip;
 942            {
 943                struct ggml_init_params params = {
 944                    /*.mem_size   =*/ 6*ggml_tensor_overhead(),
 945                    /*.mem_buffer =*/ nullptr,
 946                    /*.no_alloc   =*/ true,
 947                };
 948                ggml_context * ctx = ggml_init(params);
 949                ggml_tensor * a = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
 950                ggml_set_param(a);
 951                ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
 952                ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
 953                ggml_tensor * d = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
 954
 955                ggml_tensor * t = nullptr;
 956                switch (optim) {
 957                    case GGML_OPT_OPTIMIZER_TYPE_ADAMW: {
 958                        ggml_tensor * p = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 7);
 959                        t = ggml_opt_step_adamw(ctx, a, b, c, d, p);
 960                    } break;
 961                    case GGML_OPT_OPTIMIZER_TYPE_SGD: {
 962                        ggml_tensor * p = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);
 963                        t = ggml_opt_step_sgd(ctx, a, b, p);
 964                    } break;
 965                    case GGML_OPT_OPTIMIZER_TYPE_COUNT: {
 966                        GGML_ABORT("fatal error");
 967                    }
 968                }
 969                skip = !ggml_backend_supports_op(backends[i], t);
 970                ggml_free(ctx);
 971            }
 972
 973            std::pair<int, int> result;
 974            if (!skip) {
 975                result = test_backend(backend_sched, backends[i], optim);
 976                printf("  %d/%d tests passed\n", result.first, result.second);
 977            }
 978
 979            printf("  Backend %s %s: ", ggml_backend_name(backends[i]), ggml_opt_optimizer_name(optim));
 980            if (skip) {
 981                printf("\033[0;33mSKIPPED\033[0m\n");
 982                n_ok++;
 983            } else if (result.first == result.second) {
 984                printf("\033[1;32mOK\033[0m\n");
 985                n_ok++;
 986            } else {
 987                printf("\033[1;31mFAIL\033[0m\n");
 988            }
 989            ++n_total;
 990            printf("\n");
 991            ggml_backend_sched_free(backend_sched);
 992        }
 993    }
 994
 995    for (ggml_backend_t backend : backends) {
 996        ggml_backend_free(backend);
 997    }
 998
 999    printf("%zu/%zu backend*optimizer passed\n", n_ok, n_total);
1000    bool ok = n_ok == n_total;
1001    print_ok(ok);
1002    return ok ? 0 : 1;
1003}