1#include "llama.h"
2
3#include "arg.h"
4#include "common.h"
5#include "log.h"
6
7#include <chrono>
8#include <cinttypes>
9#include <thread>
10
11using namespace std::chrono_literals;
12
13#if defined(_MSC_VER)
14#pragma warning(disable: 4244 4267) // possible loss of data
15#endif
16
17int main(int argc, char ** argv) {
18 common_params params;
19
20 if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
21 return 1;
22 }
23
24 common_init();
25 llama_backend_init();
26 llama_numa_init(params.numa);
27 auto mparams = common_model_params_to_llama(params);
28 auto cparams = common_context_params_to_llama(params);
29 const llama_params_fit_status status = llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
30 params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target.data(), params.fit_params_min_ctx,
31 params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
32 if (status != LLAMA_PARAMS_FIT_STATUS_SUCCESS) {
33 LOG_ERR("%s: failed to fit CLI arguments to free memory, exiting...\n", __func__);
34 exit(1);
35 }
36
37 LOG_INF("%s: printing fitted CLI arguments to stdout...\n", __func__);
38 common_log_flush(common_log_main());
39 printf("-c %" PRIu32 " -ngl %" PRIi32, cparams.n_ctx, mparams.n_gpu_layers);
40
41 size_t nd = llama_max_devices();
42 while (nd > 1 && mparams.tensor_split[nd - 1] == 0.0f) {
43 nd--;
44 }
45 if (nd > 1) {
46 for (size_t id = 0; id < nd; id++) {
47 if (id == 0) {
48 printf(" -ts ");
49 }
50 printf("%s%" PRIu32, id > 0 ? "," : "", uint32_t(mparams.tensor_split[id]));
51 }
52 }
53
54 const size_t ntbo = llama_max_tensor_buft_overrides();
55 bool any_tbo = false;
56 for (size_t itbo = 0; itbo < ntbo && mparams.tensor_buft_overrides[itbo].pattern != nullptr; itbo++) {
57 if (itbo == 0) {
58 printf(" -ot \"");
59 }
60 printf("%s%s=%s", itbo > 0 ? "," : "", mparams.tensor_buft_overrides[itbo].pattern, ggml_backend_buft_name(mparams.tensor_buft_overrides[itbo].buft));
61 any_tbo = true;
62 }
63 printf("%s\n", any_tbo ? "\"" : "");
64
65 return 0;
66}