1#include "debug.h"
2
3#include "log.h"
4
5#include <cmath>
6#include <string>
7
8static std::string common_ggml_ne_string(const ggml_tensor * t) {
9 std::string str;
10 for (int i = 0; i < GGML_MAX_DIMS; ++i) {
11 str += std::to_string(t->ne[i]);
12 if (i + 1 < GGML_MAX_DIMS) {
13 str += ", ";
14 }
15 }
16 return str;
17}
18
19static float common_ggml_get_float_value(const uint8_t * data,
20 ggml_type type,
21 const size_t * nb,
22 size_t i0,
23 size_t i1,
24 size_t i2,
25 size_t i3) {
26 size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
27 float v;
28 if (type == GGML_TYPE_F16) {
29 v = ggml_fp16_to_fp32(*(const ggml_fp16_t *) &data[i]);
30 } else if (type == GGML_TYPE_F32) {
31 v = *(const float *) &data[i];
32 } else if (type == GGML_TYPE_I64) {
33 v = (float) *(const int64_t *) &data[i];
34 } else if (type == GGML_TYPE_I32) {
35 v = (float) *(const int32_t *) &data[i];
36 } else if (type == GGML_TYPE_I16) {
37 v = (float) *(const int16_t *) &data[i];
38 } else if (type == GGML_TYPE_I8) {
39 v = (float) *(const int8_t *) &data[i];
40 } else if (type == GGML_TYPE_BF16) {
41 v = ggml_bf16_to_fp32(*(const ggml_bf16_t *) &data[i]);
42 } else {
43 GGML_ABORT("fatal error");
44 }
45 return v;
46}
47
48#define INDENT " "
49
50template <bool abort>
51void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
52 GGML_ASSERT(n > 0);
53 float sum = 0;
54 for (int64_t i3 = 0; i3 < ne[3]; i3++) {
55 for (int64_t i2 = 0; i2 < ne[2]; i2++) {
56 for (int64_t i1 = 0; i1 < ne[1]; i1++) {
57 for (int64_t i0 = 0; i0 < ne[0]; i0++) {
58 const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
59 sum += v;
60 }
61 }
62 }
63 }
64 for (int64_t i3 = 0; i3 < ne[3]; i3++) {
65 LOG(INDENT "[\n");
66 for (int64_t i2 = 0; i2 < ne[2]; i2++) {
67 if (i2 == n && ne[2] > 2 * n) {
68 LOG(INDENT INDENT "..., \n");
69 i2 = ne[2] - n;
70 }
71 LOG(INDENT INDENT "[\n");
72 for (int64_t i1 = 0; i1 < ne[1]; i1++) {
73 if (i1 == n && ne[1] > 2 * n) {
74 LOG(INDENT INDENT INDENT "..., \n");
75 i1 = ne[1] - n;
76 }
77 LOG(INDENT INDENT INDENT "[");
78 for (int64_t i0 = 0; i0 < ne[0]; i0++) {
79 if (i0 == n && ne[0] > 2 * n) {
80 LOG(" ..., ");
81 i0 = ne[0] - n;
82 }
83 const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
84 LOG("%12.4f", v);
85 if (i0 < ne[0] - 1) {
86 LOG(", ");
87 }
88 }
89 LOG(" ],\n");
90 }
91 LOG(INDENT INDENT "],\n");
92 }
93 LOG(INDENT "]\n");
94 LOG(INDENT "sum = %f\n", sum);
95 }
96
97 if constexpr (abort) {
98 if (std::isnan(sum)) {
99 LOG("encountered NaN - aborting\n");
100 exit(0);
101 }
102 }
103}
104
105/**
106 * GGML operations callback during the graph execution.
107 *
108 * @param t current tensor
109 * @param ask when ask is true, the scheduler wants to know if we are interested in data from this tensor
110 * if we return true, a follow-up call will be made with ask=false in which we can do the actual collection.
111 * see ggml_backend_sched_eval_callback
112 * @param user_data user data to pass at each call back
113 * @return true to receive data or continue the graph, false otherwise
114 */
115template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
116 auto * cb_data = (base_callback_data *) user_data;
117
118 const struct ggml_tensor * src0 = t->src[0];
119 const struct ggml_tensor * src1 = t->src[1];
120
121 if (ask) {
122 return true; // Always retrieve data
123 }
124
125 bool matches_filter = cb_data->tensor_filters.empty();
126
127 if (!matches_filter) {
128 for (const auto & filter : cb_data->tensor_filters) {
129 if (std::regex_search(t->name, filter)) {
130 matches_filter = true;
131 break;
132 }
133 }
134 }
135
136 char src1_str[128] = { 0 };
137 if (src1) {
138 snprintf(src1_str, sizeof(src1_str), "%s{%s}", src1->name, common_ggml_ne_string(src1).c_str());
139 }
140
141 if (matches_filter) {
142 LOG("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type),
143 ggml_op_desc(t), src0->name, common_ggml_ne_string(src0).c_str(), src1 ? src1_str : "",
144 common_ggml_ne_string(t).c_str());
145 }
146
147 const bool is_host = ggml_backend_buffer_is_host(t->buffer);
148
149 if (!is_host) {
150 auto n_bytes = ggml_nbytes(t);
151 cb_data->data.resize(n_bytes);
152 ggml_backend_tensor_get(t, cb_data->data.data(), 0, n_bytes);
153 }
154
155 if (!ggml_is_quantized(t->type) && matches_filter) {
156 uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
157 common_debug_print_tensor<abort_on_nan>(data, t->type, t->ne, t->nb, 3);
158 }
159
160 return true;
161}
162
163// Explicit template instantiations
164template bool common_debug_cb_eval<false>(ggml_tensor *, bool, void *);
165template bool common_debug_cb_eval<true>(ggml_tensor *, bool, void *);
166template void common_debug_print_tensor<false>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t);
167template void common_debug_print_tensor<true>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t);