llmnpc - llama.cpp/common/debug.cpp

Path: llmnpc / llama.cpp / common / debug.cpp (raw)
  1#include "debug.h"
  2
  3#include "log.h"
  4
  5#include <cmath>
  6#include <string>
  7
  8static std::string common_ggml_ne_string(const ggml_tensor * t) {
  9    std::string str;
 10    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
 11        str += std::to_string(t->ne[i]);
 12        if (i + 1 < GGML_MAX_DIMS) {
 13            str += ", ";
 14        }
 15    }
 16    return str;
 17}
 18
 19static float common_ggml_get_float_value(const uint8_t * data,
 20                           ggml_type       type,
 21                           const size_t *  nb,
 22                           size_t          i0,
 23                           size_t          i1,
 24                           size_t          i2,
 25                           size_t          i3) {
 26    size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
 27    float  v;
 28    if (type == GGML_TYPE_F16) {
 29        v = ggml_fp16_to_fp32(*(const ggml_fp16_t *) &data[i]);
 30    } else if (type == GGML_TYPE_F32) {
 31        v = *(const float *) &data[i];
 32    } else if (type == GGML_TYPE_I64) {
 33        v = (float) *(const int64_t *) &data[i];
 34    } else if (type == GGML_TYPE_I32) {
 35        v = (float) *(const int32_t *) &data[i];
 36    } else if (type == GGML_TYPE_I16) {
 37        v = (float) *(const int16_t *) &data[i];
 38    } else if (type == GGML_TYPE_I8) {
 39        v = (float) *(const int8_t *) &data[i];
 40    } else if (type == GGML_TYPE_BF16) {
 41        v = ggml_bf16_to_fp32(*(const ggml_bf16_t *) &data[i]);
 42    } else {
 43        GGML_ABORT("fatal error");
 44    }
 45    return v;
 46}
 47
 48#define INDENT "    "
 49
 50template <bool abort>
 51void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
 52    GGML_ASSERT(n > 0);
 53    float sum = 0;
 54    for (int64_t i3 = 0; i3 < ne[3]; i3++) {
 55        for (int64_t i2 = 0; i2 < ne[2]; i2++) {
 56            for (int64_t i1 = 0; i1 < ne[1]; i1++) {
 57                for (int64_t i0 = 0; i0 < ne[0]; i0++) {
 58                    const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
 59                    sum += v;
 60                }
 61            }
 62        }
 63    }
 64    for (int64_t i3 = 0; i3 < ne[3]; i3++) {
 65        LOG(INDENT "[\n");
 66        for (int64_t i2 = 0; i2 < ne[2]; i2++) {
 67            if (i2 == n && ne[2] > 2 * n) {
 68                LOG(INDENT INDENT "..., \n");
 69                i2 = ne[2] - n;
 70            }
 71            LOG(INDENT INDENT "[\n");
 72            for (int64_t i1 = 0; i1 < ne[1]; i1++) {
 73                if (i1 == n && ne[1] > 2 * n) {
 74                    LOG(INDENT INDENT INDENT "..., \n");
 75                    i1 = ne[1] - n;
 76                }
 77                LOG(INDENT INDENT INDENT "[");
 78                for (int64_t i0 = 0; i0 < ne[0]; i0++) {
 79                    if (i0 == n && ne[0] > 2 * n) {
 80                        LOG("   ..., ");
 81                        i0 = ne[0] - n;
 82                    }
 83                    const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
 84                    LOG("%12.4f", v);
 85                    if (i0 < ne[0] - 1) {
 86                        LOG(", ");
 87                    }
 88                }
 89                LOG("  ],\n");
 90            }
 91            LOG(INDENT INDENT "],\n");
 92        }
 93        LOG(INDENT "]\n");
 94        LOG(INDENT "sum = %f\n", sum);
 95    }
 96
 97    if constexpr (abort) {
 98        if (std::isnan(sum)) {
 99            LOG("encountered NaN - aborting\n");
100            exit(0);
101        }
102    }
103}
104
105/**
106 * GGML operations callback during the graph execution.
107 *
108 * @param t current tensor
109 * @param ask when ask is true, the scheduler wants to know if we are interested in data from this tensor
110 *            if we return true, a follow-up call will be made with ask=false in which we can do the actual collection.
111 *            see ggml_backend_sched_eval_callback
112 * @param user_data user data to pass at each call back
113 * @return true to receive data or continue the graph, false otherwise
114 */
115template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
116    auto * cb_data = (base_callback_data *) user_data;
117
118    const struct ggml_tensor * src0 = t->src[0];
119    const struct ggml_tensor * src1 = t->src[1];
120
121    if (ask) {
122        return true;  // Always retrieve data
123    }
124
125    bool matches_filter = cb_data->tensor_filters.empty();
126
127    if (!matches_filter) {
128        for (const auto & filter : cb_data->tensor_filters) {
129            if (std::regex_search(t->name, filter)) {
130                matches_filter = true;
131                break;
132            }
133        }
134    }
135
136    char src1_str[128] = { 0 };
137    if (src1) {
138        snprintf(src1_str, sizeof(src1_str), "%s{%s}", src1->name, common_ggml_ne_string(src1).c_str());
139    }
140
141    if (matches_filter) {
142        LOG("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type),
143            ggml_op_desc(t), src0->name, common_ggml_ne_string(src0).c_str(), src1 ? src1_str : "",
144            common_ggml_ne_string(t).c_str());
145    }
146
147    const bool is_host = ggml_backend_buffer_is_host(t->buffer);
148
149    if (!is_host) {
150        auto n_bytes = ggml_nbytes(t);
151        cb_data->data.resize(n_bytes);
152        ggml_backend_tensor_get(t, cb_data->data.data(), 0, n_bytes);
153    }
154
155    if (!ggml_is_quantized(t->type) && matches_filter) {
156        uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
157        common_debug_print_tensor<abort_on_nan>(data, t->type, t->ne, t->nb, 3);
158    }
159
160    return true;
161}
162
163// Explicit template instantiations
164template bool common_debug_cb_eval<false>(ggml_tensor *, bool, void *);
165template bool common_debug_cb_eval<true>(ggml_tensor *, bool, void *);
166template void common_debug_print_tensor<false>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t);
167template void common_debug_print_tensor<true>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t);