From b333b06772c89d96aacb5490d6a219fba7c09cc6 Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Thu, 12 Feb 2026 20:57:17 +0100 Subject: Engage! --- llama.cpp/tests/test-double-float.cpp | 57 +++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 llama.cpp/tests/test-double-float.cpp (limited to 'llama.cpp/tests/test-double-float.cpp') diff --git a/llama.cpp/tests/test-double-float.cpp b/llama.cpp/tests/test-double-float.cpp new file mode 100644 index 0000000..6aac473 --- /dev/null +++ b/llama.cpp/tests/test-double-float.cpp @@ -0,0 +1,57 @@ +// These tests may take a long time! +// They are to prove that conversion from double to float of various functions in ggml.c doesn't affect the result. +// This is done by checking all finite (non-NaN, non-infinite) floats. + +#undef NDEBUG +#include +#if !defined(__riscv) && !defined(__s390__) && !defined(__ARM_NEON) +#include +#endif +#include +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdouble-promotion" + +// ggml.c::quantize_row_q4_0_ref +inline static uint8_t round_orig(float v0) { return ((int8_t) (round(v0))) + 8; } + +// ggml.c::ggml_silu_f32 +inline static float silu_orig(float x) { + return x/(1.0 + exp(-x)); +} + +#pragma GCC diagnostic pop + +// ggml.c::quantize_row_q4_0_ref +inline static uint8_t round_float(float v0) { return (int8_t)roundf(v0) + 8; } + +// ggml.c::ggml_silu_f32 +inline static float silu_float(float x) { + return x/(1.0f + expf(-x)); +} + +int main(void) { + uint32_t x = UINT32_MAX; + do { + float f; + memcpy(&f, &x, sizeof(x)); + assert(!std::isfinite(f) || (round_orig(f) == round_float(f))); + } while (x--); + +#ifdef __F16C__ + // GELU and SILU implementations are used with a FP16 lookup table. + // The original and float-only results are not equal for all inputs after converting to FP16. + // GELU is an approximation anyway (tanh), not tested here. + // For SILU, verify that the results are at least the closest floating point numbers, if the FP16 values don't match. + for (x = 0; x <= UINT16_MAX; x++) { + float f = _cvtsh_ss(x); + const float so = silu_orig(f); + const float sf = silu_float(f); + assert( (_cvtss_sh(so, 0) == _cvtss_sh(sf, 0)) + || (nextafterf(so, sf) == sf) + || (nextafterf(sf, so) == so)); + } +#endif +} -- cgit v1.2.3