diff options
Diffstat (limited to 'llama.cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h b/llama.cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h new file mode 100644 index 0000000..b7b5867 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-hexagon/htp/hex-fastdiv.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | #ifndef HEX_FASTDIV_H | ||
| 2 | #define HEX_FASTDIV_H | ||
| 3 | |||
| 4 | // See https://gmplib.org/~tege/divcnst-pldi94.pdf figure 4.1. | ||
| 5 | // Precompute mp (m' in the paper) and L such that division | ||
| 6 | // can be computed using a multiply (high 32b of 64b result) | ||
| 7 | // and a shift: | ||
| 8 | // | ||
| 9 | // n/d = (mulhi(n, mp) + n) >> L; | ||
| 10 | struct fastdiv_values { | ||
| 11 | uint32_t mp; | ||
| 12 | uint32_t l; | ||
| 13 | }; | ||
| 14 | |||
| 15 | static inline struct fastdiv_values init_fastdiv_values(uint32_t d) { | ||
| 16 | struct fastdiv_values result = { 0, 0 }; | ||
| 17 | // compute L = ceil(log2(d)); | ||
| 18 | while (result.l < 32 && ((uint32_t) 1 << result.l) < d) { | ||
| 19 | ++(result.l); | ||
| 20 | } | ||
| 21 | |||
| 22 | result.mp = (uint32_t) (((uint64_t) 1 << 32) * (((uint64_t) 1 << result.l) - d) / d + 1); | ||
| 23 | return result; | ||
| 24 | } | ||
| 25 | |||
| 26 | static inline uint32_t fastdiv(uint32_t n, const struct fastdiv_values * vals) { | ||
| 27 | // Compute high 32 bits of n * mp | ||
| 28 | const uint32_t hi = (uint32_t) (((uint64_t) n * vals->mp) >> 32); // mulhi(n, mp) | ||
| 29 | // add n, apply bit shift | ||
| 30 | return (hi + n) >> vals->l; | ||
| 31 | } | ||
| 32 | |||
| 33 | static inline uint32_t fastmodulo(uint32_t n, uint32_t d, const struct fastdiv_values * vals) { | ||
| 34 | return n - fastdiv(n, vals) * d; | ||
| 35 | } | ||
| 36 | |||
| 37 | #endif /* HEX_FASTDIV_H */ | ||
