diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
| commit | b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch) | |
| tree | 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h | |
| download | llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz | |
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h b/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h new file mode 100644 index 0000000..6a1bfde --- /dev/null +++ b/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h | |||
| @@ -0,0 +1,100 @@ | |||
| 1 | #ifndef HVX_FLOOR_H | ||
| 2 | #define HVX_FLOOR_H | ||
| 3 | |||
| 4 | #include <stdbool.h> | ||
| 5 | #include <stdint.h> | ||
| 6 | |||
| 7 | #include "hvx-base.h" | ||
| 8 | |||
| 9 | #define IEEE_VSF_EXPLEN (8) | ||
| 10 | #define IEEE_VSF_EXPBIAS (127) | ||
| 11 | #define IEEE_VSF_EXPMASK (0xFF) | ||
| 12 | #define IEEE_VSF_MANTLEN (23) | ||
| 13 | #define IEEE_VSF_MANTMASK (0x7FFFFF) | ||
| 14 | #define IEEE_VSF_MIMPMASK (0x800000) | ||
| 15 | |||
| 16 | static inline HVX_Vector hvx_vec_truncate_f32(HVX_Vector in_vec) { | ||
| 17 | HVX_Vector mask_mant_v = Q6_V_vsplat_R(IEEE_VSF_MANTMASK); | ||
| 18 | HVX_Vector mask_impl_v = Q6_V_vsplat_R(IEEE_VSF_MIMPMASK); | ||
| 19 | HVX_Vector const_zero_v = Q6_V_vzero(); | ||
| 20 | |||
| 21 | HVX_VectorPred q_negative = Q6_Q_vcmp_gt_VwVw(const_zero_v, in_vec); | ||
| 22 | |||
| 23 | HVX_Vector expval_v = in_vec >> IEEE_VSF_MANTLEN; | ||
| 24 | expval_v &= IEEE_VSF_EXPMASK; | ||
| 25 | expval_v -= IEEE_VSF_EXPBIAS; | ||
| 26 | |||
| 27 | // negative exp == fractional value | ||
| 28 | HVX_VectorPred q_negexp = Q6_Q_vcmp_gt_VwVw(const_zero_v, expval_v); | ||
| 29 | |||
| 30 | HVX_Vector rshift_v = IEEE_VSF_MANTLEN - expval_v; // fractional bits - exp shift | ||
| 31 | |||
| 32 | HVX_Vector mant_v = in_vec & mask_mant_v; // obtain mantissa | ||
| 33 | HVX_Vector vout = Q6_Vw_vadd_VwVw(mant_v, mask_impl_v); // add implicit 1.0 | ||
| 34 | |||
| 35 | vout = Q6_Vw_vasr_VwVw(vout, rshift_v); // shift to obtain truncated integer | ||
| 36 | vout = Q6_V_vmux_QVV(q_negexp, const_zero_v, vout); // expval<0 -> 0 | ||
| 37 | |||
| 38 | HVX_Vector neg_vout = -vout; | ||
| 39 | |||
| 40 | vout = Q6_V_vmux_QVV(q_negative, neg_vout, vout); // handle negatives | ||
| 41 | |||
| 42 | return (vout); | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline HVX_Vector hvx_vec_floor_f32(HVX_Vector in_vec) { | ||
| 46 | HVX_Vector mask_mant_v = Q6_V_vsplat_R(IEEE_VSF_MANTMASK); | ||
| 47 | HVX_Vector mask_impl_v = Q6_V_vsplat_R(IEEE_VSF_MIMPMASK); | ||
| 48 | HVX_Vector const_mnlen_v = Q6_V_vsplat_R(IEEE_VSF_MANTLEN); | ||
| 49 | HVX_Vector const_zero_v = Q6_V_vzero(); | ||
| 50 | HVX_Vector const_negone_v = Q6_V_vsplat_R(0xbf800000); // -1 IEEE vsf | ||
| 51 | |||
| 52 | HVX_VectorPred q_negative = Q6_Q_vcmp_gt_VwVw(const_zero_v, in_vec); | ||
| 53 | |||
| 54 | HVX_Vector expval_v = in_vec >> IEEE_VSF_MANTLEN; | ||
| 55 | expval_v &= IEEE_VSF_EXPMASK; | ||
| 56 | expval_v -= IEEE_VSF_EXPBIAS; | ||
| 57 | |||
| 58 | HVX_VectorPred q_negexp = Q6_Q_vcmp_gt_VwVw(const_zero_v, expval_v); | ||
| 59 | HVX_VectorPred q_expltmn = Q6_Q_vcmp_gt_VwVw(const_mnlen_v, expval_v); | ||
| 60 | HVX_VectorPred q_negexp_pos = Q6_Q_vcmp_gtand_QVwVw(q_negexp, in_vec, const_zero_v); | ||
| 61 | HVX_VectorPred q_negexp_neg = Q6_Q_vcmp_gtand_QVwVw(q_negexp, const_zero_v, in_vec); | ||
| 62 | |||
| 63 | // if expval < 0 (q_negexp) // <0, floor is 0 | ||
| 64 | // if vin > 0 | ||
| 65 | // floor = 0 | ||
| 66 | // if vin < 0 | ||
| 67 | // floor = -1 | ||
| 68 | // if expval < mant_len (q_expltmn) // >0, but fraction may exist | ||
| 69 | // get sign (q_negative) | ||
| 70 | // mask >> expval // fraction bits to mask off | ||
| 71 | // vout = ~(mask) // apply mask to remove fraction | ||
| 72 | // if (qneg) // negative floor is one less (more, sign bit for neg) | ||
| 73 | // vout += ((impl_mask) >> expval) | ||
| 74 | // if (mask && vin) | ||
| 75 | // vout = vin | ||
| 76 | // else // already an integer | ||
| 77 | // ; // no change | ||
| 78 | |||
| 79 | // compute floor | ||
| 80 | mask_mant_v >>= expval_v; | ||
| 81 | HVX_Vector neg_addin_v = mask_impl_v >> expval_v; | ||
| 82 | HVX_Vector vout_neg_addin = Q6_Vw_vadd_VwVw(in_vec, neg_addin_v); | ||
| 83 | HVX_Vector vout = Q6_V_vmux_QVV(q_negative, vout_neg_addin, in_vec); | ||
| 84 | |||
| 85 | HVX_Vector mask_chk_v = Q6_V_vand_VV(in_vec, mask_mant_v); // chk if bits set | ||
| 86 | HVX_VectorPred q_integral = Q6_Q_vcmp_eq_VwVw(const_zero_v, mask_chk_v); | ||
| 87 | |||
| 88 | HVX_Vector not_mask_v = Q6_V_vnot_V(mask_mant_v); // frac bits to clear | ||
| 89 | HVX_Vector vfrfloor_v = Q6_V_vand_VV(vout, not_mask_v); // clear frac bits | ||
| 90 | |||
| 91 | vout = in_vec; | ||
| 92 | vout = Q6_V_vmux_QVV(q_expltmn, vfrfloor_v, vout); // expval<mant | ||
| 93 | vout = Q6_V_vmux_QVV(q_integral, in_vec, vout); // integral values | ||
| 94 | vout = Q6_V_vmux_QVV(q_negexp_pos, const_zero_v, vout); // expval<0 x>0 -> 0 | ||
| 95 | vout = Q6_V_vmux_QVV(q_negexp_neg, const_negone_v, vout); // expval<0 x<0 -> -1 | ||
| 96 | |||
| 97 | return vout; | ||
| 98 | } | ||
| 99 | |||
| 100 | #endif /* HVX_FLOOR_H */ | ||
