aboutsummaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h')
-rw-r--r--llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h100
1 files changed, 100 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h b/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h
new file mode 100644
index 0000000..6a1bfde
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-hexagon/htp/hvx-floor.h
@@ -0,0 +1,100 @@
1#ifndef HVX_FLOOR_H
2#define HVX_FLOOR_H
3
4#include <stdbool.h>
5#include <stdint.h>
6
7#include "hvx-base.h"
8
9#define IEEE_VSF_EXPLEN (8)
10#define IEEE_VSF_EXPBIAS (127)
11#define IEEE_VSF_EXPMASK (0xFF)
12#define IEEE_VSF_MANTLEN (23)
13#define IEEE_VSF_MANTMASK (0x7FFFFF)
14#define IEEE_VSF_MIMPMASK (0x800000)
15
16static inline HVX_Vector hvx_vec_truncate_f32(HVX_Vector in_vec) {
17 HVX_Vector mask_mant_v = Q6_V_vsplat_R(IEEE_VSF_MANTMASK);
18 HVX_Vector mask_impl_v = Q6_V_vsplat_R(IEEE_VSF_MIMPMASK);
19 HVX_Vector const_zero_v = Q6_V_vzero();
20
21 HVX_VectorPred q_negative = Q6_Q_vcmp_gt_VwVw(const_zero_v, in_vec);
22
23 HVX_Vector expval_v = in_vec >> IEEE_VSF_MANTLEN;
24 expval_v &= IEEE_VSF_EXPMASK;
25 expval_v -= IEEE_VSF_EXPBIAS;
26
27 // negative exp == fractional value
28 HVX_VectorPred q_negexp = Q6_Q_vcmp_gt_VwVw(const_zero_v, expval_v);
29
30 HVX_Vector rshift_v = IEEE_VSF_MANTLEN - expval_v; // fractional bits - exp shift
31
32 HVX_Vector mant_v = in_vec & mask_mant_v; // obtain mantissa
33 HVX_Vector vout = Q6_Vw_vadd_VwVw(mant_v, mask_impl_v); // add implicit 1.0
34
35 vout = Q6_Vw_vasr_VwVw(vout, rshift_v); // shift to obtain truncated integer
36 vout = Q6_V_vmux_QVV(q_negexp, const_zero_v, vout); // expval<0 -> 0
37
38 HVX_Vector neg_vout = -vout;
39
40 vout = Q6_V_vmux_QVV(q_negative, neg_vout, vout); // handle negatives
41
42 return (vout);
43}
44
45static inline HVX_Vector hvx_vec_floor_f32(HVX_Vector in_vec) {
46 HVX_Vector mask_mant_v = Q6_V_vsplat_R(IEEE_VSF_MANTMASK);
47 HVX_Vector mask_impl_v = Q6_V_vsplat_R(IEEE_VSF_MIMPMASK);
48 HVX_Vector const_mnlen_v = Q6_V_vsplat_R(IEEE_VSF_MANTLEN);
49 HVX_Vector const_zero_v = Q6_V_vzero();
50 HVX_Vector const_negone_v = Q6_V_vsplat_R(0xbf800000); // -1 IEEE vsf
51
52 HVX_VectorPred q_negative = Q6_Q_vcmp_gt_VwVw(const_zero_v, in_vec);
53
54 HVX_Vector expval_v = in_vec >> IEEE_VSF_MANTLEN;
55 expval_v &= IEEE_VSF_EXPMASK;
56 expval_v -= IEEE_VSF_EXPBIAS;
57
58 HVX_VectorPred q_negexp = Q6_Q_vcmp_gt_VwVw(const_zero_v, expval_v);
59 HVX_VectorPred q_expltmn = Q6_Q_vcmp_gt_VwVw(const_mnlen_v, expval_v);
60 HVX_VectorPred q_negexp_pos = Q6_Q_vcmp_gtand_QVwVw(q_negexp, in_vec, const_zero_v);
61 HVX_VectorPred q_negexp_neg = Q6_Q_vcmp_gtand_QVwVw(q_negexp, const_zero_v, in_vec);
62
63 // if expval < 0 (q_negexp) // <0, floor is 0
64 // if vin > 0
65 // floor = 0
66 // if vin < 0
67 // floor = -1
68 // if expval < mant_len (q_expltmn) // >0, but fraction may exist
69 // get sign (q_negative)
70 // mask >> expval // fraction bits to mask off
71 // vout = ~(mask) // apply mask to remove fraction
72 // if (qneg) // negative floor is one less (more, sign bit for neg)
73 // vout += ((impl_mask) >> expval)
74 // if (mask && vin)
75 // vout = vin
76 // else // already an integer
77 // ; // no change
78
79 // compute floor
80 mask_mant_v >>= expval_v;
81 HVX_Vector neg_addin_v = mask_impl_v >> expval_v;
82 HVX_Vector vout_neg_addin = Q6_Vw_vadd_VwVw(in_vec, neg_addin_v);
83 HVX_Vector vout = Q6_V_vmux_QVV(q_negative, vout_neg_addin, in_vec);
84
85 HVX_Vector mask_chk_v = Q6_V_vand_VV(in_vec, mask_mant_v); // chk if bits set
86 HVX_VectorPred q_integral = Q6_Q_vcmp_eq_VwVw(const_zero_v, mask_chk_v);
87
88 HVX_Vector not_mask_v = Q6_V_vnot_V(mask_mant_v); // frac bits to clear
89 HVX_Vector vfrfloor_v = Q6_V_vand_VV(vout, not_mask_v); // clear frac bits
90
91 vout = in_vec;
92 vout = Q6_V_vmux_QVV(q_expltmn, vfrfloor_v, vout); // expval<mant
93 vout = Q6_V_vmux_QVV(q_integral, in_vec, vout); // integral values
94 vout = Q6_V_vmux_QVV(q_negexp_pos, const_zero_v, vout); // expval<0 x>0 -> 0
95 vout = Q6_V_vmux_QVV(q_negexp_neg, const_negone_v, vout); // expval<0 x<0 -> -1
96
97 return vout;
98}
99
100#endif /* HVX_FLOOR_H */