1#ifndef HVX_SCALE_H
2#define HVX_SCALE_H
3
4#include <assert.h>
5#include <stddef.h>
6#include <stdint.h>
7
8#include "hvx-base.h"
9
10#define hvx_scale_f32_loop_body(dst_type, src_type, vec_store) \
11 do { \
12 dst_type * restrict vdst = (dst_type *) dst; \
13 src_type * restrict vsrc = (src_type *) src; \
14 \
15 HVX_Vector vs = hvx_vec_splat_f32(scale); \
16 \
17 const uint32_t elem_size = sizeof(float); \
18 const uint32_t epv = 128 / elem_size; \
19 const uint32_t nvec = n / epv; \
20 const uint32_t nloe = n % epv; \
21 \
22 uint32_t i = 0; \
23 \
24 _Pragma("unroll(4)") \
25 for (; i < nvec; ++i) { \
26 HVX_Vector v = Q6_Vqf32_vmpy_VsfVsf(vsrc[i], vs); \
27 vdst[i] = Q6_Vsf_equals_Vqf32(v); \
28 } \
29 if (nloe) { \
30 HVX_Vector v = Q6_Vqf32_vmpy_VsfVsf(vsrc[i], vs); \
31 vec_store((void *) &vdst[i], nloe * elem_size, Q6_Vsf_equals_Vqf32(v)); \
32 } \
33 } while(0)
34
35static inline void hvx_scale_f32_aa(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale) {
36 assert((size_t) dst % 128 == 0);
37 assert((size_t) src % 128 == 0);
38 hvx_scale_f32_loop_body(HVX_Vector, HVX_Vector, hvx_vec_store_a);
39}
40
41static inline void hvx_scale_f32_au(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale) {
42 assert((size_t) dst % 128 == 0);
43 hvx_scale_f32_loop_body(HVX_Vector, HVX_UVector, hvx_vec_store_a);
44}
45
46static inline void hvx_scale_f32_ua(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale) {
47 assert((size_t) src % 128 == 0);
48 hvx_scale_f32_loop_body(HVX_UVector, HVX_Vector, hvx_vec_store_u);
49}
50
51static inline void hvx_scale_f32_uu(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale) {
52 hvx_scale_f32_loop_body(HVX_UVector, HVX_UVector, hvx_vec_store_u);
53}
54
55static inline void hvx_scale_f32(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale) {
56 if (((size_t) dst & 127) == 0) {
57 if (((size_t) src & 127) == 0) {
58 hvx_scale_f32_aa(dst, src, n, scale);
59 } else {
60 hvx_scale_f32_au(dst, src, n, scale);
61 }
62 } else {
63 if (((size_t) src & 127) == 0) {
64 hvx_scale_f32_ua(dst, src, n, scale);
65 } else {
66 hvx_scale_f32_uu(dst, src, n, scale);
67 }
68 }
69}
70
71#define hvx_scale_offset_f32_loop_body(dst_type, src_type, vec_store) \
72 do { \
73 dst_type * restrict vdst = (dst_type *) dst; \
74 src_type * restrict vsrc = (src_type *) src; \
75 \
76 HVX_Vector vs = hvx_vec_splat_f32(scale); \
77 HVX_Vector vo = hvx_vec_splat_f32(offset); \
78 \
79 const uint32_t elem_size = sizeof(float); \
80 const uint32_t epv = 128 / elem_size; \
81 const uint32_t nvec = n / epv; \
82 const uint32_t nloe = n % epv; \
83 \
84 uint32_t i = 0; \
85 \
86 _Pragma("unroll(4)") \
87 for (; i < nvec; ++i) { \
88 HVX_Vector v = Q6_Vqf32_vadd_Vqf32Vsf(Q6_Vqf32_vmpy_VsfVsf(vsrc[i], vs), vo); \
89 vdst[i] = Q6_Vsf_equals_Vqf32(v); \
90 } \
91 if (nloe) { \
92 HVX_Vector v = Q6_Vqf32_vadd_Vqf32Vsf(Q6_Vqf32_vmpy_VsfVsf(vsrc[i], vs), vo); \
93 vec_store((void *) &vdst[i], nloe * elem_size, Q6_Vsf_equals_Vqf32(v)); \
94 } \
95 } while(0)
96
97static inline void hvx_scale_offset_f32_aa(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale, const float offset) {
98 assert((size_t) dst % 128 == 0);
99 assert((size_t) src % 128 == 0);
100 hvx_scale_offset_f32_loop_body(HVX_Vector, HVX_Vector, hvx_vec_store_a);
101}
102
103static inline void hvx_scale_offset_f32_au(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale, const float offset) {
104 assert((size_t) dst % 128 == 0);
105 hvx_scale_offset_f32_loop_body(HVX_Vector, HVX_UVector, hvx_vec_store_a);
106}
107
108static inline void hvx_scale_offset_f32_ua(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale, const float offset) {
109 assert((size_t) src % 128 == 0);
110 hvx_scale_offset_f32_loop_body(HVX_UVector, HVX_Vector, hvx_vec_store_u);
111}
112
113static inline void hvx_scale_offset_f32_uu(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale, const float offset) {
114 hvx_scale_offset_f32_loop_body(HVX_UVector, HVX_UVector, hvx_vec_store_u);
115}
116
117static inline void hvx_scale_offset_f32(uint8_t * restrict dst, const uint8_t * restrict src, const int n, const float scale, const float offset) {
118 if (((size_t) dst & 127) == 0) {
119 if (((size_t) src & 127) == 0) {
120 hvx_scale_offset_f32_aa(dst, src, n, scale, offset);
121 } else {
122 hvx_scale_offset_f32_au(dst, src, n, scale, offset);
123 }
124 } else {
125 if (((size_t) src & 127) == 0) {
126 hvx_scale_offset_f32_ua(dst, src, n, scale, offset);
127 } else {
128 hvx_scale_offset_f32_uu(dst, src, n, scale, offset);
129 }
130 }
131}
132
133#endif // HVX_SCALE_H