1#ifndef HTP_MSG_H
2#define HTP_MSG_H
3
4#include <assert.h>
5
6// ggml-common.h must be included prio to this header
7
8// Mask to enable various stages of the Ops.
9// Used for debugging and profiling.
10enum {
11 HTP_OPMASK_QUEUE = (1 << 0), // Enable Queueing (ie calls into the DSP)
12 HTP_OPMASK_QUANTIZE = (1 << 1), // Enable Quantize
13 HTP_OPMASK_COMPUTE = (1 << 2), // Enable Compute
14};
15
16// Op flags
17enum {
18 HTP_OPFLAGS_SKIP_QUANTIZE = (1 << 0), // Skip dynamic quantization (reuse quantized tensors)
19 HTP_OPFLAGS_SKIP_COMPUTE = (1 << 1), // Skip actual computation (used for profiling)
20 HTP_OPFLAGS_EARLY_WAKEUP = (1 << 2) // Send early wakeup notification
21};
22
23enum htp_status {
24 HTP_STATUS_OK = 1,
25 HTP_STATUS_INTERNAL_ERR = 2,
26 HTP_STATUS_NO_SUPPORT = 3,
27 HTP_STATUS_INVAL_PARAMS = 4,
28 HTP_STATUS_VTCM_TOO_SMALL = 5,
29};
30
31// The values must match the ggml_type.
32// Duplicated here because we can't include full ggml.h in the htp build.
33// We have some static_asserts in the cpp code to ensure things are in sync.
34enum htp_data_type {
35 HTP_TYPE_F32 = 0,
36 HTP_TYPE_F16 = 1,
37 HTP_TYPE_Q4_0 = 2,
38 HTP_TYPE_Q8_0 = 8,
39 HTP_TYPE_I32 = 26,
40 HTP_TYPE_I64 = 27,
41 HTP_TYPE_MXFP4 = 39,
42 HTP_TYPE_COUNT
43};
44
45// Do not reorder first 4 (used as an index)
46enum htp_op {
47 HTP_OP_MUL = 0,
48 HTP_OP_ADD = 1,
49 HTP_OP_SUB = 2,
50 HTP_OP_DIV = 3,
51 HTP_OP_MUL_MAT,
52 HTP_OP_MUL_MAT_ID,
53 HTP_OP_RMS_NORM,
54 HTP_OP_UNARY_SILU,
55 HTP_OP_UNARY_GELU,
56 HTP_OP_GLU_SWIGLU,
57 HTP_OP_GLU_SWIGLU_OAI,
58 HTP_OP_GLU_GEGLU,
59 HTP_OP_SOFTMAX,
60 HTP_OP_ADD_ID,
61 HTP_OP_ROPE,
62 HTP_OP_FLASH_ATTN_EXT,
63 HTP_OP_SET_ROWS,
64 HTP_OP_GET_ROWS,
65 HTP_OP_SCALE,
66 HTP_OP_CPY,
67 HTP_OP_ARGSORT,
68 HTP_OP_SQR,
69 HTP_OP_SQRT,
70 HTP_OP_SUM_ROWS,
71 INVALID
72};
73
74static inline size_t htp_t_block_size(uint32_t t) {
75 switch (t) {
76 case HTP_TYPE_F32:
77 return 1;
78 case HTP_TYPE_F16:
79 return 1;
80 case HTP_TYPE_Q4_0:
81 return QK4_0;
82 case HTP_TYPE_Q8_0:
83 return QK8_0;
84 case HTP_TYPE_MXFP4:
85 return QK_MXFP4;
86 default:
87 assert(0 && "unsupported HTP data type");
88 }
89 return 0;
90}
91
92static inline size_t htp_type_nbytes(uint32_t t) {
93 switch (t) {
94 case HTP_TYPE_F32:
95 return 4;
96 case HTP_TYPE_F16:
97 return 2;
98 case HTP_TYPE_Q4_0:
99 return sizeof(block_q4_0);
100 case HTP_TYPE_Q8_0:
101 return sizeof(block_q8_0);
102 case HTP_TYPE_MXFP4:
103 return sizeof(block_mxfp4);
104 default:
105 assert(0 && "unsupported HTP data type");
106 }
107 return 0;
108}
109
110// Internal types
111#define QK_Q4_0x4x2 256 // 4x Q4_0 blocks packed with next 4x Q4_0 blocks (size in bytes 128)
112#define QK_Q8_0x4x2 256 // 4x Q8_0 blocks concat with next 4x Q8_0 blocks
113#define QK_MXFP4x4x2 256 // 4x MXFP4 blocks concat with next 4x MXFP4 blocks
114
115#define HTP_MAX_DIMS 4
116
117struct htp_tensor {
118 uint32_t data; // Buffer offset in the messages, and data pointer on the NSP
119 uint32_t type; // Data type
120 uint32_t ne[HTP_MAX_DIMS]; // Number of elements
121 uint32_t nb[HTP_MAX_DIMS]; // Stride in bytes (see ggml.h ggml_tensor)
122};
123
124#define HTP_MAX_OP_PARAMS 64
125
126struct htp_general_req {
127 uint32_t op; // GGML/HTP Op
128 int32_t op_params[HTP_MAX_OP_PARAMS / sizeof(int32_t)];
129 // Params for the op, e.g. epsilon of RMS norm
130 uint32_t flags; // Request flags
131
132 struct htp_tensor src0; // Input0 tensor
133 struct htp_tensor src1; // Input1 tensor
134 struct htp_tensor src2; // Input2 tensor
135 struct htp_tensor src3; // Input3 tensor
136 struct htp_tensor src4; // Input4 tensor
137 struct htp_tensor dst; // Output tensor
138
139 // should be multiple of 64 bytes (cacheline)
140};
141
142struct htp_general_rsp {
143 uint32_t op; // GGML/HTP Op
144 uint32_t status; // HTP_STATUS_...
145 uint32_t prof_usecs; // Number of usec per request
146 uint32_t prof_cycles; // Number of cycles per request
147 uint32_t prof_pkts; // Number of instruction packets per request
148 uint8_t unused[44]; // Pad to 64 bytes
149};
150
151#define HTP_MAX_MESSAGE_SIZE sizeof(struct htp_general_req)
152#define HTP_MAX_PACKET_BUFFERS 8
153
154#endif /* HTP_MSG_H */