1#ifndef HTP_DMA_H
2#define HTP_DMA_H
3
4#include <HAP_farf.h>
5#include <hexagon_types.h>
6#include <stdbool.h>
7#include <stdint.h>
8
9#ifdef __cplusplus
10extern "C" {
11#endif
12
13typedef struct {
14 void *dst;
15 const void *src;
16} dma_ptr;
17
18typedef struct {
19 hexagon_udma_descriptor_type1_t * desc; // descriptor pointers
20 hexagon_udma_descriptor_type1_t * tail; // tail pointer
21 dma_ptr * dptr; // dst/src pointers
22 uint32_t push_idx;
23 uint32_t pop_idx;
24 uint32_t capacity;
25 uint32_t idx_mask;
26} dma_queue;
27
28dma_queue * dma_queue_create(size_t capacity);
29void dma_queue_delete(dma_queue * q);
30void dma_queue_flush(dma_queue * q);
31
32// TODO: technically we don't need these and could use Q6_dmstart/wait/etc instead
33// but those do not seem to always compiler properly.
34static inline void dmstart(void * next) {
35 asm volatile(" release(%0):at" : : "r"(next));
36 asm volatile(" dmstart(%0)" : : "r"(next));
37}
38
39static inline void dmlink(void * cur, void * next) {
40 asm volatile(" release(%0):at" : : "r"(next));
41 asm volatile(" dmlink(%0, %1)" : : "r"(cur), "r"(next));
42}
43
44static inline unsigned int dmpoll(void) {
45 unsigned int ret = 0;
46 asm volatile(" %0 = dmpoll" : "=r"(ret) : : "memory");
47 return ret;
48}
49
50static inline unsigned int dmwait(void) {
51 unsigned int ret = 0;
52 asm volatile(" %0 = dmwait" : "=r"(ret) : : "memory");
53 return ret;
54}
55
56static inline dma_ptr dma_make_ptr(void *dst, const void *src)
57{
58 dma_ptr p = { dst, src };
59 return p;
60}
61
62static inline bool dma_queue_push(dma_queue * q,
63 dma_ptr dptr,
64 size_t dst_row_size,
65 size_t src_row_size,
66 size_t width, // width in bytes. number of bytes to transfer per row
67 size_t nrows) {
68 if (((q->push_idx + 1) & q->idx_mask) == q->pop_idx) {
69 FARF(ERROR, "dma-push: queue full\n");
70 return false;
71 }
72
73 hexagon_udma_descriptor_type1_t * desc = &q->desc[q->push_idx];
74
75 desc->next = NULL;
76 desc->length = 0;
77 desc->desctype = HEXAGON_UDMA_DESC_DESCTYPE_TYPE1;
78 desc->dstbypass = 1;
79 desc->srcbypass = 1;
80#if __HVX_ARCH__ >= 73
81 desc->dstbypass = 1;
82 desc->srcbypass = 1;
83#else
84 desc->dstbypass = 0;
85 desc->srcbypass = 1;
86#endif
87 desc->order = 0;
88 desc->dstate = HEXAGON_UDMA_DESC_DSTATE_INCOMPLETE;
89 desc->src = (void *) dptr.src;
90 desc->dst = (void *) dptr.dst;
91 desc->allocation = 0;
92 desc->padding = 0;
93 desc->roiwidth = width;
94 desc->roiheight = nrows;
95 desc->srcstride = src_row_size;
96 desc->dststride = dst_row_size;
97 desc->srcwidthoffset = 0;
98 desc->dstwidthoffset = 0;
99
100 q->dptr[q->push_idx] = dptr;
101
102 dmlink(q->tail, desc);
103 q->tail = desc;
104
105 // FARF(ERROR, "dma-push: i %u len %u dst %p src %p\n", q->push_idx, len, dst, src);
106 q->push_idx = (q->push_idx + 1) & q->idx_mask;
107 return true;
108}
109
110static inline bool dma_queue_push_ddr_to_vtcm(dma_queue * q,
111 dma_ptr dptr,
112 size_t dst_row_size,
113 size_t src_row_size,
114 size_t nrows) {
115 return dma_queue_push(q, dptr, dst_row_size, src_row_size, src_row_size, nrows);
116}
117
118
119static inline bool dma_queue_push_vtcm_to_ddr(dma_queue * q,
120 dma_ptr dptr,
121 size_t dst_row_size,
122 size_t src_row_size,
123 size_t nrows) {
124 return dma_queue_push(q, dptr, dst_row_size, src_row_size, dst_row_size, nrows);
125}
126
127static inline dma_ptr dma_queue_pop(dma_queue * q) {
128 dma_ptr dptr = { NULL };
129
130 if (q->push_idx == q->pop_idx) {
131 return dptr;
132 }
133
134 hexagon_udma_descriptor_type1_t * desc = &q->desc[q->pop_idx];
135
136 // Wait for desc to complete
137 while (1) {
138 dmpoll();
139 if (desc->dstate == HEXAGON_UDMA_DESC_DSTATE_COMPLETE) {
140 break;
141 }
142 // FARF(ERROR, "dma-pop: waiting for DMA : %u\n", q->pop_idx);
143 }
144
145 dptr = q->dptr[q->pop_idx];
146
147 // FARF(ERROR, "dma-pop: i %u dst %p\n", q->pop_idx, dst);
148 q->pop_idx = (q->pop_idx + 1) & q->idx_mask;
149 return dptr;
150}
151
152#ifdef __cplusplus
153} // extern "C"
154#endif
155
156#endif /* HTP_DMA_H */