1#ifndef HTP_DMA_H
  2#define HTP_DMA_H
  3
  4#include <HAP_farf.h>
  5#include <hexagon_types.h>
  6#include <stdbool.h>
  7#include <stdint.h>
  8
  9#ifdef __cplusplus
 10extern "C" {
 11#endif
 12
 13typedef struct {
 14    void *dst;
 15    const void *src;
 16} dma_ptr;
 17
 18typedef struct {
 19    hexagon_udma_descriptor_type1_t * desc;  // descriptor pointers
 20    hexagon_udma_descriptor_type1_t * tail;  // tail pointer
 21    dma_ptr                         * dptr;  // dst/src pointers
 22    uint32_t                          push_idx;
 23    uint32_t                          pop_idx;
 24    uint32_t                          capacity;
 25    uint32_t                          idx_mask;
 26} dma_queue;
 27
 28dma_queue * dma_queue_create(size_t capacity);
 29void        dma_queue_delete(dma_queue * q);
 30void        dma_queue_flush(dma_queue * q);
 31
 32// TODO: technically we don't need these and could use Q6_dmstart/wait/etc instead
 33// but those do not seem to always compiler properly.
 34static inline void dmstart(void * next) {
 35    asm volatile(" release(%0):at" : : "r"(next));
 36    asm volatile(" dmstart(%0)" : : "r"(next));
 37}
 38
 39static inline void dmlink(void * cur, void * next) {
 40    asm volatile(" release(%0):at" : : "r"(next));
 41    asm volatile(" dmlink(%0, %1)" : : "r"(cur), "r"(next));
 42}
 43
 44static inline unsigned int dmpoll(void) {
 45    unsigned int ret = 0;
 46    asm volatile(" %0 = dmpoll" : "=r"(ret) : : "memory");
 47    return ret;
 48}
 49
 50static inline unsigned int dmwait(void) {
 51    unsigned int ret = 0;
 52    asm volatile(" %0 = dmwait" : "=r"(ret) : : "memory");
 53    return ret;
 54}
 55
 56static inline dma_ptr dma_make_ptr(void *dst, const void *src)
 57{
 58    dma_ptr p = { dst, src };
 59    return p;
 60}
 61
 62static inline bool dma_queue_push(dma_queue * q,
 63                                  dma_ptr     dptr,
 64                                  size_t      dst_row_size,
 65                                  size_t      src_row_size,
 66                                  size_t      width, // width in bytes. number of bytes to transfer per row
 67                                  size_t      nrows) {
 68    if (((q->push_idx + 1) & q->idx_mask) == q->pop_idx) {
 69        FARF(ERROR, "dma-push: queue full\n");
 70        return false;
 71    }
 72
 73    hexagon_udma_descriptor_type1_t * desc = &q->desc[q->push_idx];
 74
 75    desc->next           = NULL;
 76    desc->length         = 0;
 77    desc->desctype       = HEXAGON_UDMA_DESC_DESCTYPE_TYPE1;
 78    desc->dstbypass      = 1;
 79    desc->srcbypass      = 1;
 80#if __HVX_ARCH__ >= 73
 81    desc->dstbypass      = 1;
 82    desc->srcbypass      = 1;
 83#else
 84    desc->dstbypass      = 0;
 85    desc->srcbypass      = 1;
 86#endif
 87    desc->order          = 0;
 88    desc->dstate         = HEXAGON_UDMA_DESC_DSTATE_INCOMPLETE;
 89    desc->src            = (void *) dptr.src;
 90    desc->dst            = (void *) dptr.dst;
 91    desc->allocation     = 0;
 92    desc->padding        = 0;
 93    desc->roiwidth       = width;
 94    desc->roiheight      = nrows;
 95    desc->srcstride      = src_row_size;
 96    desc->dststride      = dst_row_size;
 97    desc->srcwidthoffset = 0;
 98    desc->dstwidthoffset = 0;
 99
100    q->dptr[q->push_idx] = dptr;
101
102    dmlink(q->tail, desc);
103    q->tail = desc;
104
105    // FARF(ERROR, "dma-push: i %u len %u dst %p src %p\n", q->push_idx, len, dst, src);
106    q->push_idx = (q->push_idx + 1) & q->idx_mask;
107    return true;
108}
109
110static inline bool dma_queue_push_ddr_to_vtcm(dma_queue * q,
111                                              dma_ptr     dptr,
112                                              size_t      dst_row_size,
113                                              size_t      src_row_size,
114                                              size_t      nrows) {
115    return dma_queue_push(q, dptr, dst_row_size, src_row_size, src_row_size, nrows);
116}
117
118
119static inline bool dma_queue_push_vtcm_to_ddr(dma_queue * q,
120                                              dma_ptr     dptr,
121                                              size_t      dst_row_size,
122                                              size_t      src_row_size,
123                                              size_t      nrows) {
124    return dma_queue_push(q, dptr, dst_row_size, src_row_size, dst_row_size, nrows);
125}
126
127static inline dma_ptr dma_queue_pop(dma_queue * q) {
128    dma_ptr dptr  = { NULL };
129
130    if (q->push_idx == q->pop_idx) {
131        return dptr;
132    }
133
134    hexagon_udma_descriptor_type1_t * desc = &q->desc[q->pop_idx];
135
136    // Wait for desc to complete
137    while (1) {
138        dmpoll();
139        if (desc->dstate == HEXAGON_UDMA_DESC_DSTATE_COMPLETE) {
140            break;
141        }
142        // FARF(ERROR, "dma-pop: waiting for DMA : %u\n", q->pop_idx);
143    }
144
145    dptr = q->dptr[q->pop_idx];
146
147    // FARF(ERROR, "dma-pop: i %u dst %p\n", q->pop_idx, dst);
148    q->pop_idx = (q->pop_idx + 1) & q->idx_mask;
149    return dptr;
150}
151
152#ifdef __cplusplus
153}  // extern "C"
154#endif
155
156#endif /* HTP_DMA_H */