summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-hexagon/htp/hex-dma.h
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/ggml/src/ggml-hexagon/htp/hex-dma.h')
-rw-r--r--llama.cpp/ggml/src/ggml-hexagon/htp/hex-dma.h156
1 files changed, 156 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-hexagon/htp/hex-dma.h b/llama.cpp/ggml/src/ggml-hexagon/htp/hex-dma.h
new file mode 100644
index 0000000..d1ddb0e
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-hexagon/htp/hex-dma.h
@@ -0,0 +1,156 @@
+#ifndef HTP_DMA_H
+#define HTP_DMA_H
+
+#include <HAP_farf.h>
+#include <hexagon_types.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ void *dst;
+ const void *src;
+} dma_ptr;
+
+typedef struct {
+ hexagon_udma_descriptor_type1_t * desc; // descriptor pointers
+ hexagon_udma_descriptor_type1_t * tail; // tail pointer
+ dma_ptr * dptr; // dst/src pointers
+ uint32_t push_idx;
+ uint32_t pop_idx;
+ uint32_t capacity;
+ uint32_t idx_mask;
+} dma_queue;
+
+dma_queue * dma_queue_create(size_t capacity);
+void dma_queue_delete(dma_queue * q);
+void dma_queue_flush(dma_queue * q);
+
+// TODO: technically we don't need these and could use Q6_dmstart/wait/etc instead
+// but those do not seem to always compiler properly.
+static inline void dmstart(void * next) {
+ asm volatile(" release(%0):at" : : "r"(next));
+ asm volatile(" dmstart(%0)" : : "r"(next));
+}
+
+static inline void dmlink(void * cur, void * next) {
+ asm volatile(" release(%0):at" : : "r"(next));
+ asm volatile(" dmlink(%0, %1)" : : "r"(cur), "r"(next));
+}
+
+static inline unsigned int dmpoll(void) {
+ unsigned int ret = 0;
+ asm volatile(" %0 = dmpoll" : "=r"(ret) : : "memory");
+ return ret;
+}
+
+static inline unsigned int dmwait(void) {
+ unsigned int ret = 0;
+ asm volatile(" %0 = dmwait" : "=r"(ret) : : "memory");
+ return ret;
+}
+
+static inline dma_ptr dma_make_ptr(void *dst, const void *src)
+{
+ dma_ptr p = { dst, src };
+ return p;
+}
+
+static inline bool dma_queue_push(dma_queue * q,
+ dma_ptr dptr,
+ size_t dst_row_size,
+ size_t src_row_size,
+ size_t width, // width in bytes. number of bytes to transfer per row
+ size_t nrows) {
+ if (((q->push_idx + 1) & q->idx_mask) == q->pop_idx) {
+ FARF(ERROR, "dma-push: queue full\n");
+ return false;
+ }
+
+ hexagon_udma_descriptor_type1_t * desc = &q->desc[q->push_idx];
+
+ desc->next = NULL;
+ desc->length = 0;
+ desc->desctype = HEXAGON_UDMA_DESC_DESCTYPE_TYPE1;
+ desc->dstbypass = 1;
+ desc->srcbypass = 1;
+#if __HVX_ARCH__ >= 73
+ desc->dstbypass = 1;
+ desc->srcbypass = 1;
+#else
+ desc->dstbypass = 0;
+ desc->srcbypass = 1;
+#endif
+ desc->order = 0;
+ desc->dstate = HEXAGON_UDMA_DESC_DSTATE_INCOMPLETE;
+ desc->src = (void *) dptr.src;
+ desc->dst = (void *) dptr.dst;
+ desc->allocation = 0;
+ desc->padding = 0;
+ desc->roiwidth = width;
+ desc->roiheight = nrows;
+ desc->srcstride = src_row_size;
+ desc->dststride = dst_row_size;
+ desc->srcwidthoffset = 0;
+ desc->dstwidthoffset = 0;
+
+ q->dptr[q->push_idx] = dptr;
+
+ dmlink(q->tail, desc);
+ q->tail = desc;
+
+ // FARF(ERROR, "dma-push: i %u len %u dst %p src %p\n", q->push_idx, len, dst, src);
+ q->push_idx = (q->push_idx + 1) & q->idx_mask;
+ return true;
+}
+
+static inline bool dma_queue_push_ddr_to_vtcm(dma_queue * q,
+ dma_ptr dptr,
+ size_t dst_row_size,
+ size_t src_row_size,
+ size_t nrows) {
+ return dma_queue_push(q, dptr, dst_row_size, src_row_size, src_row_size, nrows);
+}
+
+
+static inline bool dma_queue_push_vtcm_to_ddr(dma_queue * q,
+ dma_ptr dptr,
+ size_t dst_row_size,
+ size_t src_row_size,
+ size_t nrows) {
+ return dma_queue_push(q, dptr, dst_row_size, src_row_size, dst_row_size, nrows);
+}
+
+static inline dma_ptr dma_queue_pop(dma_queue * q) {
+ dma_ptr dptr = { NULL };
+
+ if (q->push_idx == q->pop_idx) {
+ return dptr;
+ }
+
+ hexagon_udma_descriptor_type1_t * desc = &q->desc[q->pop_idx];
+
+ // Wait for desc to complete
+ while (1) {
+ dmpoll();
+ if (desc->dstate == HEXAGON_UDMA_DESC_DSTATE_COMPLETE) {
+ break;
+ }
+ // FARF(ERROR, "dma-pop: waiting for DMA : %u\n", q->pop_idx);
+ }
+
+ dptr = q->dptr[q->pop_idx];
+
+ // FARF(ERROR, "dma-pop: i %u dst %p\n", q->pop_idx, dst);
+ q->pop_idx = (q->pop_idx + 1) & q->idx_mask;
+ return dptr;
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* HTP_DMA_H */