summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-zdnn/utils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/ggml/src/ggml-zdnn/utils.cpp')
-rw-r--r--llama.cpp/ggml/src/ggml-zdnn/utils.cpp79
1 files changed, 79 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-zdnn/utils.cpp b/llama.cpp/ggml/src/ggml-zdnn/utils.cpp
new file mode 100644
index 0000000..2977cb0
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-zdnn/utils.cpp
@@ -0,0 +1,79 @@
+#include "ggml.h"
+#include "utils.hpp"
+
+zdnn_data_types ggml_zdnn_type_mapping(ggml_type type) {
+ switch (type) {
+ case GGML_TYPE_F32:
+ return FP32;
+ case GGML_TYPE_F16:
+ return FP16;
+ case GGML_TYPE_BF16:
+ return BFLOAT;
+ case GGML_TYPE_Q8_0:
+ return INT8;
+ case GGML_TYPE_I8:
+ return INT8;
+ case GGML_TYPE_I32:
+ return INT32;
+ default:
+ GGML_ABORT("%s: fatal: unable to determine zTensor data type",
+ __func__);
+ break;
+ }
+}
+
+void ggml_zdnn_create_tensor(zdnn_tensor_desc & pre_tfm_desc,
+ zdnn_tensor_desc & tfm_desc,
+ zdnn_ztensor & ztensor,
+ const ggml_tensor * src,
+ const int64_t * ne,
+ const zdnn_data_layouts layout) {
+ zdnn_init_pre_transformed_desc(
+ layout,
+ ggml_zdnn_type_mapping(src->type),
+ &pre_tfm_desc,
+ ne[3], ne[2], ne[1], ne[0]
+ );
+
+ ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc, &tfm_desc));
+ ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc, &tfm_desc, &ztensor));
+}
+
+void ggml_zdnn_load_tensor(zdnn_ztensor & ztensor, void * buffer) {
+ ZDNN_CHECK(zdnn_transform_ztensor(&ztensor, buffer));
+}
+
+void ggml_zdnn_init_tensor(ggml_backend_zdnn_buffer * buffer, const ggml_tensor * tensor) {
+ switch (tensor->op) {
+ case GGML_OP_MUL_MAT:
+ {
+ zdnn_init_pre_transformed_desc(
+ ZDNN_2D,
+ ggml_zdnn_type_mapping(tensor->type),
+ &buffer->pre_tfm_desc,
+ tensor->ne[1], tensor->ne[0]
+ );
+ } break;
+
+ default:
+ {
+ // For 4D tensors, GGML uses NCHW layout. However, because zDNN
+ // automatically transforms everything to NHWC, we will use it
+ // directly to avoid the performance penalty changing the
+ // layout and reshaping the tensor.
+ zdnn_init_pre_transformed_desc(
+ ZDNN_NHWC,
+ ggml_zdnn_type_mapping(tensor->type),
+ &buffer->pre_tfm_desc,
+ tensor->ne[3], tensor->ne[2], tensor->ne[1], tensor->ne[0]
+ );
+
+ // TODO: Consider adding a ggml check.
+ // TODO: If tensor = 4D, use ZDNN_NCHW by default.
+ // TODO: If tensor = 2D, use ZDNN_NHWC by default.
+ } break;
+ }
+
+ ZDNN_CHECK(zdnn_generate_transformed_desc(&buffer->pre_tfm_desc, &buffer->tfm_desc));
+ ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&buffer->pre_tfm_desc, &buffer->tfm_desc, &buffer->ztensor));
+}