1/*
2 * Copyright (c) 2023-2026 The ggml authors
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to
6 * deal in the Software without restriction, including without limitation the
7 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 * sell copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 * IN THE SOFTWARE.
21 */
22
23#include "acl_tensor.h"
24
25#include <algorithm>
26#include <cstring>
27
28aclDataType ggml_cann_type_mapping(ggml_type type) {
29 switch (type) {
30 case GGML_TYPE_F32:
31 return ACL_FLOAT;
32 case GGML_TYPE_F16:
33 return ACL_FLOAT16;
34 case GGML_TYPE_BF16:
35 return ACL_BF16;
36 case GGML_TYPE_I8:
37 return ACL_INT8;
38 case GGML_TYPE_I16:
39 return ACL_INT16;
40 case GGML_TYPE_I32:
41 return ACL_INT32;
42 case GGML_TYPE_Q4_0:
43 return ACL_INT4;
44 case GGML_TYPE_Q8_0:
45 return ACL_INT8;
46 case GGML_TYPE_I64:
47 return ACL_INT64;
48 default:
49 return ACL_DT_UNDEFINED;
50 }
51}
52
53acl_tensor_ptr ggml_cann_create_tensor(const ggml_tensor * tensor,
54 int64_t * ne,
55 size_t * nb,
56 int64_t dims,
57 aclFormat format,
58 size_t offset) {
59 // If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
60 // added.
61 int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
62
63 if (ne == nullptr) {
64 for (int i = 0; i < GGML_MAX_DIMS; i++) {
65 acl_ne[i] = tensor->ne[i];
66 // The step size of acl is in elements.
67 acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
68 }
69 } else {
70 // With bcast
71 for (int i = 0; i < dims; i++) {
72 acl_ne[i] = ne[i];
73 acl_stride[i] = nb[i] / ggml_element_size(tensor);
74 }
75 }
76
77 int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
78 int64_t acl_storage_len = 1;
79 for (int i = 0; i < final_dims; i++) {
80 acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
81 }
82 size_t elem_offset = offset / ggml_element_size(tensor);
83 acl_storage_len += elem_offset;
84
85 // Reverse ne and stride.
86 std::reverse(acl_ne, acl_ne + final_dims);
87 std::reverse(acl_stride, acl_stride + final_dims);
88
89 aclTensor * raw = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride, elem_offset,
90 format, &acl_storage_len, 1, tensor->data);
91
92 return acl_tensor_ptr(raw);
93}
94
95acl_int_array_ptr ggml_cann_create_int_array(const int64_t * value, uint64_t size) {
96 aclIntArray * raw = aclCreateIntArray(value, size);
97 return acl_int_array_ptr(raw);
98}
99
100acl_scalar_ptr ggml_cann_create_scalar(void * value, aclDataType dataType) {
101 aclScalar * raw = aclCreateScalar(value, dataType);
102 return acl_scalar_ptr(raw);
103}
104
105bool ggml_cann_need_bcast(const ggml_tensor * t0, const ggml_tensor * t1) {
106 for (int i = 0; i < GGML_MAX_DIMS; i++) {
107 if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
108 return true;
109 }
110 }
111 return false;
112}
113
114int64_t ggml_cann_get_bcast_shape(const ggml_tensor * src0,
115 const ggml_tensor * src1,
116 int64_t * bcast_src0_ne,
117 int64_t * bcast_src1_ne,
118 size_t * bcast_src0_nb,
119 size_t * bcast_src1_nb) {
120 GGML_ASSERT(ggml_can_repeat(src1, src0));
121 int bcast_dim_cnt = 0;
122 for (int i = 0; i < GGML_MAX_DIMS; i++) {
123 int64_t nr = src0->ne[i] / src1->ne[i];
124 bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr;
125 bcast_src1_ne[bcast_dim_cnt] = src1->ne[i];
126 bcast_src0_nb[bcast_dim_cnt] = src0->nb[i];
127 bcast_src1_nb[bcast_dim_cnt] = src1->nb[i];
128 bcast_dim_cnt++;
129 if (nr != 1) {
130 // Need to add an extra dim.
131 bcast_src0_ne[bcast_dim_cnt] = nr;
132 bcast_src1_ne[bcast_dim_cnt] = 1;
133 bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] * bcast_src0_ne[bcast_dim_cnt - 1];
134 bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] * bcast_src1_ne[bcast_dim_cnt - 1];
135 bcast_dim_cnt++;
136 }
137 }
138 return bcast_dim_cnt;
139}
140
141int64_t ggml_cann_get_mulmat_bcast_shape(const int64_t * input_ne,
142 const int64_t * weight_ne,
143 const int64_t * dst_ne,
144 const size_t * input_nb,
145 const size_t * weight_nb,
146 const size_t * dst_nb,
147 int64_t * bcast_input_ne,
148 int64_t * bcast_weight_ne,
149 int64_t * bcast_dst_ne,
150 size_t * bcast_input_nb,
151 size_t * bcast_weight_nb,
152 size_t * bcast_dst_nb) {
153 // input and dst shoule in same shape, except first two dims.
154 GGML_ASSERT(input_ne[2] == dst_ne[2]);
155 GGML_ASSERT(input_ne[3] == dst_ne[3]);
156
157 int bcast_dim_cnt = 0;
158
159 // For mul_mat, a dimension needs to be added before the dimension that
160 // weight needs to be expanded to satisfy the bcast rule of matrix
161 // multiplication.
162 for (int i = 0; i < GGML_MAX_DIMS; i++) {
163 int64_t nr = input_ne[i] / weight_ne[i];
164 // Do not use bcast in the first two dimensions because we only support
165 // the bcast batch dimension. Just copy them.
166 if (i < 2 || nr == 1) {
167 bcast_input_ne[bcast_dim_cnt] = input_ne[i];
168 bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
169 bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];
170
171 bcast_input_nb[bcast_dim_cnt] = input_nb[i];
172 bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
173 bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
174 bcast_dim_cnt++;
175 } else {
176 // Need to add an extra dim.
177 bcast_input_ne[bcast_dim_cnt] = nr;
178 bcast_dst_ne[bcast_dim_cnt] = nr;
179 bcast_weight_ne[bcast_dim_cnt] = 1;
180 bcast_input_nb[bcast_dim_cnt] = input_nb[i];
181 bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
182 bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
183 bcast_dim_cnt++;
184
185 bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
186 bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
187 bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
188 bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] * bcast_input_ne[bcast_dim_cnt - 1];
189 bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] * bcast_dst_ne[bcast_dim_cnt - 1];
190 bcast_weight_nb[bcast_dim_cnt] = bcast_weight_nb[bcast_dim_cnt - 1] * bcast_weight_ne[bcast_dim_cnt - 1];
191 bcast_dim_cnt++;
192 }
193 }
194 return bcast_dim_cnt;
195}