llmnpc - llama.cpp/ggml/src/ggml-cann/acl

Path: llmnpc / llama.cpp / ggml / src / ggml-cann / acl_tensor.cpp (raw)
  1/*
  2 * Copyright (c) 2023-2026 The ggml authors
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 * of this software and associated documentation files (the "Software"), to
  6 * deal in the Software without restriction, including without limitation the
  7 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8 * sell copies of the Software, and to permit persons to whom the Software is
  9 * furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice shall be included in
 12 * all copies or substantial portions of the Software.
 13 *
 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 20 * IN THE SOFTWARE.
 21 */
 22
 23#include "acl_tensor.h"
 24
 25#include <algorithm>
 26#include <cstring>
 27
 28aclDataType ggml_cann_type_mapping(ggml_type type) {
 29    switch (type) {
 30        case GGML_TYPE_F32:
 31            return ACL_FLOAT;
 32        case GGML_TYPE_F16:
 33            return ACL_FLOAT16;
 34        case GGML_TYPE_BF16:
 35            return ACL_BF16;
 36        case GGML_TYPE_I8:
 37            return ACL_INT8;
 38        case GGML_TYPE_I16:
 39            return ACL_INT16;
 40        case GGML_TYPE_I32:
 41            return ACL_INT32;
 42        case GGML_TYPE_Q4_0:
 43            return ACL_INT4;
 44        case GGML_TYPE_Q8_0:
 45            return ACL_INT8;
 46        case GGML_TYPE_I64:
 47            return ACL_INT64;
 48        default:
 49            return ACL_DT_UNDEFINED;
 50    }
 51}
 52
 53acl_tensor_ptr ggml_cann_create_tensor(const ggml_tensor * tensor,
 54                                       int64_t *           ne,
 55                                       size_t *            nb,
 56                                       int64_t             dims,
 57                                       aclFormat           format,
 58                                       size_t              offset) {
 59    // If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
 60    // added.
 61    int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
 62
 63    if (ne == nullptr) {
 64        for (int i = 0; i < GGML_MAX_DIMS; i++) {
 65            acl_ne[i]     = tensor->ne[i];
 66            // The step size of acl is in elements.
 67            acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
 68        }
 69    } else {
 70        // With bcast
 71        for (int i = 0; i < dims; i++) {
 72            acl_ne[i]     = ne[i];
 73            acl_stride[i] = nb[i] / ggml_element_size(tensor);
 74        }
 75    }
 76
 77    int64_t final_dims      = (dims == 0 ? GGML_MAX_DIMS : dims);
 78    int64_t acl_storage_len = 1;
 79    for (int i = 0; i < final_dims; i++) {
 80        acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
 81    }
 82    size_t elem_offset = offset / ggml_element_size(tensor);
 83    acl_storage_len += elem_offset;
 84
 85    // Reverse ne and stride.
 86    std::reverse(acl_ne, acl_ne + final_dims);
 87    std::reverse(acl_stride, acl_stride + final_dims);
 88
 89    aclTensor * raw = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride, elem_offset,
 90                                      format, &acl_storage_len, 1, tensor->data);
 91
 92    return acl_tensor_ptr(raw);
 93}
 94
 95acl_int_array_ptr ggml_cann_create_int_array(const int64_t * value, uint64_t size) {
 96    aclIntArray * raw = aclCreateIntArray(value, size);
 97    return acl_int_array_ptr(raw);
 98}
 99
100acl_scalar_ptr ggml_cann_create_scalar(void * value, aclDataType dataType) {
101    aclScalar * raw = aclCreateScalar(value, dataType);
102    return acl_scalar_ptr(raw);
103}
104
105bool ggml_cann_need_bcast(const ggml_tensor * t0, const ggml_tensor * t1) {
106    for (int i = 0; i < GGML_MAX_DIMS; i++) {
107        if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
108            return true;
109        }
110    }
111    return false;
112}
113
114int64_t ggml_cann_get_bcast_shape(const ggml_tensor * src0,
115                                  const ggml_tensor * src1,
116                                  int64_t *           bcast_src0_ne,
117                                  int64_t *           bcast_src1_ne,
118                                  size_t *            bcast_src0_nb,
119                                  size_t *            bcast_src1_nb) {
120    GGML_ASSERT(ggml_can_repeat(src1, src0));
121    int bcast_dim_cnt = 0;
122    for (int i = 0; i < GGML_MAX_DIMS; i++) {
123        int64_t nr                   = src0->ne[i] / src1->ne[i];
124        bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr;
125        bcast_src1_ne[bcast_dim_cnt] = src1->ne[i];
126        bcast_src0_nb[bcast_dim_cnt] = src0->nb[i];
127        bcast_src1_nb[bcast_dim_cnt] = src1->nb[i];
128        bcast_dim_cnt++;
129        if (nr != 1) {
130            // Need to add an extra dim.
131            bcast_src0_ne[bcast_dim_cnt] = nr;
132            bcast_src1_ne[bcast_dim_cnt] = 1;
133            bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] * bcast_src0_ne[bcast_dim_cnt - 1];
134            bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] * bcast_src1_ne[bcast_dim_cnt - 1];
135            bcast_dim_cnt++;
136        }
137    }
138    return bcast_dim_cnt;
139}
140
141int64_t ggml_cann_get_mulmat_bcast_shape(const int64_t * input_ne,
142                                         const int64_t * weight_ne,
143                                         const int64_t * dst_ne,
144                                         const size_t *  input_nb,
145                                         const size_t *  weight_nb,
146                                         const size_t *  dst_nb,
147                                         int64_t *       bcast_input_ne,
148                                         int64_t *       bcast_weight_ne,
149                                         int64_t *       bcast_dst_ne,
150                                         size_t *        bcast_input_nb,
151                                         size_t *        bcast_weight_nb,
152                                         size_t *        bcast_dst_nb) {
153    // input and dst shoule in same shape, except first two dims.
154    GGML_ASSERT(input_ne[2] == dst_ne[2]);
155    GGML_ASSERT(input_ne[3] == dst_ne[3]);
156
157    int bcast_dim_cnt = 0;
158
159    // For mul_mat, a dimension needs to be added before the dimension that
160    // weight needs to be expanded to satisfy the bcast rule of matrix
161    // multiplication.
162    for (int i = 0; i < GGML_MAX_DIMS; i++) {
163        int64_t nr = input_ne[i] / weight_ne[i];
164        // Do not use bcast in the first two dimensions because we only support
165        // the bcast batch dimension. Just copy them.
166        if (i < 2 || nr == 1) {
167            bcast_input_ne[bcast_dim_cnt]  = input_ne[i];
168            bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
169            bcast_dst_ne[bcast_dim_cnt]    = dst_ne[i];
170
171            bcast_input_nb[bcast_dim_cnt]  = input_nb[i];
172            bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
173            bcast_dst_nb[bcast_dim_cnt]    = dst_nb[i];
174            bcast_dim_cnt++;
175        } else {
176            // Need to add an extra dim.
177            bcast_input_ne[bcast_dim_cnt]  = nr;
178            bcast_dst_ne[bcast_dim_cnt]    = nr;
179            bcast_weight_ne[bcast_dim_cnt] = 1;
180            bcast_input_nb[bcast_dim_cnt]  = input_nb[i];
181            bcast_dst_nb[bcast_dim_cnt]    = dst_nb[i];
182            bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
183            bcast_dim_cnt++;
184
185            bcast_input_ne[bcast_dim_cnt]  = input_ne[i] / nr;
186            bcast_dst_ne[bcast_dim_cnt]    = dst_ne[i] / nr;
187            bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
188            bcast_input_nb[bcast_dim_cnt]  = bcast_input_nb[bcast_dim_cnt - 1] * bcast_input_ne[bcast_dim_cnt - 1];
189            bcast_dst_nb[bcast_dim_cnt]    = bcast_dst_nb[bcast_dim_cnt - 1] * bcast_dst_ne[bcast_dim_cnt - 1];
190            bcast_weight_nb[bcast_dim_cnt] = bcast_weight_nb[bcast_dim_cnt - 1] * bcast_weight_ne[bcast_dim_cnt - 1];
191            bcast_dim_cnt++;
192        }
193    }
194    return bcast_dim_cnt;
195}