summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-cann/common.h
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-cann/common.h
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-cann/common.h')
-rw-r--r--llama.cpp/ggml/src/ggml-cann/common.h641
1 files changed, 641 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-cann/common.h b/llama.cpp/ggml/src/ggml-cann/common.h
new file mode 100644
index 0000000..0120f0d
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-cann/common.h
@@ -0,0 +1,641 @@
+/*
+ * Copyright (c) 2023-2026 The ggml authors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CANN_COMMON_H
+#define CANN_COMMON_H
+
+#include "../ggml-impl.h"
+#include "../include/ggml-cann.h"
+#include "../include/ggml.h"
+
+#include <acl/acl.h>
+#include <unistd.h>
+
+#include <atomic>
+#include <condition_variable>
+#include <cstdio>
+#include <functional>
+#include <iostream>
+#include <list>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <string>
+#include <thread>
+#include <vector>
+
+#define MATRIX_ROW_PADDING 512
+#define GGML_CANN_MAX_STREAMS 8
+
+/**
+ * @brief Handles CANN-related errors by printing an error message and
+ * terminating the program.
+ * @param stmt The statement that caused the error.
+ * @param func The function in which the error occurred.
+ * @param file The file in which the error occurred.
+ * @param line The line number at which the error occurred.
+ * @param msg The error message.
+ */
+[[noreturn]] void ggml_cann_error(const char * stmt, const char * func, const char * file, int line, const char * msg);
+
+/**
+ * @brief Checks the result of a CANN function call and invokes the error
+ * handler if the call fails.
+ * @param stmt The CANN function call to check.
+ * @param success The success code that indicates the call was successful.
+ * @param error_fn The function to call to retrieve the error message.
+ */
+#define ACL_CHECK_GEN(stmt, success, error_fn) \
+ do { \
+ int err_code = (stmt); \
+ if (err_code != (success)) { \
+ ggml_cann_error(#stmt, __func__, __FILE__, __LINE__, error_fn()); \
+ } \
+ } while (0);
+
+#define ACL_CHECK(stmt) ACL_CHECK_GEN(stmt, 0, aclGetRecentErrMsg)
+
+/**
+ * @brief Contains information about CANN devices.
+ */
+struct ggml_cann_device_info {
+ /**
+ * @brief Number of CANN devices available.
+ */
+ int32_t device_count;
+
+ /**
+ * @brief Information about a single CANN device.
+ */
+ struct cann_device_info {
+ int cc; /**< Compute capability. */
+ size_t smpb; /**< Maximum shared memory per block. */
+ bool vmm; /**< Virtual memory support. */
+ size_t vmm_granularity; /**< Granularity of virtual memory. */
+ size_t total_vram; /**< Total video RAM available on the device. */
+ };
+
+ cann_device_info devices[GGML_CANN_MAX_DEVICES] = {}; /**< Array of CANN device information. */
+};
+
+const ggml_cann_device_info & ggml_cann_info();
+
+void ggml_cann_set_device(int32_t device);
+
+std::optional<std::string> get_env_as_lowercase(const std::string & name);
+bool parse_bool(const std::string & value);
+int parse_integer(const std::string & value);
+
+/**
+ * @brief Abstract base class for memory pools used by CANN.
+ */
+struct ggml_cann_pool {
+ /**
+ * @brief Virtual destructor for the memory pool.
+ */
+ virtual ~ggml_cann_pool() = default;
+
+ /**
+ * @brief Allocates memory from the pool.
+ *
+ * @param size The size of the memory block to allocate.
+ * @param actual_size Pointer to a variable where the actual allocated size
+ * will be stored.
+ * @return Pointer to the allocated memory block.
+ */
+ virtual void * alloc(size_t size, size_t * actual_size) = 0;
+
+ /**
+ * @brief Frees a previously allocated memory block.
+ *
+ * @param ptr Pointer to the memory block to free.
+ * @param size Size of the memory block to free.
+ * @note Note that all CANN opertors are running async. Make sure memory is
+ * still avaiable before this operator finished.
+ */
+ virtual void free(void * ptr, size_t size) = 0;
+};
+
+/**
+ * @brief RAII wrapper for managing memory allocations from a CANN memory pool.
+ */
+struct ggml_cann_pool_alloc {
+ ggml_cann_pool * pool = nullptr; /**< Pointer to the memory pool. */
+ void * ptr = nullptr; /**< Pointer to the allocated memory block. */
+ size_t actual_size = 0; /**< Actual size of the allocated memory block. */
+
+ /**
+ * @brief Default constructor.
+ */
+ ggml_cann_pool_alloc() = default;
+
+ /**
+ * @brief Constructor that initializes the memory pool.
+ * @param pool Reference to the memory pool.
+ */
+ explicit ggml_cann_pool_alloc(ggml_cann_pool & pool) : pool(&pool) {}
+
+ /**
+ * @brief Constructor that initializes the memory pool and allocates memory.
+ * @param pool Reference to the memory pool.
+ * @param size Size of the memory block to allocate.
+ */
+ ggml_cann_pool_alloc(ggml_cann_pool & pool, size_t size) : pool(&pool) { alloc(size); }
+
+ /**
+ * @brief Destructor that frees the allocated memory block.
+ */
+ ~ggml_cann_pool_alloc() {
+ if (ptr != nullptr) {
+ pool->free(ptr, actual_size);
+ }
+ }
+
+ /**
+ * @brief Allocates memory from the pool.
+ * @param size Size of the memory block to allocate.
+ * @return Pointer to the allocated memory block.
+ */
+ void * alloc(size_t size) {
+ GGML_ASSERT(pool != nullptr);
+ GGML_ASSERT(ptr == nullptr);
+ ptr = pool->alloc(size, &this->actual_size);
+ return ptr;
+ }
+
+ /**
+ * @brief Allocates memory from a specific memory pool.
+ * @param pool Reference to the memory pool.
+ * @param size Size of the memory block to allocate.
+ * @return Pointer to the allocated memory block.
+ */
+ void * alloc(ggml_cann_pool & pool, size_t size) {
+ this->pool = &pool;
+ return alloc(size);
+ }
+
+ /**
+ * @brief Gets the pointer to the allocated memory block.
+ * @return Pointer to the allocated memory block.
+ */
+ void * get() { return ptr; }
+
+ // Deleted copy constructor
+ ggml_cann_pool_alloc(const ggml_cann_pool_alloc &) = delete;
+
+ // Deleted move constructor
+ ggml_cann_pool_alloc(ggml_cann_pool_alloc &&) = delete;
+
+ // Deleted copy assignment operator
+ ggml_cann_pool_alloc & operator=(const ggml_cann_pool_alloc &) = delete;
+
+ // Deleted move assignment operator
+ ggml_cann_pool_alloc & operator=(ggml_cann_pool_alloc &&) = delete;
+};
+
+#ifdef USE_ACL_GRAPH
+struct ggml_graph_node_properties {
+ // dst tensor
+ void * node_address;
+ int64_t ne[GGML_MAX_DIMS];
+ size_t nb[GGML_MAX_DIMS];
+
+ // src tensor
+ void * src_address[GGML_MAX_SRC];
+ int64_t src_ne[GGML_MAX_SRC][GGML_MAX_DIMS];
+ size_t src_nb[GGML_MAX_SRC][GGML_MAX_DIMS];
+
+ // op
+ ggml_op node_op;
+ int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
+
+ /**
+ * @brief Check if a ggml tensor node matches this property set.
+ *
+ * This function compares all relevant fields (address, op type, shape, source inputs, op params)
+ * to determine whether the current node matches these previously recorded properties.
+ *
+ * @param node The current ggml tensor node.
+ * @return true if all fields match (excluding GGML_OP_VIEW); false otherwise.
+ */
+ bool has_matching_properties(ggml_tensor * node) {
+ if (node->data != this->node_address && node->op != GGML_OP_VIEW) {
+ return false;
+ }
+
+ if (node->op != this->node_op) {
+ return false;
+ }
+
+ for (int i = 0; i < GGML_MAX_DIMS; i++) {
+ if (node->ne[i] != this->ne[i]) {
+ return false;
+ }
+ if (node->nb[i] != this->nb[i]) {
+ return false;
+ }
+ }
+
+ for (int i = 0; i < GGML_MAX_SRC; i++) {
+ if (node->src[i]) {
+ if (node->src[i]->data != this->src_address[i] && node->op != GGML_OP_VIEW) {
+ return false;
+ }
+
+ for (int d = 0; d < GGML_MAX_DIMS; d++) {
+ if (node->src[i]->ne[d] != this->src_ne[i][d]) {
+ return false;
+ }
+ if (node->src[i]->nb[d] != this->src_nb[i][d]) {
+ return false;
+ }
+ }
+ } else {
+ if (this->src_address[i] != nullptr) {
+ return false;
+ }
+ }
+ }
+
+ if (node->op == GGML_OP_SCALE || node->op == GGML_OP_UNARY || node->op == GGML_OP_GLU) {
+ return memcmp(this->op_params, node->op_params, GGML_MAX_OP_PARAMS) == 0;
+ }
+ return true;
+ }
+};
+
+struct ggml_cann_graph {
+ ~ggml_cann_graph() {
+ if (graph != nullptr) {
+ ACL_CHECK(aclmdlRIDestroy(graph));
+ }
+ }
+
+ aclmdlRI graph = nullptr;
+
+ std::vector<ggml_graph_node_properties> ggml_graph_properties;
+
+ /**
+ * @brief Create a new CANN graph from a ggml computation graph.
+ *
+ * This function creates a new ggml_cann_graph object and fills its node properties
+ * (operation type, dimensions, strides, input sources, and operation parameters)
+ * based on the current ggml computation graph.
+ *
+ * Each node in the ggml graph is mapped to a property entry in the new CANN graph:
+ * - node address
+ * - operation type
+ * - shape (ne) and strides (nb)
+ * - source tensor addresses
+ * - operation parameters
+ *
+ * @param cgraph The current ggml computation graph.
+ * @return Pointer to the newly created ggml_cann_graph object.
+ */
+ static ggml_cann_graph * create_from_cgraph(ggml_cgraph * cgraph) {
+ ggml_cann_graph * new_graph = new ggml_cann_graph();
+ new_graph->ggml_graph_properties.resize(cgraph->n_nodes);
+
+ for (int node_idx = 0; node_idx < cgraph->n_nodes; ++node_idx) {
+ ggml_tensor * node = cgraph->nodes[node_idx];
+ auto & prop = new_graph->ggml_graph_properties[node_idx];
+
+ prop.node_address = node->data;
+ prop.node_op = node->op;
+
+ std::copy_n(node->ne, GGML_MAX_DIMS, prop.ne);
+ std::copy_n(node->nb, GGML_MAX_DIMS, prop.nb);
+
+ for (int src = 0; src < GGML_MAX_SRC; ++src) {
+ if (node->src[src]) {
+ prop.src_address[src] = node->src[src]->data;
+ std::copy_n(node->src[src]->ne, GGML_MAX_DIMS, prop.src_ne[src]);
+ std::copy_n(node->src[src]->nb, GGML_MAX_DIMS, prop.src_nb[src]);
+ } else {
+ prop.src_address[src] = nullptr;
+ std::fill_n(prop.src_ne[src], GGML_MAX_DIMS, 0);
+ std::fill_n(prop.src_nb[src], GGML_MAX_DIMS, 0);
+ }
+ }
+
+ memcpy(prop.op_params, node->op_params, GGML_MAX_OP_PARAMS);
+ }
+
+ return new_graph;
+ }
+
+ /**
+ * @brief Check whether this CANN graph matches the given ggml computation graph.
+ *
+ * This function compares the number of nodes and each node's properties
+ * (operation type, dimensions, strides, inputs, and operation parameters)
+ * to determine whether this CANN graph matches the given ggml graph.
+ *
+ * @param cgraph The current ggml computation graph.
+ * @return true if this CANN graph matches the ggml graph; false otherwise.
+ */
+ bool matches_cgraph(ggml_cgraph * cgraph) {
+ if (this->ggml_graph_properties.size() != static_cast<size_t>(cgraph->n_nodes)) {
+ return false;
+ }
+
+ for (int i = 0; i < cgraph->n_nodes; ++i) {
+ if (!this->ggml_graph_properties[i].has_matching_properties(cgraph->nodes[i])) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+};
+
+/**
+ * @brief LRU cache for managing ggml_cann_graph objects.
+ *
+ * This class maintains a list of shared_ptr to ggml_cann_graph objects
+ * and enforces a maximum capacity. It provides methods to push new graphs,
+ * move existing graphs to the front (most recently used), and clear the cache.
+ */
+struct ggml_cann_graph_lru_cache {
+ size_t capacity; /**< Maximum number of graphs in the cache. */
+
+ std::list<ggml_cann_graph *> cache_list; /**< List storing cached graphs as raw pointers. */
+
+ ggml_cann_graph_lru_cache() { capacity = parse_integer(get_env_as_lowercase("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); }
+
+ /**
+ * @brief Push a new graph to the front of the cache.
+ * If the cache exceeds capacity, the least recently used graph is deleted.
+ * @param new_node Pointer to the new ggml_cann_graph to cache.
+ * Ownership is transferred to the cache (cache will delete it).
+ */
+ void push(ggml_cann_graph * new_node) {
+ if (cache_list.size() >= capacity) {
+ ggml_cann_graph * old = cache_list.back();
+ cache_list.pop_back();
+ delete old; // free the old graph
+ }
+ cache_list.push_front(new_node);
+ }
+
+ /**
+ * @brief Clear all graphs from the cache (also frees memory).
+ */
+ void clear() {
+ for (auto ptr : cache_list) {
+ delete ptr;
+ }
+ cache_list.clear();
+ }
+
+ /**
+ * @brief Destructor that clears the cache and frees all cached graphs.
+ */
+ ~ggml_cann_graph_lru_cache() { clear(); }
+
+ /**
+ * @brief Find a cached CANN graph that matches the given ggml graph and move it to front.
+ *
+ * This function iterates through the cached CANN graphs stored in the LRU cache and
+ * compares them against the given ggml computation graph. If a matching graph is found,
+ * it is promoted to the front of the LRU cache and returned. Otherwise, the function
+ * returns nullptr.
+ *
+ * @param cgraph The current ggml computation graph.
+ * @return true if found; false otherwise.
+ */
+ bool find_and_move_to_front(ggml_cgraph * cgraph) {
+ for (auto & graph_ptr : this->cache_list) {
+ if (graph_ptr->matches_cgraph(cgraph)) {
+ cache_list.remove(graph_ptr);
+ cache_list.push_front(graph_ptr);
+ return true;
+ }
+ }
+ return false;
+ }
+};
+#endif // USE_ACL_GRAPH
+
+struct ggml_cann_rope_cache {
+ ~ggml_cann_rope_cache() {
+ if (theta_scale_cache) {
+ ACL_CHECK(aclrtFree(theta_scale_cache));
+ }
+ if (sin_cache) {
+ ACL_CHECK(aclrtFree(sin_cache));
+ }
+ if (cos_cache) {
+ ACL_CHECK(aclrtFree(cos_cache));
+ }
+ if (position_select_index) {
+ ACL_CHECK(aclrtFree(position_select_index));
+ }
+ if (theta_scale_exp_host) {
+ free(theta_scale_exp_host);
+ }
+ if (position_select_index_host) {
+ free(position_select_index_host);
+ }
+ if (yarn_ramp_cache) {
+ ACL_CHECK(aclrtFree(yarn_ramp_cache));
+ }
+ }
+
+ bool equal(int64_t theta_scale_length,
+ int64_t position_length,
+ float ext_factor,
+ float theta_scale,
+ float freq_scale,
+ float attn_factor,
+ bool is_neox,
+ bool indep_sects,
+ bool mrope_used,
+ bool is_imrope,
+ int sections[4]) {
+ return this->theta_scale_length == theta_scale_length && this->position_length == position_length &&
+ this->ext_factor == ext_factor && this->theta_scale == theta_scale && this->freq_scale == freq_scale &&
+ this->attn_factor == attn_factor && this->is_neox == is_neox && this->indep_sects == indep_sects &&
+ this->mrope_used == mrope_used && this->is_imrope == is_imrope && this->sections[0] == sections[0] &&
+ this->sections[1] == sections[1] && this->sections[2] == sections[2] && this->sections[3] == sections[3];
+ }
+
+ void set(int64_t theta_scale_length,
+ int64_t position_length,
+ float ext_factor,
+ float theta_scale,
+ float freq_scale,
+ float attn_factor,
+ bool is_neox,
+ bool indep_sects,
+ bool mrope_used,
+ bool is_imrope,
+ int sections[4]) {
+ this->theta_scale_length = theta_scale_length;
+ this->position_length = position_length;
+ this->ext_factor = ext_factor;
+ this->theta_scale = theta_scale;
+ this->freq_scale = freq_scale;
+ this->attn_factor = attn_factor;
+ this->is_neox = is_neox;
+ this->indep_sects = indep_sects;
+ this->mrope_used = mrope_used;
+ this->is_imrope = is_imrope;
+ this->sections[0] = sections[0];
+ this->sections[1] = sections[1];
+ this->sections[2] = sections[2];
+ this->sections[3] = sections[3];
+ }
+
+ // memory cache, prepare before inferencing.
+ void * theta_scale_cache = nullptr;
+ float * theta_scale_exp_host = nullptr;
+ int * position_select_index_host = nullptr;
+ void * position_select_index = nullptr;
+ void * yarn_ramp_cache = nullptr;
+ // sin/cos cache, used only to accelerate first layer on each device
+ void * sin_cache = nullptr;
+ void * cos_cache = nullptr;
+ // Properties to check before reusing the sincos cache
+ int64_t theta_scale_length = 0;
+ int64_t position_length = 0;
+ bool cached = false;
+ float ext_factor = 0.0f;
+ float theta_scale = 0.0f;
+ float freq_scale = 0.0f;
+ float attn_factor = 0.0f;
+ bool is_neox = false;
+ bool indep_sects = false;
+ bool mrope_used = false;
+ int sections[4] = { 0, 0, 0, 0 };
+ bool is_imrope = false;
+};
+
+struct ggml_cann_tensor_cache {
+ ~ggml_cann_tensor_cache() {
+ if (cache != nullptr) {
+ ACL_CHECK(aclrtFree(cache));
+ }
+ }
+
+ void * cache = nullptr;
+ int64_t size = 0;
+};
+
+/**
+ * @brief Context for managing CANN backend operations.
+ */
+struct ggml_backend_cann_context {
+ int32_t device; /**< Device ID. */
+ std::string name; /**< Name of the device. */
+ std::string description; /**< Description of the device. */
+ aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
+#ifdef USE_ACL_GRAPH
+ /// Cached CANN ACL graph used for executing the current ggml computation graph.
+ ggml_cann_graph_lru_cache graph_lru_cache;
+ bool acl_graph_mode = true;
+#endif
+ bool async_mode;
+ // Rope Cache
+ ggml_cann_rope_cache rope_cache;
+ // Constant Pool
+ ggml_cann_tensor_cache rms_norm_one_tensor_cache;
+ ggml_cann_tensor_cache rms_norm_zero_tensor_cache;
+
+ aclrtStream streams[GGML_CANN_MAX_STREAMS] = { nullptr }; /**< Array of streams for the device. */
+
+ /**
+ * @brief Constructor for initializing the context with a given device.
+ * @param device Device ID.
+ */
+ explicit ggml_backend_cann_context(int device) : device(device), name("CANN" + std::to_string(device)) {
+ ggml_cann_set_device(device);
+ description = aclrtGetSocName();
+
+#ifdef USE_ACL_GRAPH
+ acl_graph_mode = parse_bool(get_env_as_lowercase("GGML_CANN_ACL_GRAPH").value_or("on"));
+ GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n", __func__, device, acl_graph_mode ? "GRAPH" : "EAGER",
+ acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
+#endif
+ }
+
+ /**
+ * @brief Destructor for cleaning up resources.
+ */
+ ~ggml_backend_cann_context() {
+ ggml_cann_set_device(device);
+ if (copy_event != nullptr) {
+ ACL_CHECK(aclrtDestroyEvent(copy_event));
+ }
+ for (int i = 0; i < GGML_CANN_MAX_STREAMS; ++i) {
+ if (streams[i] != nullptr) {
+ ACL_CHECK(aclrtDestroyStream(streams[i]));
+ }
+ }
+ }
+
+ /**
+ * @brief Get or create a stream for a given index.
+ * @param stream Index of the stream.
+ * @return The stream corresponding to the given index.
+ */
+ aclrtStream stream(int stream) {
+ if (streams[stream] == nullptr) {
+ // If the device is not set here, destroying the stream later may cause a mismatch
+ // between the thread contexts where the stream was created and destroyed.
+ // However, I printed the device_id, thread_id, and stream, and they are all consistent.
+ ACL_CHECK(aclrtSetDevice(device));
+ ACL_CHECK(aclrtCreateStream(&streams[stream]));
+ }
+ return streams[stream];
+ }
+
+ /**
+ * @brief Get or create the default stream (index 0).
+ * @return The default stream.
+ */
+ aclrtStream stream() { return stream(0); }
+
+ // TODO: each stream should have a memory pool.
+ std::unique_ptr<ggml_cann_pool> mem_pool; /**< Memory pool for the device. */
+
+ /**
+ * @brief Create a new memory pool for a given device.
+ * @param device Device ID.
+ * @return A unique pointer to the new memory pool.
+ */
+ static std::unique_ptr<ggml_cann_pool> new_pool_for_device(int device);
+
+ /**
+ * @brief Get or create the memory pool for the context.
+ * @return Reference to the memory pool.
+ */
+ ggml_cann_pool & pool() {
+ if (mem_pool == nullptr) {
+ mem_pool = new_pool_for_device(device);
+ }
+ return *mem_pool;
+ }
+};
+
+#endif // CANN_COMMON_H