aboutsummaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-musa
diff options
context:
space:
mode:
authorMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
committerMitja Felicijan <mitja.felicijan@gmail.com>2026-02-12 20:57:17 +0100
commitb333b06772c89d96aacb5490d6a219fba7c09cc6 (patch)
tree211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-musa
downloadllmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-musa')
-rw-r--r--llama.cpp/ggml/src/ggml-musa/CMakeLists.txt125
-rw-r--r--llama.cpp/ggml/src/ggml-musa/mudnn.cu112
-rw-r--r--llama.cpp/ggml/src/ggml-musa/mudnn.cuh12
3 files changed, 249 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt b/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt
new file mode 100644
index 0000000..d76cb51
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt
@@ -0,0 +1,125 @@
1if (NOT EXISTS $ENV{MUSA_PATH})
2 if (NOT EXISTS /opt/musa)
3 set(MUSA_PATH /usr/local/musa)
4 else()
5 set(MUSA_PATH /opt/musa)
6 endif()
7else()
8 set(MUSA_PATH $ENV{MUSA_PATH})
9endif()
10
11set(CMAKE_C_COMPILER "${MUSA_PATH}/bin/clang")
12set(CMAKE_C_EXTENSIONS OFF)
13set(CMAKE_CXX_COMPILER "${MUSA_PATH}/bin/clang++")
14set(CMAKE_CXX_EXTENSIONS OFF)
15
16list(APPEND CMAKE_MODULE_PATH "${MUSA_PATH}/cmake")
17
18find_package(MUSAToolkit)
19
20if (MUSAToolkit_FOUND)
21 message(STATUS "MUSA Toolkit found")
22
23 if (NOT DEFINED MUSA_ARCHITECTURES)
24 set(MUSA_ARCHITECTURES "21;22;31")
25 endif()
26 message(STATUS "Using MUSA architectures: ${MUSA_ARCHITECTURES}")
27
28 file(GLOB GGML_HEADERS_MUSA "../ggml-cuda/*.cuh")
29 list(APPEND GGML_HEADERS_MUSA "../../include/ggml-cuda.h")
30 list(APPEND GGML_HEADERS_MUSA "../ggml-musa/mudnn.cuh")
31
32 file(GLOB GGML_SOURCES_MUSA "../ggml-cuda/*.cu")
33 file(GLOB SRCS "../ggml-cuda/template-instances/fattn-tile*.cu")
34 list(APPEND GGML_SOURCES_MUSA ${SRCS})
35 file(GLOB SRCS "../ggml-cuda/template-instances/fattn-mma*.cu")
36 list(APPEND GGML_SOURCES_MUSA ${SRCS})
37 file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu")
38 list(APPEND GGML_SOURCES_MUSA ${SRCS})
39
40 if (GGML_MUSA_MUDNN_COPY)
41 file(GLOB SRCS "../ggml-musa/*.cu")
42 list(APPEND GGML_SOURCES_MUSA ${SRCS})
43 add_compile_definitions(GGML_MUSA_MUDNN_COPY)
44 endif()
45
46 if (GGML_CUDA_FA_ALL_QUANTS)
47 file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*.cu")
48 list(APPEND GGML_SOURCES_MUSA ${SRCS})
49 add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
50 else()
51 file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
52 list(APPEND GGML_SOURCES_MUSA ${SRCS})
53 file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
54 list(APPEND GGML_SOURCES_MUSA ${SRCS})
55 file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
56 list(APPEND GGML_SOURCES_MUSA ${SRCS})
57 endif()
58
59 set_source_files_properties(${GGML_SOURCES_MUSA} PROPERTIES LANGUAGE CXX)
60 foreach(SOURCE ${GGML_SOURCES_MUSA})
61 set(COMPILE_FLAGS "-Od3 -fno-strict-aliasing -ffast-math -fsigned-char -x musa -mtgpu -fmusa-flush-denormals-to-zero")
62 foreach(ARCH ${MUSA_ARCHITECTURES})
63 set(COMPILE_FLAGS "${COMPILE_FLAGS} --cuda-gpu-arch=mp_${ARCH}")
64 endforeach()
65 set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS ${COMPILE_FLAGS})
66 endforeach()
67
68 ggml_add_backend_library(ggml-musa
69 ${GGML_HEADERS_MUSA}
70 ${GGML_SOURCES_MUSA}
71 )
72
73 # TODO: do not use CUDA definitions for MUSA
74 if (NOT GGML_BACKEND_DL)
75 target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
76 endif()
77
78 add_compile_definitions(GGML_USE_MUSA)
79 add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
80
81 if (GGML_MUSA_GRAPHS)
82 add_compile_definitions(GGML_MUSA_GRAPHS)
83 endif()
84
85 if (GGML_CUDA_FORCE_MMQ)
86 add_compile_definitions(GGML_CUDA_FORCE_MMQ)
87 endif()
88
89 if (GGML_CUDA_FORCE_CUBLAS)
90 add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
91 endif()
92
93 if (GGML_CUDA_NO_VMM)
94 add_compile_definitions(GGML_CUDA_NO_VMM)
95 endif()
96
97 if (NOT GGML_CUDA_FA)
98 add_compile_definitions(GGML_CUDA_NO_FA)
99 endif()
100
101 if (GGML_CUDA_NO_PEER_COPY)
102 add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
103 endif()
104
105 if (GGML_STATIC)
106 target_link_libraries(ggml-musa PRIVATE MUSA::musart_static MUSA::mublas_static)
107 # TODO: mudnn has not provided static libraries yet
108 # if (GGML_MUSA_MUDNN_COPY)
109 # target_link_libraries(ggml-musa PRIVATE mudnn_static)
110 # endif()
111 else()
112 target_link_libraries(ggml-musa PRIVATE MUSA::musart MUSA::mublas)
113 if (GGML_MUSA_MUDNN_COPY)
114 target_link_libraries(ggml-musa PRIVATE mudnn)
115 endif()
116 endif()
117
118 if (GGML_CUDA_NO_VMM)
119 # No VMM requested, no need to link directly with the musa driver lib (libmusa.so)
120 else()
121 target_link_libraries(ggml-musa PRIVATE MUSA::musa_driver)
122 endif()
123else()
124 message(FATAL_ERROR "MUSA Toolkit not found")
125endif()
diff --git a/llama.cpp/ggml/src/ggml-musa/mudnn.cu b/llama.cpp/ggml/src/ggml-musa/mudnn.cu
new file mode 100644
index 0000000..020c170
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-musa/mudnn.cu
@@ -0,0 +1,112 @@
1#include <mutex>
2#include <mudnn.h>
3
4#include "mudnn.cuh"
5
6namespace mudnn = musa::dnn;
7
8// Returns a human-readable error string for mudnn::Status
9const char* mudnnGetErrorString(mudnn::Status err) {
10 switch (err) {
11 case mudnn::Status::SUCCESS:
12 return "Success";
13 case mudnn::Status::INVALID_PARAMETER:
14 return "Invalid parameter";
15 case mudnn::Status::NOT_INITIALIZED:
16 return "Not initialized";
17 case mudnn::Status::ALLOC_FAILED:
18 return "Allocation failed";
19 case mudnn::Status::NOT_SUPPORTED:
20 return "Not supported";
21 case mudnn::Status::INTERNAL_ERROR:
22 return "Internal error";
23 case mudnn::Status::ARCH_MISMATCH:
24 return "Architecture mismatch";
25 case mudnn::Status::EXECUTION_FAILED:
26 return "Execution failed";
27 default:
28 return "Unknown mudnn status";
29 }
30}
31
32// Error checking macro for MUDNN calls
33#define MUDNN_CHECK(err) CUDA_CHECK_GEN(err, mudnn::Status::SUCCESS, mudnnGetErrorString)
34
35namespace {
36 // Thread-safe cache for mudnn::Handle objects per device
37 std::unordered_map<int, std::unique_ptr<mudnn::Handle>> handle_cache;
38 std::mutex handle_cache_mutex;
39
40 mudnn::Handle* get_cached_handle(int device_id) {
41 std::lock_guard<std::mutex> lock(handle_cache_mutex);
42 auto it = handle_cache.find(device_id);
43 if (it != handle_cache.end()) {
44 return it->second.get();
45 }
46 auto handle = std::make_unique<mudnn::Handle>(device_id);
47 mudnn::Handle* handle_ptr = handle.get();
48 handle_cache[device_id] = std::move(handle);
49 return handle_ptr;
50 }
51}
52
53// Extracts dimensions and strides from a ggml_tensor
54int get_ggml_dims_and_strides(const ggml_tensor* tensor,
55 std::vector<int64_t>& dims,
56 std::vector<int64_t>& strides) {
57 const int ndims = ggml_n_dims(tensor);
58 const size_t element_size = ggml_element_size(tensor);
59
60 dims.resize(ndims);
61 strides.resize(ndims);
62
63 for (int i = 0; i < ndims; ++i) {
64 dims[i] = tensor->ne[i];
65 strides[i] = tensor->nb[i] / static_cast<int64_t>(element_size);
66 }
67 return ndims;
68}
69
70// Converts ggml_type to mudnn::Tensor::Type
71mudnn::Tensor::Type ggml_type_to_mudnn_type(ggml_type type) {
72 switch (type) {
73 case GGML_TYPE_F32:
74 return mudnn::Tensor::Type::FLOAT;
75 case GGML_TYPE_F16:
76 return mudnn::Tensor::Type::HALF;
77
78 // TODO: Add support for other types
79
80 default:
81 MUDNN_CHECK(mudnn::Status::NOT_SUPPORTED);
82 }
83
84 return mudnn::Tensor::Type::FLOAT; // Default fallback
85}
86
87// Asynchronous memory copy using mudnn::Unary::IDENTITY
88musaError_t mudnnMemcpyAsync(ggml_backend_cuda_context& ctx, const ggml_tensor* dst, const ggml_tensor* src) {
89 mudnn::Tensor tensor_dst, tensor_src;
90
91 MUDNN_CHECK(tensor_dst.SetType(ggml_type_to_mudnn_type(dst->type)));
92 MUDNN_CHECK(tensor_src.SetType(ggml_type_to_mudnn_type(src->type)));
93
94 std::vector<int64_t> dims, strides;
95 const int ndims = get_ggml_dims_and_strides(src, dims, strides);
96
97 MUDNN_CHECK(tensor_dst.SetNdInfo(ndims, dims.data(), strides.data()));
98 MUDNN_CHECK(tensor_src.SetNdInfo(ndims, dims.data(), strides.data()));
99 MUDNN_CHECK(tensor_dst.SetAddr(dst->data));
100 MUDNN_CHECK(tensor_src.SetAddr(src->data));
101
102 mudnn::Unary op;
103 MUDNN_CHECK(op.SetMode(mudnn::Unary::Mode::IDENTITY));
104 MUDNN_CHECK(op.SetAlpha(0.0f));
105 MUDNN_CHECK(op.SetBeta(0.0f));
106
107 mudnn::Handle* handle = get_cached_handle(ctx.device);
108 MUDNN_CHECK(handle->SetStream(ctx.stream()));
109 MUDNN_CHECK(op.Run(*handle, tensor_dst, tensor_src));
110
111 return musaSuccess;
112}
diff --git a/llama.cpp/ggml/src/ggml-musa/mudnn.cuh b/llama.cpp/ggml/src/ggml-musa/mudnn.cuh
new file mode 100644
index 0000000..c301285
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-musa/mudnn.cuh
@@ -0,0 +1,12 @@
1#pragma once
2
3#include "ggml-cuda/common.cuh"
4#include "ggml.h"
5
6// Asynchronously copies data from src tensor to dst tensor using the provided context.
7// Returns a musaError_t indicating success or failure.
8musaError_t mudnnMemcpyAsync(
9 ggml_backend_cuda_context &ctx,
10 const ggml_tensor *dst,
11 const ggml_tensor *src
12);