diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
| commit | b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch) | |
| tree | 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-musa | |
| download | llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz | |
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-musa')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-musa/CMakeLists.txt | 125 | ||||
| -rw-r--r-- | llama.cpp/ggml/src/ggml-musa/mudnn.cu | 112 | ||||
| -rw-r--r-- | llama.cpp/ggml/src/ggml-musa/mudnn.cuh | 12 |
3 files changed, 249 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt b/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt new file mode 100644 index 0000000..d76cb51 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | if (NOT EXISTS $ENV{MUSA_PATH}) | ||
| 2 | if (NOT EXISTS /opt/musa) | ||
| 3 | set(MUSA_PATH /usr/local/musa) | ||
| 4 | else() | ||
| 5 | set(MUSA_PATH /opt/musa) | ||
| 6 | endif() | ||
| 7 | else() | ||
| 8 | set(MUSA_PATH $ENV{MUSA_PATH}) | ||
| 9 | endif() | ||
| 10 | |||
| 11 | set(CMAKE_C_COMPILER "${MUSA_PATH}/bin/clang") | ||
| 12 | set(CMAKE_C_EXTENSIONS OFF) | ||
| 13 | set(CMAKE_CXX_COMPILER "${MUSA_PATH}/bin/clang++") | ||
| 14 | set(CMAKE_CXX_EXTENSIONS OFF) | ||
| 15 | |||
| 16 | list(APPEND CMAKE_MODULE_PATH "${MUSA_PATH}/cmake") | ||
| 17 | |||
| 18 | find_package(MUSAToolkit) | ||
| 19 | |||
| 20 | if (MUSAToolkit_FOUND) | ||
| 21 | message(STATUS "MUSA Toolkit found") | ||
| 22 | |||
| 23 | if (NOT DEFINED MUSA_ARCHITECTURES) | ||
| 24 | set(MUSA_ARCHITECTURES "21;22;31") | ||
| 25 | endif() | ||
| 26 | message(STATUS "Using MUSA architectures: ${MUSA_ARCHITECTURES}") | ||
| 27 | |||
| 28 | file(GLOB GGML_HEADERS_MUSA "../ggml-cuda/*.cuh") | ||
| 29 | list(APPEND GGML_HEADERS_MUSA "../../include/ggml-cuda.h") | ||
| 30 | list(APPEND GGML_HEADERS_MUSA "../ggml-musa/mudnn.cuh") | ||
| 31 | |||
| 32 | file(GLOB GGML_SOURCES_MUSA "../ggml-cuda/*.cu") | ||
| 33 | file(GLOB SRCS "../ggml-cuda/template-instances/fattn-tile*.cu") | ||
| 34 | list(APPEND GGML_SOURCES_MUSA ${SRCS}) | ||
| 35 | file(GLOB SRCS "../ggml-cuda/template-instances/fattn-mma*.cu") | ||
| 36 | list(APPEND GGML_SOURCES_MUSA ${SRCS}) | ||
| 37 | file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu") | ||
| 38 | list(APPEND GGML_SOURCES_MUSA ${SRCS}) | ||
| 39 | |||
| 40 | if (GGML_MUSA_MUDNN_COPY) | ||
| 41 | file(GLOB SRCS "../ggml-musa/*.cu") | ||
| 42 | list(APPEND GGML_SOURCES_MUSA ${SRCS}) | ||
| 43 | add_compile_definitions(GGML_MUSA_MUDNN_COPY) | ||
| 44 | endif() | ||
| 45 | |||
| 46 | if (GGML_CUDA_FA_ALL_QUANTS) | ||
| 47 | file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*.cu") | ||
| 48 | list(APPEND GGML_SOURCES_MUSA ${SRCS}) | ||
| 49 | add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) | ||
| 50 | else() | ||
| 51 | file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") | ||
| 52 | list(APPEND GGML_SOURCES_MUSA ${SRCS}) | ||
| 53 | file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu") | ||
| 54 | list(APPEND GGML_SOURCES_MUSA ${SRCS}) | ||
| 55 | file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*f16-f16.cu") | ||
| 56 | list(APPEND GGML_SOURCES_MUSA ${SRCS}) | ||
| 57 | endif() | ||
| 58 | |||
| 59 | set_source_files_properties(${GGML_SOURCES_MUSA} PROPERTIES LANGUAGE CXX) | ||
| 60 | foreach(SOURCE ${GGML_SOURCES_MUSA}) | ||
| 61 | set(COMPILE_FLAGS "-Od3 -fno-strict-aliasing -ffast-math -fsigned-char -x musa -mtgpu -fmusa-flush-denormals-to-zero") | ||
| 62 | foreach(ARCH ${MUSA_ARCHITECTURES}) | ||
| 63 | set(COMPILE_FLAGS "${COMPILE_FLAGS} --cuda-gpu-arch=mp_${ARCH}") | ||
| 64 | endforeach() | ||
| 65 | set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS ${COMPILE_FLAGS}) | ||
| 66 | endforeach() | ||
| 67 | |||
| 68 | ggml_add_backend_library(ggml-musa | ||
| 69 | ${GGML_HEADERS_MUSA} | ||
| 70 | ${GGML_SOURCES_MUSA} | ||
| 71 | ) | ||
| 72 | |||
| 73 | # TODO: do not use CUDA definitions for MUSA | ||
| 74 | if (NOT GGML_BACKEND_DL) | ||
| 75 | target_compile_definitions(ggml PUBLIC GGML_USE_CUDA) | ||
| 76 | endif() | ||
| 77 | |||
| 78 | add_compile_definitions(GGML_USE_MUSA) | ||
| 79 | add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE}) | ||
| 80 | |||
| 81 | if (GGML_MUSA_GRAPHS) | ||
| 82 | add_compile_definitions(GGML_MUSA_GRAPHS) | ||
| 83 | endif() | ||
| 84 | |||
| 85 | if (GGML_CUDA_FORCE_MMQ) | ||
| 86 | add_compile_definitions(GGML_CUDA_FORCE_MMQ) | ||
| 87 | endif() | ||
| 88 | |||
| 89 | if (GGML_CUDA_FORCE_CUBLAS) | ||
| 90 | add_compile_definitions(GGML_CUDA_FORCE_CUBLAS) | ||
| 91 | endif() | ||
| 92 | |||
| 93 | if (GGML_CUDA_NO_VMM) | ||
| 94 | add_compile_definitions(GGML_CUDA_NO_VMM) | ||
| 95 | endif() | ||
| 96 | |||
| 97 | if (NOT GGML_CUDA_FA) | ||
| 98 | add_compile_definitions(GGML_CUDA_NO_FA) | ||
| 99 | endif() | ||
| 100 | |||
| 101 | if (GGML_CUDA_NO_PEER_COPY) | ||
| 102 | add_compile_definitions(GGML_CUDA_NO_PEER_COPY) | ||
| 103 | endif() | ||
| 104 | |||
| 105 | if (GGML_STATIC) | ||
| 106 | target_link_libraries(ggml-musa PRIVATE MUSA::musart_static MUSA::mublas_static) | ||
| 107 | # TODO: mudnn has not provided static libraries yet | ||
| 108 | # if (GGML_MUSA_MUDNN_COPY) | ||
| 109 | # target_link_libraries(ggml-musa PRIVATE mudnn_static) | ||
| 110 | # endif() | ||
| 111 | else() | ||
| 112 | target_link_libraries(ggml-musa PRIVATE MUSA::musart MUSA::mublas) | ||
| 113 | if (GGML_MUSA_MUDNN_COPY) | ||
| 114 | target_link_libraries(ggml-musa PRIVATE mudnn) | ||
| 115 | endif() | ||
| 116 | endif() | ||
| 117 | |||
| 118 | if (GGML_CUDA_NO_VMM) | ||
| 119 | # No VMM requested, no need to link directly with the musa driver lib (libmusa.so) | ||
| 120 | else() | ||
| 121 | target_link_libraries(ggml-musa PRIVATE MUSA::musa_driver) | ||
| 122 | endif() | ||
| 123 | else() | ||
| 124 | message(FATAL_ERROR "MUSA Toolkit not found") | ||
| 125 | endif() | ||
diff --git a/llama.cpp/ggml/src/ggml-musa/mudnn.cu b/llama.cpp/ggml/src/ggml-musa/mudnn.cu new file mode 100644 index 0000000..020c170 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-musa/mudnn.cu | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | #include <mutex> | ||
| 2 | #include <mudnn.h> | ||
| 3 | |||
| 4 | #include "mudnn.cuh" | ||
| 5 | |||
| 6 | namespace mudnn = musa::dnn; | ||
| 7 | |||
| 8 | // Returns a human-readable error string for mudnn::Status | ||
| 9 | const char* mudnnGetErrorString(mudnn::Status err) { | ||
| 10 | switch (err) { | ||
| 11 | case mudnn::Status::SUCCESS: | ||
| 12 | return "Success"; | ||
| 13 | case mudnn::Status::INVALID_PARAMETER: | ||
| 14 | return "Invalid parameter"; | ||
| 15 | case mudnn::Status::NOT_INITIALIZED: | ||
| 16 | return "Not initialized"; | ||
| 17 | case mudnn::Status::ALLOC_FAILED: | ||
| 18 | return "Allocation failed"; | ||
| 19 | case mudnn::Status::NOT_SUPPORTED: | ||
| 20 | return "Not supported"; | ||
| 21 | case mudnn::Status::INTERNAL_ERROR: | ||
| 22 | return "Internal error"; | ||
| 23 | case mudnn::Status::ARCH_MISMATCH: | ||
| 24 | return "Architecture mismatch"; | ||
| 25 | case mudnn::Status::EXECUTION_FAILED: | ||
| 26 | return "Execution failed"; | ||
| 27 | default: | ||
| 28 | return "Unknown mudnn status"; | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | // Error checking macro for MUDNN calls | ||
| 33 | #define MUDNN_CHECK(err) CUDA_CHECK_GEN(err, mudnn::Status::SUCCESS, mudnnGetErrorString) | ||
| 34 | |||
| 35 | namespace { | ||
| 36 | // Thread-safe cache for mudnn::Handle objects per device | ||
| 37 | std::unordered_map<int, std::unique_ptr<mudnn::Handle>> handle_cache; | ||
| 38 | std::mutex handle_cache_mutex; | ||
| 39 | |||
| 40 | mudnn::Handle* get_cached_handle(int device_id) { | ||
| 41 | std::lock_guard<std::mutex> lock(handle_cache_mutex); | ||
| 42 | auto it = handle_cache.find(device_id); | ||
| 43 | if (it != handle_cache.end()) { | ||
| 44 | return it->second.get(); | ||
| 45 | } | ||
| 46 | auto handle = std::make_unique<mudnn::Handle>(device_id); | ||
| 47 | mudnn::Handle* handle_ptr = handle.get(); | ||
| 48 | handle_cache[device_id] = std::move(handle); | ||
| 49 | return handle_ptr; | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | // Extracts dimensions and strides from a ggml_tensor | ||
| 54 | int get_ggml_dims_and_strides(const ggml_tensor* tensor, | ||
| 55 | std::vector<int64_t>& dims, | ||
| 56 | std::vector<int64_t>& strides) { | ||
| 57 | const int ndims = ggml_n_dims(tensor); | ||
| 58 | const size_t element_size = ggml_element_size(tensor); | ||
| 59 | |||
| 60 | dims.resize(ndims); | ||
| 61 | strides.resize(ndims); | ||
| 62 | |||
| 63 | for (int i = 0; i < ndims; ++i) { | ||
| 64 | dims[i] = tensor->ne[i]; | ||
| 65 | strides[i] = tensor->nb[i] / static_cast<int64_t>(element_size); | ||
| 66 | } | ||
| 67 | return ndims; | ||
| 68 | } | ||
| 69 | |||
| 70 | // Converts ggml_type to mudnn::Tensor::Type | ||
| 71 | mudnn::Tensor::Type ggml_type_to_mudnn_type(ggml_type type) { | ||
| 72 | switch (type) { | ||
| 73 | case GGML_TYPE_F32: | ||
| 74 | return mudnn::Tensor::Type::FLOAT; | ||
| 75 | case GGML_TYPE_F16: | ||
| 76 | return mudnn::Tensor::Type::HALF; | ||
| 77 | |||
| 78 | // TODO: Add support for other types | ||
| 79 | |||
| 80 | default: | ||
| 81 | MUDNN_CHECK(mudnn::Status::NOT_SUPPORTED); | ||
| 82 | } | ||
| 83 | |||
| 84 | return mudnn::Tensor::Type::FLOAT; // Default fallback | ||
| 85 | } | ||
| 86 | |||
| 87 | // Asynchronous memory copy using mudnn::Unary::IDENTITY | ||
| 88 | musaError_t mudnnMemcpyAsync(ggml_backend_cuda_context& ctx, const ggml_tensor* dst, const ggml_tensor* src) { | ||
| 89 | mudnn::Tensor tensor_dst, tensor_src; | ||
| 90 | |||
| 91 | MUDNN_CHECK(tensor_dst.SetType(ggml_type_to_mudnn_type(dst->type))); | ||
| 92 | MUDNN_CHECK(tensor_src.SetType(ggml_type_to_mudnn_type(src->type))); | ||
| 93 | |||
| 94 | std::vector<int64_t> dims, strides; | ||
| 95 | const int ndims = get_ggml_dims_and_strides(src, dims, strides); | ||
| 96 | |||
| 97 | MUDNN_CHECK(tensor_dst.SetNdInfo(ndims, dims.data(), strides.data())); | ||
| 98 | MUDNN_CHECK(tensor_src.SetNdInfo(ndims, dims.data(), strides.data())); | ||
| 99 | MUDNN_CHECK(tensor_dst.SetAddr(dst->data)); | ||
| 100 | MUDNN_CHECK(tensor_src.SetAddr(src->data)); | ||
| 101 | |||
| 102 | mudnn::Unary op; | ||
| 103 | MUDNN_CHECK(op.SetMode(mudnn::Unary::Mode::IDENTITY)); | ||
| 104 | MUDNN_CHECK(op.SetAlpha(0.0f)); | ||
| 105 | MUDNN_CHECK(op.SetBeta(0.0f)); | ||
| 106 | |||
| 107 | mudnn::Handle* handle = get_cached_handle(ctx.device); | ||
| 108 | MUDNN_CHECK(handle->SetStream(ctx.stream())); | ||
| 109 | MUDNN_CHECK(op.Run(*handle, tensor_dst, tensor_src)); | ||
| 110 | |||
| 111 | return musaSuccess; | ||
| 112 | } | ||
diff --git a/llama.cpp/ggml/src/ggml-musa/mudnn.cuh b/llama.cpp/ggml/src/ggml-musa/mudnn.cuh new file mode 100644 index 0000000..c301285 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-musa/mudnn.cuh | |||
| @@ -0,0 +1,12 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include "ggml-cuda/common.cuh" | ||
| 4 | #include "ggml.h" | ||
| 5 | |||
| 6 | // Asynchronously copies data from src tensor to dst tensor using the provided context. | ||
| 7 | // Returns a musaError_t indicating success or failure. | ||
| 8 | musaError_t mudnnMemcpyAsync( | ||
| 9 | ggml_backend_cuda_context &ctx, | ||
| 10 | const ggml_tensor *dst, | ||
| 11 | const ggml_tensor *src | ||
| 12 | ); | ||
