summaryrefslogtreecommitdiff
path: root/llama.cpp/ggml/src/ggml-musa
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp/ggml/src/ggml-musa')
-rw-r--r--llama.cpp/ggml/src/ggml-musa/CMakeLists.txt125
-rw-r--r--llama.cpp/ggml/src/ggml-musa/mudnn.cu112
-rw-r--r--llama.cpp/ggml/src/ggml-musa/mudnn.cuh12
3 files changed, 249 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt b/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt
new file mode 100644
index 0000000..d76cb51
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt
@@ -0,0 +1,125 @@
+if (NOT EXISTS $ENV{MUSA_PATH})
+ if (NOT EXISTS /opt/musa)
+ set(MUSA_PATH /usr/local/musa)
+ else()
+ set(MUSA_PATH /opt/musa)
+ endif()
+else()
+ set(MUSA_PATH $ENV{MUSA_PATH})
+endif()
+
+set(CMAKE_C_COMPILER "${MUSA_PATH}/bin/clang")
+set(CMAKE_C_EXTENSIONS OFF)
+set(CMAKE_CXX_COMPILER "${MUSA_PATH}/bin/clang++")
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+list(APPEND CMAKE_MODULE_PATH "${MUSA_PATH}/cmake")
+
+find_package(MUSAToolkit)
+
+if (MUSAToolkit_FOUND)
+ message(STATUS "MUSA Toolkit found")
+
+ if (NOT DEFINED MUSA_ARCHITECTURES)
+ set(MUSA_ARCHITECTURES "21;22;31")
+ endif()
+ message(STATUS "Using MUSA architectures: ${MUSA_ARCHITECTURES}")
+
+ file(GLOB GGML_HEADERS_MUSA "../ggml-cuda/*.cuh")
+ list(APPEND GGML_HEADERS_MUSA "../../include/ggml-cuda.h")
+ list(APPEND GGML_HEADERS_MUSA "../ggml-musa/mudnn.cuh")
+
+ file(GLOB GGML_SOURCES_MUSA "../ggml-cuda/*.cu")
+ file(GLOB SRCS "../ggml-cuda/template-instances/fattn-tile*.cu")
+ list(APPEND GGML_SOURCES_MUSA ${SRCS})
+ file(GLOB SRCS "../ggml-cuda/template-instances/fattn-mma*.cu")
+ list(APPEND GGML_SOURCES_MUSA ${SRCS})
+ file(GLOB SRCS "../ggml-cuda/template-instances/mmq*.cu")
+ list(APPEND GGML_SOURCES_MUSA ${SRCS})
+
+ if (GGML_MUSA_MUDNN_COPY)
+ file(GLOB SRCS "../ggml-musa/*.cu")
+ list(APPEND GGML_SOURCES_MUSA ${SRCS})
+ add_compile_definitions(GGML_MUSA_MUDNN_COPY)
+ endif()
+
+ if (GGML_CUDA_FA_ALL_QUANTS)
+ file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*.cu")
+ list(APPEND GGML_SOURCES_MUSA ${SRCS})
+ add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS)
+ else()
+ file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu")
+ list(APPEND GGML_SOURCES_MUSA ${SRCS})
+ file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
+ list(APPEND GGML_SOURCES_MUSA ${SRCS})
+ file(GLOB SRCS "../ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
+ list(APPEND GGML_SOURCES_MUSA ${SRCS})
+ endif()
+
+ set_source_files_properties(${GGML_SOURCES_MUSA} PROPERTIES LANGUAGE CXX)
+ foreach(SOURCE ${GGML_SOURCES_MUSA})
+ set(COMPILE_FLAGS "-Od3 -fno-strict-aliasing -ffast-math -fsigned-char -x musa -mtgpu -fmusa-flush-denormals-to-zero")
+ foreach(ARCH ${MUSA_ARCHITECTURES})
+ set(COMPILE_FLAGS "${COMPILE_FLAGS} --cuda-gpu-arch=mp_${ARCH}")
+ endforeach()
+ set_property(SOURCE ${SOURCE} PROPERTY COMPILE_FLAGS ${COMPILE_FLAGS})
+ endforeach()
+
+ ggml_add_backend_library(ggml-musa
+ ${GGML_HEADERS_MUSA}
+ ${GGML_SOURCES_MUSA}
+ )
+
+ # TODO: do not use CUDA definitions for MUSA
+ if (NOT GGML_BACKEND_DL)
+ target_compile_definitions(ggml PUBLIC GGML_USE_CUDA)
+ endif()
+
+ add_compile_definitions(GGML_USE_MUSA)
+ add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${GGML_CUDA_PEER_MAX_BATCH_SIZE})
+
+ if (GGML_MUSA_GRAPHS)
+ add_compile_definitions(GGML_MUSA_GRAPHS)
+ endif()
+
+ if (GGML_CUDA_FORCE_MMQ)
+ add_compile_definitions(GGML_CUDA_FORCE_MMQ)
+ endif()
+
+ if (GGML_CUDA_FORCE_CUBLAS)
+ add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
+ endif()
+
+ if (GGML_CUDA_NO_VMM)
+ add_compile_definitions(GGML_CUDA_NO_VMM)
+ endif()
+
+ if (NOT GGML_CUDA_FA)
+ add_compile_definitions(GGML_CUDA_NO_FA)
+ endif()
+
+ if (GGML_CUDA_NO_PEER_COPY)
+ add_compile_definitions(GGML_CUDA_NO_PEER_COPY)
+ endif()
+
+ if (GGML_STATIC)
+ target_link_libraries(ggml-musa PRIVATE MUSA::musart_static MUSA::mublas_static)
+ # TODO: mudnn has not provided static libraries yet
+ # if (GGML_MUSA_MUDNN_COPY)
+ # target_link_libraries(ggml-musa PRIVATE mudnn_static)
+ # endif()
+ else()
+ target_link_libraries(ggml-musa PRIVATE MUSA::musart MUSA::mublas)
+ if (GGML_MUSA_MUDNN_COPY)
+ target_link_libraries(ggml-musa PRIVATE mudnn)
+ endif()
+ endif()
+
+ if (GGML_CUDA_NO_VMM)
+ # No VMM requested, no need to link directly with the musa driver lib (libmusa.so)
+ else()
+ target_link_libraries(ggml-musa PRIVATE MUSA::musa_driver)
+ endif()
+else()
+ message(FATAL_ERROR "MUSA Toolkit not found")
+endif()
diff --git a/llama.cpp/ggml/src/ggml-musa/mudnn.cu b/llama.cpp/ggml/src/ggml-musa/mudnn.cu
new file mode 100644
index 0000000..020c170
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-musa/mudnn.cu
@@ -0,0 +1,112 @@
+#include <mutex>
+#include <mudnn.h>
+
+#include "mudnn.cuh"
+
+namespace mudnn = musa::dnn;
+
+// Returns a human-readable error string for mudnn::Status
+const char* mudnnGetErrorString(mudnn::Status err) {
+ switch (err) {
+ case mudnn::Status::SUCCESS:
+ return "Success";
+ case mudnn::Status::INVALID_PARAMETER:
+ return "Invalid parameter";
+ case mudnn::Status::NOT_INITIALIZED:
+ return "Not initialized";
+ case mudnn::Status::ALLOC_FAILED:
+ return "Allocation failed";
+ case mudnn::Status::NOT_SUPPORTED:
+ return "Not supported";
+ case mudnn::Status::INTERNAL_ERROR:
+ return "Internal error";
+ case mudnn::Status::ARCH_MISMATCH:
+ return "Architecture mismatch";
+ case mudnn::Status::EXECUTION_FAILED:
+ return "Execution failed";
+ default:
+ return "Unknown mudnn status";
+ }
+}
+
+// Error checking macro for MUDNN calls
+#define MUDNN_CHECK(err) CUDA_CHECK_GEN(err, mudnn::Status::SUCCESS, mudnnGetErrorString)
+
+namespace {
+ // Thread-safe cache for mudnn::Handle objects per device
+ std::unordered_map<int, std::unique_ptr<mudnn::Handle>> handle_cache;
+ std::mutex handle_cache_mutex;
+
+ mudnn::Handle* get_cached_handle(int device_id) {
+ std::lock_guard<std::mutex> lock(handle_cache_mutex);
+ auto it = handle_cache.find(device_id);
+ if (it != handle_cache.end()) {
+ return it->second.get();
+ }
+ auto handle = std::make_unique<mudnn::Handle>(device_id);
+ mudnn::Handle* handle_ptr = handle.get();
+ handle_cache[device_id] = std::move(handle);
+ return handle_ptr;
+ }
+}
+
+// Extracts dimensions and strides from a ggml_tensor
+int get_ggml_dims_and_strides(const ggml_tensor* tensor,
+ std::vector<int64_t>& dims,
+ std::vector<int64_t>& strides) {
+ const int ndims = ggml_n_dims(tensor);
+ const size_t element_size = ggml_element_size(tensor);
+
+ dims.resize(ndims);
+ strides.resize(ndims);
+
+ for (int i = 0; i < ndims; ++i) {
+ dims[i] = tensor->ne[i];
+ strides[i] = tensor->nb[i] / static_cast<int64_t>(element_size);
+ }
+ return ndims;
+}
+
+// Converts ggml_type to mudnn::Tensor::Type
+mudnn::Tensor::Type ggml_type_to_mudnn_type(ggml_type type) {
+ switch (type) {
+ case GGML_TYPE_F32:
+ return mudnn::Tensor::Type::FLOAT;
+ case GGML_TYPE_F16:
+ return mudnn::Tensor::Type::HALF;
+
+ // TODO: Add support for other types
+
+ default:
+ MUDNN_CHECK(mudnn::Status::NOT_SUPPORTED);
+ }
+
+ return mudnn::Tensor::Type::FLOAT; // Default fallback
+}
+
+// Asynchronous memory copy using mudnn::Unary::IDENTITY
+musaError_t mudnnMemcpyAsync(ggml_backend_cuda_context& ctx, const ggml_tensor* dst, const ggml_tensor* src) {
+ mudnn::Tensor tensor_dst, tensor_src;
+
+ MUDNN_CHECK(tensor_dst.SetType(ggml_type_to_mudnn_type(dst->type)));
+ MUDNN_CHECK(tensor_src.SetType(ggml_type_to_mudnn_type(src->type)));
+
+ std::vector<int64_t> dims, strides;
+ const int ndims = get_ggml_dims_and_strides(src, dims, strides);
+
+ MUDNN_CHECK(tensor_dst.SetNdInfo(ndims, dims.data(), strides.data()));
+ MUDNN_CHECK(tensor_src.SetNdInfo(ndims, dims.data(), strides.data()));
+ MUDNN_CHECK(tensor_dst.SetAddr(dst->data));
+ MUDNN_CHECK(tensor_src.SetAddr(src->data));
+
+ mudnn::Unary op;
+ MUDNN_CHECK(op.SetMode(mudnn::Unary::Mode::IDENTITY));
+ MUDNN_CHECK(op.SetAlpha(0.0f));
+ MUDNN_CHECK(op.SetBeta(0.0f));
+
+ mudnn::Handle* handle = get_cached_handle(ctx.device);
+ MUDNN_CHECK(handle->SetStream(ctx.stream()));
+ MUDNN_CHECK(op.Run(*handle, tensor_dst, tensor_src));
+
+ return musaSuccess;
+}
diff --git a/llama.cpp/ggml/src/ggml-musa/mudnn.cuh b/llama.cpp/ggml/src/ggml-musa/mudnn.cuh
new file mode 100644
index 0000000..c301285
--- /dev/null
+++ b/llama.cpp/ggml/src/ggml-musa/mudnn.cuh
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "ggml-cuda/common.cuh"
+#include "ggml.h"
+
+// Asynchronously copies data from src tensor to dst tensor using the provided context.
+// Returns a musaError_t indicating success or failure.
+musaError_t mudnnMemcpyAsync(
+ ggml_backend_cuda_context &ctx,
+ const ggml_tensor *dst,
+ const ggml_tensor *src
+);