From b333b06772c89d96aacb5490d6a219fba7c09cc6 Mon Sep 17 00:00:00 2001 From: Mitja Felicijan Date: Thu, 12 Feb 2026 20:57:17 +0100 Subject: Engage! --- llama.cpp/ggml/src/ggml-cuda/convert.cuh | 56 ++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 llama.cpp/ggml/src/ggml-cuda/convert.cuh (limited to 'llama.cpp/ggml/src/ggml-cuda/convert.cuh') diff --git a/llama.cpp/ggml/src/ggml-cuda/convert.cuh b/llama.cpp/ggml/src/ggml-cuda/convert.cuh new file mode 100644 index 0000000..09f9a33 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-cuda/convert.cuh @@ -0,0 +1,56 @@ +#pragma once +#include "common.cuh" + +#define CUDA_DEQUANTIZE_BLOCK_SIZE 256 + +template +using to_t_cuda_t = void (*)(const void * x, T * y, int64_t k, cudaStream_t stream); + +typedef to_t_cuda_t to_fp32_cuda_t; +typedef to_t_cuda_t to_fp16_cuda_t; +typedef to_t_cuda_t to_bf16_cuda_t; + +to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type); + +to_bf16_cuda_t ggml_get_to_bf16_cuda(ggml_type type); + +to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type); + +// TODO more general support for non-contiguous inputs + +template +using to_t_nc_cuda_t = void (*)(const void * x, T * y, + int64_t ne00, int64_t ne01, int64_t ne02, int64_t ne03, + int64_t s01, int64_t s02, int64_t s03, cudaStream_t stream); + +typedef to_t_nc_cuda_t to_fp32_nc_cuda_t; +typedef to_t_nc_cuda_t to_fp16_nc_cuda_t; +typedef to_t_nc_cuda_t to_bf16_nc_cuda_t; + +to_fp32_nc_cuda_t ggml_get_to_fp32_nc_cuda(ggml_type type); +to_fp16_nc_cuda_t ggml_get_to_fp16_nc_cuda(ggml_type type); +to_bf16_nc_cuda_t ggml_get_to_bf16_nc_cuda(ggml_type type); + +template + __host__ __device__ inline dst_t ggml_cuda_cast(src_t x) { + if constexpr (std::is_same_v) { + return x; + } else if constexpr(std::is_same_v) { + return __float2bfloat16(float(x)); + } else if constexpr(std::is_same_v) { + return __bfloat162float(x); + } else if constexpr(std::is_same_v && std::is_same_v) { + return __float22half2_rn(x); + } else if constexpr(std::is_same_v && std::is_same_v) { + // bypass compile error on cuda 12.0.1 +#ifdef GGML_USE_HIP + return __float22bfloat162_rn(x); +#else + return {x.x, x.y}; +#endif // GGML_USE_HIP + } else if constexpr(std::is_same_v) { + return int32_t(x); + } else { + return float(x); + } +} -- cgit v1.2.3