diff options
| author | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
|---|---|---|
| committer | Mitja Felicijan <mitja.felicijan@gmail.com> | 2026-02-12 20:57:17 +0100 |
| commit | b333b06772c89d96aacb5490d6a219fba7c09cc6 (patch) | |
| tree | 211df60083a5946baa2ed61d33d8121b7e251b06 /llama.cpp/ggml/src/ggml-cuda/clamp.cu | |
| download | llmnpc-b333b06772c89d96aacb5490d6a219fba7c09cc6.tar.gz | |
Engage!
Diffstat (limited to 'llama.cpp/ggml/src/ggml-cuda/clamp.cu')
| -rw-r--r-- | llama.cpp/ggml/src/ggml-cuda/clamp.cu | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/llama.cpp/ggml/src/ggml-cuda/clamp.cu b/llama.cpp/ggml/src/ggml-cuda/clamp.cu new file mode 100644 index 0000000..fe415e7 --- /dev/null +++ b/llama.cpp/ggml/src/ggml-cuda/clamp.cu @@ -0,0 +1,45 @@ +#include "clamp.cuh" + +static __device__ __forceinline__ float op_clamp(float x, float min, float max) { + return fminf(fmaxf(x, min), max); +} + +template <class T> +static __global__ void op_clamp_kernel(const T * x, T * dst, const T min, const T max, const int k) { + const int i = blockDim.x*blockIdx.x + threadIdx.x; + + if (i >= k) { + return; + } + + dst[i] = (T)op_clamp((float)x[i], (float)min, (float)max); +} + +template <class T> +static void clamp_cuda(const T * x, T * dst, const T min, const T max, const int k, cudaStream_t stream) { + const int num_blocks = (k + CUDA_CLAMP_BLOCK_SIZE - 1) / CUDA_CLAMP_BLOCK_SIZE; + op_clamp_kernel<<<num_blocks, CUDA_CLAMP_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k); +} + + +void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const void * src0_d = src0->data; + void * dst_d = dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); + GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); + GGML_ASSERT(src0->type == dst->type); + + float min; + float max; + memcpy(&min, dst->op_params, sizeof(float)); + memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); + + if (src0->type == GGML_TYPE_F16) { + clamp_cuda((const half *)src0_d, (half *)dst_d, (half)min, (half)max, ggml_nelements(src0), stream); + } else { + clamp_cuda((const float *)src0_d, (float *)dst_d, (float)min, (float)max, ggml_nelements(src0), stream); + } +} |
