From 39132750cc880f0efc9bcc1afa522d56aa58c9de Mon Sep 17 00:00:00 2001
From: bhalimi <benjamin.halimi@cea.fr>
Date: Tue, 15 Oct 2024 14:51:32 +0000
Subject: [PATCH] add sqrt cuda backend

---
 .../aidge/backend/cuda/operator/SqrtImpl.hpp  | 59 +++++++++++++++++++
 .../cuda/operator/SqrtImpl_CUDA_kernels.hpp   | 36 +++++++++++
 src/operator/SqrtImpl.cpp                     | 57 ++++++++++++++++++
 src/operator/SqrtImpl_CUDA_kernels.cu         | 53 +++++++++++++++++
 4 files changed, 205 insertions(+)
 create mode 100644 include/aidge/backend/cuda/operator/SqrtImpl.hpp
 create mode 100644 include/aidge/backend/cuda/operator/SqrtImpl_CUDA_kernels.hpp
 create mode 100644 src/operator/SqrtImpl.cpp
 create mode 100644 src/operator/SqrtImpl_CUDA_kernels.cu

diff --git a/include/aidge/backend/cuda/operator/SqrtImpl.hpp b/include/aidge/backend/cuda/operator/SqrtImpl.hpp
new file mode 100644
index 0000000..5b4b053
--- /dev/null
+++ b/include/aidge/backend/cuda/operator/SqrtImpl.hpp
@@ -0,0 +1,59 @@
+/********************************************************************************
+ * Copyright (c) 2024 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_BACKEND_CUDA_OPERATOR_SQRTIMPL_H_
+#define AIDGE_BACKEND_CUDA_OPERATOR_SQRTIMPL_H_
+
+#include <array>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include <cudnn.h>
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Sqrt.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cuda/utils/CudaUtils.hpp"
+
+namespace Aidge {
+class SqrtImpl_cuda : public OperatorImpl {
+private:
+    std::shared_ptr<Tensor> mInputFallback;
+    std::shared_ptr<Tensor> mOutputGradFallback;
+
+public:
+    SqrtImpl_cuda(const Sqrt_Op &op) : OperatorImpl(op, "cuda") {}
+
+    static std::unique_ptr<SqrtImpl_cuda> create(const Sqrt_Op &op) {
+        return std::make_unique<SqrtImpl_cuda>(op);
+    }
+
+public:
+    void forward();
+    void backward();
+    // ~SqrtImpl_cuda();
+
+private:
+    template <class T> void forward_(const Tensor& input);
+    template <class T> void backward_(const Tensor& output_grad);
+};
+
+namespace {
+// add cuda backend to Sqrt_Op implementation registry
+static Registrar<Sqrt_Op> registrarSqrtImpl_cuda("cuda", Aidge::SqrtImpl_cuda::create);
+}  // namespace
+
+}  // namespace Aidge
+
+#endif /* AIDGE_BACKEND_CUDA_OPERATOR_SQRTIMPL_H_ */
diff --git a/include/aidge/backend/cuda/operator/SqrtImpl_CUDA_kernels.hpp b/include/aidge/backend/cuda/operator/SqrtImpl_CUDA_kernels.hpp
new file mode 100644
index 0000000..4f0fdc8
--- /dev/null
+++ b/include/aidge/backend/cuda/operator/SqrtImpl_CUDA_kernels.hpp
@@ -0,0 +1,36 @@
+/********************************************************************************
+ * Copyright (c) 2024 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CUDA_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CUDA_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_
+
+#include <stdexcept>
+#include <cfloat>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cuda_fp16.h>
+
+#include "aidge/data/Data.hpp"
+#include "aidge/backend/cuda/utils/CudaUtils.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+
+template <class T>
+void SqrtImpl_cuda_forward_kernel(const T* input, T* output, int size);                           
+
+}
+#endif /* AIDGE_CUDA_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ */
+
+
+
+
+
diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp
new file mode 100644
index 0000000..0b134d9
--- /dev/null
+++ b/src/operator/SqrtImpl.cpp
@@ -0,0 +1,57 @@
+/********************************************************************************
+ * Copyright (c) 2024 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <vector>
+
+#include "aidge/backend/cuda/data/TensorImpl.hpp"
+#include "aidge/backend/cuda/operator/SqrtImpl.hpp"
+#include "aidge/backend/cuda/operator/SqrtImpl_CUDA_kernels.hpp"
+#include "aidge/backend/cuda/utils/CudaContext.hpp"
+#include "aidge/backend/cuda/utils/CudaUtils.hpp"
+#include "aidge/operator/Sqrt.hpp"
+#include "aidge/utils/Types.h"
+
+void Aidge::SqrtImpl_cuda::forward() 
+{
+    const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
+    assert(mOp.getRawInput(0) && "missing input #0");
+    const auto& input = op.getInput(0)->refCastFrom(mInputFallback, *op.getOutput(0));
+
+    switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) 
+    {
+        case DataType::Float64:
+            forward_<double>(input);
+            break;
+        case DataType::Float32:
+            forward_<float>(input);
+            break;
+        case DataType::Float16:
+            forward_<half>(input);
+            break;
+        default:
+            AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
+    }
+}
+
+template <class T>
+void Aidge::SqrtImpl_cuda::forward_(const Tensor& input) 
+{
+    const OperatorTensor& op = static_cast<const OperatorTensor&>(mOp);
+    
+    const T * inputPtr = static_cast<const T*>(input.getImpl()->rawPtr());
+    T * outputPtr = static_cast<T*>(op.getOutput(0)->getImpl()->rawPtr());
+
+    Aidge::SqrtImpl_cuda_forward_kernel<T>(inputPtr, outputPtr, static_cast<int>(op.getOutput(0)->size()));
+}
+
+// TODO ...
+void Aidge::SqrtImpl_cuda::backward() {}
diff --git a/src/operator/SqrtImpl_CUDA_kernels.cu b/src/operator/SqrtImpl_CUDA_kernels.cu
new file mode 100644
index 0000000..949323a
--- /dev/null
+++ b/src/operator/SqrtImpl_CUDA_kernels.cu
@@ -0,0 +1,53 @@
+/********************************************************************************
+ * Copyright (c) 2024 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include "aidge/backend/cuda/operator/SqrtImpl_CUDA_kernels.hpp"
+
+// Base template for floating-point types (float, double)
+template<typename T>
+__device__ T sqrt_helper(T x) {
+    return std::sqrt(x);  
+}
+
+// Specialization for half-precision type using CUDA's half
+template<>
+__device__ half sqrt_helper<half>(half x) {
+    float x_float = __half2float(x);  
+    return __float2half(std::sqrt(x_float));  
+}
+
+template <class T>
+__global__ void SqrtImpl_cuda_forward_kernel_(const T* input, T* output, int size) 
+{
+    const size_t index = blockIdx.x * blockDim.x + threadIdx.x;
+    const size_t stride = blockDim.x * gridDim.x;
+
+    //if (idx >= size) return;
+
+    for (size_t i = index; i < size; i += stride)
+        output[index] = sqrt_helper(input[index]);
+}
+
+template <class T>
+void Aidge::SqrtImpl_cuda_forward_kernel(const T* input, T* output, int size)
+{
+    int blockSize = 256;
+    int numBlocks = (size + blockSize - 1) / blockSize;
+
+    // Launch the kernel
+    SqrtImpl_cuda_forward_kernel_<<<numBlocks, blockSize>>>(input, output, size);
+};
+
+template void Aidge::SqrtImpl_cuda_forward_kernel<double>(const double* input, double* output, int size);
+
+template void Aidge::SqrtImpl_cuda_forward_kernel<float>(const float* input, float* output, int size);
+
+template void Aidge::SqrtImpl_cuda_forward_kernel<half>(const half* input, half* output, int size);
\ No newline at end of file
-- 
GitLab