diff --git a/include/aidge/backend/cuda/data/TensorImpl.hpp b/include/aidge/backend/cuda/data/TensorImpl.hpp
index 88e67d0fb5987ee10ff8db7faa66ae4202aae9fc..be6f9f3ded1e7507af4d4c54a4e6cc6ecfc0438b 100644
--- a/include/aidge/backend/cuda/data/TensorImpl.hpp
+++ b/include/aidge/backend/cuda/data/TensorImpl.hpp
@@ -76,14 +76,16 @@ public:
     // native interface
     const future_std::span<T>& data() const { return mData; }
 
+    std::size_t size() const override { return mData.size(); }
     std::size_t scalarSize() const override { return sizeof(T); }
 
     void setDevice(int device) override {
         mDevice = device;
     }
 
-    void copy(const void *src, NbElts_t length) override {
-        CHECK_CUDA_STATUS(cudaMemcpy(rawPtr(), src, length * sizeof(T), cudaMemcpyDeviceToDevice));
+    void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override {
+        void* dst = static_cast<void*>(static_cast<T*>(rawPtr()) + offset);
+        CHECK_CUDA_STATUS(cudaMemcpy(dst, src, length * sizeof(T), cudaMemcpyDeviceToDevice));
     }
 
     void copyCast(const void *src, NbElts_t length, const DataType srcDt) override {
@@ -177,10 +179,6 @@ public:
         return mData.data();
     };
 
-    void* getRaw(std::size_t idx) {
-        return static_cast<void*>(static_cast<T*>(rawPtr()) + idx);
-    }
-
     const cudnnTensorDescriptor_t& getCudnnTensorDesc() const override {
         if (mCudnnTensor == nullptr) {
             CHECK_CUDNN_STATUS(cudnnCreateTensorDescriptor(&mCudnnTensor));
@@ -214,10 +212,10 @@ public:
         return mCudnnTensor;
     }
 
-    virtual ~TensorImpl_cuda() {
-        if (mCudnnTensor != nullptr)
-            cudnnDestroyTensorDescriptor(mCudnnTensor);
-    }
+    void* getRawPtr(NbElts_t idx) override final {
+        AIDGE_ASSERT(idx < mData.size(), "idx out of range");
+        return static_cast<void*>(static_cast<T*>(rawPtr()) + idx);
+    };
 
     void setRawPtr(void *ptr, NbElts_t length) override final {
         AIDGE_ASSERT(length >= mTensor.size(), "trying to set raw pointer of insufficient capacity");
@@ -225,6 +223,11 @@ public:
         mDataOwner.reset();
     };
 
+    virtual ~TensorImpl_cuda() {
+        if (mCudnnTensor != nullptr)
+            cudnnDestroyTensorDescriptor(mCudnnTensor);
+    }
+
 private:
     void lazyInit() {
         if (mData.size() < mTensor.size()) {
diff --git a/include/aidge/backend/cuda/utils/CudaUtils.hpp b/include/aidge/backend/cuda/utils/CudaUtils.hpp
index 76d7ea48e02473deeaa2cb0801a292623a666a1d..2f66d0e778778400f0b7def345619d635cc37674 100644
--- a/include/aidge/backend/cuda/utils/CudaUtils.hpp
+++ b/include/aidge/backend/cuda/utils/CudaUtils.hpp
@@ -67,24 +67,6 @@ namespace Cuda {
     // Enable Peer-to-Peer communications between devices
     // when it is possible
     void setMultiDevicePeerAccess(unsigned int size, unsigned int* devices);
-
-    // CuDNN scaling parameters are typically "alpha" and "beta".
-    // Their type must be "float" for HALF and FLOAT (default template)
-    // and "double" for DOUBLE (specialized template)
-    template <class T>
-    struct cudnn_scaling_type {
-        typedef float type;
-    };
-
-    template <>
-    struct cudnn_scaling_type<double> {
-        typedef double type;
-    };
-
-    template <class T>
-    struct cuda_type {
-        typedef T type;
-    };
 }
 }
 
diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp
index 95a6aa523257f9fcd527460315dfc66e846ff714..9c3684e89f6b27133ca99be16b332c4e9f9a27b1 100644
--- a/src/operator/ConvImpl.cpp
+++ b/src/operator/ConvImpl.cpp
@@ -120,8 +120,8 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
 template <Aidge::DimIdx_t DIM>
 template <class T>
 void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2) {
-    const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
-    typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
+    const T alpha = 1.0f;
+    const T beta = 0.0f;
 
     CHECK_CUDNN_STATUS(
         cudnnConvolutionForward(CudaContext::cudnnHandle(),