diff --git a/include/aidge/backend/cuda/data/TensorImpl.hpp b/include/aidge/backend/cuda/data/TensorImpl.hpp index 88e67d0fb5987ee10ff8db7faa66ae4202aae9fc..be6f9f3ded1e7507af4d4c54a4e6cc6ecfc0438b 100644 --- a/include/aidge/backend/cuda/data/TensorImpl.hpp +++ b/include/aidge/backend/cuda/data/TensorImpl.hpp @@ -76,14 +76,16 @@ public: // native interface const future_std::span<T>& data() const { return mData; } + std::size_t size() const override { return mData.size(); } std::size_t scalarSize() const override { return sizeof(T); } void setDevice(int device) override { mDevice = device; } - void copy(const void *src, NbElts_t length) override { - CHECK_CUDA_STATUS(cudaMemcpy(rawPtr(), src, length * sizeof(T), cudaMemcpyDeviceToDevice)); + void copy(const void *src, NbElts_t length, NbElts_t offset = 0) override { + void* dst = static_cast<void*>(static_cast<T*>(rawPtr()) + offset); + CHECK_CUDA_STATUS(cudaMemcpy(dst, src, length * sizeof(T), cudaMemcpyDeviceToDevice)); } void copyCast(const void *src, NbElts_t length, const DataType srcDt) override { @@ -177,10 +179,6 @@ public: return mData.data(); }; - void* getRaw(std::size_t idx) { - return static_cast<void*>(static_cast<T*>(rawPtr()) + idx); - } - const cudnnTensorDescriptor_t& getCudnnTensorDesc() const override { if (mCudnnTensor == nullptr) { CHECK_CUDNN_STATUS(cudnnCreateTensorDescriptor(&mCudnnTensor)); @@ -214,10 +212,10 @@ public: return mCudnnTensor; } - virtual ~TensorImpl_cuda() { - if (mCudnnTensor != nullptr) - cudnnDestroyTensorDescriptor(mCudnnTensor); - } + void* getRawPtr(NbElts_t idx) override final { + AIDGE_ASSERT(idx < mData.size(), "idx out of range"); + return static_cast<void*>(static_cast<T*>(rawPtr()) + idx); + }; void setRawPtr(void *ptr, NbElts_t length) override final { AIDGE_ASSERT(length >= mTensor.size(), "trying to set raw pointer of insufficient capacity"); @@ -225,6 +223,11 @@ public: mDataOwner.reset(); }; + virtual ~TensorImpl_cuda() { + if (mCudnnTensor != nullptr) + cudnnDestroyTensorDescriptor(mCudnnTensor); + } + private: void lazyInit() { if (mData.size() < mTensor.size()) { diff --git a/include/aidge/backend/cuda/utils/CudaUtils.hpp b/include/aidge/backend/cuda/utils/CudaUtils.hpp index 76d7ea48e02473deeaa2cb0801a292623a666a1d..2f66d0e778778400f0b7def345619d635cc37674 100644 --- a/include/aidge/backend/cuda/utils/CudaUtils.hpp +++ b/include/aidge/backend/cuda/utils/CudaUtils.hpp @@ -67,24 +67,6 @@ namespace Cuda { // Enable Peer-to-Peer communications between devices // when it is possible void setMultiDevicePeerAccess(unsigned int size, unsigned int* devices); - - // CuDNN scaling parameters are typically "alpha" and "beta". - // Their type must be "float" for HALF and FLOAT (default template) - // and "double" for DOUBLE (specialized template) - template <class T> - struct cudnn_scaling_type { - typedef float type; - }; - - template <> - struct cudnn_scaling_type<double> { - typedef double type; - }; - - template <class T> - struct cuda_type { - typedef T type; - }; } } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index 95a6aa523257f9fcd527460315dfc66e846ff714..9c3684e89f6b27133ca99be16b332c4e9f9a27b1 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -120,8 +120,8 @@ void Aidge::ConvImpl_cuda<DIM>::forward() { template <Aidge::DimIdx_t DIM> template <class T> void Aidge::ConvImpl_cuda<DIM>::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2) { - const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f; - typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f; + const T alpha = 1.0f; + const T beta = 0.0f; CHECK_CUDNN_STATUS( cudnnConvolutionForward(CudaContext::cudnnHandle(),