diff --git a/include/aidge/backend/cuda/operator/FCImpl.hpp b/include/aidge/backend/cuda/operator/FCImpl.hpp index b06d42e64ce5272e975f0dcf4039ccd78f24f78a..ee2f1c57a27fe80f2b2ab4f3b3ae68bfa0cca24b 100644 --- a/include/aidge/backend/cuda/operator/FCImpl.hpp +++ b/include/aidge/backend/cuda/operator/FCImpl.hpp @@ -29,7 +29,7 @@ namespace Aidge { class FCImplForward_cuda : public Registrable<FCImplForward_cuda, std::tuple<DataType>, - void(unsigned int , unsigned int , unsigned int, bool, const void* , const void* , const void* , void*)> {}; + void(std::size_t , std::size_t, std::size_t, bool, const void* , const void* , const void* , void*)> {}; class FCImpl_cuda : public OperatorImpl { private: // CuDNN specific variables @@ -47,7 +47,7 @@ public: // ~FCImpl_cuda(); private: - template <class T> void forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, DimSize_t outChannels); + template <class T> void forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, std::size_t outChannels); }; namespace { diff --git a/include/aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp b/include/aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp index fac838f5ee7b11c67736e6ed83df4aa876b2825b..a4f3e4ad59c66379f404a704b8f4110f25200a4f 100644 --- a/include/aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp +++ b/include/aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp @@ -24,7 +24,7 @@ namespace Aidge { template<class T> -void fc_forward_cuda(DimSize_t nbInputs, DimSize_t inChannels, DimSize_t outChannels, bool noBias, const void *input, const void *weights, const void *bias, void *output); +void fc_forward_cuda(std::size_t nbInputs, std::size_t inChannels, std::size_t outChannels, bool noBias, const void *input, const void *weights, const void *bias, void *output); namespace { static Registrar<FCImplForward_cuda> registrarFCImpl2DForward_cuda_Float32({DataType::Float32}, Aidge::fc_forward_cuda<float>); diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index 5b2183b56208f8f9d8d1d972dc26fe9c03835694..3beeab373ddaea3563e4c4f1e6644e37769acfed 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -14,13 +14,13 @@ #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for #include <vector> -#include <iostream> + #include "aidge/utils/Types.h" #include "aidge/operator/FC.hpp" #include "aidge/backend/cuda/data/TensorImpl.hpp" -#include "aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp" #include "aidge/backend/cuda/operator/FCImpl.hpp" +#include "aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp" #include "aidge/backend/cuda/utils/CudaContext.hpp" @@ -34,9 +34,9 @@ void Aidge::FCImpl_cuda::forward() { const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); - const FC_Op& fcOp = static_cast<const FC_Op&>(mOp); + const auto& fcOp = static_cast<const FC_Op&>(mOp); + std::size_t outChannels = static_cast<std::size_t>(fcOp.template getAttr<FCAttr::OutChannels>()); bool noBias = fcOp.template getAttr<FCAttr::NoBias>(); - DimSize_t outChannels = fcOp.template getAttr<FCAttr::OutChannels>(); if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) { forward_<double>(input0, input1, input2, noBias, outChannels); } @@ -46,7 +46,7 @@ void Aidge::FCImpl_cuda::forward() { } template<class T> -void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, DimSize_t outChannels) +void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, std::size_t outChannels) { Aidge::fc_forward_cuda<T>( input0.dims()[0], diff --git a/src/operator/FCImpl_CUDA_kernels.cu b/src/operator/FCImpl_CUDA_kernels.cu index 14d731f33e1a8edc5a6126d5d1f026b7d2af64c9..cf98aabf4b9275368bbdcd12b3a379b52a10681e 100644 --- a/src/operator/FCImpl_CUDA_kernels.cu +++ b/src/operator/FCImpl_CUDA_kernels.cu @@ -14,7 +14,7 @@ template<class T> __global__ -void fc_forward_cuda_kernel(std::size_t nbInputs, std::size_t inChannels, std::size_t outChannels, bool noBias,const T* input, const T* weights, const T* bias, T *output) +void fc_forward_cuda_kernel(std::size_t nbInputs, std::size_t inChannels, std::size_t outChannels, bool noBias, const T* input, const T* weights, const T* bias, T *output) { const std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -32,7 +32,7 @@ void fc_forward_cuda_kernel(std::size_t nbInputs, std::size_t inChannels, std::s namespace Aidge{ template<class T> -void fc_forward_cuda(DimSize_t nbInputs, DimSize_t inChannels, DimSize_t outChannels, bool noBias, const void* input_, const void* weights_, const void* bias_, void* output_) +void fc_forward_cuda(std::size_t nbInputs, std::size_t inChannels, std::size_t outChannels, bool noBias, const void* input_, const void* weights_, const void* bias_, void* output_) { const T* input = static_cast<const T*>(input_); const T* weights = static_cast<const T*>(weights_); @@ -41,8 +41,8 @@ void fc_forward_cuda(DimSize_t nbInputs, DimSize_t inChannels, DimSize_t outChan const dim3 blocksPerGrid = {(static_cast<unsigned int>(inChannels) + 255) / 256, 1, static_cast<unsigned int>(outChannels)}; const dim3 threadsPerBlocks = {256, 1, 1}; - fc_forward_cuda_kernel<<<blocksPerGrid, threadsPerBlocks>>>(nbInputs, inChannels, outChannels, noBias, input, weights, bias, output); + CHECK_CUDA_STATUS(cudaPeekAtLastError()); } }