Skip to content
Snippets Groups Projects
Commit a5711c18 authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

changed argument to std::size_t for fc kernel

parent f4454951
No related branches found
No related tags found
No related merge requests found
This commit is part of merge request !10. Comments created here will be created in the context of that merge request.
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
namespace Aidge { namespace Aidge {
class FCImplForward_cuda : public Registrable<FCImplForward_cuda, class FCImplForward_cuda : public Registrable<FCImplForward_cuda,
std::tuple<DataType>, std::tuple<DataType>,
void(unsigned int , unsigned int , unsigned int, bool, const void* , const void* , const void* , void*)> {}; void(std::size_t , std::size_t, std::size_t, bool, const void* , const void* , const void* , void*)> {};
class FCImpl_cuda : public OperatorImpl { class FCImpl_cuda : public OperatorImpl {
private: private:
// CuDNN specific variables // CuDNN specific variables
...@@ -47,7 +47,7 @@ public: ...@@ -47,7 +47,7 @@ public:
// ~FCImpl_cuda(); // ~FCImpl_cuda();
private: private:
template <class T> void forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, DimSize_t outChannels); template <class T> void forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, std::size_t outChannels);
}; };
namespace { namespace {
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
namespace Aidge { namespace Aidge {
template<class T> template<class T>
void fc_forward_cuda(DimSize_t nbInputs, DimSize_t inChannels, DimSize_t outChannels, bool noBias, const void *input, const void *weights, const void *bias, void *output); void fc_forward_cuda(std::size_t nbInputs, std::size_t inChannels, std::size_t outChannels, bool noBias, const void *input, const void *weights, const void *bias, void *output);
namespace { namespace {
static Registrar<FCImplForward_cuda> registrarFCImpl2DForward_cuda_Float32({DataType::Float32}, Aidge::fc_forward_cuda<float>); static Registrar<FCImplForward_cuda> registrarFCImpl2DForward_cuda_Float32({DataType::Float32}, Aidge::fc_forward_cuda<float>);
......
...@@ -14,13 +14,13 @@ ...@@ -14,13 +14,13 @@
#include <numeric> // std::accumulate #include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for #include <thread> // std::this_thread::sleep_for
#include <vector> #include <vector>
#include <iostream>
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/operator/FC.hpp" #include "aidge/operator/FC.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp" #include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp"
#include "aidge/backend/cuda/operator/FCImpl.hpp" #include "aidge/backend/cuda/operator/FCImpl.hpp"
#include "aidge/backend/cuda/operator/FCImpl_CUDA_kernels.hpp"
#include "aidge/backend/cuda/utils/CudaContext.hpp" #include "aidge/backend/cuda/utils/CudaContext.hpp"
...@@ -34,9 +34,9 @@ void Aidge::FCImpl_cuda::forward() { ...@@ -34,9 +34,9 @@ void Aidge::FCImpl_cuda::forward() {
const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))); const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const FC_Op& fcOp = static_cast<const FC_Op&>(mOp); const auto& fcOp = static_cast<const FC_Op&>(mOp);
std::size_t outChannels = static_cast<std::size_t>(fcOp.template getAttr<FCAttr::OutChannels>());
bool noBias = fcOp.template getAttr<FCAttr::NoBias>(); bool noBias = fcOp.template getAttr<FCAttr::NoBias>();
DimSize_t outChannels = fcOp.template getAttr<FCAttr::OutChannels>();
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) { if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) {
forward_<double>(input0, input1, input2, noBias, outChannels); forward_<double>(input0, input1, input2, noBias, outChannels);
} }
...@@ -46,7 +46,7 @@ void Aidge::FCImpl_cuda::forward() { ...@@ -46,7 +46,7 @@ void Aidge::FCImpl_cuda::forward() {
} }
template<class T> template<class T>
void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, DimSize_t outChannels) void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, std::size_t outChannels)
{ {
Aidge::fc_forward_cuda<T>( Aidge::fc_forward_cuda<T>(
input0.dims()[0], input0.dims()[0],
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
template<class T> template<class T>
__global__ __global__
void fc_forward_cuda_kernel(std::size_t nbInputs, std::size_t inChannels, std::size_t outChannels, bool noBias,const T* input, const T* weights, const T* bias, T *output) void fc_forward_cuda_kernel(std::size_t nbInputs, std::size_t inChannels, std::size_t outChannels, bool noBias, const T* input, const T* weights, const T* bias, T *output)
{ {
const std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x; const std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -32,7 +32,7 @@ void fc_forward_cuda_kernel(std::size_t nbInputs, std::size_t inChannels, std::s ...@@ -32,7 +32,7 @@ void fc_forward_cuda_kernel(std::size_t nbInputs, std::size_t inChannels, std::s
namespace Aidge{ namespace Aidge{
template<class T> template<class T>
void fc_forward_cuda(DimSize_t nbInputs, DimSize_t inChannels, DimSize_t outChannels, bool noBias, const void* input_, const void* weights_, const void* bias_, void* output_) void fc_forward_cuda(std::size_t nbInputs, std::size_t inChannels, std::size_t outChannels, bool noBias, const void* input_, const void* weights_, const void* bias_, void* output_)
{ {
const T* input = static_cast<const T*>(input_); const T* input = static_cast<const T*>(input_);
const T* weights = static_cast<const T*>(weights_); const T* weights = static_cast<const T*>(weights_);
...@@ -41,8 +41,8 @@ void fc_forward_cuda(DimSize_t nbInputs, DimSize_t inChannels, DimSize_t outChan ...@@ -41,8 +41,8 @@ void fc_forward_cuda(DimSize_t nbInputs, DimSize_t inChannels, DimSize_t outChan
const dim3 blocksPerGrid = {(static_cast<unsigned int>(inChannels) + 255) / 256, 1, static_cast<unsigned int>(outChannels)}; const dim3 blocksPerGrid = {(static_cast<unsigned int>(inChannels) + 255) / 256, 1, static_cast<unsigned int>(outChannels)};
const dim3 threadsPerBlocks = {256, 1, 1}; const dim3 threadsPerBlocks = {256, 1, 1};
fc_forward_cuda_kernel<<<blocksPerGrid, threadsPerBlocks>>>(nbInputs, inChannels, outChannels, noBias, input, weights, bias, output); fc_forward_cuda_kernel<<<blocksPerGrid, threadsPerBlocks>>>(nbInputs, inChannels, outChannels, noBias, input, weights, bias, output);
CHECK_CUDA_STATUS(cudaPeekAtLastError()); CHECK_CUDA_STATUS(cudaPeekAtLastError());
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment