Skip to content
Snippets Groups Projects
Commit ace61c0d authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

minor cleanups

parent 48c45350
No related branches found
No related tags found
3 merge requests!15version 0.2.0,!12Lenetop,!10Lenet operators
...@@ -33,6 +33,7 @@ private: ...@@ -33,6 +33,7 @@ private:
// CuDNN specific variables // CuDNN specific variables
cudnnPoolingDescriptor_t mAvgPoolingDesc = nullptr; cudnnPoolingDescriptor_t mAvgPoolingDesc = nullptr;
cudnnPoolingMode_t mMode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; cudnnPoolingMode_t mMode = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
std::shared_ptr<Tensor> mInputFallback;
public: public:
AvgPoolingImpl_cuda(const AvgPooling_Op<DIM> &op) : OperatorImpl(op) {} AvgPoolingImpl_cuda(const AvgPooling_Op<DIM> &op) : OperatorImpl(op) {}
......
...@@ -32,7 +32,9 @@ class FCImplForward_cuda : public Registrable<FCImplForward_cuda, ...@@ -32,7 +32,9 @@ class FCImplForward_cuda : public Registrable<FCImplForward_cuda,
void(std::size_t , std::size_t, std::size_t, bool, const void* , const void* , const void* , void*)> {}; void(std::size_t , std::size_t, std::size_t, bool, const void* , const void* , const void* , void*)> {};
class FCImpl_cuda : public OperatorImpl { class FCImpl_cuda : public OperatorImpl {
private: private:
// CuDNN specific variables std::shared_ptr<Tensor> mInput0Fallback;
std::shared_ptr<Tensor> mInput1Fallback;
std::shared_ptr<Tensor> mInput2Fallback;
public: public:
......
...@@ -32,11 +32,5 @@ cublasStatus_t cublasGemm(cublasHandle_t handle, ...@@ -32,11 +32,5 @@ cublasStatus_t cublasGemm(cublasHandle_t handle,
const T *B, int ldb, const T *B, int ldb,
const T *beta, const T *beta,
T *C, int ldc); T *C, int ldc);
// cublasGemm(cublasContext*&, cublasOperation_t, cublasOperation_t, int&, int&, int&,
// const type*,
// const __half*&, int&,
// const __half*&, int&,
// const type*,
// __half*&, int&)’
} }
#endif /* AIDGE_CUDA_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */ #endif /* AIDGE_CUDA_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */
\ No newline at end of file
...@@ -33,6 +33,7 @@ private: ...@@ -33,6 +33,7 @@ private:
// CuDNN specific variables // CuDNN specific variables
cudnnPoolingDescriptor_t mMaxPoolingDesc = nullptr; cudnnPoolingDescriptor_t mMaxPoolingDesc = nullptr;
cudnnPoolingMode_t mMode = CUDNN_POOLING_MAX; cudnnPoolingMode_t mMode = CUDNN_POOLING_MAX;
std::shared_ptr<Tensor> mInputFallback;
public: public:
MaxPoolingImpl_cuda(const MaxPooling_Op<DIM> &op) : OperatorImpl(op) {} MaxPoolingImpl_cuda(const MaxPooling_Op<DIM> &op) : OperatorImpl(op) {}
......
...@@ -35,6 +35,7 @@ private: ...@@ -35,6 +35,7 @@ private:
#else #else
cudnnActivationMode_t mReLUDesc = nullptr; cudnnActivationMode_t mReLUDesc = nullptr;
#endif #endif
std::shared_ptr<Tensor> mInputFallback;
public: public:
ReLUImpl_cuda(const ReLU_Op &op) : OperatorImpl(op) {} ReLUImpl_cuda(const ReLU_Op &op) : OperatorImpl(op) {}
......
...@@ -25,8 +25,7 @@ void Aidge::AvgPoolingImpl_cuda<DIM>::forward() { ...@@ -25,8 +25,7 @@ void Aidge::AvgPoolingImpl_cuda<DIM>::forward() {
assert(mOp.getRawInput(0) && "missing input #0"); assert(mOp.getRawInput(0) && "missing input #0");
std::shared_ptr<Tensor> inputFallback; const auto& input = op.getInput(0)->refCastFrom(mInputFallback, *op.getOutput(0));
const auto& input = std::static_pointer_cast<Tensor>(op.getRawInput(0))->refCastFrom(inputFallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
// Lazy-initialize CuDNN AvgPooling descriptor // Lazy-initialize CuDNN AvgPooling descriptor
if (mAvgPoolingDesc == nullptr) { if (mAvgPoolingDesc == nullptr) {
......
...@@ -28,15 +28,14 @@ void Aidge::FCImpl_cuda::forward() { ...@@ -28,15 +28,14 @@ void Aidge::FCImpl_cuda::forward() {
assert(mOp.getRawInput(1) && "missing input #1"); assert(mOp.getRawInput(1) && "missing input #1");
assert(mOp.getRawInput(2) && "missing input #2"); assert(mOp.getRawInput(2) && "missing input #2");
std::shared_ptr<Tensor> inputFallback, input1Fallback, input2Fallback;
const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(inputFallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& fcOp = static_cast<const FC_Op&>(mOp); const auto& fcOp = static_cast<const FC_Op&>(mOp);
bool noBias = fcOp.template getAttr<FCAttr::NoBias>(); bool noBias = fcOp.template getAttr<FCAttr::NoBias>();
std::size_t outChannels = static_cast<std::size_t>(fcOp.template getAttr<FCAttr::OutChannels>()); std::size_t outChannels = static_cast<std::size_t>(fcOp.template getAttr<FCAttr::OutChannels>());
const auto& input0 = fcOp.getInput(0)->refCastFrom(mInput0Fallback, *fcOp.getOutput(0));
const auto& input1 = fcOp.getInput(1)->refCastFrom(mInput1Fallback, *fcOp.getOutput(0));
const auto& input2 = fcOp.getInput(2)->refCastFrom(mInput2Fallback, *fcOp.getOutput(0));
switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) { switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
case DataType::Float64: case DataType::Float64:
forward_<double>(input0, input1, input2, noBias, outChannels); forward_<double>(input0, input1, input2, noBias, outChannels);
...@@ -55,17 +54,19 @@ void Aidge::FCImpl_cuda::forward() { ...@@ -55,17 +54,19 @@ void Aidge::FCImpl_cuda::forward() {
template<class T> template<class T>
void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, std::size_t outChannels) void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, const Tensor& input2, bool noBias, std::size_t outChannels)
{ {
const T * input = static_cast<const T*>(input0.getImpl()->rawPtr()); const T * input = static_cast<const T*>(input0.getImpl()->rawPtr());
const T * weights = static_cast<const T*>(input1.getImpl()->rawPtr()); const T * weights = static_cast<const T*>(input1.getImpl()->rawPtr());
T * output = static_cast<T*>(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); T * output = static_cast<T*>(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
// Performing output = T(weights) * input
// [n x m] = [n x k] * [k x m]
// cublas is column-major so instead of transposing inputs, computing output [m x n] and transposing output, we compute output as [n x m]
int n = outChannels; int n = outChannels;
int m = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->size()/n; int m = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->size()/n;
int k = input0.size()/m; int k = input0.size()/m;
int lda = k; int lda = k; // leading dimension of weights
int ldb = k; int ldb = k; // leading dimension of input
int ldc = n; int ldc = n; // leading dimension of output
const T alpha = 1.0f; const T alpha = 1.0f;
const T beta = 0.0f; const T beta = 0.0f;
CHECK_CUBLAS_STATUS(cublasGemm(CudaContext::cublasHandle(), CHECK_CUBLAS_STATUS(cublasGemm(CudaContext::cublasHandle(),
...@@ -93,7 +94,8 @@ void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, co ...@@ -93,7 +94,8 @@ void Aidge::FCImpl_cuda::forward_(const Tensor& input0, const Tensor& input1, co
m * sizeof(T), m * sizeof(T),
cudaMemcpyHostToDevice)); cudaMemcpyHostToDevice));
const T * biases = static_cast<const T*>(input2.getImpl()->rawPtr()); const T * biases = static_cast<const T*>(input2.getImpl()->rawPtr());
// Performing output = biases * onesVector + output
// [n x m] = [n x 1] * [1 x m] + [n x m]
CHECK_CUBLAS_STATUS(cublasGemm(CudaContext::cublasHandle(), CHECK_CUBLAS_STATUS(cublasGemm(CudaContext::cublasHandle(),
CUBLAS_OP_N, CUBLAS_OP_N,
CUBLAS_OP_N, CUBLAS_OP_N,
......
...@@ -25,8 +25,7 @@ void Aidge::MaxPoolingImpl_cuda<DIM>::forward() { ...@@ -25,8 +25,7 @@ void Aidge::MaxPoolingImpl_cuda<DIM>::forward() {
assert(mOp.getRawInput(0) && "missing input #0"); assert(mOp.getRawInput(0) && "missing input #0");
std::shared_ptr<Tensor> inputFallback; const auto& input = op.getInput(0)->refCastFrom(mInputFallback, *op.getOutput(0));
const auto& input = std::static_pointer_cast<Tensor>(op.getRawInput(0))->refCastFrom(inputFallback, *std::static_pointer_cast<Tensor>(op.getRawOutput(0)));
// Lazy-initialize CuDNN MaxPooling descriptor // Lazy-initialize CuDNN MaxPooling descriptor
if (mMaxPoolingDesc == nullptr) { if (mMaxPoolingDesc == nullptr) {
......
...@@ -24,8 +24,7 @@ void Aidge::ReLUImpl_cuda::forward() { ...@@ -24,8 +24,7 @@ void Aidge::ReLUImpl_cuda::forward() {
assert(mOp.getRawInput(0) && "missing input #0"); assert(mOp.getRawInput(0) && "missing input #0");
std::shared_ptr<Tensor> inputFallback; const auto& input = op.getInput(0)->refCastFrom(mInputFallback, *op.getOutput(0));
const auto& input = std::static_pointer_cast<Tensor>(op.getRawInput(0))->refCastFrom(inputFallback, *std::static_pointer_cast<Tensor>(op.getRawOutput(0)));
// Lazy-initialize CuDNN ReLU descriptor // Lazy-initialize CuDNN ReLU descriptor
if (mReLUDesc == nullptr) { if (mReLUDesc == nullptr) {
...@@ -38,11 +37,18 @@ void Aidge::ReLUImpl_cuda::forward() { ...@@ -38,11 +37,18 @@ void Aidge::ReLUImpl_cuda::forward() {
#endif #endif
} }
if (std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType() == DataType::Float64) { switch(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()) {
forward_<double>(input); case DataType::Float64:
} forward_<double>(input);
else { break;
forward_<float>(input); case DataType::Float32:
forward_<float>(input);
break;
case DataType::Float16:
forward_<half>(input);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "Data type is not supported by Backend Cuda");
} }
} }
...@@ -64,7 +70,9 @@ void Aidge::ReLUImpl_cuda::forward_(const Tensor& input) { ...@@ -64,7 +70,9 @@ void Aidge::ReLUImpl_cuda::forward_(const Tensor& input) {
Aidge::ReLUImpl_cuda::~ReLUImpl_cuda() { Aidge::ReLUImpl_cuda::~ReLUImpl_cuda() {
if (mReLUDesc != nullptr) { if (mReLUDesc != nullptr) {
cudnnDestroyActivationDescriptor(mReLUDesc); #if CUDNN_VERSION >= 5000
cudnnDestroyActivationDescriptor(mReLUDesc);
#endif
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment