Skip to content
Snippets Groups Projects
Commit 4b2e0d5a authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Full implementation of ConvImpl

parent 31d7319e
No related branches found
No related tags found
No related merge requests found
Pipeline #31792 failed
...@@ -13,8 +13,13 @@ ...@@ -13,8 +13,13 @@
#include "aidge/backend/cuda/utils/CudaContext.hpp" #include "aidge/backend/cuda/utils/CudaContext.hpp"
namespace Aidge { namespace Aidge {
class TensorImpl_cuda_ {
public:
virtual const cudnnTensorDescriptor_t& getCudnnTensorDesc() const = 0;
};
template <class T> template <class T>
class TensorImpl_cuda : public TensorImpl { class TensorImpl_cuda : public TensorImpl, public TensorImpl_cuda_ {
private: private:
const Tensor &mTensor; // Impl needs to access Tensor information, but is not const Tensor &mTensor; // Impl needs to access Tensor information, but is not
// supposed to change it! // supposed to change it!
...@@ -53,7 +58,7 @@ class TensorImpl_cuda : public TensorImpl { ...@@ -53,7 +58,7 @@ class TensorImpl_cuda : public TensorImpl {
return mData; return mData;
}; };
const cudnnTensorDescriptor_t& getCudnnTensorDesc() const { const cudnnTensorDescriptor_t& getCudnnTensorDesc() const override {
if (mCudnnTensor == nullptr) { if (mCudnnTensor == nullptr) {
CHECK_CUDNN_STATUS(cudnnCreateTensorDescriptor(&mCudnnTensor)); CHECK_CUDNN_STATUS(cudnnCreateTensorDescriptor(&mCudnnTensor));
......
...@@ -31,7 +31,7 @@ namespace Aidge { ...@@ -31,7 +31,7 @@ namespace Aidge {
template <DimIdx_t DIM> template <DimIdx_t DIM>
class ConvImpl_cuda : public OperatorImpl { class ConvImpl_cuda : public OperatorImpl {
private: private:
const Conv_Op<DIM> &mOp; const Conv_Op<DIM> &mOp;
std::array<NbElts_t, 3> mNbConsumedData; std::array<NbElts_t, 3> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData; std::array<NbElts_t, 1> mNbProducedData;
...@@ -43,7 +43,7 @@ class ConvImpl_cuda : public OperatorImpl { ...@@ -43,7 +43,7 @@ class ConvImpl_cuda : public OperatorImpl {
cudnnConvolutionFwdAlgo_t mFwdAlgo; cudnnConvolutionFwdAlgo_t mFwdAlgo;
cudnnConvolutionDescriptor_t mConvDesc; cudnnConvolutionDescriptor_t mConvDesc;
public: public:
ConvImpl_cuda(const Conv_Op<DIM> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) { ConvImpl_cuda(const Conv_Op<DIM> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {
CHECK_CUDNN_STATUS(cudnnCreateConvolutionDescriptor(&mConvDesc)); CHECK_CUDNN_STATUS(cudnnCreateConvolutionDescriptor(&mConvDesc));
} }
...@@ -52,7 +52,7 @@ class ConvImpl_cuda : public OperatorImpl { ...@@ -52,7 +52,7 @@ class ConvImpl_cuda : public OperatorImpl {
return std::make_unique<ConvImpl_cuda>(op); return std::make_unique<ConvImpl_cuda>(op);
} }
public: public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final; NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
...@@ -65,6 +65,9 @@ class ConvImpl_cuda : public OperatorImpl { ...@@ -65,6 +65,9 @@ class ConvImpl_cuda : public OperatorImpl {
void backward(); void backward();
~ConvImpl_cuda(); ~ConvImpl_cuda();
private:
template <class T> void forward_();
}; };
namespace { namespace {
......
...@@ -77,7 +77,6 @@ void Aidge::ConvImpl_cuda<DIM>::forward() { ...@@ -77,7 +77,6 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
// FIXME: uncomment the following code once memory handling will work // FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0"); assert(mOp.getInput(0) && "missing input #0");
assert(mOp.getInput(1) && "missing input #1"); assert(mOp.getInput(1) && "missing input #1");
assert(mOp.getInput(2) && "missing input #2");
const std::vector<int> strides(mOp.template get<ConvParam::StrideDims>().rbegin(), mOp.template get<ConvParam::StrideDims>().rend()); const std::vector<int> strides(mOp.template get<ConvParam::StrideDims>().rbegin(), mOp.template get<ConvParam::StrideDims>().rend());
const std::vector<int> paddings(DIM, 0); const std::vector<int> paddings(DIM, 0);
...@@ -111,22 +110,22 @@ void Aidge::ConvImpl_cuda<DIM>::forward() { ...@@ -111,22 +110,22 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
int returnAlgoCounts = 0; int returnAlgoCounts = 0;
std::vector<cudnnConvolutionFwdAlgoPerf_t> returnFwdAlgo(maxAlgoIterations); std::vector<cudnnConvolutionFwdAlgoPerf_t> returnFwdAlgo(maxAlgoIterations);
/**************************************************************************************************************
https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnFindConvolutionForwardAlgorithm
This function attempts all cuDNN algorithms (including CUDNN_TENSOR_OP_MATH and CUDNN_DEFAULT_MATH
versions of algorithms where CUDNN_TENSOR_OP_MATH may be available) for cudnnConvolutionForward(),
using memory allocated via cudaMalloc(), and outputs performance metrics to a user-allocated array
of cudnnConvolutionFwdAlgoPerf_t. These metrics are written in sorted fashion where the first element
has the lowest compute time. The total number of resulting algorithms can be queried through
the API cudnnGetConvolutionForwardMaxCount().
***************************************************************************************************************/
/**************************************************************************************************************
https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnFindConvolutionForwardAlgorithm
This function attempts all cuDNN algorithms (including CUDNN_TENSOR_OP_MATH and CUDNN_DEFAULT_MATH
versions of algorithms where CUDNN_TENSOR_OP_MATH may be available) for cudnnConvolutionForward(),
using memory allocated via cudaMalloc(), and outputs performance metrics to a user-allocated array
of cudnnConvolutionFwdAlgoPerf_t. These metrics are written in sorted fashion where the first element
has the lowest compute time. The total number of resulting algorithms can be queried through
the API cudnnGetConvolutionForwardMaxCount().
***************************************************************************************************************/
CHECK_CUDNN_STATUS(cudnnFindConvolutionForwardAlgorithm( CHECK_CUDNN_STATUS(cudnnFindConvolutionForwardAlgorithm(
CudaContext::cudnnHandle(), CudaContext::cudnnHandle(),
static_cast<TensorImpl_cuda<float>*>(mOp.getInput(0)->getImpl().get())->getCudnnTensorDesc(), // FIXME: PLAIN WRONG dynamic_cast<TensorImpl_cuda_*>(mOp.getInput(0)->getImpl().get())->getCudnnTensorDesc(),
mFilterDesc, mFilterDesc,
mConvDesc, mConvDesc,
static_cast<TensorImpl_cuda<float>*>(mOp.getOutput(0)->getImpl().get())->getCudnnTensorDesc(), // FIXME: PLAIN WRONG dynamic_cast<TensorImpl_cuda_*>(mOp.getOutput(0)->getImpl().get())->getCudnnTensorDesc(),
maxAlgoIterations, maxAlgoIterations,
&returnAlgoCounts, &returnAlgoCounts,
&returnFwdAlgo[0])); &returnFwdAlgo[0]));
...@@ -171,6 +170,66 @@ the API cudnnGetConvolutionForwardMaxCount(). ...@@ -171,6 +170,66 @@ the API cudnnGetConvolutionForwardMaxCount().
// << std::endl; // << std::endl;
} }
mFwdAlgo = returnFwdAlgo[0].algo; mFwdAlgo = returnFwdAlgo[0].algo;
size_t workspaceSize = 0;
CHECK_CUDNN_STATUS(cudnnGetConvolutionForwardWorkspaceSize(
CudaContext::cudnnHandle(),
dynamic_cast<TensorImpl_cuda_*>(mOp.getInput(0)->getImpl().get())->getCudnnTensorDesc(),
mFilterDesc,
mConvDesc,
dynamic_cast<TensorImpl_cuda_*>(mOp.getOutput(0)->getImpl().get())->getCudnnTensorDesc(),
mFwdAlgo,
&workspaceSize));
if (mWorkspaceSize != workspaceSize) {
if (mWorkspace != nullptr) {
cudaFree(mWorkspace);
mWorkspaceSize = 0;
}
CHECK_CUDA_STATUS(cudaMalloc(&mWorkspace, workspaceSize));
mWorkspaceSize = workspaceSize;
}
if (mOp.getOutput(0)->dataType() == DataType::Float64) {
forward_<double>();
}
else {
forward_<float>();
}
}
template <Aidge::DimIdx_t DIM>
template <class T>
void Aidge::ConvImpl_cuda<DIM>::forward_() {
const typename Cuda::cudnn_scaling_type<T>::type alpha = 1.0f;
typename Cuda::cudnn_scaling_type<T>::type beta = 0.0f;
CHECK_CUDNN_STATUS(
cudnnConvolutionForward(CudaContext::cudnnHandle(),
&alpha,
dynamic_cast<TensorImpl_cuda_*>(mOp.getInput(0)->getImpl().get())->getCudnnTensorDesc(),
mOp.getInput(0)->getImpl()->rawPtr(),
mFilterDesc,
mOp.getInput(1)->getImpl()->rawPtr(),
mConvDesc,
mFwdAlgo,
mWorkspace,
mWorkspaceSize,
&beta,
dynamic_cast<TensorImpl_cuda_*>(mOp.getOutput(0)->getImpl().get())->getCudnnTensorDesc(),
mOp.getOutput(0)->getImpl()->rawPtr()));
if (mOp.getInput(2) != nullptr) {
CHECK_CUDNN_STATUS(cudnnAddTensor(CudaContext::cudnnHandle(),
&alpha,
dynamic_cast<TensorImpl_cuda_*>(mOp.getInput(2)->getImpl().get())->getCudnnTensorDesc(),
mOp.getInput(2)->getImpl()->rawPtr(),
&alpha,
dynamic_cast<TensorImpl_cuda_*>(mOp.getOutput(0)->getImpl().get())->getCudnnTensorDesc(),
mOp.getOutput(0)->getImpl()->rawPtr()));
}
} }
template <Aidge::DimIdx_t DIM> template <Aidge::DimIdx_t DIM>
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <array>
#include <catch2/catch_test_macros.hpp>
#include "Test_cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cuda.hpp"
using namespace Aidge;
TEST_CASE("[gpu/operator] Conv(forward)") {
SECTION("Classic Conv") {
std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv");
myConv->getOperator()->setDatatype(DataType::Float32);
myConv->getOperator()->setBackend("cuda");
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
{
{
{{ 0, 1, 2},
{ 3, 4, 5},
{ 6, 7, 8}},
{{ 9, 10, 11},
{ 12, 13, 14},
{ 15, 16, 17}},
{{ 18, 19, 20},
{ 21, 22, 23},
{ 24, 25, 26}}
},
{
{{ 27, 28, 29},
{ 30, 31, 32},
{ 33, 34, 35}},
{{ 36, 37, 38},
{ 39, 40, 41},
{ 42, 43, 44}},
{{ 45, 46, 47},
{ 48, 49, 50},
{ 51, 52, 53}}
},
{
{{ 54, 55, 56},
{ 57, 58, 59},
{ 60, 61, 62}},
{{ 63, 64, 65},
{ 66, 67, 68},
{ 69, 70, 71}},
{{ 72, 73, 74},
{ 75, 76, 77},
{ 78, 79, 80}}
},
{
{{ 81, 82, 83},
{ 84, 85, 86},
{ 87, 88, 89}},
{{ 90, 91, 92},
{ 93, 94, 95},
{ 96, 97, 98}},
{{ 99, 100, 101},
{102, 103, 104},
{105, 106, 107}}
}
}
});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}}
},
{
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
{
{
{{ 15226, 15577, 15928},
{ 16981, 17332, 17683},
{ 18736, 19087, 19438}},
{{ 37818, 38898, 39978},
{ 43218, 44298, 45378},
{ 48618, 49698, 50778}},
{{ 60426, 62235, 64044},
{ 69471, 71280, 73089},
{ 78516, 80325, 82134}},
{{ 83016, 85554, 88092},
{ 95706, 98244, 100782},
{108396, 110934, 113472}}
},
{
{{ 41551, 41902, 42253},
{ 43306, 43657, 44008},
{ 45061, 45412, 45763}},
{{118818, 119898, 120978},
{124218, 125298, 126378},
{129618, 130698, 131778}},
{{196101, 197910, 199719},
{205146, 206955, 208764},
{214191, 216000, 217809}},
{{273366, 275904, 278442},
{286056, 288594, 291132},
{298746, 301284, 303822}}
}
}
});
myConv->getOperator()->associateInput(0,myInput);
myConv->getOperator()->associateInput(1,myWeights);
myConv->getOperator()->associateInput(2,myBias);
myConv->getOperator()->computeOutputDims();
myConv->forward();
// myConv->getOperator()->getOutput(0)->print();
REQUIRE(*(myConv->getOperator()->getOutput(0)) == *myOutput);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment