Skip to content
Snippets Groups Projects
Commit 09fc9acb authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

add ConvDepthWiseImpl

parent 5e9dd771
No related branches found
No related tags found
2 merge requests!32version 0.2.1,!14MobileNet operators
......@@ -21,11 +21,13 @@
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/ConvDepthWise.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
namespace Aidge {
template <DimIdx_t DIM>
class ConvImpl_cuda : public OperatorImpl {
......@@ -42,14 +44,19 @@ private:
std::shared_ptr<Tensor> mInput0Fallback;
std::shared_ptr<Tensor> mInput1Fallback;
std::shared_ptr<Tensor> mInput2Fallback;
bool mDepthWise = false;
public:
ConvImpl_cuda(const Conv_Op<DIM> &op) : OperatorImpl(op, "cuda") {}
ConvImpl_cuda(const Operator&op, bool depthWise = false) : OperatorImpl(op, "cuda"), mDepthWise(depthWise) {}
static std::unique_ptr<ConvImpl_cuda> create(const Conv_Op<DIM> &op) {
return std::make_unique<ConvImpl_cuda>(op);
}
static std::unique_ptr<ConvImpl_cuda> createDW(const ConvDepthWise_Op<DIM> &op) {
return std::make_unique<ConvImpl_cuda>(op, true);
}
public:
void forward();
void backward();
......@@ -61,8 +68,8 @@ private:
};
namespace {
// add cuda backend to Conv_Op<2> implementation registry
static Registrar<Conv_Op<2>> registrarConvImpl_cuda("cuda", Aidge::ConvImpl_cuda<2>::create);
static Registrar<ConvDepthWise_Op<2>> registrarConvDepthWiseImpl_cuda("cuda", Aidge::ConvImpl_cuda<2>::createDW);
} // namespace
} // namespace Aidge
......
......@@ -16,6 +16,7 @@
#include "aidge/backend/cuda/operator/ConvImpl.hpp"
#include "aidge/backend/cuda/utils/CudaUtils.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/ConvDepthWise.hpp"
#include "aidge/utils/Types.h"
template <Aidge::DimIdx_t DIM>
......@@ -33,19 +34,27 @@ void Aidge::ConvImpl_cuda<DIM>::forward() {
// Lazy-initialize CuDNN convolution descriptor
if (mConvDesc == nullptr) {
const Conv_Op<DIM>& convOp = static_cast<const Conv_Op<DIM>&>(mOp);
const std::vector<int> strides(convOp.template getAttr<ConvAttr::StrideDims>().begin(), convOp.template getAttr<ConvAttr::StrideDims>().end());
const std::vector<int> paddings(DIM, 0);
const std::vector<int> upscales(convOp.template getAttr<ConvAttr::DilationDims>().begin(), convOp.template getAttr<ConvAttr::DilationDims>().end());
CHECK_CUDNN_STATUS(cudnnCreateConvolutionDescriptor(&mConvDesc));
CHECK_CUDNN_STATUS(cudnnSetConvolutionNdDescriptor(mConvDesc,
DIM,
&paddings[0],
&strides[0],
&upscales[0],
CUDNN_CROSS_CORRELATION,
DataTypeToCudnn(op.getOutput(0)->dataType())));
std::vector<int> strides, upscales;
if (mDepthWise) {
const ConvDepthWise_Op<DIM>& convDWOp = static_cast<const ConvDepthWise_Op<DIM>&>(mOp);
strides = std::vector<int>(convDWOp.template getAttr<ConvDepthWiseAttr::StrideDims>().begin(), convDWOp.template getAttr<ConvDepthWiseAttr::StrideDims>().end());
upscales = std::vector<int>(convDWOp.template getAttr<ConvDepthWiseAttr::DilationDims>().begin(), convDWOp.template getAttr<ConvDepthWiseAttr::DilationDims>().end());
}
else {
const Conv_Op<DIM>& convOp = static_cast<const Conv_Op<DIM>&>(mOp);
strides = std::vector<int>(convOp.template getAttr<ConvAttr::StrideDims>().begin(), convOp.template getAttr<ConvAttr::StrideDims>().end());
upscales = std::vector<int>(convOp.template getAttr<ConvAttr::DilationDims>().begin(), convOp.template getAttr<ConvAttr::DilationDims>().end());
}
CHECK_CUDNN_STATUS(cudnnCreateConvolutionDescriptor(&mConvDesc));
CHECK_CUDNN_STATUS(cudnnSetConvolutionNdDescriptor(mConvDesc,
DIM,
&paddings[0],
&strides[0],
&upscales[0],
CUDNN_CROSS_CORRELATION,
DataTypeToCudnn(op.getOutput(0)->dataType())));
}
// Lazy-initialize CuDNN filter descriptor
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <array>
#include <catch2/catch_test_macros.hpp>
#include "Test_cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cuda.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] ConvDepthWise(forward)", "[ConvDepthWise][CPU]") {
std::shared_ptr<Node> myCDW = ConvDepthWise(4, {3,3}, "mycdw");
auto op = std::static_pointer_cast<OperatorTensor>(myCDW -> getOperator());
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<float,4,1,3,3> {
{
{{
{ 0, 1, 2},
{ 3, 4, 5},
{ 6, 7, 8}
}},
{{
{ 27, 28, 29},
{ 30, 31, 32},
{ 33, 34, 35}
}},
{{
{ 54, 55, 56},
{ 57, 58, 59},
{ 60, 61, 62}
}},
{{
{ 81, 82, 83},
{ 84, 85, 86},
{ 87, 88, 89}
}}
}
});
std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float,4> {{7,0,9,0}});
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,4,5,5> { //NCHW
{
{
{{ 0, 1, 2, 3, 4},
{ 5, 6, 7, 8, 9},
{ 10, 11, 12, 13, 14},
{ 15, 16, 17, 18, 19},
{ 20, 21, 22, 23, 24}},
{{ 25, 26, 27, 28, 29},
{ 30, 31, 32, 33, 34},
{ 35, 36, 37, 38, 39},
{ 40, 41, 42, 43, 44},
{ 45, 46, 47, 48, 49}},
{{ 50, 51, 52, 53, 54},
{ 55, 56, 57, 58, 59},
{ 60, 61, 62, 63, 64},
{ 65, 66, 67, 68, 69},
{ 70, 71, 72, 73, 74}},
{{ 75, 76, 77, 78, 79},
{ 80, 81, 82, 83, 84},
{ 85, 86, 87, 88, 89},
{ 90, 91, 92, 93, 94},
{ 95, 96, 97, 98, 99}}
},
{
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}},
{{150, 151, 152, 153, 154},
{155, 156, 157, 158, 159},
{160, 161, 162, 163, 164},
{165, 166, 167, 168, 169},
{170, 171, 172, 173, 174}},
{{175, 176, 177, 178, 179},
{180, 181, 182, 183, 184},
{185, 186, 187, 188, 189},
{190, 191, 192, 193, 194},
{195, 196, 197, 198, 199}}
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,4,3,3> {
{
{
{{ 319, 355, 391},
{ 499, 535, 571},
{ 679, 715, 751}},
{{ 8745, 9024, 9303},
{ 10140, 10419, 10698},
{ 11535, 11814, 12093}},
{{ 29337, 29859, 30381},
{ 31947, 32469, 32991},
{ 34557, 35079, 35601}},
{{ 62061, 62826, 63591},
{ 65886, 66651, 67416},
{ 69711, 70476, 71241}}
},
{
{{ 3919, 3955, 3991},
{ 4099, 4135, 4171},
{ 4279, 4315, 4351}},
{{ 36645, 36924, 37203},
{ 38040, 38319, 38598},
{ 39435, 39714, 39993}},
{{ 81537, 82059, 82581},
{ 84147, 84669, 85191},
{ 86757, 87279, 87801}},
{{138561, 139326, 140091},
{142386, 143151, 143916},
{146211, 146976, 147741}}
}
}
});
myInput->setBackend("cuda");
myWeights->setBackend("cuda");
myBias->setBackend("cuda");
op -> associateInput(0, myInput);
op -> associateInput(1, myWeights);
op -> associateInput(2, myBias);
op->setDataType(DataType::Float32);
op->setBackend("cuda");
op -> computeOutputDims();
myCDW -> forward();
float* computedOutput = new float[myOutput->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * myOutput->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < myOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(myOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
}
delete[] computedOutput;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment