Skip to content
Snippets Groups Projects
Commit 993b769e authored by Adam Maroni's avatar Adam Maroni
Browse files

[Issue #250]: Add backward CPU implementation and unit testing for MaxPooling2D CPU

parent f1813259
No related branches found
No related tags found
2 merge requests!166Update 0.5.0 -> 0.6.0,!153MaxPooling2D backward implementation for CPU backend
Pipeline #69852 passed
......@@ -27,6 +27,13 @@ namespace Aidge {
// Operator implementation entry point for the backend
using MaxPooling2D_Op = MaxPooling_Op<2>;
using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const bool,
const std::array<DimSize_t, 4> &,
const void *,
void *),
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
......
......@@ -114,8 +114,8 @@ void MaxPoolingImpl2D_cpu_forward_kernel(
auto inputYPostDilation = strideYoffset + dilatedkernelY;
if (inputXPostDilation < dims[2] && inputYPostDilation < dims[3]){
const I inputValue = input[
inputBaseIndex + inputXPostDilation * dims[3]
+ inputYPostDilation
inputBaseIndex + inputXPostDilation * dims[3]
+ inputYPostDilation
];
if (!valid || inputValue > poolValue) {
poolValue = inputValue;
......@@ -131,16 +131,141 @@ void MaxPoolingImpl2D_cpu_forward_kernel(
}
}
template <class I, class O>
void MaxPoolingImpl2D_cpu_backward_kernel(
const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 2>& dilations,
const bool ceilMode,
const std::array<DimSize_t, 4> &dims,
const void *input_,
void *grad_
)
{
const I *input = static_cast<const I *>(input_);
I *grad = static_cast<I *>(grad_);
// Fill the gradient with 0 to avoid garbage data
std::fill(grad,
grad + (dims[0] * dims[1] * dims[2] * dims[3]),
static_cast<I>(0)
);
// output H size
auto hOut = static_cast<float>(
dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]
) / static_cast<float>(strideDims[0]);
const std::size_t outXSize = ceilMode
? static_cast<std::size_t>(std::ceil(hOut))
: static_cast<std::size_t>(std::floor(hOut));
// output W size
auto wOut = static_cast<float>(
dims[3] - ( kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]
) / static_cast<float>(strideDims[1]);
const std::size_t outYSize = ceilMode
? static_cast<std::size_t>(std::ceil(wOut))
: static_cast<std::size_t>(std::floor(wOut));
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch){
for (std::size_t channel = 0; channel < dims[1]; ++channel){
auto batchChannelIndex = (channel + batch * dims[1]);
const std::size_t inputBaseIndex = batchChannelIndex * dims[2] * dims[3];
for (std::size_t outX = 0; outX < outXSize; ++outX) {
const signedsize negStrideX = static_cast<signedsize>(
-outX * strideDims[0]
);
const std::size_t kernelXMin = static_cast<std::size_t>(
std::max(negStrideX, signedsize(0))
);
/* Compute kernelXMax */
std::size_t kernelXMax = dims[2] + negStrideX;
if ((static_cast<signedsize>(dims[2]) + negStrideX) < 0){
kernelXMax = 0;
}
else if (kernelXMax > kernelDims[0]){
kernelXMax = kernelDims[0];
}
for (std::size_t outY = 0; outY < outYSize; ++outY) {
const signedsize negStrideY = static_cast<signedsize>(-outY * strideDims[1]);
const std::size_t kernelYMin = static_cast<std::size_t>(
std::max(negStrideY, signedsize(0))
);
/* Compute kernelYMax */
std::size_t kernelYMax = dims[3] + negStrideY;
const std::size_t strideXoffset = outX * strideDims[0];
const std::size_t strideYoffset = outY * strideDims[1];
I poolValue(0.0);
bool valid = false;
if (static_cast<signedsize>(dims[3]) + negStrideY < 0){
kernelYMax = 0;
}
else if(kernelYMax > kernelDims[1]){
kernelYMax = kernelDims[1];
}
std::size_t saveIndex = 0;
for (unsigned int kY = kernelYMin; kY < kernelYMax ; ++kY){
for (unsigned int kX = kernelXMin; kX < kernelXMax; ++kX){
// Apply dilation factor to kernel indices
const std::size_t dilatedkernelX = kX * dilations[0];
const std::size_t dilatedkernelY = kY * dilations[1];
// Ensure indices are within bounds
auto inputXPostDilation = strideXoffset + dilatedkernelX;
auto inputYPostDilation = strideYoffset + dilatedkernelY;
if (inputXPostDilation < dims[2] && inputYPostDilation < dims[3]){
std::size_t inputIndex =
inputBaseIndex + inputXPostDilation * dims[3]
+ inputYPostDilation;
const I inputValue = input[inputIndex];
if (!valid || inputValue > poolValue) {
poolValue = inputValue;
saveIndex = inputIndex;
valid = true;
}
}
}
}
if (valid){
grad[saveIndex]++;
}
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(MaxPoolingImpl2D_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr});
{
ProdConso::inPlaceModel,
Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>,
Aidge::MaxPoolingImpl2D_cpu_backward_kernel<float, float>,
}
);
REGISTRAR(MaxPoolingImpl2D_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr});
{
ProdConso::inPlaceModel,
Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>,
Aidge::MaxPoolingImpl2D_cpu_backward_kernel<double, double>,
}
);
REGISTRAR(MaxPoolingImpl2D_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
{
ProdConso::inPlaceModel,
Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>,
Aidge::MaxPoolingImpl2D_cpu_backward_kernel<int32_t, int32_t>,
}
);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_ */
......@@ -25,7 +25,8 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() {
AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator.");
// Find the correct kernel type
const auto impl = Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
const auto impl =
Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.forward(op_.strideDims(),
......@@ -39,5 +40,19 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() {
template <>
void Aidge::MaxPoolingImpl2D_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MaxPooling_Op<2> on backend cpu");
const auto& op_ = dynamic_cast<const MaxPooling_Op<2>&>(mOp);
AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator.");
// Find the correct kernel type
const auto impl =
Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.backward(op_.strideDims(),
op_.kernelDims(),
op_.dilations(),
op_.ceilMode(),
op_.getInput(0)->template dims<4>(),
getCPUPtr(mOp.getRawInput(0)),
op_.getInput(0)->grad()->getImpl()->rawPtr());
}
......@@ -55,7 +55,11 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") {
}
});
SECTION("Stride") {
std::shared_ptr<MaxPooling_Op<2>> op = std::make_shared<MaxPooling_Op<2>>(std::array<std::size_t, 2>({2,2}), std::array<std::size_t, 2>({2,2}));
std::shared_ptr<MaxPooling_Op<2>> op =
std::make_shared<MaxPooling_Op<2>>(
std::array<std::size_t, 2>({2, 2}),
std::array<std::size_t, 2>({2, 2})
);
Tensor myOutput = Array4D<float,2,2,2,2> {
{
......@@ -172,4 +176,181 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") {
op2->getOutput(0)->print();
REQUIRE(*(op2->getOutput(0)) == *myOutput5);
}
}
\ No newline at end of file
}
TEST_CASE("[cpu/operator] MaxPooling(backward)", "[MaxPooling][CPU]") {
std::shared_ptr<Tensor> myInput =
std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW
{
{
{{-0.3848, 0.2166, -0.4373, 0.6142, 0.5277},
{0.7995, 0.3638, -1.4589, -1.0843, 1.0918},
{0.7147, 0.0936, -1.2902, 1.2037, 0.4874},
{-0.5981, 2.1184, -0.9175, 1.3859, 0.3305},
{-1.7700, 0.0563, -0.3914, 0.0538, -0.3955}},
{{-3.1409, -0.4554, 0.0524, 2.2291, 0.4859},
{-0.7465, -0.6567, -2.3703, -0.6386, -1.4152},
{ 2.2329, -0.5850, 0.0700, 1.2838, -1.7363},
{ 0.2139, 0.0624, -1.0689, -0.8221, -0.8038},
{ 0.1886, -0.7840, -0.2313, 0.2651, -1.6244}}
},
{
{{ 0.4371, 1.6417, 0.9129, 0.6325, 0.5438},
{-2.3552, -0.8850, -0.0232, -0.5462, -1.2011},
{1.7653, -1.6668, -1.0814, 0.6182, 1.2071},
{0.9541, -0.5133, 0.8664, -0.8892, 1.4585},
{1.0220, -0.5107, 0.1829, -0.2301, -0.4268}},
{{ 1.0429, 0.6279, -0.2875, 0.7187, -0.1500},
{1.6041, 2.9635, 1.4172, -0.7517, 0.5441},
{-0.2276, 0.0857, 0.6776, -0.1389, -0.0614},
{-0.1547, -0.3435, 0.0650, -0.5095, -1.8073},
{1.7217, 0.3999, -0.5953, 1.0604, -0.4126}}
}
}
});
SECTION("Stride") {
std::shared_ptr<MaxPooling_Op<2>> op =
std::make_shared<MaxPooling_Op<2>>(
std::array<std::size_t, 2>({2,2}),
std::array<std::size_t, 2>({2,2})
);
Tensor grad = Array4D<float,2,2,5,5> {
{
{
{{0, 0, 0, 1, 0},
{1, 0, 0, 0, 0},
{0, 0, 0, 0, 0},
{0, 1, 0, 1, 0},
{0, 0, 0, 0, 0}},
{{0, 1, 0, 1, 0},
{0, 0, 0, 0, 0},
{1, 0, 0, 1, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0}}
},
{
{{0, 1, 1, 0, 0},
{0, 0, 0, 0, 0},
{1, 0, 0, 0, 0},
{0, 0, 1, 0, 0},
{0, 0, 0, 0, 0}},
{{0, 0, 0, 0, 0},
{0, 1, 1, 0, 0},
{0, 1, 1, 0, 0},
{0, 0, 0, 0, 0},
{0, 0, 0, 0, 0}}
}
}
};
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->backward();
//op->getInput(0)->grad()->print();
REQUIRE(*(op->getInput(0)->grad()) == grad);
}
SECTION("Dilation"){
std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}, {2,2}); // Dilation 2x2
auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator());
Tensor grad = Array4D<float,2,2,5,5> {
{{{{0., 0., 0., 0., 1.},
{0., 0., 0., 0., 0.},
{2., 0., 0., 0., 1.},
{0., 0., 0., 0., 0.},
{0., 0., 0., 0., 0.}},
{{0., 0., 0., 0., 1.},
{0., 0., 0., 0., 0.},
{2., 0., 1., 0., 0.},
{0., 0., 0., 0., 0.},
{0., 0., 0., 0., 0.}}},
{{{0., 0., 0., 0., 0.},
{0., 0., 0., 0., 0.},
{2., 0., 0., 0., 2.},
{0., 0., 0., 0., 0.},
{0., 0., 0., 0., 0.}},
{{1., 0., 0., 0., 0.},
{0., 0., 0., 0., 0.},
{0., 0., 2., 0., 0.},
{0., 0., 0., 0., 0.},
{1., 0., 0., 0., 0.}}}}
};
myMaxPool->getOperator()->associateInput(0,myInput);
myMaxPool->getOperator()->setDataType(DataType::Float32);
myMaxPool->getOperator()->setBackend("cpu");
op->backward();
//op->getInput(0)->grad()->print();
REQUIRE(*(op->getInput(0)->grad()) == grad);
}
SECTION("Ceil mode"){
std::shared_ptr<Tensor> myInput4 =
std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW
{{{
{ 1, 2, 3, 4, 5},
{ 6, 7, 8, 9, 10},
{11, 12, 13, 14, 15},
{16, 17, 18, 19, 20},
{21, 22, 23, 24, 25}
}}}
});
// MaxPool with ceil_mode = true
std::shared_ptr<Node> myMaxPool1 =
MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, true);
auto op1 = std::static_pointer_cast<OperatorTensor>(
myMaxPool1 -> getOperator()
);
Tensor grad = Array4D<float,1,1,5,5> {
{{{
{0, 0, 0, 0, 0},
{0, 1, 0, 1, 1},
{0, 0, 0, 0, 0},
{0, 1, 0, 1, 1},
{0, 1, 0, 1, 1}
}}}
};
op1->associateInput(0, myInput4);
op1->setDataType(DataType::Float32);
op1->setBackend("cpu");
op1->backward();
//op1->getInput(0)->grad()->print();
REQUIRE(*(op1->getInput(0)->grad()) == grad);
// MaxPool with ceil_mode = false
std::shared_ptr<Node> myMaxPool2 =
MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, false);
auto op2 = std::static_pointer_cast<OperatorTensor>(
myMaxPool2 -> getOperator()
);
Tensor grad2 = Array4D<float,1,1,5,5> {
{{{
{0, 0, 0, 0, 0},
{0, 1, 0, 1, 0},
{0, 0, 0, 0, 0},
{0, 1, 0, 1, 0},
{0, 0, 0, 0, 0}
}}}
};
//op2->resetInput(0);
op2->associateInput(0, myInput4);
op2->setDataType(DataType::Float32);
op2->setBackend("cpu");
myMaxPool2->backward();
op2->getInput(0)->grad()->print();
REQUIRE(*(op2->getInput(0)->grad()) == grad2);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment