From 993b769e64cd2dd43f012ee336414a727d06e104 Mon Sep 17 00:00:00 2001 From: Adam Maroni <adamaroni@hotmail.fr> Date: Wed, 26 Mar 2025 12:53:20 +0100 Subject: [PATCH] [Issue #250]: Add backward CPU implementation and unit testing for MaxPooling2D CPU --- .../backend/cpu/operator/MaxPoolingImpl.hpp | 7 + .../cpu/operator/MaxPoolingImpl_kernels.hpp | 135 ++++++++++++- src/operator/MaxPoolingImpl.cpp | 19 +- unit_tests/operator/Test_MaxPoolingImpl.cpp | 185 +++++++++++++++++- 4 files changed, 337 insertions(+), 9 deletions(-) diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp index 062088a1..804fb33a 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -27,6 +27,13 @@ namespace Aidge { // Operator implementation entry point for the backend using MaxPooling2D_Op = MaxPooling_Op<2>; using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const bool, + const std::array<DimSize_t, 4> &, + const void *, + void *), void(const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp index 21eefb02..9a52c149 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp @@ -114,8 +114,8 @@ void MaxPoolingImpl2D_cpu_forward_kernel( auto inputYPostDilation = strideYoffset + dilatedkernelY; if (inputXPostDilation < dims[2] && inputYPostDilation < dims[3]){ const I inputValue = input[ - inputBaseIndex + inputXPostDilation * dims[3] - + inputYPostDilation + inputBaseIndex + inputXPostDilation * dims[3] + + inputYPostDilation ]; if (!valid || inputValue > poolValue) { poolValue = inputValue; @@ -131,16 +131,141 @@ void MaxPoolingImpl2D_cpu_forward_kernel( } } + +template <class I, class O> +void MaxPoolingImpl2D_cpu_backward_kernel( + const std::array<DimSize_t, 2>& strideDims, + const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 2>& dilations, + const bool ceilMode, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *grad_ +) +{ + const I *input = static_cast<const I *>(input_); + I *grad = static_cast<I *>(grad_); + + // Fill the gradient with 0 to avoid garbage data + std::fill(grad, + grad + (dims[0] * dims[1] * dims[2] * dims[3]), + static_cast<I>(0) + ); + + // output H size + auto hOut = static_cast<float>( + dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0] + ) / static_cast<float>(strideDims[0]); + const std::size_t outXSize = ceilMode + ? static_cast<std::size_t>(std::ceil(hOut)) + : static_cast<std::size_t>(std::floor(hOut)); + + // output W size + auto wOut = static_cast<float>( + dims[3] - ( kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1] + ) / static_cast<float>(strideDims[1]); + + const std::size_t outYSize = ceilMode + ? static_cast<std::size_t>(std::ceil(wOut)) + : static_cast<std::size_t>(std::floor(wOut)); + + using signedsize = std::make_signed<std::size_t>::type; + + for (std::size_t batch = 0; batch < dims[0]; ++batch){ + for (std::size_t channel = 0; channel < dims[1]; ++channel){ + auto batchChannelIndex = (channel + batch * dims[1]); + const std::size_t inputBaseIndex = batchChannelIndex * dims[2] * dims[3]; + for (std::size_t outX = 0; outX < outXSize; ++outX) { + const signedsize negStrideX = static_cast<signedsize>( + -outX * strideDims[0] + ); + const std::size_t kernelXMin = static_cast<std::size_t>( + std::max(negStrideX, signedsize(0)) + ); + /* Compute kernelXMax */ + std::size_t kernelXMax = dims[2] + negStrideX; + if ((static_cast<signedsize>(dims[2]) + negStrideX) < 0){ + kernelXMax = 0; + } + else if (kernelXMax > kernelDims[0]){ + kernelXMax = kernelDims[0]; + } + for (std::size_t outY = 0; outY < outYSize; ++outY) { + const signedsize negStrideY = static_cast<signedsize>(-outY * strideDims[1]); + const std::size_t kernelYMin = static_cast<std::size_t>( + std::max(negStrideY, signedsize(0)) + ); + /* Compute kernelYMax */ + std::size_t kernelYMax = dims[3] + negStrideY; + const std::size_t strideXoffset = outX * strideDims[0]; + const std::size_t strideYoffset = outY * strideDims[1]; + I poolValue(0.0); + bool valid = false; + if (static_cast<signedsize>(dims[3]) + negStrideY < 0){ + kernelYMax = 0; + } + else if(kernelYMax > kernelDims[1]){ + kernelYMax = kernelDims[1]; + } + std::size_t saveIndex = 0; + for (unsigned int kY = kernelYMin; kY < kernelYMax ; ++kY){ + for (unsigned int kX = kernelXMin; kX < kernelXMax; ++kX){ + // Apply dilation factor to kernel indices + const std::size_t dilatedkernelX = kX * dilations[0]; + const std::size_t dilatedkernelY = kY * dilations[1]; + // Ensure indices are within bounds + auto inputXPostDilation = strideXoffset + dilatedkernelX; + auto inputYPostDilation = strideYoffset + dilatedkernelY; + if (inputXPostDilation < dims[2] && inputYPostDilation < dims[3]){ + std::size_t inputIndex = + inputBaseIndex + inputXPostDilation * dims[3] + + inputYPostDilation; + const I inputValue = input[inputIndex]; + if (!valid || inputValue > poolValue) { + poolValue = inputValue; + saveIndex = inputIndex; + valid = true; + } + } + } + } + if (valid){ + grad[saveIndex]++; + } + } + } + } + } +} + + + + // Kernels registration to implementation entry point REGISTRAR(MaxPoolingImpl2D_cpu, {DataType::Float32}, - {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr}); + { + ProdConso::inPlaceModel, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>, + Aidge::MaxPoolingImpl2D_cpu_backward_kernel<float, float>, + } +); REGISTRAR(MaxPoolingImpl2D_cpu, {DataType::Float64}, - {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr}); + { + ProdConso::inPlaceModel, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>, + Aidge::MaxPoolingImpl2D_cpu_backward_kernel<double, double>, + } +); REGISTRAR(MaxPoolingImpl2D_cpu, {DataType::Int32}, - {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr}); + { + ProdConso::inPlaceModel, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>, + Aidge::MaxPoolingImpl2D_cpu_backward_kernel<int32_t, int32_t>, + } +); } // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_ */ diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp index 13ef75b0..42be049d 100644 --- a/src/operator/MaxPoolingImpl.cpp +++ b/src/operator/MaxPoolingImpl.cpp @@ -25,7 +25,8 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() { AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator."); // Find the correct kernel type - const auto impl = Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + const auto impl = + Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); // Call kernel impl.forward(op_.strideDims(), @@ -39,5 +40,19 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() { template <> void Aidge::MaxPoolingImpl2D_cpu::backward() { - AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MaxPooling_Op<2> on backend cpu"); + const auto& op_ = dynamic_cast<const MaxPooling_Op<2>&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator."); + + // Find the correct kernel type + const auto impl = + Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.backward(op_.strideDims(), + op_.kernelDims(), + op_.dilations(), + op_.ceilMode(), + op_.getInput(0)->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), + op_.getInput(0)->grad()->getImpl()->rawPtr()); } diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp index d480fc30..2bc5e1ee 100644 --- a/unit_tests/operator/Test_MaxPoolingImpl.cpp +++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp @@ -55,7 +55,11 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { } }); SECTION("Stride") { - std::shared_ptr<MaxPooling_Op<2>> op = std::make_shared<MaxPooling_Op<2>>(std::array<std::size_t, 2>({2,2}), std::array<std::size_t, 2>({2,2})); + std::shared_ptr<MaxPooling_Op<2>> op = + std::make_shared<MaxPooling_Op<2>>( + std::array<std::size_t, 2>({2, 2}), + std::array<std::size_t, 2>({2, 2}) + ); Tensor myOutput = Array4D<float,2,2,2,2> { { @@ -172,4 +176,181 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { op2->getOutput(0)->print(); REQUIRE(*(op2->getOutput(0)) == *myOutput5); } -} \ No newline at end of file +} + + + +TEST_CASE("[cpu/operator] MaxPooling(backward)", "[MaxPooling][CPU]") { + std::shared_ptr<Tensor> myInput = + std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW + { + { + {{-0.3848, 0.2166, -0.4373, 0.6142, 0.5277}, + {0.7995, 0.3638, -1.4589, -1.0843, 1.0918}, + {0.7147, 0.0936, -1.2902, 1.2037, 0.4874}, + {-0.5981, 2.1184, -0.9175, 1.3859, 0.3305}, + {-1.7700, 0.0563, -0.3914, 0.0538, -0.3955}}, + + {{-3.1409, -0.4554, 0.0524, 2.2291, 0.4859}, + {-0.7465, -0.6567, -2.3703, -0.6386, -1.4152}, + { 2.2329, -0.5850, 0.0700, 1.2838, -1.7363}, + { 0.2139, 0.0624, -1.0689, -0.8221, -0.8038}, + { 0.1886, -0.7840, -0.2313, 0.2651, -1.6244}} + }, + { + {{ 0.4371, 1.6417, 0.9129, 0.6325, 0.5438}, + {-2.3552, -0.8850, -0.0232, -0.5462, -1.2011}, + {1.7653, -1.6668, -1.0814, 0.6182, 1.2071}, + {0.9541, -0.5133, 0.8664, -0.8892, 1.4585}, + {1.0220, -0.5107, 0.1829, -0.2301, -0.4268}}, + + {{ 1.0429, 0.6279, -0.2875, 0.7187, -0.1500}, + {1.6041, 2.9635, 1.4172, -0.7517, 0.5441}, + {-0.2276, 0.0857, 0.6776, -0.1389, -0.0614}, + {-0.1547, -0.3435, 0.0650, -0.5095, -1.8073}, + {1.7217, 0.3999, -0.5953, 1.0604, -0.4126}} + } + } + }); + SECTION("Stride") { + std::shared_ptr<MaxPooling_Op<2>> op = + std::make_shared<MaxPooling_Op<2>>( + std::array<std::size_t, 2>({2,2}), + std::array<std::size_t, 2>({2,2}) + ); + + Tensor grad = Array4D<float,2,2,5,5> { + { + { + {{0, 0, 0, 1, 0}, + {1, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0}}, + + {{0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0}, + {1, 0, 0, 1, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}} + }, + { + {{0, 1, 1, 0, 0}, + {0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0}, + {0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0}}, + + {{0, 0, 0, 0, 0}, + {0, 1, 1, 0, 0}, + {0, 1, 1, 0, 0}, + {0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0}} + } + } + }; + op->associateInput(0,myInput); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->backward(); + //op->getInput(0)->grad()->print(); + REQUIRE(*(op->getInput(0)->grad()) == grad); + } + SECTION("Dilation"){ + std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}, {2,2}); // Dilation 2x2 + auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator()); + + Tensor grad = Array4D<float,2,2,5,5> { + {{{{0., 0., 0., 0., 1.}, + {0., 0., 0., 0., 0.}, + {2., 0., 0., 0., 1.}, + {0., 0., 0., 0., 0.}, + {0., 0., 0., 0., 0.}}, + + {{0., 0., 0., 0., 1.}, + {0., 0., 0., 0., 0.}, + {2., 0., 1., 0., 0.}, + {0., 0., 0., 0., 0.}, + {0., 0., 0., 0., 0.}}}, + + + {{{0., 0., 0., 0., 0.}, + {0., 0., 0., 0., 0.}, + {2., 0., 0., 0., 2.}, + {0., 0., 0., 0., 0.}, + {0., 0., 0., 0., 0.}}, + + {{1., 0., 0., 0., 0.}, + {0., 0., 0., 0., 0.}, + {0., 0., 2., 0., 0.}, + {0., 0., 0., 0., 0.}, + {1., 0., 0., 0., 0.}}}} + }; + myMaxPool->getOperator()->associateInput(0,myInput); + myMaxPool->getOperator()->setDataType(DataType::Float32); + myMaxPool->getOperator()->setBackend("cpu"); + op->backward(); + //op->getInput(0)->grad()->print(); + REQUIRE(*(op->getInput(0)->grad()) == grad); + } + SECTION("Ceil mode"){ + std::shared_ptr<Tensor> myInput4 = + std::make_shared<Tensor>(Array4D<float,1,1,5,5> { // NCHW + {{{ + { 1, 2, 3, 4, 5}, + { 6, 7, 8, 9, 10}, + {11, 12, 13, 14, 15}, + {16, 17, 18, 19, 20}, + {21, 22, 23, 24, 25} + }}} + }); + + // MaxPool with ceil_mode = true + std::shared_ptr<Node> myMaxPool1 = + MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, true); + auto op1 = std::static_pointer_cast<OperatorTensor>( + myMaxPool1 -> getOperator() + ); + Tensor grad = Array4D<float,1,1,5,5> { + {{{ + {0, 0, 0, 0, 0}, + {0, 1, 0, 1, 1}, + {0, 0, 0, 0, 0}, + {0, 1, 0, 1, 1}, + {0, 1, 0, 1, 1} + }}} + }; + + op1->associateInput(0, myInput4); + op1->setDataType(DataType::Float32); + op1->setBackend("cpu"); + op1->backward(); + //op1->getInput(0)->grad()->print(); + REQUIRE(*(op1->getInput(0)->grad()) == grad); + + // MaxPool with ceil_mode = false + std::shared_ptr<Node> myMaxPool2 = + MaxPooling({2,2}, "mycdw", {2,2}, {1,1}, false); + auto op2 = std::static_pointer_cast<OperatorTensor>( + myMaxPool2 -> getOperator() + ); + + Tensor grad2 = Array4D<float,1,1,5,5> { + {{{ + {0, 0, 0, 0, 0}, + {0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0}, + {0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0} + }}} + }; + + //op2->resetInput(0); + op2->associateInput(0, myInput4); + op2->setDataType(DataType::Float32); + op2->setBackend("cpu"); + myMaxPool2->backward(); + op2->getInput(0)->grad()->print(); + REQUIRE(*(op2->getInput(0)->grad()) == grad2); + } +} -- GitLab