From 39117d5af7bca55f0aa55a13db0770479378ed90 Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Fri, 24 Jan 2025 16:08:17 +0100 Subject: [PATCH] add dilations to maxpool --- .../backend/cpu/operator/MaxPoolingImpl.hpp | 1 + .../cpu/operator/MaxPoolingImpl_kernels.hpp | 126 ++---------------- src/operator/MaxPoolingImpl.cpp | 1 + unit_tests/operator/Test_MaxPoolingImpl.cpp | 35 +++++ 4 files changed, 49 insertions(+), 114 deletions(-) diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp index 68cc3621..062088a1 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -28,6 +28,7 @@ namespace Aidge { using MaxPooling2D_Op = MaxPooling_Op<2>; using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>, void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const bool, const std::array<DimSize_t, 4> &, diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp index 7b6f04f1..250b11b0 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp @@ -35,28 +35,23 @@ namespace Aidge { template <class I, class O> void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 2>& dilations, const bool /*ceilMode*/, const std::array<DimSize_t, 4> &dims, const void *input_, void *output_) { - // FIXME: missing convolution parameters as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / static_cast<float>(strideDims[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) / static_cast<float>(strideDims[1]))); - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, ch, Xin, Yin) - // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation parameter into account using signedsize = std::make_signed<std::size_t>::type; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { @@ -77,12 +72,15 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD I poolValue(0.0); bool valid = false; - for (unsigned int channel = 0; channel < dims[1]; - ++channel){ - for (unsigned int sy = syMin; sy < syMax; ++sy) { - for (unsigned int sx = sxMin; sx < sxMax; ++sx) - { - const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + for (unsigned int sy = syMin; sy < syMax; ++sy) { + for (unsigned int sx = sxMin; sx < sxMax; ++sx) { + // Apply dilation factor to kernel indices + const std::size_t dilated_sx = sx * dilations[0]; + const std::size_t dilated_sy = sy * dilations[1]; + + // Ensure indices are within bounds + if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) { + const I value = input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]; if (!valid || value > poolValue) { poolValue = value; @@ -98,106 +96,6 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD } } -//N2D2 version -/* -template <class T> -void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha, - const Tensor<T>& - inputs, - const Descriptor& desc, - const T* beta, - Tensor<T>& outputs, - Tensor<ArgMax>& argMax, - bool useArgMax, - const Tensor<bool>& maps) -{ - const unsigned int size = inputs.dimB() * outputs.dimZ(); - -#if defined(_OPENMP) && _OPENMP >= 200805 -#pragma omp parallel for collapse(2) if (size > 16) -#else -#pragma omp parallel for if (inputs.dimB() > 4 && size > 16) -#endif - for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) { - for (unsigned int output = 0; output < outputs.dimZ(); ++output) { - for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) { - for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) { - const unsigned int sxMin = (unsigned int)std::max( - desc.padding[0] - (int)(ox * desc.stride[0]), 0); - const unsigned int syMin = (unsigned int)std::max( - desc.padding[1] - (int)(oy * desc.stride[1]), 0); - const unsigned int sxMax = Utils::clamp - <int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0], - 0, - desc.pool[0]); - const unsigned int syMax = Utils::clamp - <int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1], - 0, - desc.pool[1]); - - const int ix = (int)(ox * desc.stride[0]) - desc.padding[0]; - const int iy = (int)(oy * desc.stride[1]) - desc.padding[1]; - - T poolValue(0.0); - - // For each output, compute the pool value - if (useArgMax) { - const ArgMax inputMax - = argMax(ox, oy, output, batchPos); - - if (inputMax.valid) { - poolValue = inputs(inputMax.ix, - inputMax.iy, - inputMax.channel, - batchPos); - } - } - else { - unsigned int ixMax = 0; - unsigned int iyMax = 0; - unsigned int channelMax = 0; - bool valid = false; - - for (unsigned int channel = 0; channel < inputs.dimZ(); - ++channel) - { - if (!maps.empty() && !maps(output, channel)) - continue; - - for (unsigned int sy = syMin; sy < syMax; ++sy) { - for (unsigned int sx = sxMin; sx < sxMax; ++sx) - { - const T value = inputs(ix + sx, - iy + sy, - channel, - batchPos); - - if (!valid || value > poolValue) { - poolValue = value; - valid = true; - - ixMax = ix + sx; - iyMax = iy + sy; - channelMax = channel; - } - } - } - } - - argMax(ox, oy, output, batchPos) - = ArgMax(ixMax, iyMax, channelMax, valid); - } - - outputs(ox, oy, output, batchPos) - = (*alpha) * poolValue - + (*beta) * outputs(ox, oy, output, batchPos); - } - } - } - } -} - -*/ // Kernels registration to implementation entry point REGISTRAR(MaxPoolingImpl2D_cpu, diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp index 90075a39..13ef75b0 100644 --- a/src/operator/MaxPoolingImpl.cpp +++ b/src/operator/MaxPoolingImpl.cpp @@ -30,6 +30,7 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() { // Call kernel impl.forward(op_.strideDims(), op_.kernelDims(), + op_.dilations(), op_.ceilMode(), op_.getInput(0)->template dims<4>(), getCPUPtr(mOp.getRawInput(0)), diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp index de02df2b..6b7e6d2f 100644 --- a/unit_tests/operator/Test_MaxPoolingImpl.cpp +++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp @@ -80,4 +80,39 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") { op->getOutput(0)->print(); REQUIRE(*(op->getOutput(0)) == myOutput); } + SECTION("Dilation") { + std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}, {2,2}); // Dilation 2x2 + auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator()); + + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { + { + { + { + {0.71470, 0.52770}, + {0.71470, 0.48740} + }, + { + {2.23290, 0.48590}, + {2.23290, 0.07000} + } + }, + { + { + {1.76530, 1.20710}, + {1.76530, 1.20710} + }, + { + {1.04290, 0.67760}, + {1.72170, 0.67760} + } + } + } + }); + myMaxPool->getOperator()->associateInput(0,myInput); + myMaxPool->getOperator()->setDataType(DataType::Float32); + myMaxPool->getOperator()->setBackend("cpu"); + myMaxPool->forward(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *myOutput); + } } \ No newline at end of file -- GitLab