From 39117d5af7bca55f0aa55a13db0770479378ed90 Mon Sep 17 00:00:00 2001
From: hrouis <houssemeddine.rouis92@gmail.com>
Date: Fri, 24 Jan 2025 16:08:17 +0100
Subject: [PATCH] add dilations to maxpool

---
 .../backend/cpu/operator/MaxPoolingImpl.hpp   |   1 +
 .../cpu/operator/MaxPoolingImpl_kernels.hpp   | 126 ++----------------
 src/operator/MaxPoolingImpl.cpp               |   1 +
 unit_tests/operator/Test_MaxPoolingImpl.cpp   |  35 +++++
 4 files changed, 49 insertions(+), 114 deletions(-)

diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
index 68cc3621..062088a1 100644
--- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
@@ -28,6 +28,7 @@ namespace Aidge {
 using MaxPooling2D_Op = MaxPooling_Op<2>;
 using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>,
     void(const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
                             const std::array<DimSize_t, 2>&,
                             const bool,
                             const std::array<DimSize_t, 4> &,
diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp
index 7b6f04f1..250b11b0 100644
--- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp
@@ -35,28 +35,23 @@ namespace Aidge {
 template <class I, class O>
 void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
                                         const std::array<DimSize_t, 2>& kernelDims,
+                                        const std::array<DimSize_t, 2>& dilations,
                                         const bool /*ceilMode*/,
                                         const std::array<DimSize_t, 4> &dims,
                                         const void *input_,
                                         void *output_) {
-    // FIXME: missing convolution parameters as arguments
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) /
                                 static_cast<float>(strideDims[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) /
                                 static_cast<float>(strideDims[1])));
 
-    // TODO: kernel computation
-    // output (batch, outCh, Xout, Yout)
-    // input  (batch, ch, Xin, Yin)
-    // weight (outCh, ch, kernelX, kernelY)
-    // does not take Dilation parameter into account
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
         for (std::size_t ch = 0; ch < dims[1]; ++ch) {
@@ -77,12 +72,15 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
                     I poolValue(0.0);
                     bool valid = false;
 
-                    for (unsigned int channel = 0; channel < dims[1];
-                            ++channel){
-                        for (unsigned int sy = syMin; sy < syMax; ++sy) {
-                            for (unsigned int sx = sxMin; sx < sxMax; ++sx)
-                            {
-                                const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
+                    for (unsigned int sy = syMin; sy < syMax; ++sy) {
+                        for (unsigned int sx = sxMin; sx < sxMax; ++sx) {
+                            // Apply dilation factor to kernel indices
+                            const std::size_t dilated_sx = sx * dilations[0];
+                            const std::size_t dilated_sy = sy * dilations[1];
+
+                            // Ensure indices are within bounds
+                            if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) {
+                                const I value = input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)];
 
                                 if (!valid || value > poolValue) {
                                     poolValue = value;
@@ -98,106 +96,6 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
     }
 }
 
-//N2D2 version
-/*
-template <class T>
-void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha,
-                                              const Tensor<T>&
-                                              inputs,
-                                              const Descriptor& desc,
-                                              const T* beta,
-                                              Tensor<T>& outputs,
-                                              Tensor<ArgMax>& argMax,
-                                              bool useArgMax,
-                                              const Tensor<bool>& maps)
-{
-    const unsigned int size = inputs.dimB() * outputs.dimZ();
-
-#if defined(_OPENMP) && _OPENMP >= 200805
-#pragma omp parallel for collapse(2) if (size > 16)
-#else
-#pragma omp parallel for if (inputs.dimB() > 4 && size > 16)
-#endif
-    for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) {
-        for (unsigned int output = 0; output < outputs.dimZ(); ++output) {
-            for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) {
-                for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) {
-                    const unsigned int sxMin = (unsigned int)std::max(
-                        desc.padding[0] - (int)(ox * desc.stride[0]), 0);
-                    const unsigned int syMin = (unsigned int)std::max(
-                        desc.padding[1] - (int)(oy * desc.stride[1]), 0);
-                    const unsigned int sxMax = Utils::clamp
-                        <int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0],
-                              0,
-                              desc.pool[0]);
-                    const unsigned int syMax = Utils::clamp
-                        <int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1],
-                              0,
-                              desc.pool[1]);
-
-                    const int ix = (int)(ox * desc.stride[0]) - desc.padding[0];
-                    const int iy = (int)(oy * desc.stride[1]) - desc.padding[1];
-
-                    T poolValue(0.0);
-
-                    // For each output, compute the pool value
-                    if (useArgMax) {
-                        const ArgMax inputMax
-                            = argMax(ox, oy, output, batchPos);
-
-                        if (inputMax.valid) {
-                            poolValue = inputs(inputMax.ix,
-                                               inputMax.iy,
-                                               inputMax.channel,
-                                               batchPos);
-                        }
-                    }
-                    else {
-                        unsigned int ixMax = 0;
-                        unsigned int iyMax = 0;
-                        unsigned int channelMax = 0;
-                        bool valid = false;
-
-                        for (unsigned int channel = 0; channel < inputs.dimZ();
-                             ++channel)
-                        {
-                            if (!maps.empty() && !maps(output, channel))
-                                continue;
-
-                            for (unsigned int sy = syMin; sy < syMax; ++sy) {
-                                for (unsigned int sx = sxMin; sx < sxMax; ++sx)
-                                {
-                                    const T value = inputs(ix + sx,
-                                                                 iy + sy,
-                                                                 channel,
-                                                                 batchPos);
-
-                                    if (!valid || value > poolValue) {
-                                        poolValue = value;
-                                        valid = true;
-
-                                        ixMax = ix + sx;
-                                        iyMax = iy + sy;
-                                        channelMax = channel;
-                                    }
-                                }
-                            }
-                        }
-
-                        argMax(ox, oy, output, batchPos)
-                            = ArgMax(ixMax, iyMax, channelMax, valid);
-                    }
-
-                    outputs(ox, oy, output, batchPos)
-                        = (*alpha) * poolValue
-                          + (*beta) * outputs(ox, oy, output, batchPos);
-                }
-            }
-        }
-    }
-}
-
-*/
 
 // Kernels registration to implementation entry point
 REGISTRAR(MaxPoolingImpl2D_cpu,
diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp
index 90075a39..13ef75b0 100644
--- a/src/operator/MaxPoolingImpl.cpp
+++ b/src/operator/MaxPoolingImpl.cpp
@@ -30,6 +30,7 @@ void Aidge::MaxPoolingImpl2D_cpu::forward() {
     // Call kernel
     impl.forward(op_.strideDims(),
                 op_.kernelDims(),
+                op_.dilations(),
                 op_.ceilMode(),
                 op_.getInput(0)->template dims<4>(),
                 getCPUPtr(mOp.getRawInput(0)),
diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp
index de02df2b..6b7e6d2f 100644
--- a/unit_tests/operator/Test_MaxPoolingImpl.cpp
+++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp
@@ -80,4 +80,39 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") {
         op->getOutput(0)->print();
         REQUIRE(*(op->getOutput(0)) == myOutput);
     }
+    SECTION("Dilation") {
+        std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}, {2,2}); // Dilation 2x2
+        auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator());
+
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> {
+            {
+                {
+                    {
+                        {0.71470, 0.52770},
+                        {0.71470, 0.48740}
+                    },
+                    {
+                        {2.23290, 0.48590},
+                        {2.23290, 0.07000}
+                    }
+                },
+                {
+                    {
+                        {1.76530, 1.20710},
+                        {1.76530, 1.20710}
+                    },
+                    {
+                        {1.04290, 0.67760},
+                        {1.72170, 0.67760}
+                    }
+                }
+            }
+        });
+        myMaxPool->getOperator()->associateInput(0,myInput);
+        myMaxPool->getOperator()->setDataType(DataType::Float32);
+        myMaxPool->getOperator()->setBackend("cpu");
+        myMaxPool->forward();
+        op->getOutput(0)->print();
+        REQUIRE(*(op->getOutput(0)) == *myOutput);
+    }
 }
\ No newline at end of file
-- 
GitLab