From 763e18886c06be0e55d2c2618cf16e6fe544cb13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gr=C3=A9goire=20KUBLER?= <gregoire.kubler@proton.me>
Date: Thu, 17 Apr 2025 10:48:41 +0100
Subject: [PATCH] chore : alternative implementation to reduce boilerplate code

sadly this also lengthen exec time as functions cannot be properly inlined when called through std::function.
See https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu/-/merge_requests/160#note_3653885
for more information.
---
 .../backend/cpu/operator/PadImpl_kernels.hpp  | 321 +++++++-----------
 1 file changed, 114 insertions(+), 207 deletions(-)

diff --git a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp
index 762d41fa..dfbd8edb 100644
--- a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp
@@ -20,8 +20,8 @@
 #include <cstddef>   // std::size_t
 #include <cstdint>   // std::int32_t
 #include <fmt/base.h>
+#include <functional>
 #include <stdexcept>
-#include <type_traits>
 
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
 #include "aidge/utils/Registrar.hpp"
@@ -216,39 +216,51 @@ pad3DForwardConstant(const std::array<DimSize_t, 6> &beginEndBorders,
                      std::array<DimSize_t, 4> &oOffsets,
                      O *output) {
 
-    for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
-        oOffsets[2] = oX * oStrides[2] + oOffsets[1];
-
-        const SignedDimSize_t iX = oX - beginEndBorders[0];
-        if (iX >= 0 && iX < static_cast<SignedDimSize_t>(iDims[2])) {
-            iOffsets[2] = iX * iStrides[2] + iOffsets[1];
-        } else {
-            std::fill(output + oOffsets[2],
-                      output + oOffsets[2] + oStrides[2],
-                      borderValue);
-            continue;
-        }
+    for (std::size_t batch = 0; batch < iDims[0]; ++batch) {
+        oOffsets[0] = batch * oStrides[0];
+        iOffsets[0] = batch * iStrides[0];
 
-        for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
-            oOffsets[3] = oY * oStrides[3] + oOffsets[2];
-            const SignedDimSize_t iY = oY - beginEndBorders[1];
-            if (iY >= 0 && iY < static_cast<SignedDimSize_t>(iDims[3])) {
-                iOffsets[3] = iY * iStrides[3] + iOffsets[2];
-            } else {
-                std::fill(output + oOffsets[3],
-                          output + oOffsets[3] + oStrides[3],
-                          borderValue);
-                continue;
-            }
+        for (std::size_t ch = 0; ch < iDims[1]; ++ch) {
+            iOffsets[1] = ch * iStrides[1] + iOffsets[0];
+            oOffsets[1] = ch * oStrides[1] + oOffsets[0];
+
+            for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
+                oOffsets[2] = oX * oStrides[2] + oOffsets[1];
+
+                const SignedDimSize_t iX = oX - beginEndBorders[0];
+                if (iX >= 0 && iX < static_cast<SignedDimSize_t>(iDims[2])) {
+                    iOffsets[2] = iX * iStrides[2] + iOffsets[1];
+                } else {
+                    std::fill(output + oOffsets[2],
+                              output + oOffsets[2] + oStrides[2],
+                              borderValue);
+                    continue;
+                }
 
-            for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
-                const SignedDimSize_t iZ = oZ - beginEndBorders[2];
-                // if in bounds, takes corresponding value, otherwise takes
-                // default value
-                output[oOffsets[3] + oZ] =
-                    (iZ >= 0 && iZ < static_cast<SignedDimSize_t>(iDims[4]))
-                        ? input[iOffsets[3] + iZ]
-                        : borderValue;
+                for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
+                    oOffsets[3] = oY * oStrides[3] + oOffsets[2];
+                    const SignedDimSize_t iY = oY - beginEndBorders[1];
+                    if (iY >= 0 &&
+                        iY < static_cast<SignedDimSize_t>(iDims[3])) {
+                        iOffsets[3] = iY * iStrides[3] + iOffsets[2];
+                    } else {
+                        std::fill(output + oOffsets[3],
+                                  output + oOffsets[3] + oStrides[3],
+                                  borderValue);
+                        continue;
+                    }
+
+                    for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
+                        const SignedDimSize_t iZ = oZ - beginEndBorders[2];
+                        // if in bounds, takes corresponding value, otherwise
+                        // takes default value
+                        output[oOffsets[3] + oZ] =
+                            (iZ >= 0 &&
+                             iZ < static_cast<SignedDimSize_t>(iDims[4]))
+                                ? input[iOffsets[3] + iZ]
+                                : borderValue;
+                    }
+                }
             }
         }
     }
@@ -270,39 +282,6 @@ static inline DimSize_t padEdgeComputeInputCoord(const DimSize_t beginBorder,
                  static_cast<SignedDimSize_t>(oCoord - beginBorder))));
 }
 
-template <typename I, typename O>
-static inline void
-pad3DForwardEdge(const std::array<DimSize_t, 6> &beginEndBorders,
-                 const std::array<DimSize_t, 5> &iDims,
-                 const std::array<DimSize_t, 4> &iStrides,
-                 std::array<DimSize_t, 4> &iOffsets,
-                 const I *input,
-                 const std::array<DimSize_t, 3> &oDims,
-                 const std::array<DimSize_t, 4> &oStrides,
-                 std::array<DimSize_t, 4> &oOffsets,
-                 O *output) {
-    for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
-        oOffsets[2] = oX * oStrides[2] + oOffsets[1];
-        const DimSize_t iX =
-            padEdgeComputeInputCoord(beginEndBorders[0], iDims[2], oX);
-        iOffsets[2] = iX * iStrides[2] + iOffsets[1];
-
-        for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
-            oOffsets[3] = oY * oStrides[3] + oOffsets[2];
-            const DimSize_t iY =
-                padEdgeComputeInputCoord(beginEndBorders[1], iDims[3], oY);
-            iOffsets[3] = iY * iStrides[3] + iOffsets[2];
-
-            for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
-                const DimSize_t iZ =
-                    padEdgeComputeInputCoord(beginEndBorders[2], iDims[4], oZ);
-
-                output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ];
-            }
-        }
-    }
-}
-
 /**
  * @brief small inline fctn to generate the corresponding input coordinates of
  * an output coord in reflect padding along a given dimension.
@@ -316,10 +295,8 @@ padReflectComputeInputCoord(const DimSize_t beginBorder,
                             const DimSize_t oCoord) {
     SignedDimSize_t iCoord =
         std::abs(static_cast<SignedDimSize_t>(oCoord - beginBorder));
-
-    // Handle case where iCoord > iDim
-    // If so iCoord must be changed to (iDim - 1) - delta
-    // With delta = |iDim - 1 - icoord|
+    // Handle case where iCoord > iDim If so iCoord must be changed to (iDim -
+    // 1) - delta With delta = |iDim - 1 - icoord|
     //
     // Since iCoord > iDim - 1, |(iDim - 1) - iCoord| <=> iCoord - (iDim - 1)
     // <=> iCoord + 1 - iDim
@@ -332,40 +309,6 @@ padReflectComputeInputCoord(const DimSize_t beginBorder,
     return iCoord;
 }
 
-template <typename I, typename O>
-static inline void
-pad3DForwardReflect(const std::array<DimSize_t, 6> &beginEndBorders,
-                    const std::array<DimSize_t, 5> &iDims,
-                    const std::array<DimSize_t, 4> &iStrides,
-                    std::array<DimSize_t, 4> &iOffsets,
-                    const I *input,
-                    const std::array<DimSize_t, 3> &oDims,
-                    const std::array<DimSize_t, 4> &oStrides,
-                    std::array<DimSize_t, 4> &oOffsets,
-                    O *output) {
-
-    for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
-        oOffsets[2] = oX * oStrides[2] + oOffsets[1];
-        DimSize_t iX =
-            padReflectComputeInputCoord(beginEndBorders[0], iDims[2], oX);
-        iOffsets[2] = iX * iStrides[2] + iOffsets[1];
-
-        for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
-            oOffsets[3] = oY * oStrides[3] + oOffsets[2];
-            DimSize_t iY =
-                padReflectComputeInputCoord(beginEndBorders[1], iDims[3], oY);
-            iOffsets[3] = iY * iStrides[3] + iOffsets[2];
-
-            for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
-                DimSize_t iZ = padReflectComputeInputCoord(beginEndBorders[2],
-                                                           iDims[4],
-                                                           oZ);
-                output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ];
-            }
-        }
-    }
-}
-
 /**
  * @brief small inline fctn to generate the corresponding input coordinates of
  * an output coord in wrap padding along a given dimension.
@@ -379,41 +322,6 @@ static inline DimSize_t padWrapComputeInputCoord(const DimSize_t beginBorder,
     return (iDim + oCoord - beginBorder) % iDim;
 }
 
-template <typename I, typename O>
-static inline void
-pad3DForwardWrap(const std::array<DimSize_t, 6> &beginEndBorders,
-                 const std::array<DimSize_t, 5> &iDims,
-                 const std::array<DimSize_t, 4> &iStrides,
-                 std::array<DimSize_t, 4> &iOffsets,
-                 const I *input,
-                 const std::array<DimSize_t, 3> &oDims,
-                 const std::array<DimSize_t, 4> &oStrides,
-                 std::array<DimSize_t, 4> &oOffsets,
-                 O *output) {
-    for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
-        oOffsets[2] = oX * oStrides[2] + oOffsets[1];
-
-        const DimSize_t iX =
-            padWrapComputeInputCoord(beginEndBorders[0], iDims[2], oX);
-        iOffsets[2] = iX * iStrides[2] + iOffsets[1];
-
-        for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
-            oOffsets[3] = oY * oStrides[3] + oOffsets[2];
-
-            const DimSize_t iY =
-                padWrapComputeInputCoord(beginEndBorders[1], iDims[3], oY);
-
-            iOffsets[3] = iY * iStrides[3] + iOffsets[2];
-
-            for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
-                const DimSize_t iZ =
-                    padWrapComputeInputCoord(beginEndBorders[2], iDims[4], oZ);
-                output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ];
-            }
-        }
-    }
-}
-
 /**
  * @brief Forward kernel for 2D Padding on CPU backend.
  * @tparam I Input data type.
@@ -456,6 +364,55 @@ void PadImpl3D_cpu_forward_kernel(
     std::array<DimSize_t, 4> oOffsets = {0, 0, 0, 0};
     std::array<DimSize_t, 4> iOffsets = {0, 0, 0, 0};
 
+    std::function<DimSize_t(const DimSize_t, const DimSize_t, const DimSize_t)>
+        getInputCoord;
+
+    switch (borderType) {
+    case PadBorderType::Constant: {
+        pad3DForwardConstant(beginEndBorders,
+                             static_cast<O>(borderValue),
+                             iDims,
+                             iStrides,
+                             iOffsets,
+                             input,
+                             oDims,
+                             oStrides,
+                             oOffsets,
+                             output);
+        return;
+    }
+    case PadBorderType::Zero: {
+        pad3DForwardConstant(beginEndBorders,
+                             static_cast<O>(0),
+                             iDims,
+                             iStrides,
+                             iOffsets,
+                             input,
+                             oDims,
+                             oStrides,
+                             oOffsets,
+                             output);
+        return;
+    }
+    case PadBorderType::Edge: {
+        getInputCoord = padEdgeComputeInputCoord;
+        break;
+    }
+    case PadBorderType::Reflect: {
+        getInputCoord = padReflectComputeInputCoord;
+        break;
+    }
+    case PadBorderType::Wrap: {
+        getInputCoord = padWrapComputeInputCoord;
+        break;
+    }
+    default: {
+        AIDGE_THROW_OR_ABORT(std::runtime_error,
+                             "Pad3D : unsupported padding method : {}.",
+                             borderType);
+    }
+    }
+
     for (std::size_t batch = 0; batch < iDims[0]; ++batch) {
         oOffsets[0] = batch * oStrides[0];
         iOffsets[0] = batch * iStrides[0];
@@ -464,75 +421,25 @@ void PadImpl3D_cpu_forward_kernel(
             iOffsets[1] = ch * iStrides[1] + iOffsets[0];
             oOffsets[1] = ch * oStrides[1] + oOffsets[0];
 
-            switch (borderType) {
-            case PadBorderType::Constant: {
-                pad3DForwardConstant(beginEndBorders,
-                                     static_cast<O>(borderValue),
-                                     iDims,
-                                     iStrides,
-                                     iOffsets,
-                                     input,
-                                     oDims,
-                                     oStrides,
-                                     oOffsets,
-                                     output);
-                break;
-            }
-            case PadBorderType::Zero: {
-                pad3DForwardConstant(beginEndBorders,
-                                     static_cast<O>(0),
-                                     iDims,
-                                     iStrides,
-                                     iOffsets,
-                                     input,
-                                     oDims,
-                                     oStrides,
-                                     oOffsets,
-                                     output);
-                break;
-            }
-            case PadBorderType::Edge: {
-                pad3DForwardEdge(beginEndBorders,
-                                 iDims,
-                                 iStrides,
-                                 iOffsets,
-                                 input,
-                                 oDims,
-                                 oStrides,
-                                 oOffsets,
-                                 output);
-                break;
-            }
-            case PadBorderType::Reflect: {
-                pad3DForwardReflect(beginEndBorders,
-                                    iDims,
-                                    iStrides,
-                                    iOffsets,
-                                    input,
-                                    oDims,
-                                    oStrides,
-                                    oOffsets,
-                                    output);
-                break;
-            }
-            case PadBorderType::Wrap: {
-                pad3DForwardWrap(beginEndBorders,
-                                 iDims,
-                                 iStrides,
-                                 iOffsets,
-                                 input,
-                                 oDims,
-                                 oStrides,
-                                 oOffsets,
-                                 output);
-                break;
-            }
-            default: {
-                AIDGE_THROW_OR_ABORT(
-                    std::runtime_error,
-                    "Pad3D : unsupported padding method : {}.",
-                    borderType);
-            }
+            for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
+                oOffsets[2] = oX * oStrides[2] + oOffsets[1];
+                const DimSize_t iX =
+                    getInputCoord(beginEndBorders[0], iDims[2], oX);
+                iOffsets[2] = iX * iStrides[2] + iOffsets[1];
+
+                for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
+                    oOffsets[3] = oY * oStrides[3] + oOffsets[2];
+                    const DimSize_t iY =
+                        getInputCoord(beginEndBorders[1], iDims[3], oY);
+                    iOffsets[3] = iY * iStrides[3] + iOffsets[2];
+
+                    for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
+                        const DimSize_t iZ =
+                            getInputCoord(beginEndBorders[2], iDims[4], oZ);
+
+                        output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ];
+                    }
+                }
             }
         }
     }
-- 
GitLab