diff --git a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
index 26c873c8fe7f140b09b31d0f1a9d4125acbcf50f..8ca23a4d39600af29065992804d75c42b822ea1b 100644
--- a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
@@ -131,7 +131,7 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
 
             for (std::uint32_t oy = 0; oy < oySize; ++oy) {
                 for (std::uint32_t ox = 0; ox < oxSize; ++ox) {
-                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                    const std::size_t oIndexFull = oIndex + oy*oxSize + ox;
 
                     O outputValue = static_cast<O>(borderValue);
 
@@ -140,14 +140,14 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
                         std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]);
 
                         if (ix >= 0  && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0  && iy < static_cast<std::int32_t>(dims[2])) {
-                            outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
+                            outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
                         }
                     }
                     else if (borderType == PadBorderType::Edge) {
                         std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])));
                         std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])));
 
-                        outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
+                        outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
                     }
                     else if (borderType == PadBorderType::Reflect) {
                         std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]);
@@ -162,13 +162,13 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
                         if (iy >= static_cast<std::int32_t>(dims[2]))
                             iy = static_cast<std::int32_t>(dims[2]) - iy;
 
-                        outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
+                        outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
                     }
                     else if (borderType == PadBorderType::Wrap) {
                         std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])) % static_cast<std::int32_t>(dims[3]);
                         std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[2]);
 
-                        outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
+                        outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
                     }
 
                     output[oIndexFull] = outputValue;