diff --git a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp index 762d41fafce88d14ba0d3d002a0d7e50f1ec32af..dfbd8edb0876f4d04edc40418122166b2e205184 100644 --- a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp @@ -20,8 +20,8 @@ #include <cstddef> // std::size_t #include <cstdint> // std::int32_t #include <fmt/base.h> +#include <functional> #include <stdexcept> -#include <type_traits> #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/utils/Registrar.hpp" @@ -216,39 +216,51 @@ pad3DForwardConstant(const std::array<DimSize_t, 6> &beginEndBorders, std::array<DimSize_t, 4> &oOffsets, O *output) { - for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { - oOffsets[2] = oX * oStrides[2] + oOffsets[1]; - - const SignedDimSize_t iX = oX - beginEndBorders[0]; - if (iX >= 0 && iX < static_cast<SignedDimSize_t>(iDims[2])) { - iOffsets[2] = iX * iStrides[2] + iOffsets[1]; - } else { - std::fill(output + oOffsets[2], - output + oOffsets[2] + oStrides[2], - borderValue); - continue; - } + for (std::size_t batch = 0; batch < iDims[0]; ++batch) { + oOffsets[0] = batch * oStrides[0]; + iOffsets[0] = batch * iStrides[0]; - for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { - oOffsets[3] = oY * oStrides[3] + oOffsets[2]; - const SignedDimSize_t iY = oY - beginEndBorders[1]; - if (iY >= 0 && iY < static_cast<SignedDimSize_t>(iDims[3])) { - iOffsets[3] = iY * iStrides[3] + iOffsets[2]; - } else { - std::fill(output + oOffsets[3], - output + oOffsets[3] + oStrides[3], - borderValue); - continue; - } + for (std::size_t ch = 0; ch < iDims[1]; ++ch) { + iOffsets[1] = ch * iStrides[1] + iOffsets[0]; + oOffsets[1] = ch * oStrides[1] + oOffsets[0]; + + for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { + oOffsets[2] = oX * oStrides[2] + oOffsets[1]; + + const SignedDimSize_t iX = oX - beginEndBorders[0]; + if (iX >= 0 && iX < static_cast<SignedDimSize_t>(iDims[2])) { + iOffsets[2] = iX * iStrides[2] + iOffsets[1]; + } else { + std::fill(output + oOffsets[2], + output + oOffsets[2] + oStrides[2], + borderValue); + continue; + } - for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { - const SignedDimSize_t iZ = oZ - beginEndBorders[2]; - // if in bounds, takes corresponding value, otherwise takes - // default value - output[oOffsets[3] + oZ] = - (iZ >= 0 && iZ < static_cast<SignedDimSize_t>(iDims[4])) - ? input[iOffsets[3] + iZ] - : borderValue; + for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { + oOffsets[3] = oY * oStrides[3] + oOffsets[2]; + const SignedDimSize_t iY = oY - beginEndBorders[1]; + if (iY >= 0 && + iY < static_cast<SignedDimSize_t>(iDims[3])) { + iOffsets[3] = iY * iStrides[3] + iOffsets[2]; + } else { + std::fill(output + oOffsets[3], + output + oOffsets[3] + oStrides[3], + borderValue); + continue; + } + + for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { + const SignedDimSize_t iZ = oZ - beginEndBorders[2]; + // if in bounds, takes corresponding value, otherwise + // takes default value + output[oOffsets[3] + oZ] = + (iZ >= 0 && + iZ < static_cast<SignedDimSize_t>(iDims[4])) + ? input[iOffsets[3] + iZ] + : borderValue; + } + } } } } @@ -270,39 +282,6 @@ static inline DimSize_t padEdgeComputeInputCoord(const DimSize_t beginBorder, static_cast<SignedDimSize_t>(oCoord - beginBorder)))); } -template <typename I, typename O> -static inline void -pad3DForwardEdge(const std::array<DimSize_t, 6> &beginEndBorders, - const std::array<DimSize_t, 5> &iDims, - const std::array<DimSize_t, 4> &iStrides, - std::array<DimSize_t, 4> &iOffsets, - const I *input, - const std::array<DimSize_t, 3> &oDims, - const std::array<DimSize_t, 4> &oStrides, - std::array<DimSize_t, 4> &oOffsets, - O *output) { - for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { - oOffsets[2] = oX * oStrides[2] + oOffsets[1]; - const DimSize_t iX = - padEdgeComputeInputCoord(beginEndBorders[0], iDims[2], oX); - iOffsets[2] = iX * iStrides[2] + iOffsets[1]; - - for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { - oOffsets[3] = oY * oStrides[3] + oOffsets[2]; - const DimSize_t iY = - padEdgeComputeInputCoord(beginEndBorders[1], iDims[3], oY); - iOffsets[3] = iY * iStrides[3] + iOffsets[2]; - - for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { - const DimSize_t iZ = - padEdgeComputeInputCoord(beginEndBorders[2], iDims[4], oZ); - - output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ]; - } - } - } -} - /** * @brief small inline fctn to generate the corresponding input coordinates of * an output coord in reflect padding along a given dimension. @@ -316,10 +295,8 @@ padReflectComputeInputCoord(const DimSize_t beginBorder, const DimSize_t oCoord) { SignedDimSize_t iCoord = std::abs(static_cast<SignedDimSize_t>(oCoord - beginBorder)); - - // Handle case where iCoord > iDim - // If so iCoord must be changed to (iDim - 1) - delta - // With delta = |iDim - 1 - icoord| + // Handle case where iCoord > iDim If so iCoord must be changed to (iDim - + // 1) - delta With delta = |iDim - 1 - icoord| // // Since iCoord > iDim - 1, |(iDim - 1) - iCoord| <=> iCoord - (iDim - 1) // <=> iCoord + 1 - iDim @@ -332,40 +309,6 @@ padReflectComputeInputCoord(const DimSize_t beginBorder, return iCoord; } -template <typename I, typename O> -static inline void -pad3DForwardReflect(const std::array<DimSize_t, 6> &beginEndBorders, - const std::array<DimSize_t, 5> &iDims, - const std::array<DimSize_t, 4> &iStrides, - std::array<DimSize_t, 4> &iOffsets, - const I *input, - const std::array<DimSize_t, 3> &oDims, - const std::array<DimSize_t, 4> &oStrides, - std::array<DimSize_t, 4> &oOffsets, - O *output) { - - for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { - oOffsets[2] = oX * oStrides[2] + oOffsets[1]; - DimSize_t iX = - padReflectComputeInputCoord(beginEndBorders[0], iDims[2], oX); - iOffsets[2] = iX * iStrides[2] + iOffsets[1]; - - for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { - oOffsets[3] = oY * oStrides[3] + oOffsets[2]; - DimSize_t iY = - padReflectComputeInputCoord(beginEndBorders[1], iDims[3], oY); - iOffsets[3] = iY * iStrides[3] + iOffsets[2]; - - for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { - DimSize_t iZ = padReflectComputeInputCoord(beginEndBorders[2], - iDims[4], - oZ); - output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ]; - } - } - } -} - /** * @brief small inline fctn to generate the corresponding input coordinates of * an output coord in wrap padding along a given dimension. @@ -379,41 +322,6 @@ static inline DimSize_t padWrapComputeInputCoord(const DimSize_t beginBorder, return (iDim + oCoord - beginBorder) % iDim; } -template <typename I, typename O> -static inline void -pad3DForwardWrap(const std::array<DimSize_t, 6> &beginEndBorders, - const std::array<DimSize_t, 5> &iDims, - const std::array<DimSize_t, 4> &iStrides, - std::array<DimSize_t, 4> &iOffsets, - const I *input, - const std::array<DimSize_t, 3> &oDims, - const std::array<DimSize_t, 4> &oStrides, - std::array<DimSize_t, 4> &oOffsets, - O *output) { - for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { - oOffsets[2] = oX * oStrides[2] + oOffsets[1]; - - const DimSize_t iX = - padWrapComputeInputCoord(beginEndBorders[0], iDims[2], oX); - iOffsets[2] = iX * iStrides[2] + iOffsets[1]; - - for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { - oOffsets[3] = oY * oStrides[3] + oOffsets[2]; - - const DimSize_t iY = - padWrapComputeInputCoord(beginEndBorders[1], iDims[3], oY); - - iOffsets[3] = iY * iStrides[3] + iOffsets[2]; - - for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { - const DimSize_t iZ = - padWrapComputeInputCoord(beginEndBorders[2], iDims[4], oZ); - output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ]; - } - } - } -} - /** * @brief Forward kernel for 2D Padding on CPU backend. * @tparam I Input data type. @@ -456,6 +364,55 @@ void PadImpl3D_cpu_forward_kernel( std::array<DimSize_t, 4> oOffsets = {0, 0, 0, 0}; std::array<DimSize_t, 4> iOffsets = {0, 0, 0, 0}; + std::function<DimSize_t(const DimSize_t, const DimSize_t, const DimSize_t)> + getInputCoord; + + switch (borderType) { + case PadBorderType::Constant: { + pad3DForwardConstant(beginEndBorders, + static_cast<O>(borderValue), + iDims, + iStrides, + iOffsets, + input, + oDims, + oStrides, + oOffsets, + output); + return; + } + case PadBorderType::Zero: { + pad3DForwardConstant(beginEndBorders, + static_cast<O>(0), + iDims, + iStrides, + iOffsets, + input, + oDims, + oStrides, + oOffsets, + output); + return; + } + case PadBorderType::Edge: { + getInputCoord = padEdgeComputeInputCoord; + break; + } + case PadBorderType::Reflect: { + getInputCoord = padReflectComputeInputCoord; + break; + } + case PadBorderType::Wrap: { + getInputCoord = padWrapComputeInputCoord; + break; + } + default: { + AIDGE_THROW_OR_ABORT(std::runtime_error, + "Pad3D : unsupported padding method : {}.", + borderType); + } + } + for (std::size_t batch = 0; batch < iDims[0]; ++batch) { oOffsets[0] = batch * oStrides[0]; iOffsets[0] = batch * iStrides[0]; @@ -464,75 +421,25 @@ void PadImpl3D_cpu_forward_kernel( iOffsets[1] = ch * iStrides[1] + iOffsets[0]; oOffsets[1] = ch * oStrides[1] + oOffsets[0]; - switch (borderType) { - case PadBorderType::Constant: { - pad3DForwardConstant(beginEndBorders, - static_cast<O>(borderValue), - iDims, - iStrides, - iOffsets, - input, - oDims, - oStrides, - oOffsets, - output); - break; - } - case PadBorderType::Zero: { - pad3DForwardConstant(beginEndBorders, - static_cast<O>(0), - iDims, - iStrides, - iOffsets, - input, - oDims, - oStrides, - oOffsets, - output); - break; - } - case PadBorderType::Edge: { - pad3DForwardEdge(beginEndBorders, - iDims, - iStrides, - iOffsets, - input, - oDims, - oStrides, - oOffsets, - output); - break; - } - case PadBorderType::Reflect: { - pad3DForwardReflect(beginEndBorders, - iDims, - iStrides, - iOffsets, - input, - oDims, - oStrides, - oOffsets, - output); - break; - } - case PadBorderType::Wrap: { - pad3DForwardWrap(beginEndBorders, - iDims, - iStrides, - iOffsets, - input, - oDims, - oStrides, - oOffsets, - output); - break; - } - default: { - AIDGE_THROW_OR_ABORT( - std::runtime_error, - "Pad3D : unsupported padding method : {}.", - borderType); - } + for (DimSize_t oX = 0; oX < oDims[0]; ++oX) { + oOffsets[2] = oX * oStrides[2] + oOffsets[1]; + const DimSize_t iX = + getInputCoord(beginEndBorders[0], iDims[2], oX); + iOffsets[2] = iX * iStrides[2] + iOffsets[1]; + + for (DimSize_t oY = 0; oY < oDims[1]; ++oY) { + oOffsets[3] = oY * oStrides[3] + oOffsets[2]; + const DimSize_t iY = + getInputCoord(beginEndBorders[1], iDims[3], oY); + iOffsets[3] = iY * iStrides[3] + iOffsets[2]; + + for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) { + const DimSize_t iZ = + getInputCoord(beginEndBorders[2], iDims[4], oZ); + + output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ]; + } + } } } }