Skip to content
Snippets Groups Projects
Commit 763e1888 authored by Grégoire Kubler's avatar Grégoire Kubler
Browse files

chore : alternative implementation to reduce boilerplate code

sadly this also lengthen exec time as functions cannot be properly inlined when called through std::function.
See !160 (comment 3653885)
for more information.
parent d0193dc6
No related branches found
No related tags found
No related merge requests found
......@@ -20,8 +20,8 @@
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include <fmt/base.h>
#include <functional>
#include <stdexcept>
#include <type_traits>
#include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/utils/Registrar.hpp"
......@@ -216,39 +216,51 @@ pad3DForwardConstant(const std::array<DimSize_t, 6> &beginEndBorders,
std::array<DimSize_t, 4> &oOffsets,
O *output) {
for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
oOffsets[2] = oX * oStrides[2] + oOffsets[1];
const SignedDimSize_t iX = oX - beginEndBorders[0];
if (iX >= 0 && iX < static_cast<SignedDimSize_t>(iDims[2])) {
iOffsets[2] = iX * iStrides[2] + iOffsets[1];
} else {
std::fill(output + oOffsets[2],
output + oOffsets[2] + oStrides[2],
borderValue);
continue;
}
for (std::size_t batch = 0; batch < iDims[0]; ++batch) {
oOffsets[0] = batch * oStrides[0];
iOffsets[0] = batch * iStrides[0];
for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
oOffsets[3] = oY * oStrides[3] + oOffsets[2];
const SignedDimSize_t iY = oY - beginEndBorders[1];
if (iY >= 0 && iY < static_cast<SignedDimSize_t>(iDims[3])) {
iOffsets[3] = iY * iStrides[3] + iOffsets[2];
} else {
std::fill(output + oOffsets[3],
output + oOffsets[3] + oStrides[3],
borderValue);
continue;
}
for (std::size_t ch = 0; ch < iDims[1]; ++ch) {
iOffsets[1] = ch * iStrides[1] + iOffsets[0];
oOffsets[1] = ch * oStrides[1] + oOffsets[0];
for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
oOffsets[2] = oX * oStrides[2] + oOffsets[1];
const SignedDimSize_t iX = oX - beginEndBorders[0];
if (iX >= 0 && iX < static_cast<SignedDimSize_t>(iDims[2])) {
iOffsets[2] = iX * iStrides[2] + iOffsets[1];
} else {
std::fill(output + oOffsets[2],
output + oOffsets[2] + oStrides[2],
borderValue);
continue;
}
for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
const SignedDimSize_t iZ = oZ - beginEndBorders[2];
// if in bounds, takes corresponding value, otherwise takes
// default value
output[oOffsets[3] + oZ] =
(iZ >= 0 && iZ < static_cast<SignedDimSize_t>(iDims[4]))
? input[iOffsets[3] + iZ]
: borderValue;
for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
oOffsets[3] = oY * oStrides[3] + oOffsets[2];
const SignedDimSize_t iY = oY - beginEndBorders[1];
if (iY >= 0 &&
iY < static_cast<SignedDimSize_t>(iDims[3])) {
iOffsets[3] = iY * iStrides[3] + iOffsets[2];
} else {
std::fill(output + oOffsets[3],
output + oOffsets[3] + oStrides[3],
borderValue);
continue;
}
for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
const SignedDimSize_t iZ = oZ - beginEndBorders[2];
// if in bounds, takes corresponding value, otherwise
// takes default value
output[oOffsets[3] + oZ] =
(iZ >= 0 &&
iZ < static_cast<SignedDimSize_t>(iDims[4]))
? input[iOffsets[3] + iZ]
: borderValue;
}
}
}
}
}
......@@ -270,39 +282,6 @@ static inline DimSize_t padEdgeComputeInputCoord(const DimSize_t beginBorder,
static_cast<SignedDimSize_t>(oCoord - beginBorder))));
}
template <typename I, typename O>
static inline void
pad3DForwardEdge(const std::array<DimSize_t, 6> &beginEndBorders,
const std::array<DimSize_t, 5> &iDims,
const std::array<DimSize_t, 4> &iStrides,
std::array<DimSize_t, 4> &iOffsets,
const I *input,
const std::array<DimSize_t, 3> &oDims,
const std::array<DimSize_t, 4> &oStrides,
std::array<DimSize_t, 4> &oOffsets,
O *output) {
for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
oOffsets[2] = oX * oStrides[2] + oOffsets[1];
const DimSize_t iX =
padEdgeComputeInputCoord(beginEndBorders[0], iDims[2], oX);
iOffsets[2] = iX * iStrides[2] + iOffsets[1];
for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
oOffsets[3] = oY * oStrides[3] + oOffsets[2];
const DimSize_t iY =
padEdgeComputeInputCoord(beginEndBorders[1], iDims[3], oY);
iOffsets[3] = iY * iStrides[3] + iOffsets[2];
for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
const DimSize_t iZ =
padEdgeComputeInputCoord(beginEndBorders[2], iDims[4], oZ);
output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ];
}
}
}
}
/**
* @brief small inline fctn to generate the corresponding input coordinates of
* an output coord in reflect padding along a given dimension.
......@@ -316,10 +295,8 @@ padReflectComputeInputCoord(const DimSize_t beginBorder,
const DimSize_t oCoord) {
SignedDimSize_t iCoord =
std::abs(static_cast<SignedDimSize_t>(oCoord - beginBorder));
// Handle case where iCoord > iDim
// If so iCoord must be changed to (iDim - 1) - delta
// With delta = |iDim - 1 - icoord|
// Handle case where iCoord > iDim If so iCoord must be changed to (iDim -
// 1) - delta With delta = |iDim - 1 - icoord|
//
// Since iCoord > iDim - 1, |(iDim - 1) - iCoord| <=> iCoord - (iDim - 1)
// <=> iCoord + 1 - iDim
......@@ -332,40 +309,6 @@ padReflectComputeInputCoord(const DimSize_t beginBorder,
return iCoord;
}
template <typename I, typename O>
static inline void
pad3DForwardReflect(const std::array<DimSize_t, 6> &beginEndBorders,
const std::array<DimSize_t, 5> &iDims,
const std::array<DimSize_t, 4> &iStrides,
std::array<DimSize_t, 4> &iOffsets,
const I *input,
const std::array<DimSize_t, 3> &oDims,
const std::array<DimSize_t, 4> &oStrides,
std::array<DimSize_t, 4> &oOffsets,
O *output) {
for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
oOffsets[2] = oX * oStrides[2] + oOffsets[1];
DimSize_t iX =
padReflectComputeInputCoord(beginEndBorders[0], iDims[2], oX);
iOffsets[2] = iX * iStrides[2] + iOffsets[1];
for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
oOffsets[3] = oY * oStrides[3] + oOffsets[2];
DimSize_t iY =
padReflectComputeInputCoord(beginEndBorders[1], iDims[3], oY);
iOffsets[3] = iY * iStrides[3] + iOffsets[2];
for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
DimSize_t iZ = padReflectComputeInputCoord(beginEndBorders[2],
iDims[4],
oZ);
output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ];
}
}
}
}
/**
* @brief small inline fctn to generate the corresponding input coordinates of
* an output coord in wrap padding along a given dimension.
......@@ -379,41 +322,6 @@ static inline DimSize_t padWrapComputeInputCoord(const DimSize_t beginBorder,
return (iDim + oCoord - beginBorder) % iDim;
}
template <typename I, typename O>
static inline void
pad3DForwardWrap(const std::array<DimSize_t, 6> &beginEndBorders,
const std::array<DimSize_t, 5> &iDims,
const std::array<DimSize_t, 4> &iStrides,
std::array<DimSize_t, 4> &iOffsets,
const I *input,
const std::array<DimSize_t, 3> &oDims,
const std::array<DimSize_t, 4> &oStrides,
std::array<DimSize_t, 4> &oOffsets,
O *output) {
for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
oOffsets[2] = oX * oStrides[2] + oOffsets[1];
const DimSize_t iX =
padWrapComputeInputCoord(beginEndBorders[0], iDims[2], oX);
iOffsets[2] = iX * iStrides[2] + iOffsets[1];
for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
oOffsets[3] = oY * oStrides[3] + oOffsets[2];
const DimSize_t iY =
padWrapComputeInputCoord(beginEndBorders[1], iDims[3], oY);
iOffsets[3] = iY * iStrides[3] + iOffsets[2];
for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
const DimSize_t iZ =
padWrapComputeInputCoord(beginEndBorders[2], iDims[4], oZ);
output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ];
}
}
}
}
/**
* @brief Forward kernel for 2D Padding on CPU backend.
* @tparam I Input data type.
......@@ -456,6 +364,55 @@ void PadImpl3D_cpu_forward_kernel(
std::array<DimSize_t, 4> oOffsets = {0, 0, 0, 0};
std::array<DimSize_t, 4> iOffsets = {0, 0, 0, 0};
std::function<DimSize_t(const DimSize_t, const DimSize_t, const DimSize_t)>
getInputCoord;
switch (borderType) {
case PadBorderType::Constant: {
pad3DForwardConstant(beginEndBorders,
static_cast<O>(borderValue),
iDims,
iStrides,
iOffsets,
input,
oDims,
oStrides,
oOffsets,
output);
return;
}
case PadBorderType::Zero: {
pad3DForwardConstant(beginEndBorders,
static_cast<O>(0),
iDims,
iStrides,
iOffsets,
input,
oDims,
oStrides,
oOffsets,
output);
return;
}
case PadBorderType::Edge: {
getInputCoord = padEdgeComputeInputCoord;
break;
}
case PadBorderType::Reflect: {
getInputCoord = padReflectComputeInputCoord;
break;
}
case PadBorderType::Wrap: {
getInputCoord = padWrapComputeInputCoord;
break;
}
default: {
AIDGE_THROW_OR_ABORT(std::runtime_error,
"Pad3D : unsupported padding method : {}.",
borderType);
}
}
for (std::size_t batch = 0; batch < iDims[0]; ++batch) {
oOffsets[0] = batch * oStrides[0];
iOffsets[0] = batch * iStrides[0];
......@@ -464,75 +421,25 @@ void PadImpl3D_cpu_forward_kernel(
iOffsets[1] = ch * iStrides[1] + iOffsets[0];
oOffsets[1] = ch * oStrides[1] + oOffsets[0];
switch (borderType) {
case PadBorderType::Constant: {
pad3DForwardConstant(beginEndBorders,
static_cast<O>(borderValue),
iDims,
iStrides,
iOffsets,
input,
oDims,
oStrides,
oOffsets,
output);
break;
}
case PadBorderType::Zero: {
pad3DForwardConstant(beginEndBorders,
static_cast<O>(0),
iDims,
iStrides,
iOffsets,
input,
oDims,
oStrides,
oOffsets,
output);
break;
}
case PadBorderType::Edge: {
pad3DForwardEdge(beginEndBorders,
iDims,
iStrides,
iOffsets,
input,
oDims,
oStrides,
oOffsets,
output);
break;
}
case PadBorderType::Reflect: {
pad3DForwardReflect(beginEndBorders,
iDims,
iStrides,
iOffsets,
input,
oDims,
oStrides,
oOffsets,
output);
break;
}
case PadBorderType::Wrap: {
pad3DForwardWrap(beginEndBorders,
iDims,
iStrides,
iOffsets,
input,
oDims,
oStrides,
oOffsets,
output);
break;
}
default: {
AIDGE_THROW_OR_ABORT(
std::runtime_error,
"Pad3D : unsupported padding method : {}.",
borderType);
}
for (DimSize_t oX = 0; oX < oDims[0]; ++oX) {
oOffsets[2] = oX * oStrides[2] + oOffsets[1];
const DimSize_t iX =
getInputCoord(beginEndBorders[0], iDims[2], oX);
iOffsets[2] = iX * iStrides[2] + iOffsets[1];
for (DimSize_t oY = 0; oY < oDims[1]; ++oY) {
oOffsets[3] = oY * oStrides[3] + oOffsets[2];
const DimSize_t iY =
getInputCoord(beginEndBorders[1], iDims[3], oY);
iOffsets[3] = iY * iStrides[3] + iOffsets[2];
for (DimSize_t oZ = 0; oZ < oDims[2]; ++oZ) {
const DimSize_t iZ =
getInputCoord(beginEndBorders[2], iDims[4], oZ);
output[oOffsets[3] + oZ] = input[iOffsets[3] + iZ];
}
}
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment