Skip to content
Snippets Groups Projects
Commit f1813259 authored by Adam Maroni's avatar Adam Maroni
Browse files

Refactoring of MaxPoolingImpl_kernels.hpp

parent bd42c132
No related branches found
No related tags found
2 merge requests!166Update 0.5.0 -> 0.6.0,!153MaxPooling2D backward implementation for CPU backend
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <array> #include <array>
#include <cmath> #include <cmath>
#include <cstdint>
#include <tuple> #include <tuple>
...@@ -34,75 +35,101 @@ namespace Aidge { ...@@ -34,75 +35,101 @@ namespace Aidge {
* @param output_ Output Tensor. * @param output_ Output Tensor.
*/ */
template <class I, class O> template <class I, class O>
void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, void MaxPoolingImpl2D_cpu_forward_kernel(
const std::array<DimSize_t, 2>& kernelDims, const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilations, const std::array<DimSize_t, 2>& kernelDims,
const bool ceilMode, const std::array<DimSize_t, 2>& dilations,
const std::array<DimSize_t, 4> &dims, const bool ceilMode,
const void *input_, const std::array<DimSize_t, 4> &dims,
void *output_) { const void *input_,
const I *input = static_cast<const I *>(input_); void *output_)
O *output = static_cast<O *>(output_); {
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
// output H size // output H size
const std::size_t oxSize = auto hOut = static_cast<float>(
ceilMode dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]
? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / ) / static_cast<float>(strideDims[0]);
static_cast<float>(strideDims[0]))) const std::size_t outXSize = ceilMode
: static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]) / ? static_cast<std::size_t>(std::ceil(hOut))
static_cast<float>(strideDims[0]))); : static_cast<std::size_t>(std::floor(hOut));
// output W size
const std::size_t oySize =
ceilMode
? static_cast<std::size_t>(std::ceil(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) /
static_cast<float>(strideDims[1])))
: static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - (kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]) /
static_cast<float>(strideDims[1])));
using signedsize = std::make_signed<std::size_t>::type; // output W size
for (std::size_t batch = 0; batch < dims[0]; ++batch) { auto wOut = static_cast<float>(
for (std::size_t ch = 0; ch < dims[1]; ++ch) { dims[3] - ( kernelDims[1] - 1) * dilations[1] - 1 + strideDims[1]
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; ) / static_cast<float>(strideDims[1]);
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
I poolValue(0.0); const std::size_t outYSize = ceilMode
bool valid = false; ? static_cast<std::size_t>(std::ceil(wOut))
: static_cast<std::size_t>(std::floor(wOut));
for (unsigned int sy = syMin; sy < syMax; ++sy) { using signedsize = std::make_signed<std::size_t>::type;
for (unsigned int sx = sxMin; sx < sxMax; ++sx) {
// Apply dilation factor to kernel indices
const std::size_t dilated_sx = sx * dilations[0];
const std::size_t dilated_sy = sy * dilations[1];
// Ensure indices are within bounds for (std::size_t batch = 0; batch < dims[0]; ++batch){
if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) { for (std::size_t channel = 0; channel < dims[1]; ++channel){
const I value = input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]; auto batchChannelIndex = (channel + batch * dims[1]);
const std::size_t outputBaseIndex = batchChannelIndex * outXSize * outYSize;
if (!valid || value > poolValue) { const std::size_t inputBaseIndex = batchChannelIndex * dims[2] * dims[3];
poolValue = value; for (std::size_t outX = 0; outX < outXSize; ++outX) {
valid = true; const signedsize negStrideX = static_cast<signedsize>(
} -outX * strideDims[0]
} );
} const std::size_t kernelXMin = static_cast<std::size_t>(
} std::max(negStrideX, signedsize(0))
output[oIndexFull] = poolValue; );
/* Compute kernelXMax */
std::size_t kernelXMax = dims[2] + negStrideX;
if ((static_cast<signedsize>(dims[2]) + negStrideX) < 0){
kernelXMax = 0;
}
else if (kernelXMax > kernelDims[0]){
kernelXMax = kernelDims[0];
}
for (std::size_t outY = 0; outY < outYSize; ++outY) {
const signedsize negStrideY = static_cast<signedsize>(-outY * strideDims[1]);
const std::size_t kernelYMin = static_cast<std::size_t>(
std::max(negStrideY, signedsize(0))
);
/* Compute kernelYMax */
std::size_t kernelYMax = dims[3] + negStrideY;
const std::size_t outputIndex = outputBaseIndex + outX * outYSize + outY;
const std::size_t strideXoffset = outX * strideDims[0];
const std::size_t strideYoffset = outY * strideDims[1];
I poolValue(0.0);
bool valid = false;
if (static_cast<signedsize>(dims[3]) + negStrideY < 0){
kernelYMax = 0;
}
else if(kernelYMax > kernelDims[1]){
kernelYMax = kernelDims[1];
}
for (unsigned int kY = kernelYMin; kY < kernelYMax ; ++kY){
for (unsigned int kX = kernelXMin; kX < kernelXMax; ++kX){
// Apply dilation factor to kernel indices
const std::size_t dilatedkernelX = kX * dilations[0];
const std::size_t dilatedkernelY = kY * dilations[1];
// Ensure indices are within bounds
auto inputXPostDilation = strideXoffset + dilatedkernelX;
auto inputYPostDilation = strideYoffset + dilatedkernelY;
if (inputXPostDilation < dims[2] && inputYPostDilation < dims[3]){
const I inputValue = input[
inputBaseIndex + inputXPostDilation * dims[3]
+ inputYPostDilation
];
if (!valid || inputValue > poolValue) {
poolValue = inputValue;
valid = true;
} }
}
} }
}
output[outputIndex] = poolValue;
} }
}
} }
} }
}
// Kernels registration to implementation entry point // Kernels registration to implementation entry point
REGISTRAR(MaxPoolingImpl2D_cpu, REGISTRAR(MaxPoolingImpl2D_cpu,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment