diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index adea96ca43a1ad9d2a49777426913ca4676e4f32..7c76657f7d100255f49ab1e675672407c5fbbaf8 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -28,8 +28,10 @@ namespace Aidge { using AvgPooling2D_Op = AvgPooling_Op<2>; using AvgPoolingImpl2D_cpu = OperatorImpl_cpu<AvgPooling_Op<2>, void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 4>&, + bool, const void *, void *)>; diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp index f6da9dcb026101b93de862499d42ae8734532d52..68dbfbe7302c392d87d9c895612c7ace9292ef57 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp @@ -35,66 +35,54 @@ namespace Aidge { template <class I, class O> void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 2>& dilations, const std::array<DimSize_t, 4> &dims, + bool ceilMode, const void *input_, void *output_) { - // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); + // Calculate output dimensions based on ceilMode and dilations + auto compute_output_size = [&](DimSize_t inputDim, DimSize_t kernelDim, DimSize_t stride, DimSize_t dilation) { + DimSize_t effectiveKernelDim = (kernelDim - 1) * dilation + 1; + float result = static_cast<float>(inputDim - effectiveKernelDim + stride) / static_cast<float>(stride); + return ceilMode ? static_cast<DimSize_t>(std::ceil(result)) : static_cast<DimSize_t>(std::floor(result)); + }; - // output H size - const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / - static_cast<float>(strideDims[0]))); - // output W size - const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / - static_cast<float>(strideDims[1]))); + const std::size_t oxSize = compute_output_size(dims[2], kernelDims[0], strideDims[0], dilations[0]); + const std::size_t oySize = compute_output_size(dims[3], kernelDims[1], strideDims[1], dilations[1]); - // TODO: kernel computation - // output (batch, outCh, Xout, Yout) - // input (batch, ch, Xin, Yin) - // weight (outCh, ch, kernelX, kernelY) - // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { - const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0); + const std::size_t oIndex = (ch + batch * dims[1]) * oxSize * oySize; + const std::size_t iIndex = (ch + batch * dims[1]) * dims[2] * dims[3]; + std::fill(output + oIndex, output + (oIndex + oxSize * oySize), 0); + for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); - const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx); + const signedsize startx = static_cast<signedsize>(ox * strideDims[0]) - (dilations[0] - 1); + const std::size_t sxMin = static_cast<std::size_t>(std::max(startx, signedsize(0))); + const std::size_t sxMax = std::min(dims[2], static_cast<std::size_t>(startx + kernelDims[0] * dilations[0])); + for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); - const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify); - const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const std::size_t ix = ox * strideDims[0]; - const std::size_t iy = oy * strideDims[1]; + const signedsize starty = static_cast<signedsize>(oy * strideDims[1]) - (dilations[1] - 1); + const std::size_t syMin = static_cast<std::size_t>(std::max(starty, signedsize(0))); + const std::size_t syMax = std::min(dims[3], static_cast<std::size_t>(starty + kernelDims[1] * dilations[1])); - if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += static_cast<O>( - input[iIndex + (ix+0)*dims[3] + (iy+0)] + - input[iIndex + (ix+0)*dims[3] + (iy+1)] + - input[iIndex + (ix+0)*dims[3] + (iy+2)] + - input[iIndex + (ix+1)*dims[3] + (iy+0)] + - input[iIndex + (ix+1)*dims[3] + (iy+1)] + - input[iIndex + (ix+1)*dims[3] + (iy+2)] + - input[iIndex + (ix+2)*dims[3] + (iy+0)] + - input[iIndex + (ix+2)*dims[3] + (iy+1)] + - input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9); - } else { - for (std::size_t sx = sxMin; sx < sxMax; ++sx) { - for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; - } + const std::size_t oIndexFull = oIndex + ox * oySize + oy; + O sum = static_cast<O>(0); + std::size_t count = 0; + + for (std::size_t sx = sxMin; sx < sxMax; sx += dilations[0]) { + for (std::size_t sy = syMin; sy < syMax; sy += dilations[1]) { + sum += static_cast<O>(input[iIndex + sx * dims[3] + sy]); + ++count; } - // padding not used - output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin); } + + output[oIndexFull] = sum / static_cast<O>(count); } } } diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index 01a5e8cf1772161f5cf98d3a8bd52f43ac7a1d0d..eb5ef87bd16620cdef33330bd7b39ece1783ecfc 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -32,7 +32,9 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() { // Call kernel impl.forward(op_.strideDims(), op_.kernelDims(), + op_.dilations(), op_.getInput(0)->template dims<4>(), + op_.ceilMode(), getCPUPtr(op_.getInput(0)), getCPUPtr(op_.getOutput(0))); } diff --git a/unit_tests/operator/Test_AndImpl.cpp b/unit_tests/operator/Test_AndImpl.cpp index 978a89e5a6fa2cfa7079fc4d6ccd709994eb537c..148298d5f44b9f7744ab18bcfc5ce675f77a784c 100644 --- a/unit_tests/operator/Test_AndImpl.cpp +++ b/unit_tests/operator/Test_AndImpl.cpp @@ -160,28 +160,6 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { } SECTION("Broadcasting") { -<<<<<<< HEAD - std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { - { // - { // - {{10, 20},{22, 23},{20, 20}}, // - {{10, 15},{10, 29},{20, 20}}, // - {{26, 25},{33, 20},{10, 20}} // - } // - } // - }); // - - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}}); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> { - { // - { // - {{ 1, 1},{ 0, 0},{ 0, 1}}, // - {{ 1, 0},{ 1, 0},{ 0, 1}}, // - {{ 0, 0},{ 0, 1},{ 1, 1}} // - } // - } // - }); // -======= std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float, 1, 2, 2, 2>{ { {{{1, 0}, {1, 0}}, @@ -193,7 +171,6 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") { {{{1, 0}, {1, 0}}, {{1, 0}, {0, 0}}}} }); ->>>>>>> fix and kernel and unit tests std::shared_ptr<Node> myAnd = And(); auto op = std::static_pointer_cast<OperatorTensor>(myAnd->getOperator());