From af6a86d49a65c280b9588e44698b562cc2676f8c Mon Sep 17 00:00:00 2001 From: IKucher <Inna.KUCHER@cea.fr> Date: Wed, 4 Oct 2023 09:43:31 +0000 Subject: [PATCH] max pool implem + unit_test --- include/aidge/backend/cpu.hpp | 1 + .../backend/cpu/operator/MaxPoolingImpl.hpp | 70 ++++++ .../MaxPoolingImpl_forward_kernels.hpp | 211 ++++++++++++++++++ src/operator/MaxPoolingImpl.cpp | 80 +++++++ unit_tests/operator/Test_MaxPoolingImpl.cpp | 82 +++++++ 5 files changed, 444 insertions(+) create mode 100644 include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp create mode 100644 include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp create mode 100644 src/operator/MaxPoolingImpl.cpp create mode 100644 unit_tests/operator/Test_MaxPoolingImpl.cpp diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 59dc3cec..336d549a 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -15,6 +15,7 @@ #include "aidge/backend/cpu/data/TensorImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp new file mode 100644 index 00000000..a29be1f3 --- /dev/null +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -0,0 +1,70 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ +#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/MaxPooling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// class MaxPooling_Op; + +// compute kernel registry for forward and backward +class MaxPoolingImpl2DForward_cpu + : public Registrable<MaxPoolingImpl2DForward_cpu, + std::tuple<DataType, DataType>, + void(const MaxPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; +class MaxPoolingImpl2DBackward_cpu + : public Registrable<MaxPoolingImpl2DBackward_cpu, + std::tuple<DataType, DataType>, + void(const MaxPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + +class MaxPoolingImpl2D_cpu : public OperatorImpl { + private: + const MaxPooling_Op<2> &mOp; + std::array<NbElts_t, 1> mNbConsumedData; + std::array<NbElts_t, 1> mNbProducedData; + + public: + MaxPoolingImpl2D_cpu(const MaxPooling_Op<2> &op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {} + + static std::unique_ptr<MaxPoolingImpl2D_cpu> create(const MaxPooling_Op<2> &op) { + return std::make_unique<MaxPoolingImpl2D_cpu>(op); + } + + public: + NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; + NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; + + void forward(); + + void backward(); +}; + +namespace { +// add cpu backend to MaxPooling_Op<2> implementation registry +static Registrar<MaxPooling_Op<2>> registrarMaxPoolingImpl2D_cpu("cpu", Aidge::MaxPoolingImpl2D_cpu::create); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp new file mode 100644 index 00000000..c278e271 --- /dev/null +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp @@ -0,0 +1,211 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" +#include "aidge/utils/Types.h" +#include "aidge/data/Data.hpp" +#include <array> +#include <tuple> +#include <cmath> + +namespace Aidge { +/** + * @brief Forward kernel for 2D MaxPoolingolution on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param params tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void MaxPoolingImpl2D_cpu_forward_kernel(const MaxPooling_Op<2>::Parameters ¶ms, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *output_) { + // FIXME: missing convolution parameters as arguments + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) / + static_cast<float>(std::get<0>(params)[0]))); + // output W size + const std::size_t oySize = + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) / + static_cast<float>(std::get<0>(params)[1]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, ch, Xin, Yin) + // weight (outCh, ch, kernelX, kernelY) + // does not take Dilation parameter into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t ch = 0; ch < dims[1]; ++ch) { + const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; + const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx); + for (std::size_t oy = 0; oy < oySize; ++oy) { + const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]); + const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify); + const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const std::size_t ix = ox * std::get<0>(params)[0]; + const std::size_t iy = oy * std::get<0>(params)[1]; + + I poolValue(0.0); + + for (unsigned int channel = 0; channel < dims[1]; + ++channel){ + for (unsigned int sy = syMin; sy < syMax; ++sy) { + for (unsigned int sx = sxMin; sx < sxMax; ++sx) + { + const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + + if (!valid || value > poolValue) { + poolValue = value; + } + } + } + } + output[oIndexFull] = poolValue; + } + } + } + } +} + +//N2D2 version +/* +template <class T> +void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha, + const Tensor<T>& + inputs, + const Descriptor& desc, + const T* beta, + Tensor<T>& outputs, + Tensor<ArgMax>& argMax, + bool useArgMax, + const Tensor<bool>& maps) +{ + const unsigned int size = inputs.dimB() * outputs.dimZ(); + +#if defined(_OPENMP) && _OPENMP >= 200805 +#pragma omp parallel for collapse(2) if (size > 16) +#else +#pragma omp parallel for if (inputs.dimB() > 4 && size > 16) +#endif + for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) { + for (unsigned int output = 0; output < outputs.dimZ(); ++output) { + for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) { + for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) { + const unsigned int sxMin = (unsigned int)std::max( + desc.padding[0] - (int)(ox * desc.stride[0]), 0); + const unsigned int syMin = (unsigned int)std::max( + desc.padding[1] - (int)(oy * desc.stride[1]), 0); + const unsigned int sxMax = Utils::clamp + <int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0], + 0, + desc.pool[0]); + const unsigned int syMax = Utils::clamp + <int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1], + 0, + desc.pool[1]); + + const int ix = (int)(ox * desc.stride[0]) - desc.padding[0]; + const int iy = (int)(oy * desc.stride[1]) - desc.padding[1]; + + T poolValue(0.0); + + // For each output, compute the pool value + if (useArgMax) { + const ArgMax inputMax + = argMax(ox, oy, output, batchPos); + + if (inputMax.valid) { + poolValue = inputs(inputMax.ix, + inputMax.iy, + inputMax.channel, + batchPos); + } + } + else { + unsigned int ixMax = 0; + unsigned int iyMax = 0; + unsigned int channelMax = 0; + bool valid = false; + + for (unsigned int channel = 0; channel < inputs.dimZ(); + ++channel) + { + if (!maps.empty() && !maps(output, channel)) + continue; + + for (unsigned int sy = syMin; sy < syMax; ++sy) { + for (unsigned int sx = sxMin; sx < sxMax; ++sx) + { + const T value = inputs(ix + sx, + iy + sy, + channel, + batchPos); + + if (!valid || value > poolValue) { + poolValue = value; + valid = true; + + ixMax = ix + sx; + iyMax = iy + sy; + channelMax = channel; + } + } + } + } + + argMax(ox, oy, output, batchPos) + = ArgMax(ixMax, iyMax, channelMax, valid); + } + + outputs(ox, oy, output, batchPos) + = (*alpha) * poolValue + + (*beta) * outputs(ox, oy, output, batchPos); + } + } + } + } +} + +*/ + +namespace { +static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float32( + std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}), + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>); +static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int, int>); +static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, + Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ */ diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp new file mode 100644 index 00000000..a4aa5f09 --- /dev/null +++ b/src/operator/MaxPoolingImpl.cpp @@ -0,0 +1,80 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <numeric> +#include <thread> +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/operator/MaxPooling.hpp" + +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" +#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { + assert(mOp.getInput(inputIdx) && "requires valid input"); + + // Requires the whole tensors + const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); + + return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // for the direct convolution algorithm, convolutions can be in-place, if + // there is no padding! + return 0; +} + +Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const { + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + (void) outputIdx; + + const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); + return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { + assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); + return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; +} + +Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const { + assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); + return mNbProducedData[static_cast<std::size_t>(outputIdx)]; +} +void Aidge::MaxPoolingImpl2D_cpu::updateConsummerProducer(){ + // Update producer-consumer data + for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + mNbProducedData[0] += getRequiredMemory(0, {}); +} +void Aidge::MaxPoolingImpl2D_cpu::forward() { + // FIXME: uncomment the following code once memory handling will work + assert(mOp.getInput(0) && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = + Registrar<MaxPoolingImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(mOp.getParams(), + mOp.getInput(0)->dims<4>(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); + +} + +void Aidge::MaxPoolingImpl2D_cpu::backward() { printf("Not implemented yet.\n"); } diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp new file mode 100644 index 00000000..83fa7eaa --- /dev/null +++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp @@ -0,0 +1,82 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> +#include <cstdlib> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/MaxPooling.hpp" + +#include "aidge/backend/cpu.hpp" + +using namespace Aidge; + + +TEST_CASE("[cpu/operator] MaxPooling(forward)") { + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW + { + { + {{-0.3848, 0.2166, -0.4373, 0.6142, 0.5277}, + {0.7995, 0.3638, -1.4589, -1.0843, 1.0918}, + {0.7147, 0.0936, -1.2902, 1.2037, 0.4874}, + {-0.5981, 2.1184, -0.9175, 1.3859, 0.3305}, + {-1.7700, 0.0563, -0.3914, 0.0538, -0.3955}}, + + {{-3.1409, -0.4554, 0.0524, 2.2291, 0.4859}, + {-0.7465, -0.6567, -2.3703, -0.6386, -1.4152}, + { 2.2329, -0.5850, 0.0700, 1.2838, -1.7363}, + { 0.2139, 0.0624, -1.0689, -0.8221, -0.8038}, + { 0.1886, -0.7840, -0.2313, 0.2651, -1.6244}} + }, + { + {{ 0.4371, 1.6417, 0.9129, 0.6325, 0.5438}, + {-2.3552, -0.8850, -0.0232, -0.5462, -1.2011}, + {1.7653, -1.6668, -1.0814, 0.6182, 1.2071}, + {0.9541, -0.5133, 0.8664, -0.8892, 1.4585}, + {1.0220, -0.5107, 0.1829, -0.2301, -0.4268}}, + + {{ 1.0429, 0.6279, -0.2875, 0.7187, -0.1500}, + {1.6041, 2.9635, 1.4172, -0.7517, 0.5441}, + {-0.2276, 0.0857, 0.6776, -0.1389, -0.0614}, + {-0.1547, -0.3435, 0.0650, -0.5095, -1.8073}, + {1.7217, 0.3999, -0.5953, 1.0604, -0.4126}} + } + } + }); + SECTION("Stride") { + std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2}); + myMaxPool->getOperator()->setDatatype(DataType::Float32); + myMaxPool->getOperator()->setBackend("cpu"); + + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { + { + { + {{ 0.7995, 0.6142}, + { 2.1184, 1.3859}}, + {{ -0.4554, 2.2291}, + { 2.2329, 1.2838}} + }, + { + {{1.6417, 0.9129}, + {1.7653, 0.8664}}, + {{2.9635, 1.4172}, + {0.0857, 0.6776}} + } + } + }); + myMaxPool->getOperator()->associateInput(0,myInput); + myMaxPool->getOperator()->computeOutputDims(); + myMaxPool->forward(); + myMaxPool->getOperator()->getOutput(0)->print(); + REQUIRE(*(myMaxPool->getOperator()->getOutput(0)) == *myOutput); + } +} \ No newline at end of file -- GitLab