Skip to content
Snippets Groups Projects
Commit 8c5764b4 authored by Cyril Moineau's avatar Cyril Moineau
Browse files

Merge branch 'maxPoolImplem' into 'master'

Implementation of Max Pool in cpp

See merge request eclipse/aidge/aidge_backend_cpu!12
parents 039f265a 6308e639
No related branches found
No related tags found
No related merge requests found
......@@ -15,6 +15,7 @@
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_
#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// class MaxPooling_Op;
// compute kernel registry for forward and backward
class MaxPoolingImpl2DForward_cpu
: public Registrable<MaxPoolingImpl2DForward_cpu,
std::tuple<DataType, DataType>,
void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
class MaxPoolingImpl2DBackward_cpu
: public Registrable<MaxPoolingImpl2DBackward_cpu,
std::tuple<DataType, DataType>,
void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
class MaxPoolingImpl2D_cpu : public OperatorImpl {
private:
const MaxPooling_Op<2> &mOp;
std::array<NbElts_t, 1> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
MaxPoolingImpl2D_cpu(const MaxPooling_Op<2> &op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
static std::unique_ptr<MaxPoolingImpl2D_cpu> create(const MaxPooling_Op<2> &op) {
return std::make_unique<MaxPoolingImpl2D_cpu>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void backward();
};
namespace {
// add cpu backend to MaxPooling_Op<2> implementation registry
static Registrar<MaxPooling_Op<2>> registrarMaxPoolingImpl2D_cpu("cpu", Aidge::MaxPoolingImpl2D_cpu::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/data/Data.hpp"
#include <array>
#include <tuple>
#include <cmath>
namespace Aidge {
/**
* @brief Forward kernel for 2D MaxPoolingolution on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param attrs tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void MaxPoolingImpl2D_cpu_forward_kernel(const MaxPooling_Op<2>::Attrs &attrs,
const std::array<DimSize_t, 4> &dims,
const void *input_,
void *output_) {
// FIXME: missing convolution parameters as arguments
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
std::array<DimSize_t, 2> strideDims = std::get<0>(attrs);
std::array<DimSize_t, 2> kernelDims = std::get<1>(attrs);
std::array<DimSize_t, 4> paddingDims = std::get<2>(attrs);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + paddingDims[0] + paddingDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + paddingDims[1] + paddingDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation parameter into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(paddingDims[0] - ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(paddingDims[1] - oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
I poolValue(0.0);
bool valid = false;
for (unsigned int channel = 0; channel < dims[1];
++channel){
for (unsigned int sy = syMin; sy < syMax; ++sy) {
for (unsigned int sx = sxMin; sx < sxMax; ++sx)
{
const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
if (!valid || value > poolValue) {
poolValue = value;
valid = true;
}
}
}
}
output[oIndexFull] = poolValue;
}
}
}
}
}
//N2D2 version
/*
template <class T>
void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha,
const Tensor<T>&
inputs,
const Descriptor& desc,
const T* beta,
Tensor<T>& outputs,
Tensor<ArgMax>& argMax,
bool useArgMax,
const Tensor<bool>& maps)
{
const unsigned int size = inputs.dimB() * outputs.dimZ();
#if defined(_OPENMP) && _OPENMP >= 200805
#pragma omp parallel for collapse(2) if (size > 16)
#else
#pragma omp parallel for if (inputs.dimB() > 4 && size > 16)
#endif
for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) {
for (unsigned int output = 0; output < outputs.dimZ(); ++output) {
for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) {
for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) {
const unsigned int sxMin = (unsigned int)std::max(
desc.padding[0] - (int)(ox * desc.stride[0]), 0);
const unsigned int syMin = (unsigned int)std::max(
desc.padding[1] - (int)(oy * desc.stride[1]), 0);
const unsigned int sxMax = Utils::clamp
<int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0],
0,
desc.pool[0]);
const unsigned int syMax = Utils::clamp
<int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1],
0,
desc.pool[1]);
const int ix = (int)(ox * desc.stride[0]) - desc.padding[0];
const int iy = (int)(oy * desc.stride[1]) - desc.padding[1];
T poolValue(0.0);
// For each output, compute the pool value
if (useArgMax) {
const ArgMax inputMax
= argMax(ox, oy, output, batchPos);
if (inputMax.valid) {
poolValue = inputs(inputMax.ix,
inputMax.iy,
inputMax.channel,
batchPos);
}
}
else {
unsigned int ixMax = 0;
unsigned int iyMax = 0;
unsigned int channelMax = 0;
bool valid = false;
for (unsigned int channel = 0; channel < inputs.dimZ();
++channel)
{
if (!maps.empty() && !maps(output, channel))
continue;
for (unsigned int sy = syMin; sy < syMax; ++sy) {
for (unsigned int sx = sxMin; sx < sxMax; ++sx)
{
const T value = inputs(ix + sx,
iy + sy,
channel,
batchPos);
if (!valid || value > poolValue) {
poolValue = value;
valid = true;
ixMax = ix + sx;
iyMax = iy + sy;
channelMax = channel;
}
}
}
}
argMax(ox, oy, output, batchPos)
= ArgMax(ixMax, iyMax, channelMax, valid);
}
outputs(ox, oy, output, batchPos)
= (*alpha) * poolValue
+ (*beta) * outputs(ox, oy, output, batchPos);
}
}
}
}
}
*/
namespace {
static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float32(
std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}),
Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>);
static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32},
Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int, int>);
static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64},
Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <numeric>
#include <thread>
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp"
Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
assert(mOp.getInput(inputIdx) && "requires valid input");
// Requires the whole tensors
const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
// for the direct convolution algorithm, convolutions can be in-place, if
// there is no padding!
return 0;
}
Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const {
// Requires the whole tensors, regardless of available data on inputs
assert(outputIdx == 0 && "operator has only one output");
(void) outputIdx;
const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
}
Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
}
Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const {
assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
return mNbProducedData[static_cast<std::size_t>(outputIdx)];
}
void Aidge::MaxPoolingImpl2D_cpu::updateConsummerProducer(){
// Update producer-consumer data
for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum
// amount for a forward pass
mNbProducedData[0] += getRequiredMemory(0, {});
}
void Aidge::MaxPoolingImpl2D_cpu::forward() {
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getInput(0) && "missing input #0");
// Find the correct kernel type
auto kernelFunc =
Registrar<MaxPoolingImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()});
// Call kernel
kernelFunc(mOp.getStaticAttributes(),
mOp.getInput(0)->dims<4>(),
mOp.getInput(0)->getImpl()->rawPtr(),
mOp.getOutput(0)->getImpl()->rawPtr());
}
void Aidge::MaxPoolingImpl2D_cpu::backward() { printf("Not implemented yet.\n"); }
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <memory>
#include <cstdlib>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/backend/cpu.hpp"
using namespace Aidge;
TEST_CASE("[cpu/operator] MaxPooling(forward)") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW
{
{
{{-0.3848, 0.2166, -0.4373, 0.6142, 0.5277},
{0.7995, 0.3638, -1.4589, -1.0843, 1.0918},
{0.7147, 0.0936, -1.2902, 1.2037, 0.4874},
{-0.5981, 2.1184, -0.9175, 1.3859, 0.3305},
{-1.7700, 0.0563, -0.3914, 0.0538, -0.3955}},
{{-3.1409, -0.4554, 0.0524, 2.2291, 0.4859},
{-0.7465, -0.6567, -2.3703, -0.6386, -1.4152},
{ 2.2329, -0.5850, 0.0700, 1.2838, -1.7363},
{ 0.2139, 0.0624, -1.0689, -0.8221, -0.8038},
{ 0.1886, -0.7840, -0.2313, 0.2651, -1.6244}}
},
{
{{ 0.4371, 1.6417, 0.9129, 0.6325, 0.5438},
{-2.3552, -0.8850, -0.0232, -0.5462, -1.2011},
{1.7653, -1.6668, -1.0814, 0.6182, 1.2071},
{0.9541, -0.5133, 0.8664, -0.8892, 1.4585},
{1.0220, -0.5107, 0.1829, -0.2301, -0.4268}},
{{ 1.0429, 0.6279, -0.2875, 0.7187, -0.1500},
{1.6041, 2.9635, 1.4172, -0.7517, 0.5441},
{-0.2276, 0.0857, 0.6776, -0.1389, -0.0614},
{-0.1547, -0.3435, 0.0650, -0.5095, -1.8073},
{1.7217, 0.3999, -0.5953, 1.0604, -0.4126}}
}
}
});
SECTION("Stride") {
std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2});
myMaxPool->getOperator()->setDatatype(DataType::Float32);
myMaxPool->getOperator()->setBackend("cpu");
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> {
{
{
{{ 0.7995, 0.6142},
{ 2.1184, 1.3859}},
{{ -0.4554, 2.2291},
{ 2.2329, 1.2838}}
},
{
{{1.6417, 0.9129},
{1.7653, 0.8664}},
{{2.9635, 1.4172},
{0.0857, 0.6776}}
}
}
});
myMaxPool->getOperator()->associateInput(0,myInput);
myMaxPool->getOperator()->computeOutputDims();
myMaxPool->forward();
myMaxPool->getOperator()->getOutput(0)->print();
REQUIRE(*(myMaxPool->getOperator()->getOutput(0)) == *myOutput);
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment