Skip to content
Snippets Groups Projects
Commit 102f1995 authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

add ReduceSum op

parent 60319519
No related branches found
No related tags found
2 merge requests!93Release v0.3.0,!75Learning backend cuda
......@@ -30,6 +30,7 @@
#include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/backend/cpu/operator/PowImpl.hpp"
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_
#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/ReduceSum.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
class ReduceSumImplForward_cpu
: public Registrable<ReduceSumImplForward_cpu,
std::tuple<DataType, DataType>,
void(const std::vector<std::int32_t>&,
DimSize_t,
const std::vector<DimSize_t>&,
const void *,
void *)> {};
class ReduceSumImpl1DBackward_cpu
: public Registrable<ReduceSumImpl1DBackward_cpu,
std::tuple<DataType, DataType>,
void(const std::vector<std::int32_t>&,
DimSize_t,
const std::vector<DimSize_t>&,
const void *,
void *)> {};
class ReduceSumImpl_cpu : public OperatorImpl {
public:
ReduceSumImpl_cpu(const ReduceSum_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<ReduceSumImpl_cpu> create(const ReduceSum_Op &op) {
return std::make_unique<ReduceSumImpl_cpu>(op);
}
public:
void forward() override;
};
namespace {
static Registrar<ReduceSum_Op> registrarReduceSumImpl_cpu("cpu", Aidge::ReduceSumImpl_cpu::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_FORWARD_KERNEL_H_
#include <algorithm> // std::for_each
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include <functional> //std::multiplies
#include <numeric> //std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/operator/ReduceSum.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
template <class I, class O>
void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
DimSize_t /*keepDims*/,
const std::vector<DimSize_t>& inputDims,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t nb_dims = inputDims.size();
const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
if (axes.size() == 1) {
const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>());
const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>());
const std::size_t dim_i = inputDims[axes[0]];
for (std::size_t pre = 0; pre < stride_pre; ++pre) {
for (std::size_t post = 0; post < stride_post; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post + post;
const std::size_t idx_o = pre * stride_post + post;
O sum = 0;
for (std::size_t i = 0; i < dim_i; ++i) {
sum +=input[idx_i + i*stride_post];
}
output[idx_o] = sum;
}
}
} else {
std::size_t outputElements = totalElements;
auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
stride_post[nb_dims - 1] = 1;
for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) {
stride_post[i] = stride_post[i+1]*inputDims[i+1];
}
auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
stride_pre[0] = 1;
for (std::size_t i = 1; i < nb_dims; ++i) {
stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
}
const I* inputAccumulation = input;
I* outputAccumulation = nullptr;
for (const auto& axisInt : axes) {
const std::size_t a = static_cast<std::size_t>(axisInt);
outputElements /= inputDims[a];
outputAccumulation = new I[outputElements];
const std::size_t dim_i = inputDims[a];
for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) {
for (std::size_t post = 0; post < stride_post[a]; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
const std::size_t idx_o = pre * stride_post[a] + post;
I sum = 0;
for (std::size_t i = 0; i < dim_i; ++i) {
sum += inputAccumulation[idx_i + i*stride_post[a]];
}
outputAccumulation[idx_o] = sum;
}
}
std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
if (inputAccumulation != input) {
delete[] inputAccumulation;
}
inputAccumulation = outputAccumulation;
}
// Copy elements from inputAccumulation to output while dividing by divisor
std::copy(inputAccumulation, inputAccumulation + outputElements, output);
if (outputAccumulation) {
delete[] outputAccumulation;
}
}
}
namespace {
static Registrar<ReduceSumImplForward_cpu> registrarReduceSumImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>);
static Registrar<ReduceSumImplForward_cpu> registrarReduceSumImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ReduceSumImpl_cpu_forward_kernel<int, int>);
static Registrar<ReduceSumImplForward_cpu> registrarReduceSumImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp"
#include <memory>
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/operator/ReduceSum.hpp"
#include "aidge/backend/cpu/operator/ReduceSumImpl_forward_kernels.hpp"
void Aidge::ReduceSumImpl_cpu::forward() {
const ReduceSum_Op& op_ = dynamic_cast<const ReduceSum_Op&>(mOp);
// Find the correct kernel type
auto kernelFunc = Registrar<ReduceSumImplForward_cpu>::create({
op_.getInput(0)->dataType(),
op_.getOutput(0)->dataType()});
// Call kernel
kernelFunc(op_.axes(),
op_.keepDims(),
op_.getInput(0)->dims(),
op_.getInput(0)->getImpl()->rawPtr(),
op_.getOutput(0)->getImpl()->rawPtr());
}
......@@ -13,7 +13,7 @@
#include <memory>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/ArgMax.hpp"
#include "aidge/operator/ReduceSum.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/backend/cpu.hpp"
......@@ -21,96 +21,173 @@
using namespace Aidge;
TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") {
SECTION("3D Tensor") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,2,3,4> {
TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
SECTION("KeepDims") {
SECTION("test 1") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 1.0, 2.0, 3.0, 4.0},
{ 8.0, 0.0, 17.0, 1.0},
{ 5.0, 10.0, 6.0, 0.0}
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 7.0, 1.0, 9.0, 4.0},
{ 0.0, 8.0, 4.0, 2.0},
{ 9.0, 2.0, 0.0, 5.0}
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
SECTION("Axis 2") {
Tensor myOutput = Tensor(Array3D<float,3,1,2> {
{
Tensor myOutput = Tensor(Array2D<float,2,3> {
{
{ 3.0, 2.0, 1.0 },
{ 2.0, 1.0, 0.0}
}
{{ 25.0, 3.0 }},
{{ 70.0, 3.0 }},
{{ 115.0, 3.0 }}
}
});
std::shared_ptr<Node> myArgMax = ArgMax(2);
auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myArgMax->forward();
myReduceSum->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput);
}
SECTION("Axis 1") {
Tensor myOutput = Tensor(Array2D<float,2,4> {
SECTION("test 2") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> {
{
{ 1.0, 2.0, 1.0, 0.0 },
{ 2.0, 1.0, 0.0, 2.0 }
{
{ 0.0, 0.0 },
{ 1.0, 1.0 },
{ 2.0, 2.0 }
},
{
{ 3.0, 3.0 },
{ 4.0, 4.0 },
{ 5.0, 5.0 }
},
{
{ 6.0, 6.0 },
{ 7.0, 7.0 },
{ 8.0, 8.0 }
}
}
});
std::shared_ptr<Node> myArgMax = ArgMax(1);
auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myArgMax->forward();
myOutput.print();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput);
}
SECTION("Axis 0") {
Tensor myOutput = Tensor(Array2D<float,3,4> {
Tensor myOutput = Tensor(Array3D<float,3,1,1> {
{
{ 1.0, 0.0, 1.0, 0.0 },
{ 0.0, 1.0, 0.0, 1.0 },
{ 1.0, 0.0, 0.0, 1.0 }
},
{{ 6.0 }},
{{ 24.0 }},
{{ 42.0 }}
}
});
std::shared_ptr<Node> myArgMax = ArgMax(1);
auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
std::shared_ptr<Node> myReduceSum = ReduceSum({1, 2}, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myArgMax->forward();
myReduceSum->forward();
myOutput.print();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == myOutput);
}
}
SECTION("Select_Last_Index") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array1D<float,10> {
SECTION("not_KeepDims") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> {
{
1.0, 5.0, 9.0, 0.0, 6.0, 2.0, 9.0, 4.0, 3.0, 9.0
{ 25.0, 3.0 },
{ 70.0, 3.0 },
{ 115.0, 3.0 }
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {{9}});
std::shared_ptr<Node> myArgMax = ArgMax(0, 1, 1);
auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 0);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myArgMax->forward();
myReduceSum->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("all_axes") {
SECTION("1") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
{
{
{ 5.0, 1.0 },
{ 20.0, 2.0 }
},
{
{ 30.0, 1.0 },
{ 40.0, 2.0 }
},
{
{ 55.0, 1.0 },
{ 60.0, 2.0 }
}
}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
{219.0}
});
std::shared_ptr<Node> myReduceSum = ReduceSum({0, 1, 2}, 0);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceSum->forward();
op->getOutput(0)->print();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
SECTION("2") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<float,5,4> {
{{ 0.004232f, 0.105120f, 0.045124f, 0.009205f},
{ 0.000766f, 0.272162f, 0.503560f, 0.044163f},
{ 0.049755f, 0.000305f, 0.143634f, 0.013253f},
{ 0.096258f, 0.311231f, 0.358143f, 0.000452f},
{ 0.468617f, 0.015693f, 0.145316f, 0.000105f}}
});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
{2.587094f}
});
std::shared_ptr<Node> myReduceSum = ReduceSum({0, 1}, 0);
auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
op->associateInput(0,myInput);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
myReduceSum->forward();
op->getOutput(0)->print();
// approxEq<float>(*(op->getOutput(0)), *myOutput);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput));
}
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment