add ReduceMean operator

10cfb6dd · Houssem ROUIS · 0ad72e30 · 10cfb6dd · 10cfb6dd · 10cfb6dd
Commit 10cfb6dd authored 1 year ago by Houssem ROUIS
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_
+#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_
+#include <array>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/ReduceMean.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+namespace Aidge {
+// class ReduceMean_Op;
+// compute kernel registry for forward and backward
+// DIM 1
+class ReduceMeanImpl1DForward_cpu
+    : public Registrable<ReduceMeanImpl1DForward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+class ReduceMeanImpl1DBackward_cpu
+    : public Registrable<ReduceMeanImpl1DBackward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)> {};
+// DIM 2
+class ReduceMeanImpl2DForward_cpu
+    : public Registrable<ReduceMeanImpl2DForward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+class ReduceMeanImpl2DBackward_cpu
+    : public Registrable<ReduceMeanImpl2DBackward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)> {};
+// DIM 3
+class ReduceMeanImpl3DForward_cpu
+    : public Registrable<ReduceMeanImpl3DForward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+class ReduceMeanImpl3DBackward_cpu
+    : public Registrable<ReduceMeanImpl3DBackward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+class ReduceMeanImpl1D_cpu : public OperatorImpl {
+   public:
+    ReduceMeanImpl1D_cpu(const ReduceMean_Op<1>& op) : OperatorImpl(op) {}
+    static std::unique_ptr<ReduceMeanImpl1D_cpu> create(const ReduceMean_Op<1> &op) {
+        return std::make_unique<ReduceMeanImpl1D_cpu>(op);
+    }
+   public:
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+class ReduceMeanImpl2D_cpu : public OperatorImpl {
+   public:
+    ReduceMeanImpl2D_cpu(const ReduceMean_Op<2>& op) : OperatorImpl(op) {}
+    static std::unique_ptr<ReduceMeanImpl2D_cpu> create(const ReduceMean_Op<2> &op) {
+        return std::make_unique<ReduceMeanImpl2D_cpu>(op);
+    }
+   public:
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+class ReduceMeanImpl3D_cpu : public OperatorImpl {
+   public:
+    ReduceMeanImpl3D_cpu(const ReduceMean_Op<3>& op) : OperatorImpl(op) {}
+    static std::unique_ptr<ReduceMeanImpl3D_cpu> create(const ReduceMean_Op<3> &op) {
+        return std::make_unique<ReduceMeanImpl3D_cpu>(op);
+    }
+   public:
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+namespace {
+// add cpu backend to ReduceMean_Op<2> implementation registry
+static Registrar<ReduceMean_Op<2>> registrarReduceMeanImpl2D_cpu("cpu", Aidge::ReduceMeanImpl2D_cpu::create);
+static Registrar<ReduceMean_Op<3>> registrarReduceMeanImpl3D_cpu("cpu", Aidge::ReduceMeanImpl3D_cpu::create);
+}  // namespace
+}  // namespace Aidge
+#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_ */
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/operator/ReduceMean.hpp"
+#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
+#include <array>
+#include <cstddef>
+#include <algorithm>
+#include "aidge/data/Data.hpp"
+namespace Aidge {
+template <class I, class O, DimSize_t DIM>
+void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs& attrs,
+                                     const std::vector<DimSize_t>& inputDims,
+                                     const void* input_,
+                                     void* output_) {
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+	DimSize_t keepDims = std::get<1>(attrs);
+    // Calculate the total number of elements in the input array
+    size_t totalElements = 1;
+    for (size_t dimSize : inputDims) {
+        totalElements *= dimSize;
+    }
+    // Create a temporary arrays to store intermediate input/output for each ReduceDim op
+    std::vector<I> tempInArray(input, input + totalElements);
+    std::vector<I> tempOutArray(input, input + totalElements);
+    std::vector<size_t> currentDims = inputDims;
+    std::size_t addedElems = 0;
+    for(std::size_t i=0; i<1 ; ++i)
+    {
+		addedElems = 0;
+		I* tempOutArrayPtr = tempOutArray.data();
+        std::size_t axis = std::get<0>(attrs)[i];
+        std::size_t nbElemAfterAxis = 1;
+        std::size_t nbElemBeforeAxis = 1;
+        for (size_t d = 0; d < currentDims.size(); ++d) {
+            nbElemAfterAxis *= (d > axis) ? currentDims[d]:1;
+            nbElemBeforeAxis *= (d < axis) ? currentDims[d]:1;
+        }
+        for (std::size_t j=0; j<nbElemBeforeAxis; ++j)
+        {
+            for (std::size_t k=0; k<nbElemAfterAxis; ++k)
+            {
+                I mean = 0;
+                for(std::size_t l=0; l<currentDims[axis];l++)
+                {
+                        size_t idx = j*(nbElemAfterAxis*currentDims[axis])+l*currentDims[axis]+k;
+                        mean+= tempInArray[idx];
+                }
+                tempOutArrayPtr[addedElems] = mean/currentDims[axis];
+                addedElems++;
+            }
+        }
+		// Update the input for the next slice operation
+        tempInArray.assign(tempOutArray.begin(), tempOutArray.begin() + addedElems);
+		if(keepDims)
+        	currentDims[axis] = 1;
+		else
+			currentDims.erase(currentDims.begin()+axis);
+    }
+	std::copy_n(tempInArray.data(), addedElems, output);
+}
+namespace {
+// DIM = 1
+static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,1>);
+static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,1>);
+static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,1>);
+// DIM = 2
+static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,2>);
+static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,2>);
+static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,2>);
+// DIM = 3
+static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,3>);
+static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,3>);
+static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,3>);
+}  // namespace
+}  // namespace Aidge
+#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_ */
--- a/src/operator/ReduceMeanImpl.cpp
+++ b/src/operator/ReduceMeanImpl.cpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+#include "aidge/utils/Types.h"
+#include "aidge/operator/ReduceMean.hpp"
+#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
+#include "aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp"
+Aidge::NbElts_t Aidge::ReduceMeanImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+Aidge::NbElts_t Aidge::ReduceMeanImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+Aidge::NbElts_t Aidge::ReduceMeanImpl3D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+void Aidge::ReduceMeanImpl1D_cpu::forward() {
+    // FIXME: uncomment the following code once memory handling will work
+    assert(mOp.getInput(0) && "missing input #0");
+    // Find the correct kernel type
+    auto kernelFunc =
+            Registrar<ReduceMeanImpl1DForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getOutput(0)->dataType()});
+    // Call kernel
+    kernelFunc(dynamic_cast<const ReduceMean_Op<1>&>(mOp).getStaticAttributes(),
+               mOp.getInput(0)->dims(),
+               mOp.getInput(0)->getImpl()->rawPtr(),
+               mOp.getOutput(0)->getImpl()->rawPtr());
+}
+void Aidge::ReduceMeanImpl2D_cpu::forward() {
+    // FIXME: uncomment the following code once memory handling will work
+    assert(mOp.getInput(0) && "missing input #0");
+    // Find the correct kernel type
+    auto kernelFunc =
+            Registrar<ReduceMeanImpl2DForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getOutput(0)->dataType()});
+    // Call kernel
+    kernelFunc(dynamic_cast<const ReduceMean_Op<2>&>(mOp).getStaticAttributes(),
+               mOp.getInput(0)->dims(),
+               mOp.getInput(0)->getImpl()->rawPtr(),
+               mOp.getOutput(0)->getImpl()->rawPtr());
+}
+void Aidge::ReduceMeanImpl3D_cpu::forward() {
+    // FIXME: uncomment the following code once memory handling will work
+    assert(mOp.getInput(0) && "missing input #0");
+    // Find the correct kernel type
+    auto kernelFunc =
+            Registrar<ReduceMeanImpl3DForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getOutput(0)->dataType()});
+    // Call kernel
+    kernelFunc(dynamic_cast<const ReduceMean_Op<3>&>(mOp).getStaticAttributes(),
+               mOp.getInput(0)->dims(),
+               mOp.getInput(0)->getImpl()->rawPtr(),
+               mOp.getOutput(0)->getImpl()->rawPtr());
+}
\ No newline at end of file
--- a/unit_tests/operator/Test_ReduceMeanImpl.cpp
+++ b/unit_tests/operator/Test_ReduceMeanImpl.cpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#include <catch2/catch_test_macros.hpp>
+#include <memory>
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/ReduceMean.hpp"
+#include "aidge/operator/Conv.hpp"
+#include "aidge/backend/cpu.hpp"
+using namespace Aidge;
+TEST_CASE("[cpu/operator] ReduceMean(forward)") {
+    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
+        {
+            {
+                { 5.0, 1.0 },
+                { 20.0, 2.0 }
+            },
+            {
+                { 30.0, 1.0 },
+                { 40.0, 2.0 }
+            },
+            {
+                { 55.0, 1.0 },
+                { 60.0, 2.0 }
+            }
+        }
+    });
+    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array3D<float,3,1,2> {
+        {
+            {{ 12.5, 1.5 }},
+            {{ 35.0, 1.5 }},
+            {{ 57.5, 1.5 }}
+        }
+    });
+    //TODO fix case of DIM=1
+    std::shared_ptr<Node> myReduceMean = ReduceMean({1,1});
+    myReduceMean->getOperator()->setDatatype(DataType::Float32);
+    myReduceMean->getOperator()->setBackend("cpu");
+    myReduceMean->getOperator()->associateInput(0,myInput);
+    myReduceMean->getOperator()->computeOutputDims();
+    myReduceMean->forward();
+    myReduceMean->getOperator()->getOutput(0)->print();
+    float* resPtr = static_cast<float*>(myReduceMean->getOperator()->getOutput(0)->getImpl()->rawPtr());
+    float* expectedPtr = static_cast<float*>(myOutput->getImpl()->rawPtr());
+    for (std::size_t i = 0; i< myOutput->size(); ++i) {
+        REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+    }
+}
\ No newline at end of file