From 701120eae040f240b29362209e045385833c82b6 Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Sun, 24 Mar 2024 13:57:59 +0000
Subject: [PATCH] Change ReduceMean attribute 'axes' from static array to
 dynamic vector

Change axes attribute type to allow runtime deduction of 'axes' dimensions in the ReduceMean used in MSE loss function
---
 .../backend/cpu/operator/ReduceMeanImpl.hpp   | 138 ++++++++++--------
 .../ReduceMeanImpl_forward_kernels.hpp        |  76 +++++-----
 src/operator/ReduceMeanImpl.cpp               | 114 ++++++++-------
 3 files changed, 189 insertions(+), 139 deletions(-)

diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
index 16c5c56d..e2b72883 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
@@ -25,55 +25,22 @@
 namespace Aidge {
 // class ReduceMean_Op;
 
-// compute kernel registry for forward and backward
-// DIM 1
-class ReduceMeanImpl1DForward_cpu
-    : public Registrable<ReduceMeanImpl1DForward_cpu,
+// Every DIM
+class ReduceMeanImplForward_cpu
+    : public Registrable<ReduceMeanImplForward_cpu,
                          std::tuple<DataType, DataType>,
-                         void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+                         void(const ReduceMean_Op::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
 class ReduceMeanImpl1DBackward_cpu
     : public Registrable<ReduceMeanImpl1DBackward_cpu,
                          std::tuple<DataType, DataType>,
-                         void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)> {};
+                         void(const ReduceMean_Op::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)> {};
 
-// DIM 2
-class ReduceMeanImpl2DForward_cpu
-    : public Registrable<ReduceMeanImpl2DForward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
-class ReduceMeanImpl2DBackward_cpu
-    : public Registrable<ReduceMeanImpl2DBackward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)> {};
-// DIM 3
-class ReduceMeanImpl3DForward_cpu
-    : public Registrable<ReduceMeanImpl3DForward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
-class ReduceMeanImpl3DBackward_cpu
-    : public Registrable<ReduceMeanImpl3DBackward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
-
-class ReduceMeanImpl1D_cpu : public OperatorImpl {
-   public:
-    ReduceMeanImpl1D_cpu(const ReduceMean_Op<1>& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<ReduceMeanImpl1D_cpu> create(const ReduceMean_Op<1> &op) {
-        return std::make_unique<ReduceMeanImpl1D_cpu>(op);
-    }
-
-   public:
-    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    void forward() override;
-};
-
-class ReduceMeanImpl2D_cpu : public OperatorImpl {
+class ReduceMeanImpl_cpu : public OperatorImpl {
    public:
-    ReduceMeanImpl2D_cpu(const ReduceMean_Op<2>& op) : OperatorImpl(op, "cpu") {}
+    ReduceMeanImpl_cpu(const ReduceMean_Op& op) : OperatorImpl(op, "cpu") {}
 
-    static std::unique_ptr<ReduceMeanImpl2D_cpu> create(const ReduceMean_Op<2> &op) {
-        return std::make_unique<ReduceMeanImpl2D_cpu>(op);
+    static std::unique_ptr<ReduceMeanImpl_cpu> create(const ReduceMean_Op &op) {
+        return std::make_unique<ReduceMeanImpl_cpu>(op);
     }
 
    public:
@@ -81,23 +48,80 @@ class ReduceMeanImpl2D_cpu : public OperatorImpl {
     void forward() override;
 };
 
-class ReduceMeanImpl3D_cpu : public OperatorImpl {
-   public:
-    ReduceMeanImpl3D_cpu(const ReduceMean_Op<3>& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<ReduceMeanImpl3D_cpu> create(const ReduceMean_Op<3> &op) {
-        return std::make_unique<ReduceMeanImpl3D_cpu>(op);
-    }
-
-   public:
-    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    void forward() override;
-};
+// // compute kernel registry for forward and backward
+// // DIM 1
+// class ReduceMeanImpl1DForward_cpu
+//     : public Registrable<ReduceMeanImpl1DForward_cpu,
+//                          std::tuple<DataType, DataType>,
+//                          void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+// class ReduceMeanImpl1DBackward_cpu
+//     : public Registrable<ReduceMeanImpl1DBackward_cpu,
+//                          std::tuple<DataType, DataType>,
+//                          void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)> {};
+
+// // DIM 2
+// class ReduceMeanImpl2DForward_cpu
+//     : public Registrable<ReduceMeanImpl2DForward_cpu,
+//                          std::tuple<DataType, DataType>,
+//                          void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+// class ReduceMeanImpl2DBackward_cpu
+//     : public Registrable<ReduceMeanImpl2DBackward_cpu,
+//                          std::tuple<DataType, DataType>,
+//                          void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)> {};
+// // DIM 3
+// class ReduceMeanImpl3DForward_cpu
+//     : public Registrable<ReduceMeanImpl3DForward_cpu,
+//                          std::tuple<DataType, DataType>,
+//                          void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+// class ReduceMeanImpl3DBackward_cpu
+//     : public Registrable<ReduceMeanImpl3DBackward_cpu,
+//                          std::tuple<DataType, DataType>,
+//                          void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+
+// class ReduceMeanImpl1D_cpu : public OperatorImpl {
+//    public:
+//     ReduceMeanImpl1D_cpu(const ReduceMean_Op<1>& op) : OperatorImpl(op, "cpu") {}
+
+//     static std::unique_ptr<ReduceMeanImpl1D_cpu> create(const ReduceMean_Op<1> &op) {
+//         return std::make_unique<ReduceMeanImpl1D_cpu>(op);
+//     }
+
+//    public:
+//     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+//     void forward() override;
+// };
+
+// class ReduceMeanImpl2D_cpu : public OperatorImpl {
+//    public:
+//     ReduceMeanImpl2D_cpu(const ReduceMean_Op<2>& op) : OperatorImpl(op, "cpu") {}
+
+//     static std::unique_ptr<ReduceMeanImpl2D_cpu> create(const ReduceMean_Op<2> &op) {
+//         return std::make_unique<ReduceMeanImpl2D_cpu>(op);
+//     }
+
+//    public:
+//     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+//     void forward() override;
+// };
+
+// class ReduceMeanImpl3D_cpu : public OperatorImpl {
+//    public:
+//     ReduceMeanImpl3D_cpu(const ReduceMean_Op<3>& op) : OperatorImpl(op, "cpu") {}
+
+//     static std::unique_ptr<ReduceMeanImpl3D_cpu> create(const ReduceMean_Op<3> &op) {
+//         return std::make_unique<ReduceMeanImpl3D_cpu>(op);
+//     }
+
+//    public:
+//     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+//     void forward() override;
+// };
 namespace {
 // add cpu backend to ReduceMean_Op<2> implementation registry
-static Registrar<ReduceMean_Op<1>> registrarReduceMeanImpl1D_cpu("cpu", Aidge::ReduceMeanImpl1D_cpu::create);
-static Registrar<ReduceMean_Op<2>> registrarReduceMeanImpl2D_cpu("cpu", Aidge::ReduceMeanImpl2D_cpu::create);
-static Registrar<ReduceMean_Op<3>> registrarReduceMeanImpl3D_cpu("cpu", Aidge::ReduceMeanImpl3D_cpu::create);
+static Registrar<ReduceMean_Op> registrarReduceMeanImpl_cpu("cpu", Aidge::ReduceMeanImpl_cpu::create);
+// static Registrar<ReduceMean_Op<1>> registrarReduceMeanImpl1D_cpu("cpu", Aidge::ReduceMeanImpl1D_cpu::create);
+// static Registrar<ReduceMean_Op<2>> registrarReduceMeanImpl2D_cpu("cpu", Aidge::ReduceMeanImpl2D_cpu::create);
+// static Registrar<ReduceMean_Op<3>> registrarReduceMeanImpl3D_cpu("cpu", Aidge::ReduceMeanImpl3D_cpu::create);
 }  // namespace
 }  // namespace Aidge
 
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
index 46eb61f2..25796f22 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
@@ -12,10 +12,12 @@
 #ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
 #define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
 
-#include <cstddef>
-#include <algorithm>   // std::copy, std::for_each
-#include <numeric>     //std::accumulate
+#include <algorithm>   // std::for_each
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::int32_t
 #include <functional>  //std::multiplies
+#include <numeric>     //std::accumulate
+#include <vector>
 
 #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
 #include "aidge/data/Data.hpp"
@@ -23,8 +25,8 @@
 #include "aidge/utils/Registrar.hpp"
 
 namespace Aidge {
-template <class I, class O, DimSize_t DIM>
-void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs& attrs,
+template <class I, class O>
+void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attrs,
                                      const std::vector<DimSize_t>& inputDims,
                                      const void* input_,
                                      void* output_) {
@@ -32,14 +34,15 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs&
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
 
+    const std::vector<std::int32_t>& axes = std::get<0>(attrs);
     const std::size_t nb_dims = inputDims.size();
     const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
 
-    if (DIM == 1) {
-        const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + std::get<0>(attrs)[0], 1, std::multiplies<std::size_t>());
-        const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - std::get<0>(attrs)[0], 1, std::multiplies<std::size_t>());
+    if (axes.size() == 1) {
+        const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>());
+        const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>());
 
-        const std::size_t dim_i = inputDims[std::get<0>(attrs)[0]];
+        const std::size_t dim_i = inputDims[axes[0]];
         for (std::size_t pre = 0; pre < stride_pre; ++pre) {
             for (std::size_t post = 0; post < stride_post; ++post) {
                 const std::size_t idx_i = pre * dim_i * stride_post + post;
@@ -68,7 +71,7 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs&
         const I* inputAccumulation = input;
         I* outputAccumulation = nullptr;
 
-        for (const auto& axisInt : std::get<0>(attrs)) {
+        for (const auto& axisInt : axes) {
             const std::size_t a = static_cast<std::size_t>(axisInt);
             outputElements /= inputDims[a];
             outputAccumulation = new I[outputElements];
@@ -103,29 +106,36 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op<DIM>::Attrs&
 }
 
 namespace {
-// DIM = 1
-static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,1>);
-static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,1>);
-static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,1>);
-
-// DIM = 2
-static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,2>);
-static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,2>);
-static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,2>);
-
-// DIM = 3
-static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,3>);
-static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,3>);
-static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,3>);
+static Registrar<ReduceMeanImplForward_cpu> registrarReduceMeanImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>);
+static Registrar<ReduceMeanImplForward_cpu> registrarReduceMeanImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int>);
+static Registrar<ReduceMeanImplForward_cpu> registrarReduceMeanImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>);
+
+// // DIM = 1
+// static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float32(
+//         {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,1>);
+// static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Int32(
+//         {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,1>);
+// static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float64(
+//         {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,1>);
+
+// // DIM = 2
+// static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float32(
+//         {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,2>);
+// static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Int32(
+//         {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,2>);
+// static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float64(
+//         {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,2>);
+
+// // DIM = 3
+// static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float32(
+//         {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,3>);
+// static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Int32(
+//         {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,3>);
+// static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float64(
+//         {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,3>);
 }  // namespace
 }  // namespace Aidge
 
diff --git a/src/operator/ReduceMeanImpl.cpp b/src/operator/ReduceMeanImpl.cpp
index e31a53d8..82f96f11 100644
--- a/src/operator/ReduceMeanImpl.cpp
+++ b/src/operator/ReduceMeanImpl.cpp
@@ -9,71 +9,87 @@
  *
  ********************************************************************************/
 
-#include <cassert>
-#include <chrono>  // std::chrono::milliseconds
-#include <numeric> // std::accumulate
-#include <thread>  // std::this_thread::sleep_for
+#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
+
+#include <memory>
 #include <vector>
 
 #include "aidge/utils/Types.h"
 #include "aidge/operator/ReduceMean.hpp"
-
-#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
 #include "aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp"
-Aidge::NbElts_t Aidge::ReduceMeanImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
-    // this implementation can be in-place
-    return 0;
-}
-Aidge::NbElts_t Aidge::ReduceMeanImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
-    // this implementation can be in-place
-    return 0;
-}
-Aidge::NbElts_t Aidge::ReduceMeanImpl3D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
+
+Aidge::NbElts_t Aidge::ReduceMeanImpl_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
     // this implementation can be in-place
     return 0;
 }
+// Aidge::NbElts_t Aidge::ReduceMeanImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
+//     // this implementation can be in-place
+//     return 0;
+// }
+// Aidge::NbElts_t Aidge::ReduceMeanImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
+//     // this implementation can be in-place
+//     return 0;
+// }
+// Aidge::NbElts_t Aidge::ReduceMeanImpl3D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
+//     // this implementation can be in-place
+//     return 0;
+// }
 
-void Aidge::ReduceMeanImpl1D_cpu::forward() {
-
+void Aidge::ReduceMeanImpl_cpu::forward() {
+    const ReduceMean_Op& op_ = dynamic_cast<const ReduceMean_Op&>(mOp);
     // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<ReduceMeanImpl1DForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+    auto kernelFunc = Registrar<ReduceMeanImplForward_cpu>::create({
+        op_.getInput(0)->dataType(),
+        op_.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(dynamic_cast<const ReduceMean_Op<1>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
+    kernelFunc(op_.getStaticAttributes(),
+               op_.getInput(0)->dims(),
+               op_.getInput(0)->getImpl()->rawPtr(),
+               op_.getOutput(0)->getImpl()->rawPtr());
 }
 
-void Aidge::ReduceMeanImpl2D_cpu::forward() {
+// void Aidge::ReduceMeanImpl1D_cpu::forward() {
 
-    // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<ReduceMeanImpl2DForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+//     // Find the correct kernel type
+//     auto kernelFunc =
+//             Registrar<ReduceMeanImpl1DForward_cpu>::create({
+//         std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
+//         std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
 
-    // Call kernel
-    kernelFunc(dynamic_cast<const ReduceMean_Op<2>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
-}
+//     // Call kernel
+//     kernelFunc(dynamic_cast<const ReduceMean_Op<1>&>(mOp).getStaticAttributes(),
+//                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
+//                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
+//                std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
+// }
 
-void Aidge::ReduceMeanImpl3D_cpu::forward() {
+// void Aidge::ReduceMeanImpl2D_cpu::forward() {
 
-    // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<ReduceMeanImpl3DForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+//     // Find the correct kernel type
+//     auto kernelFunc =
+//             Registrar<ReduceMeanImpl2DForward_cpu>::create({
+//         std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
+//         std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
 
-    // Call kernel
-    kernelFunc(dynamic_cast<const ReduceMean_Op<3>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
-}
\ No newline at end of file
+//     // Call kernel
+//     kernelFunc(dynamic_cast<const ReduceMean_Op<2>&>(mOp).getStaticAttributes(),
+//                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
+//                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
+//                std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
+// }
+
+// void Aidge::ReduceMeanImpl3D_cpu::forward() {
+
+//     // Find the correct kernel type
+//     auto kernelFunc =
+//             Registrar<ReduceMeanImpl3DForward_cpu>::create({
+//         std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
+//         std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+
+//     // Call kernel
+//     kernelFunc(dynamic_cast<const ReduceMean_Op<3>&>(mOp).getStaticAttributes(),
+//                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
+//                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
+//                std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
+// }
\ No newline at end of file
-- 
GitLab