Merge remote-tracking branch 'origin/dev' into scheduling

8ecdc264 · Olivier BICHLER · 67a6da5c · f805a9af · 8ecdc264 · 8ecdc264
Commit 8ecdc264 authored 11 months ago by Olivier BICHLER
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,10 +7,11 @@ file(READ "${CMAKE_SOURCE_DIR}/project_name.txt" project)
 message(STATUS "Project name: ${project}")
 message(STATUS "Project version: ${version}")
-# Note : project name is {project} and python module name is also {project} 
+# Note : project name is {project} and python module name is also {project}
 set(module_name _${project}) # target name
 project(${project})
+set(CXX_STANDARD 14)
 ##############################################
 # Define options
@@ -18,6 +19,7 @@ option(PYBIND "python binding" ON)
 option(WERROR "Warning as error" OFF)
 option(TEST "Enable tests" ON)
 option(COVERAGE "Enable coverage" OFF)
+option(ENABLE_ASAN "Enable ASan (AddressSanitizer) for runtime analysis of memory use (over/underflow, memory leak, ...)" OFF)
 ##############################################
 # Import utils CMakeLists
@@ -34,7 +36,6 @@ find_package(aidge_core REQUIRED)
 ##############################################
 # Create target and set properties
 file(GLOB_RECURSE src_files "src/*.cpp")
 file(GLOB_RECURSE inc_files "include/*.hpp")
@@ -43,9 +44,23 @@ target_link_libraries(${module_name}
    PUBLIC
        _aidge_core # _ is added because we link the target not the project
 )
 #Set target properties
 set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON)
+if( ${ENABLE_ASAN} )
+    message("Building ${module_name} with ASAN.")
+    set(SANITIZE_FLAGS -fsanitize=address -fno-omit-frame-pointer)
+    target_link_libraries(${module_name}
+        PUBLIC
+            -fsanitize=address
+    )
+    target_compile_options(${module_name}
+        PRIVATE
+            ${SANITIZE_FLAGS}
+    )
+endif()
 target_include_directories(${module_name}
    PUBLIC
        $<INSTALL_INTERFACE:include>
@@ -60,7 +75,7 @@ if (PYBIND)
    # Handles Python + pybind11 headers dependencies
    target_link_libraries(${module_name}
-        PUBLIC 
+        PUBLIC
            pybind11::pybind11
        PRIVATE
            Python::Python
@@ -99,8 +114,8 @@ install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 install(EXPORT ${project}-targets
 FILE "${project}-targets.cmake"
 DESTINATION ${INSTALL_CONFIGDIR}
- COMPONENT ${module_name} 
+ COMPONENT ${module_name}
-)  
+)
 #Create a ConfigVersion.cmake file
 include(CMakePackageConfigHelpers)

--- a/include/aidge/backend/cpu.hpp
+++ b/include/aidge/backend/cpu.hpp
@@ -23,6 +23,7 @@
 #include "aidge/backend/cpu/operator/ErfImpl.hpp"
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
 #include "aidge/backend/cpu/operator/GatherImpl.hpp"
+#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
 #include "aidge/backend/cpu/operator/MemorizeImpl.hpp"

--- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
@@ -12,10 +12,10 @@
 #ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
 #define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
-#include "aidge/utils/Registrar.hpp"
 #include <algorithm>
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
 namespace Aidge {
 // template <class I, class W, class B, class O>

--- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_
+#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_
+#include <memory>
+#include <vector>
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/GlobalAveragePooling.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+namespace Aidge {
+// class GlobalAveragePooling_Op;
+class GlobalAveragePoolingImplForward_cpu
+    : public Registrable<
+          GlobalAveragePoolingImplForward_cpu, std::tuple<DataType, DataType>,
+          void(const std::vector<DimSize_t> &, const void *, void *)> {};
+class GlobalAveragePoolingImplBackward_cpu
+    : public Registrable<
+          GlobalAveragePoolingImplBackward_cpu, std::tuple<DataType, DataType>,
+          void(const std::vector<DimSize_t> &, const void *, void *)> {};
+class GlobalAveragePoolingImpl_cpu : public OperatorImpl {
+public:
+  GlobalAveragePoolingImpl_cpu(const GlobalAveragePooling_Op &op)
+      : OperatorImpl(op, "cpu") {}
+  static std::unique_ptr<GlobalAveragePoolingImpl_cpu>
+  create(const GlobalAveragePooling_Op &op) {
+    return std::make_unique<GlobalAveragePoolingImpl_cpu>(op);
+  }
+  void forward() override;
+};
+namespace {
+static Registrar<GlobalAveragePooling_Op> registrarGlobalAveragePoolingImpl_cpu(
+    "cpu", Aidge::GlobalAveragePoolingImpl_cpu::create);
+}
+} // namespace Aidge
+#endif /* _AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */
--- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_FORWARD_KERNEL_H_
+#include <cstddef>
+#include <functional>  // std::multiplies
+#include <numeric>     // std::accumulate
+#include <vector>
+#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
+#include "aidge/data/Data.hpp"
+#include "aidge/utils/ErrorHandling.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+namespace Aidge {
+template <class I, class O>
+void GlobalAveragePoolingImpl_cpu_forward_kernel(
+    const std::vector<DimSize_t> &dims, const void *input_, void *output_) {
+  // error checking
+    AIDGE_ASSERT(dims.size() >= 3,"GlobalAveragePool needs at least a 3 dimensions "
+                 "input, number of input dim : {}",
+                 dims.size());
+  // computation
+  const I *input = static_cast<const I *>(input_);
+  O *output = static_cast<O *>(output_);
+  DimSize_t nb_elems = std::accumulate(dims.begin(), dims.end(), std::size_t(1),
+                                       std::multiplies<std::size_t>());
+  const DimSize_t in_batch_nb_elems{nb_elems / dims[0]};
+  const DimSize_t in_channel_nb_elems{in_batch_nb_elems / dims[1]};
+  const DimSize_t out_batch_nb_elems{dims[1]};
+  // parse channel by channel and fill each output with the average of the
+  // values in the channel
+  for (DimSize_t batch = 0; batch < dims[0]; ++batch) {
+    for (DimSize_t channel = 0; channel < dims[1]; ++channel) {
+      const I *filter_start = std::next(
+          input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
+      I sum = 0;
+      for (size_t i = 0; i < in_channel_nb_elems; ++i) {
+        sum += filter_start[i];
+      }
+      output[batch * out_batch_nb_elems + channel] =
+          sum / static_cast<I>(in_channel_nb_elems);
+    }
+  }
+}
+// Then we add the Registrar declaration for different input/output types
+namespace {
+static Registrar<GlobalAveragePoolingImplForward_cpu>
+    registrarGlobalAveragePoolingImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32},
+        Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>);
+static Registrar<GlobalAveragePoolingImplForward_cpu>
+    registrarGlobalAveragePoolingImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32},
+        Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int, int>);
+static Registrar<GlobalAveragePoolingImplForward_cpu>
+    registrarGlobalAveragePoolingImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64},
+        Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>);
+} // namespace
+} // namespace Aidge
+#endif /* AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_FORWARD_KERNEL_H_ */
--- a/src/operator/GlobalAveragePoolingImpl.cpp
+++ b/src/operator/GlobalAveragePoolingImpl.cpp
+/********************************************************************************
+ * Copyright (c) 2024 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
+#include <functional>
+#include <memory>
+#include <vector>
+#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp"
+#include "aidge/data/Data.hpp"
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/GlobalAveragePooling.hpp"
+#include "aidge/utils/ErrorHandling.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+void Aidge::GlobalAveragePoolingImpl_cpu::forward()
+{
+    const GlobalAveragePooling_Op& op_ = static_cast<const GlobalAveragePooling_Op&>(mOp);
+    // Check if input is provided
+    AIDGE_ASSERT(op_.getInput(0), "missing input 0");
+    // Create the forward kernal with the wanted types
+    auto kernelFunc = Registrar<GlobalAveragePoolingImplForward_cpu>::create({op_.getInput(0)->dataType(),
+                                                                              op_.getOutput(0)->dataType()});
+    // Call kernel
+    kernelFunc(op_.getInput(0)->dims(),
+               op_.getInput(0)->getImpl()->rawPtr(),
+               op_.getOutput(0)->getImpl()->rawPtr());
+}
\ No newline at end of file
--- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
+++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+#include <aidge/utils/Types.h>
+#include <catch2/catch_test_macros.hpp>
+#include <chrono>
+#include <cmath>
+#include <cstddef> // std::size_t
+#include <cstdint> // std::uint16_t
+#include <iostream>
+#include <memory>
+#include <numeric> // std::accumulate
+#include <ostream>
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/GlobalAveragePooling.hpp"
+#include "aidge/utils/TensorUtils.hpp"
+// debug print function
+void print_tensor(Aidge::Tensor &T) {
+  // Print tensors
+  std::cout << "Tensor : size =  [";
+  for (auto &dim : T.dims()) {
+    std::cout << dim << " , ";
+  }
+  std::cout << "]" << std::endl;
+  T.print();
+}
+namespace Aidge {
+TEST_CASE("[cpu/operator] GlobalAveragePooling",
+          "[GlobalAveragePooling][CPU]") {
+  constexpr std::uint16_t NBTRIALS = 10;
+  // Create a random number generator
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<float> valueDist(
+      0.1f, 1.1f); // Random float distribution between 0 and 1
+  std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
+                                                         std::size_t(10));
+  std::uniform_int_distribution<std::size_t> nbLowDimsDist(std::size_t(1),
+                                                           std::size_t(2));
+  std::uniform_int_distribution<std::size_t> nbHighDimsDist(std::size_t(3),
+                                                            std::size_t(7));
+  // Create MatGlobalAveragePooling Operator
+  std::shared_ptr<Node> globAvgPool = GlobalAveragePooling();
+  auto op =
+      std::static_pointer_cast<OperatorTensor>(globAvgPool->getOperator());
+  op->setDataType(DataType::Float32);
+  op->setBackend("cpu");
+  // Create the input Tensor
+  std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
+  op->associateInput(0, T0);
+  T0->setDataType(DataType::Float32);
+  T0->setBackend("cpu");
+  // Create results Tensor
+  std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
+  Tres->setDataType(DataType::Float32);
+  Tres->setBackend("cpu");
+  // To measure execution time of 'MatGlobalAveragePooling_Op::forward()' member
+  // function call
+  std::chrono::time_point<std::chrono::system_clock> start;
+  std::chrono::time_point<std::chrono::system_clock> end;
+  std::chrono::duration<double, std::micro> duration{};
+  int number_of_operation{0};
+  SECTION("GlobalAveragePoolingImpl_cpu::forward()") {
+    SECTION(
+        "1-2Dim > not enough dimensions leads to function throwing an error") {
+      // generate a random tensors
+      const std::size_t nbDims = nbLowDimsDist(gen);
+      std::vector<std::size_t> dims;
+      for (std::size_t i = 0; i < nbDims; ++i) {
+        dims.push_back(dimSizeDist(gen));
+      }
+      const std::size_t nb_elements =
+          std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1),
+                          std::multiplies<std::size_t>());
+      float *array0 = new float[nb_elements];
+      for (std::size_t i = 0; i < nb_elements; ++i) {
+        array0[i] = valueDist(gen);
+      }
+      // input0
+      T0->resize(dims);
+      T0->getImpl()->setRawPtr(array0, nb_elements);
+      REQUIRE_THROWS(globAvgPool->forward());
+      delete[] array0;
+    }
+    SECTION("3+Dim") {
+      SECTION("Fill a tensor with all values set as N will result with every "
+              "output being N") {
+        // generate the tensor
+        const std::size_t nbDims = nbHighDimsDist(gen);
+        std::vector<std::size_t> dims_in;
+        for (std::size_t i = 0; i < nbDims; ++i) {
+          dims_in.push_back(dimSizeDist(gen));
+        }
+        // create in nb_elems
+        const std::size_t in_nb_elems =
+            std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1),
+                            std::multiplies<std::size_t>());
+        const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0];
+        const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1];
+        number_of_operation +=
+            in_nb_elems +
+            dims_in[1]; //  averaging per channel : 1 addition per element in
+                        //  the channel + 1 division this for every batch
+        // create out nb_elems
+        std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
+        const std::size_t out_nb_elems =
+            std::accumulate(dims_out.cbegin(), dims_out.cend(), std::size_t(1),
+                            std::multiplies<std::size_t>());
+        const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0];
+        // iterate over each batch/channel
+        float *array0 = new float[in_nb_elems];
+        float *result = new float[out_nb_elems];
+        float val = valueDist(gen);
+        for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
+          for (std::size_t channel = 0; channel < dims_in[1]; ++channel) {
+            for (std::size_t i = 0; i < in_channel_nb_elems; ++i)
+            {
+              array0[batch * in_batch_nb_elems + channel * in_channel_nb_elems +
+                     i] = val;
+            }
+            result[batch * out_batch_nb_elems + channel] = val;
+          }
+        }
+        // input0
+        T0->resize(dims_in);
+        T0->getImpl()->setRawPtr(array0, in_nb_elems);
+        // results
+        Tres->resize(dims_out);
+        Tres->getImpl()->setRawPtr(result, out_nb_elems);
+        op->computeOutputDims();
+        start = std::chrono::system_clock::now();
+        REQUIRE_NOTHROW(globAvgPool->forward());
+        end = std::chrono::system_clock::now();
+        duration +=
+            std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+        REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
+        for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
+          REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
+        }
+        REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        delete[] array0;
+        delete[] result;
+      }
+      SECTION("random testing") {
+        for (int trial = 0; trial < NBTRIALS; ++trial) {
+          // generate the tensor
+          const std::size_t nbDims = nbHighDimsDist(gen);
+          std::vector<std::size_t> dims_in;
+          for (std::size_t i = 0; i < nbDims; ++i) {
+            dims_in.push_back(dimSizeDist(gen));
+          }
+          // create in nb_elems
+          const std::size_t in_nb_elems =
+              std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1),
+                              std::multiplies<std::size_t>());
+          const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0];
+          const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1];
+          number_of_operation +=
+              in_nb_elems +
+              dims_in[1]; //  averaging per channel : 1 addition per element in
+                          //  the channel + 1 division this for every batch
+          // create out nb_elems
+          std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
+          const std::size_t out_nb_elems =
+              std::accumulate(dims_out.cbegin(), dims_out.cend(),
+                              std::size_t(1), std::multiplies<std::size_t>());
+          const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0];
+          // iterate over each batch/channel
+          float *array0 = new float[in_nb_elems];
+          float *result = new float[out_nb_elems];
+          for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
+            for (std::size_t channel = 0; channel < dims_in[1]; ++channel) {
+              float channel_sum = 0;
+              for (std::size_t i = 0; i < in_channel_nb_elems; ++i)
+              {
+                float val = valueDist(gen);
+                array0[batch * in_batch_nb_elems +
+                       channel * in_channel_nb_elems + i] = val;
+                channel_sum += val;
+              }
+              result[batch * out_batch_nb_elems + channel] =
+                  channel_sum / in_channel_nb_elems;
+            }
+          }
+          // input0
+          T0->resize(dims_in);
+          T0->getImpl()->setRawPtr(array0, in_nb_elems);
+          // results
+          Tres->resize(dims_out);
+          Tres->getImpl()->setRawPtr(result, out_nb_elems);
+          op->computeOutputDims();
+          start = std::chrono::system_clock::now();
+          REQUIRE_NOTHROW(globAvgPool->forward());
+          end = std::chrono::system_clock::now();
+          duration += std::chrono::duration_cast<std::chrono::microseconds>(
+              end - start);
+          REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
+          for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
+            REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
+          }
+          REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+          delete[] array0;
+          delete[] result;
+        }
+      }
+      SECTION("Using result from a pytorch function as groundtruth") {
+        DimSize_t batch_size = 2;
+        DimSize_t channels = 3;
+        DimSize_t height = 4;
+        DimSize_t width = 3;
+        DimSize_t depth = 2;
+        SECTION("2D_img") {
+          const std::vector<DimSize_t> in_dims{batch_size, channels, height,
+                                               width};
+          const std::vector<DimSize_t> out_dims{batch_size, channels};
+          DimSize_t in_nb_elems = batch_size * channels * height * width;
+          DimSize_t out_nb_elems = batch_size * channels;
+          number_of_operation +=
+              in_nb_elems +
+              channels; //  averaging per channel : 1 addition per element in
+                        //  the channel + 1 division this for every batch
+          auto input = new float[in_nb_elems];
+          auto result = new float[out_nb_elems];
+          input[0] = 0.1807716;
+          input[1] = -0.0699881;
+          input[2] = -0.3596235;
+          input[3] = -0.9152045;
+          input[4] = 0.6257653;
+          input[5] = 0.0255099;
+          input[6] = 0.9545137;
+          input[7] = 0.0643485;
+          input[8] = 0.3611506;
+          input[9] = 1.1678782;
+          input[10] = -1.3498932;
+          input[11] = -0.5101767;
+          input[12] = 0.2359577;
+          input[13] = -0.2397784;
+          input[14] = -0.9211147;
+          input[15] = 1.5432971;
+          input[16] = 1.3488258;
+          input[17] = -0.1396417;
+          input[18] = 0.2857972;
+          input[19] = 0.9651205;
+          input[20] = -2.0371499;
+          input[21] = 0.4931363;
+          input[22] = 1.4869986;
+          input[23] = 0.5910330;
+          input[24] = 0.1260297;
+          input[25] = -1.5626874;
+          input[26] = -1.1601028;
+          input[27] = -0.3348408;
+          input[28] = 0.4477722;
+          input[29] = -0.8016447;
+          input[30] = 1.5236114;
+          input[31] = 2.5085869;
+          input[32] = -0.6630959;
+          input[33] = -0.2512752;
+          input[34] = 1.0101448;
+          input[35] = 0.1215468;
+          input[36] = 0.1583993;
+          input[37] = 1.1340188;
+          input[38] = -1.1538976;
+          input[39] = -0.2983968;
+          input[40] = -0.5075365;
+          input[41] = -0.9239212;
+          input[42] = 0.5467061;
+          input[43] = -1.4947776;
+          input[44] = -1.2057148;
+          input[45] = 0.5718198;
+          input[46] = -0.5973545;
+          input[47] = -0.6936757;
+          input[48] = 1.6455388;
+          input[49] = -0.8029931;
+          input[50] = 1.3514109;
+          input[51] = -0.2759193;
+          input[52] = -1.5108346;
+          input[53] = 2.1047730;
+          input[54] = 2.7629590;
+          input[55] = -1.7465292;
+          input[56] = 0.8353187;
+          input[57] = -1.9560477;
+          input[58] = -0.8002653;
+          input[59] = -0.5044988;
+          input[60] = -0.0711742;
+          input[61] = -0.5130699;
+          input[62] = -1.0307810;
+          input[63] = 0.9154347;
+          input[64] = -0.2282317;
+          input[65] = -0.6884708;
+          input[66] = 0.1832259;
+          input[67] = 0.6003584;
+          input[68] = -1.5429375;
+          input[69] = -0.3465560;
+          input[70] = -0.1476223;
+          input[71] = 0.6469797;
+          result[0] = 0.0145876;
+          result[1] = 0.3010401;
+          result[2] = 0.0803371;
+          result[3] = -0.3720275;
+          result[4] = 0.0919094;
+          result[5] = -0.1852371;
+          // input0
+          T0->resize(in_dims);
+          T0->getImpl()->setRawPtr(input, in_nb_elems);
+          // results
+          Tres->resize(out_dims);
+          Tres->getImpl()->setRawPtr(result, out_nb_elems);
+          op->computeOutputDims();
+          start = std::chrono::system_clock::now();
+          REQUIRE_NOTHROW(globAvgPool->forward());
+          end = std::chrono::system_clock::now();
+          duration += std::chrono::duration_cast<std::chrono::microseconds>(
+              end - start);
+          REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
+          for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
+            REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
+          }
+          REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+          delete[] input;
+          delete[] result;
+        }
+        SECTION("3D_img") {
+          const std::vector<DimSize_t> in_dims{batch_size, channels, height,
+                                               width, depth};
+          const std::vector<DimSize_t> out_dims{batch_size, channels};
+          DimSize_t in_nb_elems =
+              batch_size * channels * height * width * depth;
+          number_of_operation +=
+              in_nb_elems +
+              channels; //  averaging per channel : 1 addition per element in
+                        //  the channel + 1 division this for every batch
+          DimSize_t out_nb_elems = batch_size * channels;
+          auto input = new float[in_nb_elems];
+          auto result = new float[out_nb_elems];
+          input[0] = 0.0061403;
+          input[1] = -0.9665052;
+          input[2] = 0.3582928;
+          input[3] = 0.1072854;
+          input[4] = 1.2463317;
+          input[5] = 1.2460036;
+          input[6] = 0.3534451;
+          input[7] = 0.9425349;
+          input[8] = -0.2103887;
+          input[9] = -0.7959853;
+          input[10] = 0.1297970;
+          input[11] = -1.9445597;
+          input[12] = 0.0609514;
+          input[13] = -0.2379328;
+          input[14] = 1.9020044;
+          input[15] = -1.1762751;
+          input[16] = 0.3404147;
+          input[17] = 1.1685153;
+          input[18] = -0.6526139;
+          input[19] = 0.3767620;
+          input[20] = 0.1887376;
+          input[21] = 0.5154487;
+          input[22] = 0.6371427;
+          input[23] = -0.3948864;
+          input[24] = -1.1571540;
+          input[25] = 0.2896117;
+          input[26] = 0.6163548;
+          input[27] = -0.4370409;
+          input[28] = 0.6589766;
+          input[29] = 0.6587803;
+          input[30] = -1.3702172;
+          input[31] = -1.6210355;
+          input[32] = 0.5872851;
+          input[33] = 0.2860694;
+          input[34] = 0.0082870;
+          input[35] = -0.2523253;
+          input[36] = -1.3247224;
+          input[37] = 0.1891782;
+          input[38] = 0.0211001;
+          input[39] = 0.9404197;
+          input[40] = -0.5576900;
+          input[41] = -0.6939272;
+          input[42] = -0.3252473;
+          input[43] = 1.2439330;
+          input[44] = -1.1671864;
+          input[45] = -0.4091243;
+          input[46] = 1.2600617;
+          input[47] = -1.5630058;
+          input[48] = 1.1346143;
+          input[49] = -0.0823837;
+          input[50] = 0.2893163;
+          input[51] = 0.8357732;
+          input[52] = -0.2449911;
+          input[53] = 0.2712233;
+          input[54] = 0.0936364;
+          input[55] = -0.8834321;
+          input[56] = -0.3274170;
+          input[57] = 0.0783938;
+          input[58] = -0.3807656;
+          input[59] = 0.3775077;
+          input[60] = 0.1119123;
+          input[61] = 2.3142793;
+          input[62] = -0.7989057;
+          input[63] = -0.5643027;
+          input[64] = -1.1346605;
+          input[65] = 0.1705271;
+          input[66] = 0.9946650;
+          input[67] = 1.2625724;
+          input[68] = 1.6218156;
+          input[69] = 1.0774711;
+          input[70] = 0.5947813;
+          input[71] = -1.5290873;
+          input[72] = 2.0437069;
+          input[73] = -0.1656267;
+          input[74] = 0.0870704;
+          input[75] = -0.5276564;
+          input[76] = -0.1002882;
+          input[77] = 1.0539219;
+          input[78] = -0.6230739;
+          input[79] = -1.5905718;
+          input[80] = -0.9741858;
+          input[81] = -0.1869211;
+          input[82] = 0.5816050;
+          input[83] = -2.6339815;
+          input[84] = -1.0764544;
+          input[85] = 2.5903966;
+          input[86] = 0.4940658;
+          input[87] = 0.4671729;
+          input[88] = 0.6588292;
+          input[89] = -0.7257792;
+          input[90] = 1.4280071;
+          input[91] = -1.2187740;
+          input[92] = 0.7380729;
+          input[93] = -1.1599953;
+          input[94] = -1.4355115;
+          input[95] = -1.5304037;
+          input[96] = 0.8474578;
+          input[97] = 0.0774260;
+          input[98] = 0.5433396;
+          input[99] = -0.8438400;
+          input[100] = -0.1089903;
+          input[101] = -0.6354192;
+          input[102] = 0.8772392;
+          input[103] = 0.2844733;
+          input[104] = 0.0975270;
+          input[105] = -0.9785872;
+          input[106] = -0.4320499;
+          input[107] = -1.4937501;
+          input[108] = -2.0644901;
+          input[109] = 0.0851217;
+          input[110] = 0.6644159;
+          input[111] = 0.4168026;
+          input[112] = 0.0958830;
+          input[113] = -1.5699565;
+          input[114] = 0.3739572;
+          input[115] = -0.1420672;
+          input[116] = -0.7864021;
+          input[117] = 0.2443752;
+          input[118] = -0.9811850;
+          input[119] = -0.0698569;
+          input[120] = 0.1463890;
+          input[121] = 0.2536245;
+          input[122] = 0.2136150;
+          input[123] = 0.3113698;
+          input[124] = 1.8353856;
+          input[125] = 1.4473228;
+          input[126] = -0.7373698;
+          input[127] = 0.2485314;
+          input[128] = -0.4789796;
+          input[129] = -0.3396149;
+          input[130] = 0.6438198;
+          input[131] = 0.7287521;
+          input[132] = -1.5119252;
+          input[133] = -0.1006494;
+          input[134] = 1.8955028;
+          input[135] = 1.0871323;
+          input[136] = 0.3620502;
+          input[137] = -0.8826663;
+          input[138] = 1.2220223;
+          input[139] = -1.2817260;
+          input[140] = 1.4153577;
+          input[141] = 0.4148015;
+          input[142] = 1.3458617;
+          input[143] = 1.9718349;
+          result[0] = 0.1333608;
+          result[1] = -0.1716091;
+          result[2] = 0.2201060;
+          result[3] = -0.1585989;
+          result[4] = -0.2291074;
+          result[5] = 0.4254351;
+          // input0
+          T0->resize(in_dims);
+          T0->getImpl()->setRawPtr(input, in_nb_elems);
+          // results
+          Tres->resize(out_dims);
+          Tres->getImpl()->setRawPtr(result, out_nb_elems);
+          op->computeOutputDims();
+          start = std::chrono::system_clock::now();
+          REQUIRE_NOTHROW(globAvgPool->forward());
+          end = std::chrono::system_clock::now();
+          duration += std::chrono::duration_cast<std::chrono::microseconds>(
+              end - start);
+          REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
+          for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
+            REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
+          }
+          REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+          delete[] input;
+          delete[] result;
+        }
+      }
+      std::cout << "GlobalAveragePooling total execution time : "
+                << duration.count() << "µs" << std::endl;
+      std::cout << "Number of operations : " << number_of_operation
+                << std::endl;
+      std::cout << "Operation / µs = " << number_of_operation / duration.count()
+                << std::endl;
+    }
+  }
+}
+} // namespace Aidge