From 2407e191c6a10e6bca0eb5d07493948dc6bf913c Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Tue, 11 Mar 2025 20:11:38 +0100
Subject: [PATCH 1/3] Fixed wrong computation in int

---
 .../cpu/operator/AvgPoolingImpl_kernels.hpp   | 22 ++++++++++++++++---
 .../GlobalAveragePoolingImpl_kernels.hpp      |  2 +-
 .../cpu/operator/ReduceMeanImpl_kernels.hpp   | 13 ++++++-----
 unit_tests/operator/Test_AvgPoolingImpl.cpp   | 21 ++++++++++++++++++
 .../Test_GlobalAveragePoolingImpl.cpp         | 21 ++++++++++++++++++
 5 files changed, 70 insertions(+), 9 deletions(-)

diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp
index 78f8446a..1671759d 100644
--- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp
@@ -23,6 +23,22 @@
 #include "aidge/utils/Types.h"
 
 namespace Aidge {
+
+template <typename T>
+using Acc_T = typename std::conditional<std::is_floating_point<T>::value, T, double>::type;
+
+template <typename T>
+typename std::enable_if<std::is_floating_point<T>::value, T>::type
+castFromFloat(T value) {
+  return value;
+}
+
+template <typename T>
+typename std::enable_if<!std::is_floating_point<T>::value, T>::type
+castFromFloat(double value) {
+  return static_cast<T>(std::nearbyint(value));
+}
+
 /**
  * @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
  * @tparam I Input data type.
@@ -79,7 +95,7 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
                     const std::size_t ix = ox * strideDims[0];
                     const std::size_t iy = oy * strideDims[1];
 
-                    O sum = static_cast<O>(0);
+                    Acc_T<I> sum = static_cast<Acc_T<I>>(0);
                     std::size_t count = 0;
 
                     for (unsigned int sy = syMin; sy < syMax; ++sy) {
@@ -90,13 +106,13 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
 
                             // Ensure within bounds
                             if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) {
-                                sum += static_cast<O>(input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]);
+                                sum += static_cast<Acc_T<I>>(input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]);
                                 ++count;
                             }
                         }
                     }
 
-                    output[oIndexFull] = count > 0 ? sum / static_cast<O>(count) : 0;
+                    output[oIndexFull] = count > 0 ? castFromFloat<O>(sum / count) : 0;
                 }
             }
         }
diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp
index d5e5561d..7a47ccf3 100644
--- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp
@@ -38,7 +38,7 @@ stableMean(const T* vec, size_t size) {
 
 // Specialization for integers: perform the mean computation in float
 template <typename T>
-typename std::enable_if<!std::is_floating_point<T>::value, T>::type
+typename std::enable_if<!std::is_floating_point<T>::value, double>::type
 stableMean(const T* vec, size_t size) {
   double mean = 0;
   for (size_t i = 0; i < size; ++i) {
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
index 864b89c4..a1562322 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
@@ -25,6 +25,9 @@
 #include "aidge/utils/Registrar.hpp"
 
 namespace Aidge {
+    
+template <typename T>
+using Acc_T = typename std::conditional<std::is_floating_point<T>::value, T, double>::type;
 
 template <typename T>
 typename std::enable_if<std::is_floating_point<T>::value, T>::type
@@ -38,7 +41,7 @@ stableMean(const T* vec, size_t len, size_t stride) {
 
 // Specialization for integers: perform the mean computation in float
 template <typename T>
-typename std::enable_if<!std::is_floating_point<T>::value, T>::type
+typename std::enable_if<!std::is_floating_point<T>::value, double>::type
 stableMean(const T* vec, size_t len, size_t stride) {
   double mean = 0;
   for (size_t i = 0; i < len; ++i) {
@@ -102,13 +105,13 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
         }
 
         // Type should be the return type of stableMean<I>(), which is always floating point
-        const decltype(stableMean<I>(input, 0, 0))* inputAccumulation = nullptr;
-        decltype(stableMean<I>(input, 0, 0))* outputAccumulation = nullptr;
+        const Acc_T<I>* inputAccumulation = nullptr;
+        Acc_T<I>* outputAccumulation = nullptr;
 
         for (const auto& axisInt : axes) {
             const std::size_t a = static_cast<std::size_t>(axisInt);
             outputElements /= inputDims[a];
-            outputAccumulation = new I[outputElements];
+            outputAccumulation = new Acc_T<I>[outputElements];
             const std::size_t dim_i = inputDims[a];
             for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) {
                 for (std::size_t post = 0; post < stride_post[a]; ++post) {
@@ -118,7 +121,7 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
                         outputAccumulation[idx_o] = stableMean<I>(input + idx_i, dim_i, stride_post[a]);
                     }
                     else {
-                        outputAccumulation[idx_o] = stableMean<I>(inputAccumulation + idx_i, dim_i, stride_post[a]);
+                        outputAccumulation[idx_o] = stableMean<Acc_T<I>>(inputAccumulation + idx_i, dim_i, stride_post[a]);
                     }
                 }
             }
diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp
index f116934c..d0299ab5 100644
--- a/unit_tests/operator/Test_AvgPoolingImpl.cpp
+++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp
@@ -201,4 +201,25 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") {
         op2->getOutput(0)->print();
         REQUIRE(*(op2->getOutput(0)) == *myOutput5);
     }
+
+    SECTION("Simple test") {
+      std::shared_ptr<Tensor> tensor =
+          std::make_shared<Tensor>(Array4D<int32_t, 1, 1, 7, 7>{{{{
+              {0, 8, 26, 35, 49, 45, 22},
+              {2, 24, 48, 66, 60, 46, 26},
+              {8, 41, 64, 68, 39, 18, 9},
+              {10, 48, 72, 76, 42, 14, 9},
+              {6, 29, 52, 65, 27, 7, 3},
+              {1, 9, 24, 31, 18, 7, 1},
+              {0, 0, 4, 6, 7, 1, 1}}}}});
+
+        auto op = AvgPooling2D_Op({7, 7});
+        op.setDataType(DataType::Int32);
+        op.setBackend("cpu");
+
+        op.associateInput(0, tensor);
+        op.forwardDims();
+        op.forward();
+        REQUIRE(op.getOutput(0)->get<int32_t>(0) == 26);
+    }
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
index 8e8536ac..0fd7d84b 100644
--- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
+++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
@@ -558,6 +558,27 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
       Log::info("Number of operations : {}\n", number_of_operation);
       Log::info("Operation / Âµs = {}\n", number_of_operation / duration.count());
     }
+  
+    SECTION("Simple test") {
+      std::shared_ptr<Tensor> tensor =
+          std::make_shared<Tensor>(Array4D<int32_t, 1, 1, 7, 7>{{{{
+              {0, 8, 26, 35, 49, 45, 22},
+              {2, 24, 48, 66, 60, 46, 26},
+              {8, 41, 64, 68, 39, 18, 9},
+              {10, 48, 72, 76, 42, 14, 9},
+              {6, 29, 52, 65, 27, 7, 3},
+              {1, 9, 24, 31, 18, 7, 1},
+              {0, 0, 4, 6, 7, 1, 1}}}}});
+
+        auto op = GlobalAveragePooling_Op();
+        op.setDataType(DataType::Int32);
+        op.setBackend("cpu");
+
+        op.associateInput(0, tensor);
+        op.forwardDims();
+        op.forward();
+        REQUIRE(op.getOutput(0)->get<int32_t>(0) == 26);
+    }
   }
 }
 } // namespace Aidge
-- 
GitLab


From ea9a0a70e58900bbc54aeded143e7a37f62bcf92 Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Tue, 11 Mar 2025 20:14:53 +0100
Subject: [PATCH 2/3] Removed unrelated change

---
 unit_tests/scheduler/Test_Scheduler.cpp | 53 -------------------------
 1 file changed, 53 deletions(-)

diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp
index be87e8ac..54e57ec4 100644
--- a/unit_tests/scheduler/Test_Scheduler.cpp
+++ b/unit_tests/scheduler/Test_Scheduler.cpp
@@ -17,7 +17,6 @@
 #include "aidge/graph/Node.hpp"
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/OpArgs.hpp"
-#include "aidge/operator/GenericOperator.hpp"
 #include "aidge/operator/Memorize.hpp"
 #include "aidge/operator/Pop.hpp"
 #include "aidge/operator/Stack.hpp"
@@ -29,7 +28,6 @@
 #include "aidge/operator/MetaOperator.hpp"
 #include "aidge/scheduler/SequentialScheduler.hpp"
 #include "aidge/scheduler/ParallelScheduler.hpp"
-#include "aidge/graph/Testing.hpp"
 
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
@@ -522,57 +520,6 @@ TEST_CASE("[cpu/scheduler] Accumulate", "[scheduler]") {
     REQUIRE(*output == *expectedOutput);
 }
 
-TEST_CASE("[cpu/scheduler] Branch", "[scheduler]") {
-    std::shared_ptr<Tensor> in = std::make_shared<Tensor>(
-            Array2D<float, 2, 3>{{{1, 2, 3}, {4, 5, 6}}});
-
-    std::shared_ptr<GraphView> g = Sequential({
-        Producer(in, "input"),
-        Parallel({
-            Sequential({
-                GenericOperator("b0_op1", {InputCategory::Data}, 1),
-                GenericOperator("b0_op2", {InputCategory::Data}, 1),
-                GenericOperator("b0_op3", {InputCategory::Data}, 1),
-                GenericOperator("b0_op4", {InputCategory::Data}, 1),
-                GenericOperator("b0_op5", {InputCategory::Data}, 1)
-            }),
-            Sequential({
-                GenericOperator("b1_op1", {InputCategory::Data}, 1),
-                GenericOperator("b1_op2", {InputCategory::Data}, 1),
-                GenericOperator("b1_op3", {InputCategory::Data}, 1)
-            }),
-            Sequential({
-                GenericOperator("b2_op1", {InputCategory::Data}, 1)
-            })
-        }),
-        GenericOperator("op1", {InputCategory::Data, InputCategory::Data, InputCategory::Data}, 1),
-        GenericOperator("op2", {InputCategory::Data}, 1),
-        GenericOperator("op3", {InputCategory::Data}, 1)
-    });
-
-    g->save("branch_forwarded");
-
-    auto scheduler = SequentialScheduler(g);
-    scheduler.generateScheduling();
-    scheduler.saveStaticSchedulingDiagram("branch_scheduling");
-
-    // Default scheduling order is not necessarily determinist, but is garanteed to be correct in every case.
-    // This behavior might change in the future.
-    auto seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::Default);
-    fmt::println("seqSchedule = {}", seqSchedule);
-
-    scheduler.tagForkBranches();
-    g->save("branch_forwarded_tag");
-
-    seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::ShortestBranchFirst);
-    REQUIRE(nodePtrTo(seqSchedule, nodePtrToType) == std::vector<std::string>{
-        "Producer", "b2_op1", "b1_op1", "b1_op2", "b1_op3", "b0_op1", "b0_op2", "b0_op3", "b0_op4", "b0_op5", "op1", "op2", "op3"});
-
-    seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::LonguestBranchFirst);
-    REQUIRE(nodePtrTo(seqSchedule, nodePtrToType) == std::vector<std::string>{
-        "Producer", "b0_op1", "b0_op2", "b0_op3", "b0_op4", "b0_op5", "b1_op1", "b1_op2", "b1_op3", "b2_op1", "op1", "op2", "op3"});
-}
-
 #ifdef WITH_OPENSSL
 TEST_CASE("[cpu/scheduler] Select", "[scheduler]") {
     std::shared_ptr<Tensor> in = std::make_shared<Tensor>(
-- 
GitLab


From d34c46219694ac7084073c097a2a6de04b223af9 Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Sun, 16 Mar 2025 11:30:52 +0100
Subject: [PATCH 3/3] Revert "Removed unrelated change"

This reverts commit ea9a0a70e58900bbc54aeded143e7a37f62bcf92.
---
 unit_tests/scheduler/Test_Scheduler.cpp | 53 +++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp
index 54e57ec4..be87e8ac 100644
--- a/unit_tests/scheduler/Test_Scheduler.cpp
+++ b/unit_tests/scheduler/Test_Scheduler.cpp
@@ -17,6 +17,7 @@
 #include "aidge/graph/Node.hpp"
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/OpArgs.hpp"
+#include "aidge/operator/GenericOperator.hpp"
 #include "aidge/operator/Memorize.hpp"
 #include "aidge/operator/Pop.hpp"
 #include "aidge/operator/Stack.hpp"
@@ -28,6 +29,7 @@
 #include "aidge/operator/MetaOperator.hpp"
 #include "aidge/scheduler/SequentialScheduler.hpp"
 #include "aidge/scheduler/ParallelScheduler.hpp"
+#include "aidge/graph/Testing.hpp"
 
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
@@ -520,6 +522,57 @@ TEST_CASE("[cpu/scheduler] Accumulate", "[scheduler]") {
     REQUIRE(*output == *expectedOutput);
 }
 
+TEST_CASE("[cpu/scheduler] Branch", "[scheduler]") {
+    std::shared_ptr<Tensor> in = std::make_shared<Tensor>(
+            Array2D<float, 2, 3>{{{1, 2, 3}, {4, 5, 6}}});
+
+    std::shared_ptr<GraphView> g = Sequential({
+        Producer(in, "input"),
+        Parallel({
+            Sequential({
+                GenericOperator("b0_op1", {InputCategory::Data}, 1),
+                GenericOperator("b0_op2", {InputCategory::Data}, 1),
+                GenericOperator("b0_op3", {InputCategory::Data}, 1),
+                GenericOperator("b0_op4", {InputCategory::Data}, 1),
+                GenericOperator("b0_op5", {InputCategory::Data}, 1)
+            }),
+            Sequential({
+                GenericOperator("b1_op1", {InputCategory::Data}, 1),
+                GenericOperator("b1_op2", {InputCategory::Data}, 1),
+                GenericOperator("b1_op3", {InputCategory::Data}, 1)
+            }),
+            Sequential({
+                GenericOperator("b2_op1", {InputCategory::Data}, 1)
+            })
+        }),
+        GenericOperator("op1", {InputCategory::Data, InputCategory::Data, InputCategory::Data}, 1),
+        GenericOperator("op2", {InputCategory::Data}, 1),
+        GenericOperator("op3", {InputCategory::Data}, 1)
+    });
+
+    g->save("branch_forwarded");
+
+    auto scheduler = SequentialScheduler(g);
+    scheduler.generateScheduling();
+    scheduler.saveStaticSchedulingDiagram("branch_scheduling");
+
+    // Default scheduling order is not necessarily determinist, but is garanteed to be correct in every case.
+    // This behavior might change in the future.
+    auto seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::Default);
+    fmt::println("seqSchedule = {}", seqSchedule);
+
+    scheduler.tagForkBranches();
+    g->save("branch_forwarded_tag");
+
+    seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::ShortestBranchFirst);
+    REQUIRE(nodePtrTo(seqSchedule, nodePtrToType) == std::vector<std::string>{
+        "Producer", "b2_op1", "b1_op1", "b1_op2", "b1_op3", "b0_op1", "b0_op2", "b0_op3", "b0_op4", "b0_op5", "op1", "op2", "op3"});
+
+    seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::LonguestBranchFirst);
+    REQUIRE(nodePtrTo(seqSchedule, nodePtrToType) == std::vector<std::string>{
+        "Producer", "b0_op1", "b0_op2", "b0_op3", "b0_op4", "b0_op5", "b1_op1", "b1_op2", "b1_op3", "b2_op1", "op1", "op2", "op3"});
+}
+
 #ifdef WITH_OPENSSL
 TEST_CASE("[cpu/scheduler] Select", "[scheduler]") {
     std::shared_ptr<Tensor> in = std::make_shared<Tensor>(
-- 
GitLab