From 2407e191c6a10e6bca0eb5d07493948dc6bf913c Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Tue, 11 Mar 2025 20:11:38 +0100 Subject: [PATCH 1/3] Fixed wrong computation in int --- .../cpu/operator/AvgPoolingImpl_kernels.hpp | 22 ++++++++++++++++--- .../GlobalAveragePoolingImpl_kernels.hpp | 2 +- .../cpu/operator/ReduceMeanImpl_kernels.hpp | 13 ++++++----- unit_tests/operator/Test_AvgPoolingImpl.cpp | 21 ++++++++++++++++++ .../Test_GlobalAveragePoolingImpl.cpp | 21 ++++++++++++++++++ 5 files changed, 70 insertions(+), 9 deletions(-) diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp index 78f8446a..1671759d 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp @@ -23,6 +23,22 @@ #include "aidge/utils/Types.h" namespace Aidge { + +template <typename T> +using Acc_T = typename std::conditional<std::is_floating_point<T>::value, T, double>::type; + +template <typename T> +typename std::enable_if<std::is_floating_point<T>::value, T>::type +castFromFloat(T value) { + return value; +} + +template <typename T> +typename std::enable_if<!std::is_floating_point<T>::value, T>::type +castFromFloat(double value) { + return static_cast<T>(std::nearbyint(value)); +} + /** * @brief Forward kernel for 2D AvgPoolingolution on CPU backend. * @tparam I Input data type. @@ -79,7 +95,7 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD const std::size_t ix = ox * strideDims[0]; const std::size_t iy = oy * strideDims[1]; - O sum = static_cast<O>(0); + Acc_T<I> sum = static_cast<Acc_T<I>>(0); std::size_t count = 0; for (unsigned int sy = syMin; sy < syMax; ++sy) { @@ -90,13 +106,13 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD // Ensure within bounds if ((ix + dilated_sx) < dims[2] && (iy + dilated_sy) < dims[3]) { - sum += static_cast<O>(input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]); + sum += static_cast<Acc_T<I>>(input[iIndex + (ix + dilated_sx) * dims[3] + (iy + dilated_sy)]); ++count; } } } - output[oIndexFull] = count > 0 ? sum / static_cast<O>(count) : 0; + output[oIndexFull] = count > 0 ? castFromFloat<O>(sum / count) : 0; } } } diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp index d5e5561d..7a47ccf3 100644 --- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp @@ -38,7 +38,7 @@ stableMean(const T* vec, size_t size) { // Specialization for integers: perform the mean computation in float template <typename T> -typename std::enable_if<!std::is_floating_point<T>::value, T>::type +typename std::enable_if<!std::is_floating_point<T>::value, double>::type stableMean(const T* vec, size_t size) { double mean = 0; for (size_t i = 0; i < size; ++i) { diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp index 864b89c4..a1562322 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp @@ -25,6 +25,9 @@ #include "aidge/utils/Registrar.hpp" namespace Aidge { + +template <typename T> +using Acc_T = typename std::conditional<std::is_floating_point<T>::value, T, double>::type; template <typename T> typename std::enable_if<std::is_floating_point<T>::value, T>::type @@ -38,7 +41,7 @@ stableMean(const T* vec, size_t len, size_t stride) { // Specialization for integers: perform the mean computation in float template <typename T> -typename std::enable_if<!std::is_floating_point<T>::value, T>::type +typename std::enable_if<!std::is_floating_point<T>::value, double>::type stableMean(const T* vec, size_t len, size_t stride) { double mean = 0; for (size_t i = 0; i < len; ++i) { @@ -102,13 +105,13 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, } // Type should be the return type of stableMean<I>(), which is always floating point - const decltype(stableMean<I>(input, 0, 0))* inputAccumulation = nullptr; - decltype(stableMean<I>(input, 0, 0))* outputAccumulation = nullptr; + const Acc_T<I>* inputAccumulation = nullptr; + Acc_T<I>* outputAccumulation = nullptr; for (const auto& axisInt : axes) { const std::size_t a = static_cast<std::size_t>(axisInt); outputElements /= inputDims[a]; - outputAccumulation = new I[outputElements]; + outputAccumulation = new Acc_T<I>[outputElements]; const std::size_t dim_i = inputDims[a]; for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) { for (std::size_t post = 0; post < stride_post[a]; ++post) { @@ -118,7 +121,7 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, outputAccumulation[idx_o] = stableMean<I>(input + idx_i, dim_i, stride_post[a]); } else { - outputAccumulation[idx_o] = stableMean<I>(inputAccumulation + idx_i, dim_i, stride_post[a]); + outputAccumulation[idx_o] = stableMean<Acc_T<I>>(inputAccumulation + idx_i, dim_i, stride_post[a]); } } } diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp index f116934c..d0299ab5 100644 --- a/unit_tests/operator/Test_AvgPoolingImpl.cpp +++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp @@ -201,4 +201,25 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") { op2->getOutput(0)->print(); REQUIRE(*(op2->getOutput(0)) == *myOutput5); } + + SECTION("Simple test") { + std::shared_ptr<Tensor> tensor = + std::make_shared<Tensor>(Array4D<int32_t, 1, 1, 7, 7>{{{{ + {0, 8, 26, 35, 49, 45, 22}, + {2, 24, 48, 66, 60, 46, 26}, + {8, 41, 64, 68, 39, 18, 9}, + {10, 48, 72, 76, 42, 14, 9}, + {6, 29, 52, 65, 27, 7, 3}, + {1, 9, 24, 31, 18, 7, 1}, + {0, 0, 4, 6, 7, 1, 1}}}}}); + + auto op = AvgPooling2D_Op({7, 7}); + op.setDataType(DataType::Int32); + op.setBackend("cpu"); + + op.associateInput(0, tensor); + op.forwardDims(); + op.forward(); + REQUIRE(op.getOutput(0)->get<int32_t>(0) == 26); + } } \ No newline at end of file diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp index 8e8536ac..0fd7d84b 100644 --- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp +++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp @@ -558,6 +558,27 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", Log::info("Number of operations : {}\n", number_of_operation); Log::info("Operation / µs = {}\n", number_of_operation / duration.count()); } + + SECTION("Simple test") { + std::shared_ptr<Tensor> tensor = + std::make_shared<Tensor>(Array4D<int32_t, 1, 1, 7, 7>{{{{ + {0, 8, 26, 35, 49, 45, 22}, + {2, 24, 48, 66, 60, 46, 26}, + {8, 41, 64, 68, 39, 18, 9}, + {10, 48, 72, 76, 42, 14, 9}, + {6, 29, 52, 65, 27, 7, 3}, + {1, 9, 24, 31, 18, 7, 1}, + {0, 0, 4, 6, 7, 1, 1}}}}}); + + auto op = GlobalAveragePooling_Op(); + op.setDataType(DataType::Int32); + op.setBackend("cpu"); + + op.associateInput(0, tensor); + op.forwardDims(); + op.forward(); + REQUIRE(op.getOutput(0)->get<int32_t>(0) == 26); + } } } } // namespace Aidge -- GitLab From ea9a0a70e58900bbc54aeded143e7a37f62bcf92 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Tue, 11 Mar 2025 20:14:53 +0100 Subject: [PATCH 2/3] Removed unrelated change --- unit_tests/scheduler/Test_Scheduler.cpp | 53 ------------------------- 1 file changed, 53 deletions(-) diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index be87e8ac..54e57ec4 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -17,7 +17,6 @@ #include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" -#include "aidge/operator/GenericOperator.hpp" #include "aidge/operator/Memorize.hpp" #include "aidge/operator/Pop.hpp" #include "aidge/operator/Stack.hpp" @@ -29,7 +28,6 @@ #include "aidge/operator/MetaOperator.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/scheduler/ParallelScheduler.hpp" -#include "aidge/graph/Testing.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" @@ -522,57 +520,6 @@ TEST_CASE("[cpu/scheduler] Accumulate", "[scheduler]") { REQUIRE(*output == *expectedOutput); } -TEST_CASE("[cpu/scheduler] Branch", "[scheduler]") { - std::shared_ptr<Tensor> in = std::make_shared<Tensor>( - Array2D<float, 2, 3>{{{1, 2, 3}, {4, 5, 6}}}); - - std::shared_ptr<GraphView> g = Sequential({ - Producer(in, "input"), - Parallel({ - Sequential({ - GenericOperator("b0_op1", {InputCategory::Data}, 1), - GenericOperator("b0_op2", {InputCategory::Data}, 1), - GenericOperator("b0_op3", {InputCategory::Data}, 1), - GenericOperator("b0_op4", {InputCategory::Data}, 1), - GenericOperator("b0_op5", {InputCategory::Data}, 1) - }), - Sequential({ - GenericOperator("b1_op1", {InputCategory::Data}, 1), - GenericOperator("b1_op2", {InputCategory::Data}, 1), - GenericOperator("b1_op3", {InputCategory::Data}, 1) - }), - Sequential({ - GenericOperator("b2_op1", {InputCategory::Data}, 1) - }) - }), - GenericOperator("op1", {InputCategory::Data, InputCategory::Data, InputCategory::Data}, 1), - GenericOperator("op2", {InputCategory::Data}, 1), - GenericOperator("op3", {InputCategory::Data}, 1) - }); - - g->save("branch_forwarded"); - - auto scheduler = SequentialScheduler(g); - scheduler.generateScheduling(); - scheduler.saveStaticSchedulingDiagram("branch_scheduling"); - - // Default scheduling order is not necessarily determinist, but is garanteed to be correct in every case. - // This behavior might change in the future. - auto seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::Default); - fmt::println("seqSchedule = {}", seqSchedule); - - scheduler.tagForkBranches(); - g->save("branch_forwarded_tag"); - - seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::ShortestBranchFirst); - REQUIRE(nodePtrTo(seqSchedule, nodePtrToType) == std::vector<std::string>{ - "Producer", "b2_op1", "b1_op1", "b1_op2", "b1_op3", "b0_op1", "b0_op2", "b0_op3", "b0_op4", "b0_op5", "op1", "op2", "op3"}); - - seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::LonguestBranchFirst); - REQUIRE(nodePtrTo(seqSchedule, nodePtrToType) == std::vector<std::string>{ - "Producer", "b0_op1", "b0_op2", "b0_op3", "b0_op4", "b0_op5", "b1_op1", "b1_op2", "b1_op3", "b2_op1", "op1", "op2", "op3"}); -} - #ifdef WITH_OPENSSL TEST_CASE("[cpu/scheduler] Select", "[scheduler]") { std::shared_ptr<Tensor> in = std::make_shared<Tensor>( -- GitLab From d34c46219694ac7084073c097a2a6de04b223af9 Mon Sep 17 00:00:00 2001 From: Olivier BICHLER <olivier.bichler@cea.fr> Date: Sun, 16 Mar 2025 11:30:52 +0100 Subject: [PATCH 3/3] Revert "Removed unrelated change" This reverts commit ea9a0a70e58900bbc54aeded143e7a37f62bcf92. --- unit_tests/scheduler/Test_Scheduler.cpp | 53 +++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 54e57ec4..be87e8ac 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -17,6 +17,7 @@ #include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" +#include "aidge/operator/GenericOperator.hpp" #include "aidge/operator/Memorize.hpp" #include "aidge/operator/Pop.hpp" #include "aidge/operator/Stack.hpp" @@ -28,6 +29,7 @@ #include "aidge/operator/MetaOperator.hpp" #include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/scheduler/ParallelScheduler.hpp" +#include "aidge/graph/Testing.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" @@ -520,6 +522,57 @@ TEST_CASE("[cpu/scheduler] Accumulate", "[scheduler]") { REQUIRE(*output == *expectedOutput); } +TEST_CASE("[cpu/scheduler] Branch", "[scheduler]") { + std::shared_ptr<Tensor> in = std::make_shared<Tensor>( + Array2D<float, 2, 3>{{{1, 2, 3}, {4, 5, 6}}}); + + std::shared_ptr<GraphView> g = Sequential({ + Producer(in, "input"), + Parallel({ + Sequential({ + GenericOperator("b0_op1", {InputCategory::Data}, 1), + GenericOperator("b0_op2", {InputCategory::Data}, 1), + GenericOperator("b0_op3", {InputCategory::Data}, 1), + GenericOperator("b0_op4", {InputCategory::Data}, 1), + GenericOperator("b0_op5", {InputCategory::Data}, 1) + }), + Sequential({ + GenericOperator("b1_op1", {InputCategory::Data}, 1), + GenericOperator("b1_op2", {InputCategory::Data}, 1), + GenericOperator("b1_op3", {InputCategory::Data}, 1) + }), + Sequential({ + GenericOperator("b2_op1", {InputCategory::Data}, 1) + }) + }), + GenericOperator("op1", {InputCategory::Data, InputCategory::Data, InputCategory::Data}, 1), + GenericOperator("op2", {InputCategory::Data}, 1), + GenericOperator("op3", {InputCategory::Data}, 1) + }); + + g->save("branch_forwarded"); + + auto scheduler = SequentialScheduler(g); + scheduler.generateScheduling(); + scheduler.saveStaticSchedulingDiagram("branch_scheduling"); + + // Default scheduling order is not necessarily determinist, but is garanteed to be correct in every case. + // This behavior might change in the future. + auto seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::Default); + fmt::println("seqSchedule = {}", seqSchedule); + + scheduler.tagForkBranches(); + g->save("branch_forwarded_tag"); + + seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::ShortestBranchFirst); + REQUIRE(nodePtrTo(seqSchedule, nodePtrToType) == std::vector<std::string>{ + "Producer", "b2_op1", "b1_op1", "b1_op2", "b1_op3", "b0_op1", "b0_op2", "b0_op3", "b0_op4", "b0_op5", "op1", "op2", "op3"}); + + seqSchedule = scheduler.Scheduler::getSequentialStaticScheduling(0, Scheduler::SchedulingPolicy::LonguestBranchFirst); + REQUIRE(nodePtrTo(seqSchedule, nodePtrToType) == std::vector<std::string>{ + "Producer", "b0_op1", "b0_op2", "b0_op3", "b0_op4", "b0_op5", "b1_op1", "b1_op2", "b1_op3", "b2_op1", "op1", "op2", "op3"}); +} + #ifdef WITH_OPENSSL TEST_CASE("[cpu/scheduler] Select", "[scheduler]") { std::shared_ptr<Tensor> in = std::make_shared<Tensor>( -- GitLab