Merge branch 'dev' of https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu into dev

30d2af82 · Lucas Lopez · 35e720e9 · 9d427c11 · 30d2af82 · 30d2af82
Commit 30d2af82 authored 4 months ago by Lucas Lopez
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -69,6 +69,8 @@ file(GLOB_RECURSE inc_files "include/*.hpp")
 add_library(${module_name} ${src_files} ${inc_files})
 target_link_libraries(${module_name}
+    PRIVATE
+        fmt::fmt
    PUBLIC
        _aidge_core # _ is added because we link the exported target and not the project
 )

--- a/include/aidge/backend/cpu/operator/MulImpl.hpp
+++ b/include/aidge/backend/cpu/operator/MulImpl.hpp
@@ -34,6 +34,7 @@ using MulImpl_cpu = OperatorImpl_cpu<Mul_Op,
        const std::size_t,
        const std::vector<std::size_t>,
        const std::vector<std::size_t>,
+        const std::vector<std::size_t>,
        const void*,
        const void*,
        const void*,

--- a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
@@ -149,61 +149,53 @@ void MulImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
 template <class I1, class I2, class O>
 void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
-                                 const std::size_t input1Length,
+                                  const std::size_t input1Length,
-                                 const std::size_t grad0Length,
+                                  const std::size_t gradOutputLength,
-                                 const std::vector<std::size_t> input0Dims,
+                                  const std::vector<std::size_t>& dims0,
-                                 const std::vector<std::size_t> input1Dims,
+                                  const std::vector<std::size_t>& dims1,
-                                 const void* input0_,
+                                  const std::vector<std::size_t>& outputDims,
-                                 const void* input1_,
+                                  const void* input0_,
-                                 const void* grad_output_,
+                                  const void* input1_,
-                                 void* gradientInput0,
+                                  const void* grad_output_,
-                                 void* gradientInput1)
+                                  void* gradientInput0_,
+                                  void* gradientInput1_)
 {
-    const auto* input0 = static_cast<const I1*>(input0_);
+    const I1* input0 = static_cast<const I1*>(input0_);
-    const auto* input1 = static_cast<const I1*>(input1_);
+    const I2* input1 = static_cast<const I2*>(input1_);
-    const auto* grad_output = static_cast<const O*>(grad_output_);
+    const O* grad_output = static_cast<const O*>(grad_output_);
-    auto* grad_input_0 = static_cast<I1*>(gradientInput0);
+    auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
-    auto* grad_input_1 = static_cast<I2*>(gradientInput1);
+    auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
+    std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
-    if(input0Dims.size() >= input1Dims.size())
+    std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
-    {
-        AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
+    // Broadcast dims0 and dims1 to match the shape of outputDims
+    auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
-        for(auto i = 0U; i < input0Length; ++i)
+    auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
-        {
-            const auto indices = getMultiDimIndices(input1Dims, i);
+    for (std::size_t i = 0; i < gradOutputLength; ++i) {
-            const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
+        auto idxOutputGrad = getMultiDimIndices(outputDims, i);
+        std::vector<std::size_t> idxInput0(broadcastedDims0.size());
-            grad_input_0[i] = input1[flattenedIndex] * grad_output[i];
+        std::vector<std::size_t> idxInput1(broadcastedDims1.size());
+        // Map output indices to input0 indices, considering broadcasting
+        for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
+            // If input0 is broadcasted along this dimension (== 1) or both dimensions are 1, index is 0.
+            // idxInput0 represent the multi dim index of input0 contributing
+            // to the output at index i.
+            idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
        }
-        for(std::size_t i = 0 ; i < grad0Length; ++i)
+        for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
-        {
+            idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
-            const auto indices = getMultiDimIndices(input1Dims, i);
-            const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
-            grad_input_1[flattenedIndex] += input0[i] * grad_output[i];
        }
-    } else {
+        // We have to access tensors with a flat index, hence the conversion
-        AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
+        auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
+        auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
-        for(auto i = 0U; i < input1Length; ++i)
+        grad_input_0[idx0] += static_cast<I1>(grad_output[i] * input1[idx1]);
-        {
+        grad_input_1[idx1] += static_cast<I2>(grad_output[i] * input0[idx0]);
-            const auto indices = getMultiDimIndices(input0Dims, i);
-            const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
-            grad_input_1[i] = input0[flattenedIndex] * grad_output[i];
-        }
-        for(std::size_t i = 0 ; i < grad0Length; ++i)
-        {
-            const auto indices = getMultiDimIndices(input0Dims, i);
-            const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
-            grad_input_0[flattenedIndex] += input1[i] * grad_output[i];
-        }
    }
 }

--- a/src/operator/MulImpl.cpp
+++ b/src/operator/MulImpl.cpp
@@ -58,6 +58,7 @@ void Aidge::MulImpl_cpu::backward() {
               /* grad0Length  */ out0grad->size(),
               /* input0Dims   */ in0->dims(),
               /* input1Dims   */ in1->dims(),
+               out0grad->dims(),
               getCPUPtr(in0),
               getCPUPtr(in1),
               getCPUPtr(out0grad),

--- a/unit_tests/operator/Test_AddImpl.cpp
+++ b/unit_tests/operator/Test_AddImpl.cpp
@@ -100,7 +100,7 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
        });                                     //
        std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{100,200}});
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
+        Tensor expectedOutput = Array4D<int,3,3,3,2> {
            {                                               //
                {                                           //
                    {{ 120, 222},{ 124, 226},{ 128, 230}},  //
@@ -118,7 +118,7 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
                    {{ 144, 246},{ 148, 250},{152, 254}}    //
                }                                           //
            }                                               //
-        });                                                 //
+        };                                                 //
        std::shared_ptr<Node> myAdd_0 = Add();
        std::shared_ptr<Node> myAdd_1 = Add();
@@ -135,8 +135,8 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
        op_1->setBackend("cpu");
        myAdd_0->forward();
        myAdd_1->forward();
-        op_1->getOutput(0)->print();
+        Log::info("Add_1 Tensor:\n{}", *(op_1->getOutput(0)));
-        expectedOutput->print();
+        Log::info("Expected Add_1 Tensor:\n{}", expectedOutput);
-        REQUIRE(*op_1->getOutput(0) == *expectedOutput);
+        REQUIRE(*op_1->getOutput(0) == expectedOutput);
    }
 }
\ No newline at end of file
--- a/unit_tests/operator/Test_MulImpl.cpp
+++ b/unit_tests/operator/Test_MulImpl.cpp