chore: Improve and test Mul Backward kernel

- Rework Mul backward kernel to make more straighforward, and easily adaptable to other element-wise kernels (sub, add, div). - Add tests, including new test with random values

chore: Improve and test Mul Backward kernel
- Rework Mul backward kernel to make more straighforward, and easily adaptable to other element-wise kernels (sub, add, div). - Add tests, including new test with random values
5de68dcc · Jerome Hue · Maxence Naud · 7302dd25 · 5de68dcc · 5de68dcc
Commit 5de68dcc authored 3 months ago by Jerome Hue Committed by Maxence Naud 3 months ago
--- a/include/aidge/backend/cpu/operator/MulImpl.hpp
+++ b/include/aidge/backend/cpu/operator/MulImpl.hpp
@@ -34,6 +34,7 @@ using MulImpl_cpu = OperatorImpl_cpu<Mul_Op,
        const std::size_t,
        const std::vector<std::size_t>,
        const std::vector<std::size_t>,
+        const std::vector<std::size_t>,
        const void*,
        const void*,
        const void*,

--- a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
@@ -149,61 +149,53 @@ void MulImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
 template <class I1, class I2, class O>
 void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
-                                 const std::size_t input1Length,
+                                  const std::size_t input1Length,
-                                 const std::size_t grad0Length,
+                                  const std::size_t gradOutputLength,
-                                 const std::vector<std::size_t> input0Dims,
+                                  const std::vector<std::size_t>& dims0,
-                                 const std::vector<std::size_t> input1Dims,
+                                  const std::vector<std::size_t>& dims1,
-                                 const void* input0_,
+                                  const std::vector<std::size_t>& outputDims,
-                                 const void* input1_,
+                                  const void* input0_,
-                                 const void* grad_output_,
+                                  const void* input1_,
-                                 void* gradientInput0,
+                                  const void* grad_output_,
-                                 void* gradientInput1)
+                                  void* gradientInput0_,
+                                  void* gradientInput1_)
 {
-    const auto* input0 = static_cast<const I1*>(input0_);
+    const I1* input0 = static_cast<const I1*>(input0_);
-    const auto* input1 = static_cast<const I1*>(input1_);
+    const I2* input1 = static_cast<const I2*>(input1_);
-    const auto* grad_output = static_cast<const O*>(grad_output_);
+    const O* grad_output = static_cast<const O*>(grad_output_);
-    auto* grad_input_0 = static_cast<I1*>(gradientInput0);
+    auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
-    auto* grad_input_1 = static_cast<I2*>(gradientInput1);
+    auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
+    std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
-    if(input0Dims.size() >= input1Dims.size())
+    std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
-    {
-        AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
+    // Broadcast dims0 and dims1 to match the shape of outputDims
+    auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
-        for(auto i = 0U; i < input0Length; ++i)
+    auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
-        {
-            const auto indices = getMultiDimIndices(input1Dims, i);
+    for (std::size_t i = 0; i < gradOutputLength; ++i) {
-            const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
+        auto idxOutputGrad = getMultiDimIndices(outputDims, i);
+        std::vector<std::size_t> idxInput0(broadcastedDims0.size());
-            grad_input_0[i] = input1[flattenedIndex] * grad_output[i];
+        std::vector<std::size_t> idxInput1(broadcastedDims1.size());
+        // Map output indices to input0 indices, considering broadcasting
+        for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
+            // If input0 is broadcasted along this dimension (== 1) or both dimensions are 1, index is 0.
+            // idxInput0 represent the multi dim index of input0 contributing
+            // to the output at index i.
+            idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
        }
-        for(std::size_t i = 0 ; i < grad0Length; ++i)
+        for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
-        {
+            idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
-            const auto indices = getMultiDimIndices(input1Dims, i);
-            const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
-            grad_input_1[flattenedIndex] += input0[i] * grad_output[i];
        }
-    } else {
+        // We have to access tensors with a flat index, hence the conversion
-        AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
+        auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
+        auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
-        for(auto i = 0U; i < input1Length; ++i)
+        grad_input_0[idx0] += static_cast<I1>(grad_output[i] * input1[idx1]);
-        {
+        grad_input_1[idx1] += static_cast<I2>(grad_output[i] * input0[idx0]);
-            const auto indices = getMultiDimIndices(input0Dims, i);
-            const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
-            grad_input_1[i] = input0[flattenedIndex] * grad_output[i];
-        }
-        for(std::size_t i = 0 ; i < grad0Length; ++i)
-        {
-            const auto indices = getMultiDimIndices(input0Dims, i);
-            const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
-            grad_input_0[flattenedIndex] += input1[i] * grad_output[i];
-        }
    }
 }

--- a/src/operator/MulImpl.cpp
+++ b/src/operator/MulImpl.cpp
@@ -58,6 +58,7 @@ void Aidge::MulImpl_cpu::backward() {
               /* grad0Length  */ out0grad->size(),
               /* input0Dims   */ in0->dims(),
               /* input1Dims   */ in1->dims(),
+               out0grad->dims(),
               getCPUPtr(in0),
               getCPUPtr(in1),
               getCPUPtr(out0grad),

--- a/unit_tests/operator/Test_MulImpl.cpp
+++ b/unit_tests/operator/Test_MulImpl.cpp