From b854bc94fe2134997bfb2e25ff2fbc0d645bdb45 Mon Sep 17 00:00:00 2001
From: Antoni Olivier <olivier.antoni@cea.fr>
Date: Wed, 16 Apr 2025 14:10:45 +0200
Subject: [PATCH] Fix input gradient tensor in Backward method

---
 include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp     | 2 +-
 include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp     | 2 +-
 .../aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp    | 2 +-
 .../aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp    | 2 +-
 include/aidge/backend/cpu/operator/LnImpl_kernels.hpp       | 4 +---
 .../aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp   | 6 ------
 include/aidge/backend/cpu/operator/MulImpl_kernels.hpp      | 3 ---
 include/aidge/backend/cpu/operator/PowImpl_kernels.hpp      | 6 ------
 include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp     | 2 +-
 include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp  | 2 +-
 include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp     | 2 +-
 include/aidge/backend/cpu/operator/SubImpl_kernels.hpp      | 3 ---
 include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp     | 2 +-
 unit_tests/operator/Test_MaxPoolingImpl.cpp                 | 2 +-
 14 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp
index 141e5b60..e82f34fc 100644
--- a/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp
@@ -43,7 +43,7 @@ void AtanImpl_cpu_backward_kernel(const std::size_t inputLength,
     // Apply the derivative of atan for each element in the input array
     for (size_t i = 0; i < inputLength; ++i) {
         // dx = dy * (1 / (1 + x^2))
-        grad_input[i] = grad_output[i] * static_cast<O>(1.0 / (1.0 + output[i] * output[i]));
+        grad_input[i] += grad_output[i] * static_cast<O>(1.0 / (1.0 + output[i] * output[i]));
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
index f7a64585..65bf5094 100644
--- a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
@@ -48,7 +48,7 @@ void ClipImpl_cpu_backward_kernel(
     GI* grad_input = static_cast<GI*>(grad_input_);
 
     for (std::size_t i = 0; i < length; ++i) {
-        grad_input[i] = ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0;
+        grad_input[i] += ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0;
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp b/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp
index 92f12fbe..c823b294 100644
--- a/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/HeavisideImpl_kernels.hpp
@@ -54,7 +54,7 @@ void HeavisideImplCpuBackwardKernel(std::size_t inputLength,
     GI* grad_input = static_cast<GI*>(grad_input_);
 
     for (size_t i = 0; i < inputLength; ++i) {
-        grad_input[i] = grad_output[i] * static_cast<O>(1.0 / (1.0 + (output[i] * M_PI) * (output[i] * M_PI)));
+        grad_input[i] += grad_output[i] * static_cast<O>(1.0 / (1.0 + (output[i] * M_PI) * (output[i] * M_PI)));
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp
index 1b4c3053..236038c6 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp
@@ -45,7 +45,7 @@ void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_,
     const I negativeSlope = static_cast<const I>(negativeSlope_);
 
     for (std::size_t i = 0; i < inputLength; ++i) {
-        grad_input[i] = (input[i] > 0) ? grad_output[i] : negativeSlope*grad_output[i];
+        grad_input[i] += (input[i] > 0) ? grad_output[i] : negativeSlope*grad_output[i];
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp
index ee2864b6..8b57b417 100755
--- a/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp
@@ -48,9 +48,7 @@ void LnImpl_cpu_backward_kernel(const std::size_t inputLength,
 	
     for (std::size_t i = 0; i < inputLength; ++i) {
 		if (input[i] > I(eps)) {
-			grad_input[i] = grad_output[i] / input[i];
-		} else {
-			grad_input[i] = GI(0);
+			grad_input[i] += grad_output[i] / input[i];
 		}
     }
 }
diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp
index 7fe272d5..3057878d 100644
--- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp
@@ -149,12 +149,6 @@ void MaxPoolingImpl2D_cpu_backward_kernel(
   const I *input = static_cast<const I *>(input_);
   I *grad = static_cast<I *>(grad_);
 
-  // Fill the gradient with 0 to avoid garbage data
-  std::fill(grad,
-	  grad + (dims[0] * dims[1] * dims[2] * dims[3]),
-	  static_cast<I>(0)
-  );
-
   // output H size
   auto hOut = static_cast<float>(
     dims[2] - (kernelDims[0] - 1) * dilations[0] - 1 + strideDims[0]
diff --git a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
index 36acb919..a88923fd 100644
--- a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
@@ -166,9 +166,6 @@ void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
     auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
     auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
 
-    std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
-    std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
-
     // Broadcast dims0 and dims1 to match the shape of outputDims
     auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
     auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
diff --git a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp
index cae10663..51fd1bb6 100644
--- a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp
@@ -163,12 +163,6 @@ void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims,
     I2* grad1 = static_cast<I2*>(gradientInput1_);
     const O* gradOut = static_cast<const O*>(gradOutput_);
 
-    // Fill input grads with zeros
-	std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
-	std::fill(grad0, grad0 + input0Elements, I1(0));
-	std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
-	std::fill(grad1, grad1 + input1Elements, I2(0));
-
 	std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
     for (size_t oIndex = 0; oIndex < totalElements; ++oIndex)
     {
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp
index 6b7c3c9c..3789052c 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp
@@ -47,7 +47,7 @@ void ReLUImpl_cpu_backward_kernel(const std::size_t inputLength,
     const GO* grad_output = static_cast<const GO*>(grad_output_);
     GI* grad_input = static_cast<GI*>(grad_input_);
     for (std::size_t i = 0; i < inputLength; ++i) {
-        grad_input[i] = (input[i] > 0) ? grad_output[i] : 0;
+        grad_input[i] += (input[i] > 0) ? grad_output[i] : 0;
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp
index 83ad4575..b3446dba 100644
--- a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp
@@ -43,7 +43,7 @@ void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLength,
     const GO* grad_output = static_cast<const GO*>(grad_output_);
     GI* grad_input = static_cast<GI*>(grad_input_);
     for (std::size_t i = 0; i < inputLength; ++i) {
-        grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i];
+        grad_input[i] += output[i] * (O(1) - output[i]) * grad_output[i];
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp
index bccc195e..beddc74d 100644
--- a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp
@@ -44,7 +44,7 @@ void SqrtImpl_cpu_backward_kernel(const std::size_t inputLength,
     O* grad_input = static_cast<O*>(grad_input_);
 
     for (std::size_t i = 0; i < inputLength; ++i) {
-        grad_input[i] = static_cast<O>(0.5/output[i]) * grad_output[i];
+        grad_input[i] += static_cast<O>(0.5/output[i]) * grad_output[i];
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp
index 8d3d80e9..751177a7 100644
--- a/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp
@@ -165,9 +165,6 @@ void SubImpl_cpu_backward_kernel(const std::size_t input0Length,
     auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
     auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
 
-    std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
-    std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
-
     auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
     auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
 
diff --git a/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp
index 49cfe9cb..ca4510d9 100644
--- a/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp
@@ -39,7 +39,7 @@ void TanhImpl_cpu_backward_kernel(const std::size_t inputLength,
     const GO* grad_output = static_cast<const GO*>(grad_output_);
     GI* grad_input = static_cast<GI*>(grad_input_);
     for (std::size_t i = 0; i < inputLength; ++i) {
-        grad_input[i] = (O(1) - output[i] * output[i]) * grad_output[i];
+        grad_input[i] += (O(1) - output[i] * output[i]) * grad_output[i];
     }
 }
 
diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp
index 2bc5e1ee..e4d171c5 100644
--- a/unit_tests/operator/Test_MaxPoolingImpl.cpp
+++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp
@@ -345,7 +345,7 @@ TEST_CASE("[cpu/operator] MaxPooling(backward)", "[MaxPooling][CPU]") {
 			}}}
 		};
 
-		//op2->resetInput(0);
+		myInput4->setGrad(nullptr);
 		op2->associateInput(0, myInput4);
 		op2->setDataType(DataType::Float32);
 		op2->setBackend("cpu");
-- 
GitLab