From 458c440121cb3f418111a3c29f174916f31f8329 Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Mon, 1 Jul 2024 00:17:57 +0000
Subject: [PATCH] comment Tensor::initGrad() calls

---
 .../aidge/learning/optimizer/Optimizer.hpp    |  6 ++--
 src/loss/classification/BCE.cpp               | 30 +++++++++----------
 src/loss/regression/MSE.cpp                   |  2 +-
 unit_tests/optimizer/Test_Adam.cpp            |  2 +-
 unit_tests/optimizer/Test_SGD.cpp             |  2 +-
 5 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/aidge/learning/optimizer/Optimizer.hpp b/include/aidge/learning/optimizer/Optimizer.hpp
index 195d649..83ba3f3 100644
--- a/include/aidge/learning/optimizer/Optimizer.hpp
+++ b/include/aidge/learning/optimizer/Optimizer.hpp
@@ -48,9 +48,9 @@ public:
 
     virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) {
         mParameters = parameters;
-        for (const auto& param : parameters) {
-            param->initGrad(); // create gradient and set it to zeros
-        }
+        // for (const auto& param : parameters) {
+        //     param->initGrad(); // create gradient and set it to zeros
+        // }
     }
 
     constexpr float learningRate() const noexcept {
diff --git a/src/loss/classification/BCE.cpp b/src/loss/classification/BCE.cpp
index b11541a..d515607 100644
--- a/src/loss/classification/BCE.cpp
+++ b/src/loss/classification/BCE.cpp
@@ -40,7 +40,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
                                const std::shared_ptr<Tensor>& target) {
     /*
 	Binay Cross Entropy (BCE) loss function
-	
+
     Implementation note:
     loss function is computed using a graph in order to not be backend dependant.
     */
@@ -76,7 +76,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
     target_node->addChild(add2_node, 0, 0);
     Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}}))
         ->addChild(add2_node, 0, 1);
-	
+
     // Define nodes: sub1 = 1 - prediction + eps2 and sub2 = - (1 - target + eps2)
     const std::shared_ptr<Node> sub1_node = Sub("sub1");
     const std::shared_ptr<Node> sub2_node = Sub("sub2");
@@ -86,7 +86,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
     target_node->addChild(sub2_node, 0, 0);
     Producer(std::make_shared<Tensor>(Array1D<float, 1>{{1.0f + eps2}}))
         ->addChild(sub2_node, 0, 1);
-	
+
     // Define nodes: ln1 = ln(prediction + eps1) and ln2 = ln(1 - prediction + eps2)
     const std::shared_ptr<Node> ln1_node = Ln("ln1");
     const std::shared_ptr<Node> ln2_node = Ln("ln2");
@@ -100,12 +100,12 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
     ln1_node->addChild(mul1_node, 0, 1);
     sub2_node->addChild(mul2_node, 0, 0);
     ln2_node->addChild(mul2_node, 0, 1);
-	
+
     // Define node: sub3 = - [(target + eps1) * ln(prediction + eps1) + (1 - target + eps2) * ln(1 - prediction + eps2)]
     const std::shared_ptr<Node> sub3_node = Sub("sub3");
     mul2_node->addChild(sub3_node, 0, 0);
     mul1_node->addChild(sub3_node, 0, 1);
-	
+
     // Define nodes: div1 = (target + eps1) / (prediction + eps1) and div2 = - (1 - target + eps2)/(1 - prediction + eps2)
     const std::shared_ptr<Node> div1_node = Div("div1");
     const std::shared_ptr<Node> div2_node = Div("div2");
@@ -113,27 +113,27 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
     add1_node->addChild(div1_node, 0, 1);
     sub2_node->addChild(div2_node, 0, 0);
     sub1_node->addChild(div2_node, 0, 1);
-	
+
     // Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2)
     const std::shared_ptr<Node> add3_node = Add(2, "add3");
     div1_node->addChild(add3_node, 0, 0);
     div2_node->addChild(add3_node, 0, 1);
 
-    // Define node: loss    
+    // Define node: loss
     std::vector<int> axes_dims(prediction->nbDims());
     std::iota(std::begin(axes_dims), std::end(axes_dims), 0);
     auto loss_node = ReduceMean(axes_dims, 1, "loss");
     sub3_node->addChild(loss_node, 0, 0);
-		
+
     // Define node: gradient
     const std::shared_ptr<Node> gradient_node = Mul("gradient");
     add3_node->addChild(gradient_node, 0, 0);
     Producer(std::make_shared<Tensor>(Array1D<float, 1>{{-1.0f/float(target->dims()[0])}}))
         ->addChild(gradient_node, 0, 1);
-    	
+
     // Create GraphView
     std::shared_ptr<GraphView> gv_loss = std::make_shared<GraphView>("BCE");
-    gv_loss->add({prediction_node, target_node, 
+    gv_loss->add({prediction_node, target_node,
                   add1_node->getParent(1), add1_node,
                   add2_node->getParent(1), add2_node,
                   sub1_node->getParent(0), sub1_node,
@@ -142,16 +142,16 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
                   sub3_node, loss_node,
                   add3_node, gradient_node->getParent(1), gradient_node});
     gv_loss->compile(prediction->getImpl()->backend(), prediction->dataType());
-	
+
     // Compute loss and gradient
     SequentialScheduler ss_loss{gv_loss};
     ss_loss.forward(false);
-	
-    prediction->initGrad(); // Enable gradient for output
-    std::shared_ptr<Tensor> outputGrad = prediction->grad(); 
+
+    // prediction->initGrad(); // Enable gradient for output
+    std::shared_ptr<Tensor> outputGrad = prediction->grad();
     const std::shared_ptr<OperatorTensor> gradient_op = std::dynamic_pointer_cast<OperatorTensor>(gradient_node->getOperator());
     outputGrad->copyFrom(gradient_op->getOutput(0)->clone()); // Update gradient
-	
+
     const std::shared_ptr<OperatorTensor> loss_op = std::dynamic_pointer_cast<OperatorTensor>(loss_node->getOperator());
     return loss_op->getOutput(0)->clone(); // Return loss
 }
diff --git a/src/loss/regression/MSE.cpp b/src/loss/regression/MSE.cpp
index db45e34..3d7ffe9 100644
--- a/src/loss/regression/MSE.cpp
+++ b/src/loss/regression/MSE.cpp
@@ -45,7 +45,7 @@ Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction,
     (2/NbBatch)->Mul->Gradient
     */
 
-    prediction->initGrad(); // Enable gradient for output
+    // prediction->initGrad(); // Enable gradient for output
 
     // compile_gradient(graph);  // Warning compile gradient here, without
     //                           // it, grad is nullptr. Maybe we can find a better
diff --git a/unit_tests/optimizer/Test_Adam.cpp b/unit_tests/optimizer/Test_Adam.cpp
index ef2d521..bd29790 100644
--- a/unit_tests/optimizer/Test_Adam.cpp
+++ b/unit_tests/optimizer/Test_Adam.cpp
@@ -83,7 +83,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
             optim_tensors[i] = std::make_shared<Tensor>(dims);
             optim_tensors[i]->setBackend("cpu");
             optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
-            optim_tensors[i]->initGrad();
+            // optim_tensors[i]->initGrad();
 
             grad_tensors[i] = std::make_shared<Tensor>(dims);
             grad_tensors[i]->setBackend("cpu");
diff --git a/unit_tests/optimizer/Test_SGD.cpp b/unit_tests/optimizer/Test_SGD.cpp
index df9924d..6b8edc6 100644
--- a/unit_tests/optimizer/Test_SGD.cpp
+++ b/unit_tests/optimizer/Test_SGD.cpp
@@ -77,7 +77,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") {
             optim_tensors[i] = std::make_shared<Tensor>(dims);
             optim_tensors[i]->setBackend("cpu");
             optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
-            optim_tensors[i]->initGrad();
+            // optim_tensors[i]->initGrad();
 
             grad_tensors[i] = std::make_shared<Tensor>(dims);
             grad_tensors[i]->setBackend("cpu");
-- 
GitLab