diff --git a/include/aidge/learning/optimizer/Optimizer.hpp b/include/aidge/learning/optimizer/Optimizer.hpp
index 195d64965d3ba4eb89c9c4d0ca2155cb719f76f3..83ba3f37f35f608c416dc8750a25c8b226fac8bf 100644
--- a/include/aidge/learning/optimizer/Optimizer.hpp
+++ b/include/aidge/learning/optimizer/Optimizer.hpp
@@ -48,9 +48,9 @@ public:
 
     virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) {
         mParameters = parameters;
-        for (const auto& param : parameters) {
-            param->initGrad(); // create gradient and set it to zeros
-        }
+        // for (const auto& param : parameters) {
+        //     param->initGrad(); // create gradient and set it to zeros
+        // }
     }
 
     constexpr float learningRate() const noexcept {
diff --git a/src/loss/classification/BCE.cpp b/src/loss/classification/BCE.cpp
index b11541a27b5d578b33d484f38281b79a899ed36d..d5156072e9aeff84470fc60a4efb7571de81483b 100644
--- a/src/loss/classification/BCE.cpp
+++ b/src/loss/classification/BCE.cpp
@@ -40,7 +40,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
                                const std::shared_ptr<Tensor>& target) {
     /*
 	Binay Cross Entropy (BCE) loss function
-	
+
     Implementation note:
     loss function is computed using a graph in order to not be backend dependant.
     */
@@ -76,7 +76,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
     target_node->addChild(add2_node, 0, 0);
     Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}}))
         ->addChild(add2_node, 0, 1);
-	
+
     // Define nodes: sub1 = 1 - prediction + eps2 and sub2 = - (1 - target + eps2)
     const std::shared_ptr<Node> sub1_node = Sub("sub1");
     const std::shared_ptr<Node> sub2_node = Sub("sub2");
@@ -86,7 +86,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
     target_node->addChild(sub2_node, 0, 0);
     Producer(std::make_shared<Tensor>(Array1D<float, 1>{{1.0f + eps2}}))
         ->addChild(sub2_node, 0, 1);
-	
+
     // Define nodes: ln1 = ln(prediction + eps1) and ln2 = ln(1 - prediction + eps2)
     const std::shared_ptr<Node> ln1_node = Ln("ln1");
     const std::shared_ptr<Node> ln2_node = Ln("ln2");
@@ -100,12 +100,12 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
     ln1_node->addChild(mul1_node, 0, 1);
     sub2_node->addChild(mul2_node, 0, 0);
     ln2_node->addChild(mul2_node, 0, 1);
-	
+
     // Define node: sub3 = - [(target + eps1) * ln(prediction + eps1) + (1 - target + eps2) * ln(1 - prediction + eps2)]
     const std::shared_ptr<Node> sub3_node = Sub("sub3");
     mul2_node->addChild(sub3_node, 0, 0);
     mul1_node->addChild(sub3_node, 0, 1);
-	
+
     // Define nodes: div1 = (target + eps1) / (prediction + eps1) and div2 = - (1 - target + eps2)/(1 - prediction + eps2)
     const std::shared_ptr<Node> div1_node = Div("div1");
     const std::shared_ptr<Node> div2_node = Div("div2");
@@ -113,27 +113,27 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
     add1_node->addChild(div1_node, 0, 1);
     sub2_node->addChild(div2_node, 0, 0);
     sub1_node->addChild(div2_node, 0, 1);
-	
+
     // Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2)
     const std::shared_ptr<Node> add3_node = Add(2, "add3");
     div1_node->addChild(add3_node, 0, 0);
     div2_node->addChild(add3_node, 0, 1);
 
-    // Define node: loss    
+    // Define node: loss
     std::vector<int> axes_dims(prediction->nbDims());
     std::iota(std::begin(axes_dims), std::end(axes_dims), 0);
     auto loss_node = ReduceMean(axes_dims, 1, "loss");
     sub3_node->addChild(loss_node, 0, 0);
-		
+
     // Define node: gradient
     const std::shared_ptr<Node> gradient_node = Mul("gradient");
     add3_node->addChild(gradient_node, 0, 0);
     Producer(std::make_shared<Tensor>(Array1D<float, 1>{{-1.0f/float(target->dims()[0])}}))
         ->addChild(gradient_node, 0, 1);
-    	
+
     // Create GraphView
     std::shared_ptr<GraphView> gv_loss = std::make_shared<GraphView>("BCE");
-    gv_loss->add({prediction_node, target_node, 
+    gv_loss->add({prediction_node, target_node,
                   add1_node->getParent(1), add1_node,
                   add2_node->getParent(1), add2_node,
                   sub1_node->getParent(0), sub1_node,
@@ -142,16 +142,16 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
                   sub3_node, loss_node,
                   add3_node, gradient_node->getParent(1), gradient_node});
     gv_loss->compile(prediction->getImpl()->backend(), prediction->dataType());
-	
+
     // Compute loss and gradient
     SequentialScheduler ss_loss{gv_loss};
     ss_loss.forward(false);
-	
-    prediction->initGrad(); // Enable gradient for output
-    std::shared_ptr<Tensor> outputGrad = prediction->grad(); 
+
+    // prediction->initGrad(); // Enable gradient for output
+    std::shared_ptr<Tensor> outputGrad = prediction->grad();
     const std::shared_ptr<OperatorTensor> gradient_op = std::dynamic_pointer_cast<OperatorTensor>(gradient_node->getOperator());
     outputGrad->copyFrom(gradient_op->getOutput(0)->clone()); // Update gradient
-	
+
     const std::shared_ptr<OperatorTensor> loss_op = std::dynamic_pointer_cast<OperatorTensor>(loss_node->getOperator());
     return loss_op->getOutput(0)->clone(); // Return loss
 }
diff --git a/src/loss/regression/MSE.cpp b/src/loss/regression/MSE.cpp
index db45e349dce9084ca596ff4d2925d0341aaf0655..3d7ffe923bfa957c43fa93ef7c234ef1bdf63f06 100644
--- a/src/loss/regression/MSE.cpp
+++ b/src/loss/regression/MSE.cpp
@@ -45,7 +45,7 @@ Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction,
     (2/NbBatch)->Mul->Gradient
     */
 
-    prediction->initGrad(); // Enable gradient for output
+    // prediction->initGrad(); // Enable gradient for output
 
     // compile_gradient(graph);  // Warning compile gradient here, without
     //                           // it, grad is nullptr. Maybe we can find a better
diff --git a/unit_tests/optimizer/Test_Adam.cpp b/unit_tests/optimizer/Test_Adam.cpp
index ef2d52106140507e173cb11a4d6cfc974125e715..bd297903d47b90b755ff59ace0e052aa62c309d7 100644
--- a/unit_tests/optimizer/Test_Adam.cpp
+++ b/unit_tests/optimizer/Test_Adam.cpp
@@ -83,7 +83,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
             optim_tensors[i] = std::make_shared<Tensor>(dims);
             optim_tensors[i]->setBackend("cpu");
             optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
-            optim_tensors[i]->initGrad();
+            // optim_tensors[i]->initGrad();
 
             grad_tensors[i] = std::make_shared<Tensor>(dims);
             grad_tensors[i]->setBackend("cpu");
diff --git a/unit_tests/optimizer/Test_SGD.cpp b/unit_tests/optimizer/Test_SGD.cpp
index df9924d557d89d0483d018ce08951cf573e233d7..6b8edc60a6f1583d1241552442558bff5f2ce52e 100644
--- a/unit_tests/optimizer/Test_SGD.cpp
+++ b/unit_tests/optimizer/Test_SGD.cpp
@@ -77,7 +77,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") {
             optim_tensors[i] = std::make_shared<Tensor>(dims);
             optim_tensors[i]->setBackend("cpu");
             optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
-            optim_tensors[i]->initGrad();
+            // optim_tensors[i]->initGrad();
 
             grad_tensors[i] = std::make_shared<Tensor>(dims);
             grad_tensors[i]->setBackend("cpu");