comment Tensor::initGrad() calls

458c4401 · Maxence Naud · 51e3cf9e · 458c4401 · 458c4401 · 458c4401
Commit 458c4401 authored 10 months ago by Maxence Naud
--- a/include/aidge/learning/optimizer/Optimizer.hpp
+++ b/include/aidge/learning/optimizer/Optimizer.hpp
@@ -48,9 +48,9 @@ public:
    virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) {
        mParameters = parameters;
-        for (const auto& param : parameters) {
+        // for (const auto& param : parameters) {
-            param->initGrad(); // create gradient and set it to zeros
+        //     param->initGrad(); // create gradient and set it to zeros
-        }
+        // }
    }
    constexpr float learningRate() const noexcept {

--- a/src/loss/classification/BCE.cpp
+++ b/src/loss/classification/BCE.cpp
@@ -40,7 +40,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
                               const std::shared_ptr<Tensor>& target) {
    /*
 	Binay Cross Entropy (BCE) loss function
    Implementation note:
    loss function is computed using a graph in order to not be backend dependant.
    */
@@ -76,7 +76,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
    target_node->addChild(add2_node, 0, 0);
    Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}}))
        ->addChild(add2_node, 0, 1);
    // Define nodes: sub1 = 1 - prediction + eps2 and sub2 = - (1 - target + eps2)
    const std::shared_ptr<Node> sub1_node = Sub("sub1");
    const std::shared_ptr<Node> sub2_node = Sub("sub2");
@@ -86,7 +86,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
    target_node->addChild(sub2_node, 0, 0);
    Producer(std::make_shared<Tensor>(Array1D<float, 1>{{1.0f + eps2}}))
        ->addChild(sub2_node, 0, 1);
    // Define nodes: ln1 = ln(prediction + eps1) and ln2 = ln(1 - prediction + eps2)
    const std::shared_ptr<Node> ln1_node = Ln("ln1");
    const std::shared_ptr<Node> ln2_node = Ln("ln2");
@@ -100,12 +100,12 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
    ln1_node->addChild(mul1_node, 0, 1);
    sub2_node->addChild(mul2_node, 0, 0);
    ln2_node->addChild(mul2_node, 0, 1);
    // Define node: sub3 = - [(target + eps1) * ln(prediction + eps1) + (1 - target + eps2) * ln(1 - prediction + eps2)]
    const std::shared_ptr<Node> sub3_node = Sub("sub3");
    mul2_node->addChild(sub3_node, 0, 0);
    mul1_node->addChild(sub3_node, 0, 1);
    // Define nodes: div1 = (target + eps1) / (prediction + eps1) and div2 = - (1 - target + eps2)/(1 - prediction + eps2)
    const std::shared_ptr<Node> div1_node = Div("div1");
    const std::shared_ptr<Node> div2_node = Div("div2");
@@ -113,27 +113,27 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
    add1_node->addChild(div1_node, 0, 1);
    sub2_node->addChild(div2_node, 0, 0);
    sub1_node->addChild(div2_node, 0, 1);
    // Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2)
    const std::shared_ptr<Node> add3_node = Add(2, "add3");
    div1_node->addChild(add3_node, 0, 0);
    div2_node->addChild(add3_node, 0, 1);
-    // Define node: loss    
+    // Define node: loss
    std::vector<int> axes_dims(prediction->nbDims());
    std::iota(std::begin(axes_dims), std::end(axes_dims), 0);
    auto loss_node = ReduceMean(axes_dims, 1, "loss");
    sub3_node->addChild(loss_node, 0, 0);
    // Define node: gradient
    const std::shared_ptr<Node> gradient_node = Mul("gradient");
    add3_node->addChild(gradient_node, 0, 0);
    Producer(std::make_shared<Tensor>(Array1D<float, 1>{{-1.0f/float(target->dims()[0])}}))
        ->addChild(gradient_node, 0, 1);
    // Create GraphView
    std::shared_ptr<GraphView> gv_loss = std::make_shared<GraphView>("BCE");
-    gv_loss->add({prediction_node, target_node, 
+    gv_loss->add({prediction_node, target_node,
                  add1_node->getParent(1), add1_node,
                  add2_node->getParent(1), add2_node,
                  sub1_node->getParent(0), sub1_node,
@@ -142,16 +142,16 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
                  sub3_node, loss_node,
                  add3_node, gradient_node->getParent(1), gradient_node});
    gv_loss->compile(prediction->getImpl()->backend(), prediction->dataType());
    // Compute loss and gradient
    SequentialScheduler ss_loss{gv_loss};
    ss_loss.forward(false);
-    prediction->initGrad(); // Enable gradient for output
+    // prediction->initGrad(); // Enable gradient for output
-    std::shared_ptr<Tensor> outputGrad = prediction->grad(); 
+    std::shared_ptr<Tensor> outputGrad = prediction->grad();
    const std::shared_ptr<OperatorTensor> gradient_op = std::dynamic_pointer_cast<OperatorTensor>(gradient_node->getOperator());
    outputGrad->copyFrom(gradient_op->getOutput(0)->clone()); // Update gradient
    const std::shared_ptr<OperatorTensor> loss_op = std::dynamic_pointer_cast<OperatorTensor>(loss_node->getOperator());
    return loss_op->getOutput(0)->clone(); // Return loss
 }
--- a/src/loss/regression/MSE.cpp
+++ b/src/loss/regression/MSE.cpp
@@ -45,7 +45,7 @@ Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction,
    (2/NbBatch)->Mul->Gradient
    */
-    prediction->initGrad(); // Enable gradient for output
+    // prediction->initGrad(); // Enable gradient for output
    // compile_gradient(graph);  // Warning compile gradient here, without
    //                           // it, grad is nullptr. Maybe we can find a better

--- a/unit_tests/optimizer/Test_Adam.cpp
+++ b/unit_tests/optimizer/Test_Adam.cpp
@@ -83,7 +83,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
            optim_tensors[i] = std::make_shared<Tensor>(dims);
            optim_tensors[i]->setBackend("cpu");
            optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
-            optim_tensors[i]->initGrad();
+            // optim_tensors[i]->initGrad();
            grad_tensors[i] = std::make_shared<Tensor>(dims);
            grad_tensors[i]->setBackend("cpu");

--- a/unit_tests/optimizer/Test_SGD.cpp
+++ b/unit_tests/optimizer/Test_SGD.cpp
@@ -77,7 +77,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") {
            optim_tensors[i] = std::make_shared<Tensor>(dims);
            optim_tensors[i]->setBackend("cpu");
            optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
-            optim_tensors[i]->initGrad();
+            // optim_tensors[i]->initGrad();
            grad_tensors[i] = std::make_shared<Tensor>(dims);
            grad_tensors[i]->setBackend("cpu");