From 458c440121cb3f418111a3c29f174916f31f8329 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Mon, 1 Jul 2024 00:17:57 +0000 Subject: [PATCH] comment Tensor::initGrad() calls --- .../aidge/learning/optimizer/Optimizer.hpp | 6 ++-- src/loss/classification/BCE.cpp | 30 +++++++++---------- src/loss/regression/MSE.cpp | 2 +- unit_tests/optimizer/Test_Adam.cpp | 2 +- unit_tests/optimizer/Test_SGD.cpp | 2 +- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/aidge/learning/optimizer/Optimizer.hpp b/include/aidge/learning/optimizer/Optimizer.hpp index 195d649..83ba3f3 100644 --- a/include/aidge/learning/optimizer/Optimizer.hpp +++ b/include/aidge/learning/optimizer/Optimizer.hpp @@ -48,9 +48,9 @@ public: virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) { mParameters = parameters; - for (const auto& param : parameters) { - param->initGrad(); // create gradient and set it to zeros - } + // for (const auto& param : parameters) { + // param->initGrad(); // create gradient and set it to zeros + // } } constexpr float learningRate() const noexcept { diff --git a/src/loss/classification/BCE.cpp b/src/loss/classification/BCE.cpp index b11541a..d515607 100644 --- a/src/loss/classification/BCE.cpp +++ b/src/loss/classification/BCE.cpp @@ -40,7 +40,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, const std::shared_ptr<Tensor>& target) { /* Binay Cross Entropy (BCE) loss function - + Implementation note: loss function is computed using a graph in order to not be backend dependant. */ @@ -76,7 +76,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, target_node->addChild(add2_node, 0, 0); Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}})) ->addChild(add2_node, 0, 1); - + // Define nodes: sub1 = 1 - prediction + eps2 and sub2 = - (1 - target + eps2) const std::shared_ptr<Node> sub1_node = Sub("sub1"); const std::shared_ptr<Node> sub2_node = Sub("sub2"); @@ -86,7 +86,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, target_node->addChild(sub2_node, 0, 0); Producer(std::make_shared<Tensor>(Array1D<float, 1>{{1.0f + eps2}})) ->addChild(sub2_node, 0, 1); - + // Define nodes: ln1 = ln(prediction + eps1) and ln2 = ln(1 - prediction + eps2) const std::shared_ptr<Node> ln1_node = Ln("ln1"); const std::shared_ptr<Node> ln2_node = Ln("ln2"); @@ -100,12 +100,12 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ln1_node->addChild(mul1_node, 0, 1); sub2_node->addChild(mul2_node, 0, 0); ln2_node->addChild(mul2_node, 0, 1); - + // Define node: sub3 = - [(target + eps1) * ln(prediction + eps1) + (1 - target + eps2) * ln(1 - prediction + eps2)] const std::shared_ptr<Node> sub3_node = Sub("sub3"); mul2_node->addChild(sub3_node, 0, 0); mul1_node->addChild(sub3_node, 0, 1); - + // Define nodes: div1 = (target + eps1) / (prediction + eps1) and div2 = - (1 - target + eps2)/(1 - prediction + eps2) const std::shared_ptr<Node> div1_node = Div("div1"); const std::shared_ptr<Node> div2_node = Div("div2"); @@ -113,27 +113,27 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, add1_node->addChild(div1_node, 0, 1); sub2_node->addChild(div2_node, 0, 0); sub1_node->addChild(div2_node, 0, 1); - + // Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2) const std::shared_ptr<Node> add3_node = Add(2, "add3"); div1_node->addChild(add3_node, 0, 0); div2_node->addChild(add3_node, 0, 1); - // Define node: loss + // Define node: loss std::vector<int> axes_dims(prediction->nbDims()); std::iota(std::begin(axes_dims), std::end(axes_dims), 0); auto loss_node = ReduceMean(axes_dims, 1, "loss"); sub3_node->addChild(loss_node, 0, 0); - + // Define node: gradient const std::shared_ptr<Node> gradient_node = Mul("gradient"); add3_node->addChild(gradient_node, 0, 0); Producer(std::make_shared<Tensor>(Array1D<float, 1>{{-1.0f/float(target->dims()[0])}})) ->addChild(gradient_node, 0, 1); - + // Create GraphView std::shared_ptr<GraphView> gv_loss = std::make_shared<GraphView>("BCE"); - gv_loss->add({prediction_node, target_node, + gv_loss->add({prediction_node, target_node, add1_node->getParent(1), add1_node, add2_node->getParent(1), add2_node, sub1_node->getParent(0), sub1_node, @@ -142,16 +142,16 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, sub3_node, loss_node, add3_node, gradient_node->getParent(1), gradient_node}); gv_loss->compile(prediction->getImpl()->backend(), prediction->dataType()); - + // Compute loss and gradient SequentialScheduler ss_loss{gv_loss}; ss_loss.forward(false); - - prediction->initGrad(); // Enable gradient for output - std::shared_ptr<Tensor> outputGrad = prediction->grad(); + + // prediction->initGrad(); // Enable gradient for output + std::shared_ptr<Tensor> outputGrad = prediction->grad(); const std::shared_ptr<OperatorTensor> gradient_op = std::dynamic_pointer_cast<OperatorTensor>(gradient_node->getOperator()); outputGrad->copyFrom(gradient_op->getOutput(0)->clone()); // Update gradient - + const std::shared_ptr<OperatorTensor> loss_op = std::dynamic_pointer_cast<OperatorTensor>(loss_node->getOperator()); return loss_op->getOutput(0)->clone(); // Return loss } diff --git a/src/loss/regression/MSE.cpp b/src/loss/regression/MSE.cpp index db45e34..3d7ffe9 100644 --- a/src/loss/regression/MSE.cpp +++ b/src/loss/regression/MSE.cpp @@ -45,7 +45,7 @@ Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction, (2/NbBatch)->Mul->Gradient */ - prediction->initGrad(); // Enable gradient for output + // prediction->initGrad(); // Enable gradient for output // compile_gradient(graph); // Warning compile gradient here, without // // it, grad is nullptr. Maybe we can find a better diff --git a/unit_tests/optimizer/Test_Adam.cpp b/unit_tests/optimizer/Test_Adam.cpp index ef2d521..bd29790 100644 --- a/unit_tests/optimizer/Test_Adam.cpp +++ b/unit_tests/optimizer/Test_Adam.cpp @@ -83,7 +83,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { optim_tensors[i] = std::make_shared<Tensor>(dims); optim_tensors[i]->setBackend("cpu"); optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]); - optim_tensors[i]->initGrad(); + // optim_tensors[i]->initGrad(); grad_tensors[i] = std::make_shared<Tensor>(dims); grad_tensors[i]->setBackend("cpu"); diff --git a/unit_tests/optimizer/Test_SGD.cpp b/unit_tests/optimizer/Test_SGD.cpp index df9924d..6b8edc6 100644 --- a/unit_tests/optimizer/Test_SGD.cpp +++ b/unit_tests/optimizer/Test_SGD.cpp @@ -77,7 +77,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") { optim_tensors[i] = std::make_shared<Tensor>(dims); optim_tensors[i]->setBackend("cpu"); optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]); - optim_tensors[i]->initGrad(); + // optim_tensors[i]->initGrad(); grad_tensors[i] = std::make_shared<Tensor>(dims); grad_tensors[i]->setBackend("cpu"); -- GitLab