diff --git a/include/aidge/learning/optimizer/Optimizer.hpp b/include/aidge/learning/optimizer/Optimizer.hpp index 195d64965d3ba4eb89c9c4d0ca2155cb719f76f3..83ba3f37f35f608c416dc8750a25c8b226fac8bf 100644 --- a/include/aidge/learning/optimizer/Optimizer.hpp +++ b/include/aidge/learning/optimizer/Optimizer.hpp @@ -48,9 +48,9 @@ public: virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) { mParameters = parameters; - for (const auto& param : parameters) { - param->initGrad(); // create gradient and set it to zeros - } + // for (const auto& param : parameters) { + // param->initGrad(); // create gradient and set it to zeros + // } } constexpr float learningRate() const noexcept { diff --git a/src/loss/classification/BCE.cpp b/src/loss/classification/BCE.cpp index b11541a27b5d578b33d484f38281b79a899ed36d..d5156072e9aeff84470fc60a4efb7571de81483b 100644 --- a/src/loss/classification/BCE.cpp +++ b/src/loss/classification/BCE.cpp @@ -40,7 +40,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, const std::shared_ptr<Tensor>& target) { /* Binay Cross Entropy (BCE) loss function - + Implementation note: loss function is computed using a graph in order to not be backend dependant. */ @@ -76,7 +76,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, target_node->addChild(add2_node, 0, 0); Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}})) ->addChild(add2_node, 0, 1); - + // Define nodes: sub1 = 1 - prediction + eps2 and sub2 = - (1 - target + eps2) const std::shared_ptr<Node> sub1_node = Sub("sub1"); const std::shared_ptr<Node> sub2_node = Sub("sub2"); @@ -86,7 +86,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, target_node->addChild(sub2_node, 0, 0); Producer(std::make_shared<Tensor>(Array1D<float, 1>{{1.0f + eps2}})) ->addChild(sub2_node, 0, 1); - + // Define nodes: ln1 = ln(prediction + eps1) and ln2 = ln(1 - prediction + eps2) const std::shared_ptr<Node> ln1_node = Ln("ln1"); const std::shared_ptr<Node> ln2_node = Ln("ln2"); @@ -100,12 +100,12 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ln1_node->addChild(mul1_node, 0, 1); sub2_node->addChild(mul2_node, 0, 0); ln2_node->addChild(mul2_node, 0, 1); - + // Define node: sub3 = - [(target + eps1) * ln(prediction + eps1) + (1 - target + eps2) * ln(1 - prediction + eps2)] const std::shared_ptr<Node> sub3_node = Sub("sub3"); mul2_node->addChild(sub3_node, 0, 0); mul1_node->addChild(sub3_node, 0, 1); - + // Define nodes: div1 = (target + eps1) / (prediction + eps1) and div2 = - (1 - target + eps2)/(1 - prediction + eps2) const std::shared_ptr<Node> div1_node = Div("div1"); const std::shared_ptr<Node> div2_node = Div("div2"); @@ -113,27 +113,27 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, add1_node->addChild(div1_node, 0, 1); sub2_node->addChild(div2_node, 0, 0); sub1_node->addChild(div2_node, 0, 1); - + // Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2) const std::shared_ptr<Node> add3_node = Add(2, "add3"); div1_node->addChild(add3_node, 0, 0); div2_node->addChild(add3_node, 0, 1); - // Define node: loss + // Define node: loss std::vector<int> axes_dims(prediction->nbDims()); std::iota(std::begin(axes_dims), std::end(axes_dims), 0); auto loss_node = ReduceMean(axes_dims, 1, "loss"); sub3_node->addChild(loss_node, 0, 0); - + // Define node: gradient const std::shared_ptr<Node> gradient_node = Mul("gradient"); add3_node->addChild(gradient_node, 0, 0); Producer(std::make_shared<Tensor>(Array1D<float, 1>{{-1.0f/float(target->dims()[0])}})) ->addChild(gradient_node, 0, 1); - + // Create GraphView std::shared_ptr<GraphView> gv_loss = std::make_shared<GraphView>("BCE"); - gv_loss->add({prediction_node, target_node, + gv_loss->add({prediction_node, target_node, add1_node->getParent(1), add1_node, add2_node->getParent(1), add2_node, sub1_node->getParent(0), sub1_node, @@ -142,16 +142,16 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, sub3_node, loss_node, add3_node, gradient_node->getParent(1), gradient_node}); gv_loss->compile(prediction->getImpl()->backend(), prediction->dataType()); - + // Compute loss and gradient SequentialScheduler ss_loss{gv_loss}; ss_loss.forward(false); - - prediction->initGrad(); // Enable gradient for output - std::shared_ptr<Tensor> outputGrad = prediction->grad(); + + // prediction->initGrad(); // Enable gradient for output + std::shared_ptr<Tensor> outputGrad = prediction->grad(); const std::shared_ptr<OperatorTensor> gradient_op = std::dynamic_pointer_cast<OperatorTensor>(gradient_node->getOperator()); outputGrad->copyFrom(gradient_op->getOutput(0)->clone()); // Update gradient - + const std::shared_ptr<OperatorTensor> loss_op = std::dynamic_pointer_cast<OperatorTensor>(loss_node->getOperator()); return loss_op->getOutput(0)->clone(); // Return loss } diff --git a/src/loss/regression/MSE.cpp b/src/loss/regression/MSE.cpp index db45e349dce9084ca596ff4d2925d0341aaf0655..3d7ffe923bfa957c43fa93ef7c234ef1bdf63f06 100644 --- a/src/loss/regression/MSE.cpp +++ b/src/loss/regression/MSE.cpp @@ -45,7 +45,7 @@ Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction, (2/NbBatch)->Mul->Gradient */ - prediction->initGrad(); // Enable gradient for output + // prediction->initGrad(); // Enable gradient for output // compile_gradient(graph); // Warning compile gradient here, without // // it, grad is nullptr. Maybe we can find a better diff --git a/unit_tests/optimizer/Test_Adam.cpp b/unit_tests/optimizer/Test_Adam.cpp index ef2d52106140507e173cb11a4d6cfc974125e715..bd297903d47b90b755ff59ace0e052aa62c309d7 100644 --- a/unit_tests/optimizer/Test_Adam.cpp +++ b/unit_tests/optimizer/Test_Adam.cpp @@ -83,7 +83,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { optim_tensors[i] = std::make_shared<Tensor>(dims); optim_tensors[i]->setBackend("cpu"); optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]); - optim_tensors[i]->initGrad(); + // optim_tensors[i]->initGrad(); grad_tensors[i] = std::make_shared<Tensor>(dims); grad_tensors[i]->setBackend("cpu"); diff --git a/unit_tests/optimizer/Test_SGD.cpp b/unit_tests/optimizer/Test_SGD.cpp index df9924d557d89d0483d018ce08951cf573e233d7..6b8edc60a6f1583d1241552442558bff5f2ce52e 100644 --- a/unit_tests/optimizer/Test_SGD.cpp +++ b/unit_tests/optimizer/Test_SGD.cpp @@ -77,7 +77,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") { optim_tensors[i] = std::make_shared<Tensor>(dims); optim_tensors[i]->setBackend("cpu"); optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]); - optim_tensors[i]->initGrad(); + // optim_tensors[i]->initGrad(); grad_tensors[i] = std::make_shared<Tensor>(dims); grad_tensors[i]->setBackend("cpu");