Skip to content
Snippets Groups Projects
Commit 458c4401 authored by Maxence Naud's avatar Maxence Naud
Browse files

comment Tensor::initGrad() calls

parent 51e3cf9e
No related branches found
No related tags found
1 merge request!10version 0.1.2
Pipeline #49600 passed
...@@ -48,9 +48,9 @@ public: ...@@ -48,9 +48,9 @@ public:
virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) { virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) {
mParameters = parameters; mParameters = parameters;
for (const auto& param : parameters) { // for (const auto& param : parameters) {
param->initGrad(); // create gradient and set it to zeros // param->initGrad(); // create gradient and set it to zeros
} // }
} }
constexpr float learningRate() const noexcept { constexpr float learningRate() const noexcept {
......
...@@ -40,7 +40,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ...@@ -40,7 +40,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
const std::shared_ptr<Tensor>& target) { const std::shared_ptr<Tensor>& target) {
/* /*
Binay Cross Entropy (BCE) loss function Binay Cross Entropy (BCE) loss function
Implementation note: Implementation note:
loss function is computed using a graph in order to not be backend dependant. loss function is computed using a graph in order to not be backend dependant.
*/ */
...@@ -76,7 +76,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ...@@ -76,7 +76,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
target_node->addChild(add2_node, 0, 0); target_node->addChild(add2_node, 0, 0);
Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}})) Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}}))
->addChild(add2_node, 0, 1); ->addChild(add2_node, 0, 1);
// Define nodes: sub1 = 1 - prediction + eps2 and sub2 = - (1 - target + eps2) // Define nodes: sub1 = 1 - prediction + eps2 and sub2 = - (1 - target + eps2)
const std::shared_ptr<Node> sub1_node = Sub("sub1"); const std::shared_ptr<Node> sub1_node = Sub("sub1");
const std::shared_ptr<Node> sub2_node = Sub("sub2"); const std::shared_ptr<Node> sub2_node = Sub("sub2");
...@@ -86,7 +86,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ...@@ -86,7 +86,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
target_node->addChild(sub2_node, 0, 0); target_node->addChild(sub2_node, 0, 0);
Producer(std::make_shared<Tensor>(Array1D<float, 1>{{1.0f + eps2}})) Producer(std::make_shared<Tensor>(Array1D<float, 1>{{1.0f + eps2}}))
->addChild(sub2_node, 0, 1); ->addChild(sub2_node, 0, 1);
// Define nodes: ln1 = ln(prediction + eps1) and ln2 = ln(1 - prediction + eps2) // Define nodes: ln1 = ln(prediction + eps1) and ln2 = ln(1 - prediction + eps2)
const std::shared_ptr<Node> ln1_node = Ln("ln1"); const std::shared_ptr<Node> ln1_node = Ln("ln1");
const std::shared_ptr<Node> ln2_node = Ln("ln2"); const std::shared_ptr<Node> ln2_node = Ln("ln2");
...@@ -100,12 +100,12 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ...@@ -100,12 +100,12 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
ln1_node->addChild(mul1_node, 0, 1); ln1_node->addChild(mul1_node, 0, 1);
sub2_node->addChild(mul2_node, 0, 0); sub2_node->addChild(mul2_node, 0, 0);
ln2_node->addChild(mul2_node, 0, 1); ln2_node->addChild(mul2_node, 0, 1);
// Define node: sub3 = - [(target + eps1) * ln(prediction + eps1) + (1 - target + eps2) * ln(1 - prediction + eps2)] // Define node: sub3 = - [(target + eps1) * ln(prediction + eps1) + (1 - target + eps2) * ln(1 - prediction + eps2)]
const std::shared_ptr<Node> sub3_node = Sub("sub3"); const std::shared_ptr<Node> sub3_node = Sub("sub3");
mul2_node->addChild(sub3_node, 0, 0); mul2_node->addChild(sub3_node, 0, 0);
mul1_node->addChild(sub3_node, 0, 1); mul1_node->addChild(sub3_node, 0, 1);
// Define nodes: div1 = (target + eps1) / (prediction + eps1) and div2 = - (1 - target + eps2)/(1 - prediction + eps2) // Define nodes: div1 = (target + eps1) / (prediction + eps1) and div2 = - (1 - target + eps2)/(1 - prediction + eps2)
const std::shared_ptr<Node> div1_node = Div("div1"); const std::shared_ptr<Node> div1_node = Div("div1");
const std::shared_ptr<Node> div2_node = Div("div2"); const std::shared_ptr<Node> div2_node = Div("div2");
...@@ -113,27 +113,27 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ...@@ -113,27 +113,27 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
add1_node->addChild(div1_node, 0, 1); add1_node->addChild(div1_node, 0, 1);
sub2_node->addChild(div2_node, 0, 0); sub2_node->addChild(div2_node, 0, 0);
sub1_node->addChild(div2_node, 0, 1); sub1_node->addChild(div2_node, 0, 1);
// Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2) // Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2)
const std::shared_ptr<Node> add3_node = Add(2, "add3"); const std::shared_ptr<Node> add3_node = Add(2, "add3");
div1_node->addChild(add3_node, 0, 0); div1_node->addChild(add3_node, 0, 0);
div2_node->addChild(add3_node, 0, 1); div2_node->addChild(add3_node, 0, 1);
// Define node: loss // Define node: loss
std::vector<int> axes_dims(prediction->nbDims()); std::vector<int> axes_dims(prediction->nbDims());
std::iota(std::begin(axes_dims), std::end(axes_dims), 0); std::iota(std::begin(axes_dims), std::end(axes_dims), 0);
auto loss_node = ReduceMean(axes_dims, 1, "loss"); auto loss_node = ReduceMean(axes_dims, 1, "loss");
sub3_node->addChild(loss_node, 0, 0); sub3_node->addChild(loss_node, 0, 0);
// Define node: gradient // Define node: gradient
const std::shared_ptr<Node> gradient_node = Mul("gradient"); const std::shared_ptr<Node> gradient_node = Mul("gradient");
add3_node->addChild(gradient_node, 0, 0); add3_node->addChild(gradient_node, 0, 0);
Producer(std::make_shared<Tensor>(Array1D<float, 1>{{-1.0f/float(target->dims()[0])}})) Producer(std::make_shared<Tensor>(Array1D<float, 1>{{-1.0f/float(target->dims()[0])}}))
->addChild(gradient_node, 0, 1); ->addChild(gradient_node, 0, 1);
// Create GraphView // Create GraphView
std::shared_ptr<GraphView> gv_loss = std::make_shared<GraphView>("BCE"); std::shared_ptr<GraphView> gv_loss = std::make_shared<GraphView>("BCE");
gv_loss->add({prediction_node, target_node, gv_loss->add({prediction_node, target_node,
add1_node->getParent(1), add1_node, add1_node->getParent(1), add1_node,
add2_node->getParent(1), add2_node, add2_node->getParent(1), add2_node,
sub1_node->getParent(0), sub1_node, sub1_node->getParent(0), sub1_node,
...@@ -142,16 +142,16 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ...@@ -142,16 +142,16 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
sub3_node, loss_node, sub3_node, loss_node,
add3_node, gradient_node->getParent(1), gradient_node}); add3_node, gradient_node->getParent(1), gradient_node});
gv_loss->compile(prediction->getImpl()->backend(), prediction->dataType()); gv_loss->compile(prediction->getImpl()->backend(), prediction->dataType());
// Compute loss and gradient // Compute loss and gradient
SequentialScheduler ss_loss{gv_loss}; SequentialScheduler ss_loss{gv_loss};
ss_loss.forward(false); ss_loss.forward(false);
prediction->initGrad(); // Enable gradient for output // prediction->initGrad(); // Enable gradient for output
std::shared_ptr<Tensor> outputGrad = prediction->grad(); std::shared_ptr<Tensor> outputGrad = prediction->grad();
const std::shared_ptr<OperatorTensor> gradient_op = std::dynamic_pointer_cast<OperatorTensor>(gradient_node->getOperator()); const std::shared_ptr<OperatorTensor> gradient_op = std::dynamic_pointer_cast<OperatorTensor>(gradient_node->getOperator());
outputGrad->copyFrom(gradient_op->getOutput(0)->clone()); // Update gradient outputGrad->copyFrom(gradient_op->getOutput(0)->clone()); // Update gradient
const std::shared_ptr<OperatorTensor> loss_op = std::dynamic_pointer_cast<OperatorTensor>(loss_node->getOperator()); const std::shared_ptr<OperatorTensor> loss_op = std::dynamic_pointer_cast<OperatorTensor>(loss_node->getOperator());
return loss_op->getOutput(0)->clone(); // Return loss return loss_op->getOutput(0)->clone(); // Return loss
} }
...@@ -45,7 +45,7 @@ Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction, ...@@ -45,7 +45,7 @@ Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction,
(2/NbBatch)->Mul->Gradient (2/NbBatch)->Mul->Gradient
*/ */
prediction->initGrad(); // Enable gradient for output // prediction->initGrad(); // Enable gradient for output
// compile_gradient(graph); // Warning compile gradient here, without // compile_gradient(graph); // Warning compile gradient here, without
// // it, grad is nullptr. Maybe we can find a better // // it, grad is nullptr. Maybe we can find a better
......
...@@ -83,7 +83,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { ...@@ -83,7 +83,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
optim_tensors[i] = std::make_shared<Tensor>(dims); optim_tensors[i] = std::make_shared<Tensor>(dims);
optim_tensors[i]->setBackend("cpu"); optim_tensors[i]->setBackend("cpu");
optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]); optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
optim_tensors[i]->initGrad(); // optim_tensors[i]->initGrad();
grad_tensors[i] = std::make_shared<Tensor>(dims); grad_tensors[i] = std::make_shared<Tensor>(dims);
grad_tensors[i]->setBackend("cpu"); grad_tensors[i]->setBackend("cpu");
......
...@@ -77,7 +77,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") { ...@@ -77,7 +77,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") {
optim_tensors[i] = std::make_shared<Tensor>(dims); optim_tensors[i] = std::make_shared<Tensor>(dims);
optim_tensors[i]->setBackend("cpu"); optim_tensors[i]->setBackend("cpu");
optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]); optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
optim_tensors[i]->initGrad(); // optim_tensors[i]->initGrad();
grad_tensors[i] = std::make_shared<Tensor>(dims); grad_tensors[i] = std::make_shared<Tensor>(dims);
grad_tensors[i]->setBackend("cpu"); grad_tensors[i]->setBackend("cpu");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment