diff --git a/include/aidge/learning/optimizer/Adam.hpp b/include/aidge/learning/optimizer/Adam.hpp index 8c89e53f94dade675d3e7da139554561929f5e3d..da2146932d3a09b9d38ea744b6e685ac0af46aec 100644 --- a/include/aidge/learning/optimizer/Adam.hpp +++ b/include/aidge/learning/optimizer/Adam.hpp @@ -58,23 +58,43 @@ public: } void update() override final { + + auto backend = mParameters[0]->backend(); + auto device = mParameters[0]->device(); + auto dataType = mParameters[0]->dataType(); + float mBeta1Power = std::pow(this->getAttr<AdamAttr::Beta1>(), static_cast<float>(mLRScheduler.step() + 1)); float mBeta2Power = std::pow(this->getAttr<AdamAttr::Beta2>(), static_cast<float>(mLRScheduler.step() + 1)); + float mReversedBeta1Power = 1.0f - mBeta1Power; float mSqrtReversedBeta2Power = std::sqrt(1.0f - mBeta2Power); Tensor alpha = Tensor(learningRate() * mSqrtReversedBeta2Power / mReversedBeta1Power); - alpha.setBackend(mParameters[0]->getImpl()->backend()); - alpha.setDataType(mParameters[0]->dataType()); + alpha.setBackend(backend, device); + alpha.setDataType(dataType); Tensor epsilon_hat = Tensor(this->getAttr<AdamAttr::Epsilon>() * mSqrtReversedBeta2Power); - epsilon_hat.setBackend(mParameters[0]->getImpl()->backend()); - epsilon_hat.setDataType(mParameters[0]->dataType()); + epsilon_hat.setBackend(backend, device); + epsilon_hat.setDataType(dataType); + + mBeta1.setBackend(backend, device); + mBeta1.setDataType(dataType); + mReversedBeta1.setBackend(backend, device); + mReversedBeta1.setDataType(dataType); + + mBeta2.setBackend(backend, device); + mBeta2.setDataType(dataType); + mReversedBeta2.setBackend(backend, device); + mReversedBeta2.setDataType(dataType); if (mLRScheduler.step() == 0) { for (std::size_t i = 0; i < mParameters.size(); ++i) { mMomentum1[i].zeros(); - mMomentum2[i].zeros(); + mMomentum1[i].setBackend(backend, device); + mMomentum1[i].setDataType(dataType); + mMomentum2[i].zeros(); + mMomentum2[i].setBackend(backend, device); + mMomentum2[i].setDataType(dataType); } } @@ -88,25 +108,33 @@ public: } void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) override final { + Optimizer::setParameters(parameters); mMomentum1 = std::vector<Tensor>(parameters.size()); mMomentum2 = std::vector<Tensor>(parameters.size()); + for (std::size_t i = 0; i < parameters.size(); ++i) { + mMomentum1[i] = Tensor(parameters[i]->dims()); - mMomentum1[i].setBackend(parameters[i]->getImpl()->backend()); + mMomentum1[i].setBackend(parameters[i]->backend(), parameters[i]->device()); mMomentum1[i].setDataType(parameters[i]->dataType()); + mMomentum2[i] = Tensor(parameters[i]->dims()); - mMomentum2[i].setBackend(parameters[i]->getImpl()->backend()); + mMomentum2[i].setBackend(parameters[i]->backend(), parameters[i]->device()); mMomentum2[i].setDataType(parameters[i]->dataType()); } if (parameters.size() > 0) { - mBeta1.setBackend(mParameters[0]->getImpl()->backend()); + + mBeta1.setBackend(mParameters[0]->backend(), mParameters[0]->device()); mBeta1.setDataType(parameters[0]->dataType()); - mReversedBeta1.setBackend(mParameters[0]->getImpl()->backend()); + + mReversedBeta1.setBackend(mParameters[0]->backend(), mParameters[0]->device()); mReversedBeta1.setDataType(parameters[0]->dataType()); - mBeta2.setBackend(mParameters[0]->getImpl()->backend()); + + mBeta2.setBackend(mParameters[0]->backend(), mParameters[0]->device()); mBeta2.setDataType(parameters[0]->dataType()); - mReversedBeta2.setBackend(mParameters[0]->getImpl()->backend()); + + mReversedBeta2.setBackend(mParameters[0]->backend(), mParameters[0]->device()); mReversedBeta2.setDataType(parameters[0]->dataType()); } } diff --git a/include/aidge/learning/optimizer/SGD.hpp b/include/aidge/learning/optimizer/SGD.hpp index da029b36fae81af32aad79e668fab1e98e1a0076..1c5f1a625dc777e44b367fe8b3851a4f0f75c483 100644 --- a/include/aidge/learning/optimizer/SGD.hpp +++ b/include/aidge/learning/optimizer/SGD.hpp @@ -56,16 +56,33 @@ public: } void update() override final { + + auto backend = mParameters[0]->backend(); + auto device = mParameters[0]->device(); + auto dataType = mParameters[0]->dataType(); + mLR = Tensor(learningRate()); - mLR.setBackend(mParameters[0]->getImpl()->backend()); - mLR.setDataType(mParameters[0]->dataType()); - mWeightDecay.setBackend(mParameters[0]->getImpl()->backend()); - mWeightDecay.setDataType(mParameters[0]->dataType()); + + // Set backends / devices + + mLR.setDataType(dataType); + mLR.setBackend(backend, device); + + mWeightDecay.setDataType(dataType); + mWeightDecay.setBackend(backend, device); + + mReversedDampening.setDataType(dataType); + mReversedDampening.setBackend(backend, device); + + mMomentum.setDataType(dataType); + mMomentum.setBackend(backend, device); + + // update loop if (mLRScheduler.step() == 0) { for (std::size_t i = 0; i < mParameters.size(); ++i) { mGradientInertia[i] = mParameters[i]->grad()->clone(); - *mParameters[i] -= mLR*mGradientInertia[i]; + *mParameters[i] -= mLR * mGradientInertia[i]; } } else { for (std::size_t i = 0; i < mParameters.size(); ++i) { @@ -82,13 +99,13 @@ public: mGradientInertia = std::vector<Tensor>(parameters.size()); for (std::size_t i = 0; i < parameters.size(); ++i) { mGradientInertia[i] = Tensor(parameters[i]->dims()); - mGradientInertia[i].setBackend(parameters[i]->backend()); + mGradientInertia[i].setBackend(parameters[i]->backend(), parameters[i]->device()); mGradientInertia[i].setDataType(parameters[i]->dataType()); } if (parameters.size() > 0) { - mReversedDampening.setBackend(mParameters[0]->getImpl()->backend()); + mReversedDampening.setBackend(mParameters[0]->backend(), mParameters[0]->device()); mReversedDampening.setDataType(parameters[0]->dataType()); - mMomentum.setBackend(mParameters[0]->getImpl()->backend()); + mMomentum.setBackend(mParameters[0]->backend(), mParameters[0]->device()); mMomentum.setDataType(parameters[0]->dataType()); } } diff --git a/src/loss/classification/BCE.cpp b/src/loss/classification/BCE.cpp index 4d5ce38bec984b9cf4faf4d80a494f16a48d72b1..c277658a44e11b7e80a1d6fd7836b3ce1a335d7d 100644 --- a/src/loss/classification/BCE.cpp +++ b/src/loss/classification/BCE.cpp @@ -43,14 +43,22 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, AIDGE_ASSERT(target->dims().size() == 2, "Label must have two dims: [BatchSize, NbChannel]"); + AIDGE_ASSERT(prediction->backend() == target->backend(), "'prediction' and 'target' Tensors must be on the " "same backend. Found {} and {}.\n", prediction->backend(), target->backend()); + + AIDGE_ASSERT(prediction->device() == target->device(), + "'prediction' and 'target' Tensors must be on the " + "same device. Found {} and {}.\n", + prediction->device(), target->device()); + AIDGE_ASSERT(prediction->dims() == target->dims(), "'prediction' (shape {}) and 'target' (shape {}) Tensors must " "have the same dimensions.\n", prediction->dims(), target->dims()); + AIDGE_ASSERT(prediction->dataType() == target->dataType(), "'prediction' (data type {}) and 'target' (data type {}) " "Tensors must have the same data type.\n", @@ -135,7 +143,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ln1_node, ln2_node, sub2_node, mul1_node, mul2_node, sub3_node, loss_node, sub4_node, mul3_node, div1_node, gradient_node->getParent(1), gradient_node}); - gv_loss->compile(prediction->getImpl()->backend(), prediction->dataType()); + gv_loss->compile(prediction->backend(), prediction->dataType(), prediction->device()); // Compute loss and gradient SequentialScheduler ss_loss{gv_loss}; diff --git a/src/loss/classification/CELoss.cpp b/src/loss/classification/CELoss.cpp index 2d4b7f0a818abc524c8196dfa14810da7f19b523..2f2000585790a4781f91aa02236d9e3609f73680 100644 --- a/src/loss/classification/CELoss.cpp +++ b/src/loss/classification/CELoss.cpp @@ -37,14 +37,22 @@ Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction, { AIDGE_ASSERT(prediction->nbDims() == 2, "Label must have two dims: [BatchSize, NbChannel]"); + AIDGE_ASSERT(prediction->backend() == target->backend(), "'prediction' and 'target' Tensors must be on the " "same backend. Found {} and {}.\n", prediction->backend(), target->backend()); + + AIDGE_ASSERT(prediction->device() == target->device(), + "'prediction' and 'target' Tensors must be on the " + "same device. Found {} and {}.\n", + prediction->device(), target->device()); + AIDGE_ASSERT(prediction->dims() == target->dims(), "'prediction' (shape {}) and 'target' (shape {}) Tensors must " "have the same dimensions.\n", prediction->dims(), target->dims()); + AIDGE_ASSERT(prediction->dataType() == target->dataType(), "'prediction' (data type {}) and 'target' (data type {}) " "Tensors must have the same data type.\n", @@ -52,12 +60,13 @@ Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction, auto backend = prediction->backend(); auto dataType = prediction->dataType(); + auto device = prediction->device(); // Compute the predicition SoftMax auto softmaxOp = Softmax_Op(1); softmaxOp.setDataType(dataType); - softmaxOp.setBackend(backend); + softmaxOp.setBackend(backend, device); softmaxOp.associateInput(0, prediction); softmaxOp.forward(); @@ -80,7 +89,7 @@ Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction, std::shared_ptr<GraphView> lossGraphView = std::make_shared<GraphView>("CELoss"); lossGraphView->add({targetNode, softmaxNode, logNode, mulNode, sumNode, meanNode}); - lossGraphView->compile(backend, dataType); + lossGraphView->compile(backend, dataType, device); SequentialScheduler scheduler(lossGraphView); scheduler.forward(true); @@ -89,7 +98,7 @@ Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction, auto lossTensor = meanOp->getOutput(0); auto scalar = Tensor(-1.0f); - scalar.setBackend(backend); + scalar.setBackend(backend, device); scalar.setDataType(dataType); (*lossTensor) = (*lossTensor) * scalar; @@ -100,7 +109,7 @@ Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction, auto subOp = Sub_Op(); subOp.setDataType(dataType); - subOp.setBackend(backend); + subOp.setBackend(backend, device); subOp.associateInput(0, softmax); subOp.associateInput(1, target); @@ -110,13 +119,17 @@ Aidge::Tensor Aidge::loss::CELoss(std::shared_ptr<Tensor>& prediction, const float batchSize = static_cast<float>((target->dims())[0]); + // Compute the rescaled error + scalar = Tensor(1.0f / batchSize); - scalar.setBackend(backend); + scalar.setBackend(backend, device); scalar.setDataType(dataType); (*err) = (*err) * scalar; + // Set the error signal + prediction->setGrad(err); // Return the loss value diff --git a/src/loss/distillation/KD.cpp b/src/loss/distillation/KD.cpp index 4283a28fa05c91cdb58f3a6677210b414e18f44a..411be45803069b32b94a18a46b196f82ac91ce69 100644 --- a/src/loss/distillation/KD.cpp +++ b/src/loss/distillation/KD.cpp @@ -59,10 +59,17 @@ Aidge::Tensor Aidge::loss::KD(std::shared_ptr<Tensor>& student_prediction, "'prediction' and 'target' Tensors must be on the " "same backend. Found {} and {}.\n", student_prediction->backend(), teacher_prediction->backend()); + + AIDGE_ASSERT(student_prediction->device() == teacher_prediction->device(), + "'prediction' and 'target' Tensors must be on the " + "same device. Found {} and {}.\n", + student_prediction->device(), teacher_prediction->device()); + AIDGE_ASSERT(student_prediction->dims() == teacher_prediction->dims(), "'prediction' (shape {}) and 'target' (shape {}) Tensors must " "have the same dimensions.\n", student_prediction->dims(), teacher_prediction->dims()); + AIDGE_ASSERT(student_prediction->dataType() == teacher_prediction->dataType(), "'prediction' (data type {}) and 'target' (data type {}) " "Tensors must have the same data type.\n", @@ -134,7 +141,7 @@ Aidge::Tensor Aidge::loss::KD(std::shared_ptr<Tensor>& student_prediction, soft_teacher_node, mul_node, mul2_node->getParent(1), mul2_node, rm_node, sub_node}); - gv_loss->compile(student_prediction->getImpl()->backend(), student_prediction->dataType()); + gv_loss->compile(student_prediction->backend(), student_prediction->dataType(), student_prediction->device()); SequentialScheduler ss_loss{gv_loss}; ss_loss.forward(false); diff --git a/src/loss/regression/MSE.cpp b/src/loss/regression/MSE.cpp index 8234aa9875163f9a6223327fcaa0780b74bd630c..67d37d3d11551a72fcc4089f7daeaee5510fb017 100644 --- a/src/loss/regression/MSE.cpp +++ b/src/loss/regression/MSE.cpp @@ -56,10 +56,17 @@ Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction, "'prediction' and 'target' Tensors must be on the " "same backend. Found {} and {}.\n", prediction->backend(), target->backend()); + + AIDGE_ASSERT(prediction->device() == target->device(), + "'prediction' and 'target' Tensors must be on the " + "same device. Found {} and {}.\n", + prediction->device(), target->device()); + AIDGE_ASSERT(prediction->dims() == target->dims(), "'prediction' (shape {}) and 'target' (shape {}) Tensors must " "have the same dimensions.\n", prediction->dims(), target->dims()); + AIDGE_ASSERT(prediction->dataType() == target->dataType(), "'prediction' (data type {}) and 'target' (data type {}) " "Tensors must have the same data type.\n", @@ -93,7 +100,7 @@ Aidge::Tensor Aidge::loss::MSE(std::shared_ptr<Tensor>& prediction, Sequential({sub_node, pow_node, rm_node}); gv_local->add({sub_node->getParent(0), sub_node->getParent(1), pow_exp_node, mul_node->getParent(1), mul_node}); - gv_local->compile(prediction->getImpl()->backend(), prediction->dataType()); + gv_local->compile(prediction->backend(), prediction->dataType(), prediction->device()); SequentialScheduler ss_local{gv_local}; ss_local.forward(false); diff --git a/src/metrics/Accuracy.cpp b/src/metrics/Accuracy.cpp index 5462505fd67786598f6ad09793ddcf66c4f15c69..7181febb65e948436746549bff38086b1b2196c5 100644 --- a/src/metrics/Accuracy.cpp +++ b/src/metrics/Accuracy.cpp @@ -29,8 +29,9 @@ Aidge::Tensor Aidge::metrics::Accuracy(std::shared_ptr<Tensor>& prediction, const std::shared_ptr<Tensor>& target, std::int32_t axis) { /* - Implementation note: - Accuracy is computed using a graph in order to not be backend dependant. + Implementation note: + Accuracy is computed using a graph in order to not be backend dependant. + The graph used is the following: The graph used is the following: @@ -42,23 +43,29 @@ Aidge::Tensor Aidge::metrics::Accuracy(std::shared_ptr<Tensor>& prediction, AIDGE_ASSERT(target->dims().size() == 2, "Label must have two dims: [BatchSize, NbChannel]"); - std::shared_ptr<Tensor> outputGrad = prediction->grad(); - AIDGE_ASSERT(prediction->backend() == target->backend(), "'prediction' and 'target' Tensors must be on the " "same backend. Found {} and {}.\n", prediction->backend(), target->backend()); + + AIDGE_ASSERT(prediction->device() == target->device(), + "'prediction' and 'target' Tensors must be on the " + "same device. Found {} and {}.\n", + prediction->device(), target->device()); + AIDGE_ASSERT(prediction->dims() == target->dims(), "'prediction' (shape {}) and 'target' (shape {}) Tensors must " "have the same dimensions.\n", prediction->dims(), target->dims()); + AIDGE_ASSERT(prediction->dataType() == target->dataType(), "'prediction' (data type {}) and 'target' (data type {}) " "Tensors must have the same data type.\n", prediction->dataType(), target->dataType()); // Create graph nodes and connections - const std::shared_ptr<Node> argmax_perd_node = ArgMax(axis); + + const std::shared_ptr<Node> argmax_pred_node = ArgMax(axis); const std::shared_ptr<Node> argmax_target_node = ArgMax(axis); const std::shared_ptr<Node> equal_node = Equal(); @@ -66,29 +73,30 @@ Aidge::Tensor Aidge::metrics::Accuracy(std::shared_ptr<Tensor>& prediction, const std::shared_ptr<Node> rs_node = ReduceSum(); const std::shared_ptr<Node> pred_node = Producer(prediction, "pred"); - pred_node->addChild(argmax_perd_node); + pred_node->addChild(argmax_pred_node); + const std::shared_ptr<Node> label_node = Producer(target, "label"); label_node->addChild(argmax_target_node); - argmax_perd_node->addChild(equal_node,0,0); - argmax_target_node->addChild(equal_node,0,1); - - // equal_node->addChild(rs_node,0,0); + argmax_pred_node->addChild(equal_node, 0, 0); + argmax_target_node->addChild(equal_node, 0, 1); // Create the graph - std::shared_ptr<GraphView> gv_local = - Sequential({equal_node, cast_node, rs_node}); - gv_local->add({pred_node,argmax_perd_node, label_node,argmax_target_node}); - gv_local->compile(prediction->getImpl()->backend(), prediction->dataType()); + std::shared_ptr<GraphView> gv_local = Sequential({equal_node, cast_node, rs_node}); + + gv_local->add({pred_node, argmax_pred_node, label_node, argmax_target_node}); + gv_local->compile(prediction->backend(), prediction->dataType(), prediction->device()); + + // Execute the graph and retreive the result SequentialScheduler ss_local{gv_local}; + ss_local.forward(false); - // TODO: way too complicated to access - const std::shared_ptr<OperatorTensor> res = - std::dynamic_pointer_cast<OperatorTensor>(rs_node->getOperator()); + const std::shared_ptr<OperatorTensor> op = std::dynamic_pointer_cast<OperatorTensor>(rs_node->getOperator()); + std::shared_ptr<Tensor> fallback; - return res->getOutput(0)->refFrom(fallback, "cpu"); + return op->getOutput(0)->refFrom(fallback, "cpu"); }