From 7844f1ae76ef10bbfb827d3d929f2f41e93bc752 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Thu, 5 Dec 2024 14:26:41 +0000 Subject: [PATCH 1/5] Change 1D attribute Tensors for scalar Tensors and use compound assignment operators on 'Parameter' to avoid reset of 'mGrad' attribute --- include/aidge/learning/optimizer/Adam.hpp | 40 +++++++++++------------ include/aidge/learning/optimizer/SGD.hpp | 10 +++--- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/include/aidge/learning/optimizer/Adam.hpp b/include/aidge/learning/optimizer/Adam.hpp index 125cfd7..a018d6e 100644 --- a/include/aidge/learning/optimizer/Adam.hpp +++ b/include/aidge/learning/optimizer/Adam.hpp @@ -35,12 +35,12 @@ class Adam: public Optimizer, public StaticAttributes<AdamAttr, float, float, fl private: std::vector<Tensor> mMomentum1; std::vector<Tensor> mMomentum2; - Tensor mLR{std::vector<std::size_t>({1})}; - Tensor mBeta1{std::vector<std::size_t>({1})}; - Tensor mReversedBeta1{std::vector<std::size_t>({1})}; - Tensor mBeta2{std::vector<std::size_t>({1})}; - Tensor mReversedBeta2{std::vector<std::size_t>({1})}; - Tensor mEpsilon{std::vector<std::size_t>({1})}; + Tensor mLR{1.0f}; + Tensor mBeta1; + Tensor mReversedBeta1; + Tensor mBeta2; + Tensor mReversedBeta2; + Tensor mEpsilon; public: using Attributes_ = StaticAttributes<AdamAttr, float, float, float>; @@ -51,19 +51,17 @@ public: : Optimizer(), Attributes_(attr<AdamAttr::Beta1>(beta1), attr<AdamAttr::Beta2>(beta2), - attr<AdamAttr::Epsilon>(epsilon)) + attr<AdamAttr::Epsilon>(epsilon)), + mBeta1(beta1), + mReversedBeta1(1.0f - beta1), + mBeta2(beta2), + mReversedBeta2(1.0f - beta2), + mEpsilon(epsilon) { - mBeta1 = Tensor(Array1D<float, 1>{{beta1}}); - mReversedBeta1 = Tensor(Array1D<float, 1>{{1.0f - beta1}}); - - mBeta2 = Tensor(Array1D<float, 1>{{beta2}}); - mReversedBeta2 = Tensor(Array1D<float, 1>{{1.0f - beta2}}); - - mEpsilon = Tensor(Array1D<float, 1>{{epsilon}}); } void update() override final { - mLR = Tensor(Array1D<float, 1>{{learningRate()}}); + mLR = Tensor(learningRate()); mLR.setBackend(mParameters[0]->getImpl()->backend()); if (mParameters[0]->getImpl()->backend() != mBeta1.getImpl()->backend()) { @@ -73,11 +71,11 @@ public: mReversedBeta2.setBackend(mParameters[0]->getImpl()->backend()); } - Tensor alpha = Tensor(Array1D<float, 1>{{ static_cast<float>(learningRate() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), mLRScheduler.step() + 1)) - / (1.0f - std::pow(this->getAttr<AdamAttr::Beta1>(), mLRScheduler.step() + 1))) }}); + Tensor alpha = Tensor(learningRate() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), static_cast<float>(mLRScheduler.step() + 1))) + / (1.0f - std::pow(this->getAttr<AdamAttr::Beta1>(), static_cast<float>(mLRScheduler.step() + 1)))); alpha.setBackend(mParameters[0]->getImpl()->backend()); - Tensor epsilon = Tensor(Array1D<float, 1>{{ static_cast<float>(this->getAttr<AdamAttr::Epsilon>() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), mLRScheduler.step() + 1))) }}); + Tensor epsilon = Tensor(this->getAttr<AdamAttr::Epsilon>() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), static_cast<float>(mLRScheduler.step() + 1)))); epsilon.setBackend(mParameters[0]->getImpl()->backend()); if (mLRScheduler.step() == 0) { @@ -90,13 +88,13 @@ public: mMomentum2[i].zeros(); } } - + for (std::size_t i = 0; i < mParameters.size(); ++i) { mMomentum1[i] = mBeta1 * mMomentum1[i] + mReversedBeta1 * (*mParameters[i]->grad()); mMomentum2[i] = mBeta2 * mMomentum2[i] + mReversedBeta2 * (*mParameters[i]->grad()) * (*mParameters[i]->grad()); - *mParameters[i] = *mParameters[i] - alpha * mMomentum1[i] / (mMomentum2[i].sqrt() + epsilon); + *mParameters[i] -= alpha * mMomentum1[i] / (mMomentum2[i].sqrt() + epsilon); } - + mLRScheduler.update(); } diff --git a/include/aidge/learning/optimizer/SGD.hpp b/include/aidge/learning/optimizer/SGD.hpp index 2ce6572..768a3d0 100644 --- a/include/aidge/learning/optimizer/SGD.hpp +++ b/include/aidge/learning/optimizer/SGD.hpp @@ -47,23 +47,23 @@ public: Attributes_(attr<SGDAttr::Momentum>(momentum), attr<SGDAttr::Dampening>(dampening)) { - mMomentum = Tensor(Array1D<float, 1>{{momentum}}); - mReversedDampening = Tensor(Array1D<float, 1>{{1.0f - dampening}}); + mMomentum = Tensor(momentum); + mReversedDampening = Tensor(1.0f - dampening); } void update() override final { - mLR = Tensor(Array1D<float, 1>{{learningRate()}}); + mLR = Tensor(learningRate()); mLR.setBackend(mParameters[0]->getImpl()->backend()); if (mLRScheduler.step() == 0) { for (std::size_t i = 0; i < mParameters.size(); ++i) { mGradientInertia[i] = mParameters[i]->grad()->clone(); - *mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i]; + *mParameters[i] -= mLR*mGradientInertia[i]; } } else { for (std::size_t i = 0; i < mParameters.size(); ++i) { mGradientInertia[i] = mMomentum*mGradientInertia[i] + mReversedDampening*(*mParameters[i]->grad()); - *mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i]; + *mParameters[i] -= mLR*mGradientInertia[i]; } } mLRScheduler.update(); -- GitLab From 69abe5691d56e673e60d29ca8e069ec6889efecd Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Thu, 5 Dec 2024 14:27:13 +0000 Subject: [PATCH 2/5] upd Catch2 version 3.0.1 -> 3.7.1 --- unit_tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 07d35e3..27dab59 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -4,7 +4,7 @@ include(FetchContent) FetchContent_Declare( Catch2 GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.0.1 # or a later release + GIT_TAG v3.7.1 # or a later release ) FetchContent_MakeAvailable(Catch2) -- GitLab From 0a8c74f958d3cc49623babd0beff76144a4a2d47 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Thu, 5 Dec 2024 14:30:18 +0000 Subject: [PATCH 3/5] small changes in 'Test_SGD.cpp' --- unit_tests/optimizer/Test_SGD.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unit_tests/optimizer/Test_SGD.cpp b/unit_tests/optimizer/Test_SGD.cpp index 3f13807..14986a7 100644 --- a/unit_tests/optimizer/Test_SGD.cpp +++ b/unit_tests/optimizer/Test_SGD.cpp @@ -9,13 +9,15 @@ * ********************************************************************************/ -#include <catch2/catch_test_macros.hpp> #include <cstddef> // std::size_t #include <memory> #include <random> // std::random_device, std::mt19937, std::uniform_int_distribution #include <set> #include <vector> +#include <catch2/catch_test_macros.hpp> +#include <fmt/core.h> + #include "aidge/data/Tensor.hpp" #include "aidge/backend/cpu/data/TensorImpl.hpp" #include "aidge/learning/learningRate/LRScheduler.hpp" @@ -81,9 +83,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") { tensors[i] = std::make_shared<Tensor>(dims); tensors[i]->setBackend("cpu"); tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]); - optim_tensors[i] = std::make_shared<Tensor>(dims); - optim_tensors[i]->setBackend("cpu"); - optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]); + optim_tensors[i] = std::make_shared<Tensor>(tensors[i]->clone()); // optim_tensors[i]->initGrad(); grad_tensors[i] = std::make_shared<Tensor>(dims); -- GitLab From bb116c6a9e1d421ae6b6e91bee5a3f19f93691a4 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Thu, 5 Dec 2024 23:00:35 +0000 Subject: [PATCH 4/5] Reduce absolute precision requirement in Test_Adam.cpp --- unit_tests/optimizer/Test_Adam.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_tests/optimizer/Test_Adam.cpp b/unit_tests/optimizer/Test_Adam.cpp index a3d7c4b..caacb9c 100644 --- a/unit_tests/optimizer/Test_Adam.cpp +++ b/unit_tests/optimizer/Test_Adam.cpp @@ -146,7 +146,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { for (std::size_t t = 0; t < nb_tensors; ++t) { const Tensor tmpt1= *(opt.parameters().at(t)); const Tensor tmpt2= *tensors[t]; - REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f)); + REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-7f)); } } } -- GitLab From e4c01874a3f92256b52362fd0be9a3123cbf19b7 Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Thu, 5 Dec 2024 23:37:17 +0000 Subject: [PATCH 5/5] restore absolute precision value in test, change std::pow values to get float in Test_Adam --- unit_tests/optimizer/Test_Adam.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unit_tests/optimizer/Test_Adam.cpp b/unit_tests/optimizer/Test_Adam.cpp index caacb9c..cd171e3 100644 --- a/unit_tests/optimizer/Test_Adam.cpp +++ b/unit_tests/optimizer/Test_Adam.cpp @@ -130,8 +130,8 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { for (std::size_t step = 0; step < 10; ++step) { // truth - float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, step + 1)) / (1.0f - std::pow(beta1, step + 1)); - float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1)); + float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, static_cast<float>(step + 1))) / (1.0f - std::pow(beta1, static_cast<float>(step + 1))); + float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, static_cast<float>(step + 1))); for (std::size_t t = 0; t < nb_tensors; ++t) { for (std::size_t i = 0; i < size_tensors[t]; ++i) { val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i]; @@ -146,7 +146,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { for (std::size_t t = 0; t < nb_tensors; ++t) { const Tensor tmpt1= *(opt.parameters().at(t)); const Tensor tmpt2= *tensors[t]; - REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-7f)); + REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f)); } } } -- GitLab