Skip to content
Snippets Groups Projects
Commit 86852f74 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'dev' into 'main'

v0.2.2

See merge request eclipse/aidge/aidge_learning!28
parents 75a59b1e aed8f560
No related branches found
No related tags found
1 merge request!28v0.2.2
Pipeline #61688 passed with warnings
# Version 0.2.2 (December 12, 2024)
# Version 0.1.1 (May 14, 2024) # Version 0.1.1 (May 14, 2024)
* Fix loss function to return Tensor with gradient * Fix loss function to return Tensor with gradient
......
...@@ -5,7 +5,7 @@ file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version) ...@@ -5,7 +5,7 @@ file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
project(aidge_learning project(aidge_learning
VERSION ${version} VERSION ${version}
DESCRIPTION "Functions and alogrithms to train models in the AIDGE framework" DESCRIPTION "Functions and alogrithms to train models in the AIDGE framework"
LANGUAGES CXX) LANGUAGES CXX)
message(STATUS "Project name: ${CMAKE_PROJECT_NAME}") message(STATUS "Project name: ${CMAKE_PROJECT_NAME}")
......
...@@ -35,12 +35,12 @@ class Adam: public Optimizer, public StaticAttributes<AdamAttr, float, float, fl ...@@ -35,12 +35,12 @@ class Adam: public Optimizer, public StaticAttributes<AdamAttr, float, float, fl
private: private:
std::vector<Tensor> mMomentum1; std::vector<Tensor> mMomentum1;
std::vector<Tensor> mMomentum2; std::vector<Tensor> mMomentum2;
Tensor mLR{std::vector<std::size_t>({1})}; Tensor mLR{1.0f};
Tensor mBeta1{std::vector<std::size_t>({1})}; Tensor mBeta1;
Tensor mReversedBeta1{std::vector<std::size_t>({1})}; Tensor mReversedBeta1;
Tensor mBeta2{std::vector<std::size_t>({1})}; Tensor mBeta2;
Tensor mReversedBeta2{std::vector<std::size_t>({1})}; Tensor mReversedBeta2;
Tensor mEpsilon{std::vector<std::size_t>({1})}; Tensor mEpsilon;
public: public:
using Attributes_ = StaticAttributes<AdamAttr, float, float, float>; using Attributes_ = StaticAttributes<AdamAttr, float, float, float>;
...@@ -51,19 +51,17 @@ public: ...@@ -51,19 +51,17 @@ public:
: Optimizer(), : Optimizer(),
Attributes_(attr<AdamAttr::Beta1>(beta1), Attributes_(attr<AdamAttr::Beta1>(beta1),
attr<AdamAttr::Beta2>(beta2), attr<AdamAttr::Beta2>(beta2),
attr<AdamAttr::Epsilon>(epsilon)) attr<AdamAttr::Epsilon>(epsilon)),
mBeta1(beta1),
mReversedBeta1(1.0f - beta1),
mBeta2(beta2),
mReversedBeta2(1.0f - beta2),
mEpsilon(epsilon)
{ {
mBeta1 = Tensor(Array1D<float, 1>{{beta1}});
mReversedBeta1 = Tensor(Array1D<float, 1>{{1.0f - beta1}});
mBeta2 = Tensor(Array1D<float, 1>{{beta2}});
mReversedBeta2 = Tensor(Array1D<float, 1>{{1.0f - beta2}});
mEpsilon = Tensor(Array1D<float, 1>{{epsilon}});
} }
void update() override final { void update() override final {
mLR = Tensor(Array1D<float, 1>{{learningRate()}}); mLR = Tensor(learningRate());
mLR.setBackend(mParameters[0]->getImpl()->backend()); mLR.setBackend(mParameters[0]->getImpl()->backend());
if (mParameters[0]->getImpl()->backend() != mBeta1.getImpl()->backend()) { if (mParameters[0]->getImpl()->backend() != mBeta1.getImpl()->backend()) {
...@@ -73,11 +71,11 @@ public: ...@@ -73,11 +71,11 @@ public:
mReversedBeta2.setBackend(mParameters[0]->getImpl()->backend()); mReversedBeta2.setBackend(mParameters[0]->getImpl()->backend());
} }
Tensor alpha = Tensor(Array1D<float, 1>{{ static_cast<float>(learningRate() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), mLRScheduler.step() + 1)) Tensor alpha = Tensor(learningRate() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), static_cast<float>(mLRScheduler.step() + 1)))
/ (1.0f - std::pow(this->getAttr<AdamAttr::Beta1>(), mLRScheduler.step() + 1))) }}); / (1.0f - std::pow(this->getAttr<AdamAttr::Beta1>(), static_cast<float>(mLRScheduler.step() + 1))));
alpha.setBackend(mParameters[0]->getImpl()->backend()); alpha.setBackend(mParameters[0]->getImpl()->backend());
Tensor epsilon = Tensor(Array1D<float, 1>{{ static_cast<float>(this->getAttr<AdamAttr::Epsilon>() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), mLRScheduler.step() + 1))) }}); Tensor epsilon = Tensor(this->getAttr<AdamAttr::Epsilon>() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), static_cast<float>(mLRScheduler.step() + 1))));
epsilon.setBackend(mParameters[0]->getImpl()->backend()); epsilon.setBackend(mParameters[0]->getImpl()->backend());
if (mLRScheduler.step() == 0) { if (mLRScheduler.step() == 0) {
...@@ -90,13 +88,13 @@ public: ...@@ -90,13 +88,13 @@ public:
mMomentum2[i].zeros(); mMomentum2[i].zeros();
} }
} }
for (std::size_t i = 0; i < mParameters.size(); ++i) { for (std::size_t i = 0; i < mParameters.size(); ++i) {
mMomentum1[i] = mBeta1 * mMomentum1[i] + mReversedBeta1 * (*mParameters[i]->grad()); mMomentum1[i] = mBeta1 * mMomentum1[i] + mReversedBeta1 * (*mParameters[i]->grad());
mMomentum2[i] = mBeta2 * mMomentum2[i] + mReversedBeta2 * (*mParameters[i]->grad()) * (*mParameters[i]->grad()); mMomentum2[i] = mBeta2 * mMomentum2[i] + mReversedBeta2 * (*mParameters[i]->grad()) * (*mParameters[i]->grad());
*mParameters[i] = *mParameters[i] - alpha * mMomentum1[i] / (mMomentum2[i].sqrt() + epsilon); *mParameters[i] -= alpha * mMomentum1[i] / (mMomentum2[i].sqrt() + epsilon);
} }
mLRScheduler.update(); mLRScheduler.update();
} }
......
...@@ -47,23 +47,23 @@ public: ...@@ -47,23 +47,23 @@ public:
Attributes_(attr<SGDAttr::Momentum>(momentum), Attributes_(attr<SGDAttr::Momentum>(momentum),
attr<SGDAttr::Dampening>(dampening)) attr<SGDAttr::Dampening>(dampening))
{ {
mMomentum = Tensor(Array1D<float, 1>{{momentum}}); mMomentum = Tensor(momentum);
mReversedDampening = Tensor(Array1D<float, 1>{{1.0f - dampening}}); mReversedDampening = Tensor(1.0f - dampening);
} }
void update() override final { void update() override final {
mLR = Tensor(Array1D<float, 1>{{learningRate()}}); mLR = Tensor(learningRate());
mLR.setBackend(mParameters[0]->getImpl()->backend()); mLR.setBackend(mParameters[0]->getImpl()->backend());
if (mLRScheduler.step() == 0) { if (mLRScheduler.step() == 0) {
for (std::size_t i = 0; i < mParameters.size(); ++i) { for (std::size_t i = 0; i < mParameters.size(); ++i) {
mGradientInertia[i] = mParameters[i]->grad()->clone(); mGradientInertia[i] = mParameters[i]->grad()->clone();
*mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i]; *mParameters[i] -= mLR*mGradientInertia[i];
} }
} else { } else {
for (std::size_t i = 0; i < mParameters.size(); ++i) { for (std::size_t i = 0; i < mParameters.size(); ++i) {
mGradientInertia[i] = mMomentum*mGradientInertia[i] + mReversedDampening*(*mParameters[i]->grad()); mGradientInertia[i] = mMomentum*mGradientInertia[i] + mReversedDampening*(*mParameters[i]->grad());
*mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i]; *mParameters[i] -= mLR*mGradientInertia[i];
} }
} }
mLRScheduler.update(); mLRScheduler.update();
......
...@@ -60,8 +60,8 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ...@@ -60,8 +60,8 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
const std::shared_ptr<Node> target_node = Producer(target, "label"); const std::shared_ptr<Node> target_node = Producer(target, "label");
// Define nodes: add1 = prediction + eps1, add2 = target + eps1 // Define nodes: add1 = prediction + eps1, add2 = target + eps1
const std::shared_ptr<Node> add1_node = Add(2, "add1"); const std::shared_ptr<Node> add1_node = Add("add1");
const std::shared_ptr<Node> add2_node = Add(2, "add2"); const std::shared_ptr<Node> add2_node = Add("add2");
prediction_node->addChild(add1_node, 0, 0); prediction_node->addChild(add1_node, 0, 0);
Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}})) Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}}))
->addChild(add1_node, 0, 1); ->addChild(add1_node, 0, 1);
...@@ -107,7 +107,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction, ...@@ -107,7 +107,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
sub1_node->addChild(div2_node, 0, 1); sub1_node->addChild(div2_node, 0, 1);
// Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2) // Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2)
const std::shared_ptr<Node> add3_node = Add(2, "add3"); const std::shared_ptr<Node> add3_node = Add("add3");
div1_node->addChild(add3_node, 0, 0); div1_node->addChild(add3_node, 0, 0);
div2_node->addChild(add3_node, 0, 1); div2_node->addChild(add3_node, 0, 1);
......
...@@ -4,7 +4,7 @@ include(FetchContent) ...@@ -4,7 +4,7 @@ include(FetchContent)
FetchContent_Declare( FetchContent_Declare(
Catch2 Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v3.0.1 # or a later release GIT_TAG v3.7.1 # or a later release
) )
FetchContent_MakeAvailable(Catch2) FetchContent_MakeAvailable(Catch2)
...@@ -45,11 +45,11 @@ if(aidge_backend_cuda_FOUND) ...@@ -45,11 +45,11 @@ if(aidge_backend_cuda_FOUND)
# Enable CUDA language support and separable compilation for the target # Enable CUDA language support and separable compilation for the target
enable_language(CUDA) enable_language(CUDA)
set_target_properties(${tests_exe} PROPERTIES CUDA_SEPARABLE_COMPILATION ON) set_target_properties(${tests_exe} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_include_directories(${tests_exe} PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) target_include_directories(${tests_exe} PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
# Link manually specified CUDA libraries if the targets are not available # Link manually specified CUDA libraries if the targets are not available
target_link_libraries(${tests_exe} target_link_libraries(${tests_exe}
PUBLIC PUBLIC
_aidge_backend_cuda _aidge_backend_cuda
CUDA::cudart CUDA::cudart
CUDA::cublas CUDA::cublas
......
...@@ -130,8 +130,8 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { ...@@ -130,8 +130,8 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
for (std::size_t step = 0; step < 10; ++step) { for (std::size_t step = 0; step < 10; ++step) {
// truth // truth
float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, step + 1)) / (1.0f - std::pow(beta1, step + 1)); float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, static_cast<float>(step + 1))) / (1.0f - std::pow(beta1, static_cast<float>(step + 1)));
float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1)); float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, static_cast<float>(step + 1)));
for (std::size_t t = 0; t < nb_tensors; ++t) { for (std::size_t t = 0; t < nb_tensors; ++t) {
for (std::size_t i = 0; i < size_tensors[t]; ++i) { for (std::size_t i = 0; i < size_tensors[t]; ++i) {
val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i]; val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i];
......
...@@ -9,13 +9,15 @@ ...@@ -9,13 +9,15 @@
* *
********************************************************************************/ ********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t #include <cstddef> // std::size_t
#include <memory> #include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_int_distribution #include <random> // std::random_device, std::mt19937, std::uniform_int_distribution
#include <set> #include <set>
#include <vector> #include <vector>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu/data/TensorImpl.hpp" #include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/learning/learningRate/LRScheduler.hpp" #include "aidge/learning/learningRate/LRScheduler.hpp"
...@@ -81,9 +83,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") { ...@@ -81,9 +83,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") {
tensors[i] = std::make_shared<Tensor>(dims); tensors[i] = std::make_shared<Tensor>(dims);
tensors[i]->setBackend("cpu"); tensors[i]->setBackend("cpu");
tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]); tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]);
optim_tensors[i] = std::make_shared<Tensor>(dims); optim_tensors[i] = std::make_shared<Tensor>(tensors[i]->clone());
optim_tensors[i]->setBackend("cpu");
optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
// optim_tensors[i]->initGrad(); // optim_tensors[i]->initGrad();
grad_tensors[i] = std::make_shared<Tensor>(dims); grad_tensors[i] = std::make_shared<Tensor>(dims);
......
0.2.1 0.2.2
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment