Skip to content
Snippets Groups Projects
Commit 86852f74 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'dev' into 'main'

v0.2.2

See merge request !28
parents 75a59b1e aed8f560
Branches
Tags v0.2.2
1 merge request!28v0.2.2
Pipeline #61688 passed with warnings
# Version 0.2.2 (December 12, 2024)
# Version 0.1.1 (May 14, 2024)
* Fix loss function to return Tensor with gradient
......
......@@ -5,7 +5,7 @@ file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
project(aidge_learning
VERSION ${version}
DESCRIPTION "Functions and alogrithms to train models in the AIDGE framework"
DESCRIPTION "Functions and alogrithms to train models in the AIDGE framework"
LANGUAGES CXX)
message(STATUS "Project name: ${CMAKE_PROJECT_NAME}")
......
......@@ -35,12 +35,12 @@ class Adam: public Optimizer, public StaticAttributes<AdamAttr, float, float, fl
private:
std::vector<Tensor> mMomentum1;
std::vector<Tensor> mMomentum2;
Tensor mLR{std::vector<std::size_t>({1})};
Tensor mBeta1{std::vector<std::size_t>({1})};
Tensor mReversedBeta1{std::vector<std::size_t>({1})};
Tensor mBeta2{std::vector<std::size_t>({1})};
Tensor mReversedBeta2{std::vector<std::size_t>({1})};
Tensor mEpsilon{std::vector<std::size_t>({1})};
Tensor mLR{1.0f};
Tensor mBeta1;
Tensor mReversedBeta1;
Tensor mBeta2;
Tensor mReversedBeta2;
Tensor mEpsilon;
public:
using Attributes_ = StaticAttributes<AdamAttr, float, float, float>;
......@@ -51,19 +51,17 @@ public:
: Optimizer(),
Attributes_(attr<AdamAttr::Beta1>(beta1),
attr<AdamAttr::Beta2>(beta2),
attr<AdamAttr::Epsilon>(epsilon))
attr<AdamAttr::Epsilon>(epsilon)),
mBeta1(beta1),
mReversedBeta1(1.0f - beta1),
mBeta2(beta2),
mReversedBeta2(1.0f - beta2),
mEpsilon(epsilon)
{
mBeta1 = Tensor(Array1D<float, 1>{{beta1}});
mReversedBeta1 = Tensor(Array1D<float, 1>{{1.0f - beta1}});
mBeta2 = Tensor(Array1D<float, 1>{{beta2}});
mReversedBeta2 = Tensor(Array1D<float, 1>{{1.0f - beta2}});
mEpsilon = Tensor(Array1D<float, 1>{{epsilon}});
}
void update() override final {
mLR = Tensor(Array1D<float, 1>{{learningRate()}});
mLR = Tensor(learningRate());
mLR.setBackend(mParameters[0]->getImpl()->backend());
if (mParameters[0]->getImpl()->backend() != mBeta1.getImpl()->backend()) {
......@@ -73,11 +71,11 @@ public:
mReversedBeta2.setBackend(mParameters[0]->getImpl()->backend());
}
Tensor alpha = Tensor(Array1D<float, 1>{{ static_cast<float>(learningRate() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), mLRScheduler.step() + 1))
/ (1.0f - std::pow(this->getAttr<AdamAttr::Beta1>(), mLRScheduler.step() + 1))) }});
Tensor alpha = Tensor(learningRate() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), static_cast<float>(mLRScheduler.step() + 1)))
/ (1.0f - std::pow(this->getAttr<AdamAttr::Beta1>(), static_cast<float>(mLRScheduler.step() + 1))));
alpha.setBackend(mParameters[0]->getImpl()->backend());
Tensor epsilon = Tensor(Array1D<float, 1>{{ static_cast<float>(this->getAttr<AdamAttr::Epsilon>() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), mLRScheduler.step() + 1))) }});
Tensor epsilon = Tensor(this->getAttr<AdamAttr::Epsilon>() * std::sqrt(1.0f - std::pow(this->getAttr<AdamAttr::Beta2>(), static_cast<float>(mLRScheduler.step() + 1))));
epsilon.setBackend(mParameters[0]->getImpl()->backend());
if (mLRScheduler.step() == 0) {
......@@ -90,13 +88,13 @@ public:
mMomentum2[i].zeros();
}
}
for (std::size_t i = 0; i < mParameters.size(); ++i) {
mMomentum1[i] = mBeta1 * mMomentum1[i] + mReversedBeta1 * (*mParameters[i]->grad());
mMomentum2[i] = mBeta2 * mMomentum2[i] + mReversedBeta2 * (*mParameters[i]->grad()) * (*mParameters[i]->grad());
*mParameters[i] = *mParameters[i] - alpha * mMomentum1[i] / (mMomentum2[i].sqrt() + epsilon);
*mParameters[i] -= alpha * mMomentum1[i] / (mMomentum2[i].sqrt() + epsilon);
}
mLRScheduler.update();
}
......
......@@ -47,23 +47,23 @@ public:
Attributes_(attr<SGDAttr::Momentum>(momentum),
attr<SGDAttr::Dampening>(dampening))
{
mMomentum = Tensor(Array1D<float, 1>{{momentum}});
mReversedDampening = Tensor(Array1D<float, 1>{{1.0f - dampening}});
mMomentum = Tensor(momentum);
mReversedDampening = Tensor(1.0f - dampening);
}
void update() override final {
mLR = Tensor(Array1D<float, 1>{{learningRate()}});
mLR = Tensor(learningRate());
mLR.setBackend(mParameters[0]->getImpl()->backend());
if (mLRScheduler.step() == 0) {
for (std::size_t i = 0; i < mParameters.size(); ++i) {
mGradientInertia[i] = mParameters[i]->grad()->clone();
*mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i];
*mParameters[i] -= mLR*mGradientInertia[i];
}
} else {
for (std::size_t i = 0; i < mParameters.size(); ++i) {
mGradientInertia[i] = mMomentum*mGradientInertia[i] + mReversedDampening*(*mParameters[i]->grad());
*mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i];
*mParameters[i] -= mLR*mGradientInertia[i];
}
}
mLRScheduler.update();
......
......@@ -60,8 +60,8 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
const std::shared_ptr<Node> target_node = Producer(target, "label");
// Define nodes: add1 = prediction + eps1, add2 = target + eps1
const std::shared_ptr<Node> add1_node = Add(2, "add1");
const std::shared_ptr<Node> add2_node = Add(2, "add2");
const std::shared_ptr<Node> add1_node = Add("add1");
const std::shared_ptr<Node> add2_node = Add("add2");
prediction_node->addChild(add1_node, 0, 0);
Producer(std::make_shared<Tensor>(Array1D<float, 1>{{eps1}}))
->addChild(add1_node, 0, 1);
......@@ -107,7 +107,7 @@ Aidge::Tensor Aidge::loss::BCE(std::shared_ptr<Tensor>& prediction,
sub1_node->addChild(div2_node, 0, 1);
// Define node: add3 = (target + eps1) / (prediction + eps1) - (1 - target + eps2)/(1 - prediction + eps2)
const std::shared_ptr<Node> add3_node = Add(2, "add3");
const std::shared_ptr<Node> add3_node = Add("add3");
div1_node->addChild(add3_node, 0, 0);
div2_node->addChild(add3_node, 0, 1);
......
......@@ -4,7 +4,7 @@ include(FetchContent)
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v3.0.1 # or a later release
GIT_TAG v3.7.1 # or a later release
)
FetchContent_MakeAvailable(Catch2)
......@@ -45,11 +45,11 @@ if(aidge_backend_cuda_FOUND)
# Enable CUDA language support and separable compilation for the target
enable_language(CUDA)
set_target_properties(${tests_exe} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_include_directories(${tests_exe} PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
# Link manually specified CUDA libraries if the targets are not available
target_link_libraries(${tests_exe}
PUBLIC
PUBLIC
_aidge_backend_cuda
CUDA::cudart
CUDA::cublas
......
......@@ -130,8 +130,8 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
for (std::size_t step = 0; step < 10; ++step) {
// truth
float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, step + 1)) / (1.0f - std::pow(beta1, step + 1));
float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1));
float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, static_cast<float>(step + 1))) / (1.0f - std::pow(beta1, static_cast<float>(step + 1)));
float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, static_cast<float>(step + 1)));
for (std::size_t t = 0; t < nb_tensors; ++t) {
for (std::size_t i = 0; i < size_tensors[t]; ++i) {
val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i];
......
......@@ -9,13 +9,15 @@
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_int_distribution
#include <set>
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <fmt/core.h>
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/learning/learningRate/LRScheduler.hpp"
......@@ -81,9 +83,7 @@ TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") {
tensors[i] = std::make_shared<Tensor>(dims);
tensors[i]->setBackend("cpu");
tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]);
optim_tensors[i] = std::make_shared<Tensor>(dims);
optim_tensors[i]->setBackend("cpu");
optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
optim_tensors[i] = std::make_shared<Tensor>(tensors[i]->clone());
// optim_tensors[i]->initGrad();
grad_tensors[i] = std::make_shared<Tensor>(dims);
......
0.2.1
0.2.2
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment