Skip to content
Snippets Groups Projects
Commit 65c2bc33 authored by Maxence Naud's avatar Maxence Naud
Browse files

[Add] Optimizer & SGD classes with unit-test

parent c5e67b0e
No related branches found
No related tags found
1 merge request!3Dev - learning - v0.1.0
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CORE_OPTIMIZER_OPTIMIZER_H_
#define AIDGE_CORE_OPTIMIZER_OPTIMIZER_H_
#include <memory>
#include <vector>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/learning/learningRate/LRScheduler.hpp"
namespace Aidge {
/**
* @brief Interface for optimization classes.
* Parameters to optimize and the learning rate scheduler should be specified outside
* of the constructor in their own setter functions to avoid constructors with too
* many parameters in derived classes.
*/
class Optimizer {
protected:
/// @brief List of Tensors to update.
std::vector<std::shared_ptr<Tensor>> mParameters{};
/// @brief Learning rate scheduler.
/// @note Initialized with constant learning rate.
LRScheduler mLRScheduler = LRScheduler(1.0e-5f);
public:
Optimizer() = default;
virtual ~Optimizer() noexcept;
public:
// getter & setters
inline const std::vector<std::shared_ptr<Tensor>>& parameters() const noexcept {
return mParameters;
}
virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) {
mParameters = parameters;
for (const auto& param : parameters) {
param->initGradient(); // create gradient and set it to zeros
}
}
constexpr float learningRate() const noexcept {
return mLRScheduler.learningRate();
}
const LRScheduler& learningRateScheduler() const noexcept {
return mLRScheduler;
}
void setLearningRateScheduler(const LRScheduler& lrscheduler) {
mLRScheduler = lrscheduler;
}
/**
* @brief Update each Tensor registered with respect to the associated uptade function.
*/
virtual void update() {}
/**
* @brief Reset the gradient of each parameter registered in the Optimizer.
*/
void resetGrad() const {
for (const auto& t_ptr : mParameters) {
t_ptr -> grad() -> zeros();
}
}
};
} // namespace Aidge
#endif // AIDGE_CORE_OPTIMIZER_OPTIMIZER_H_
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CORE_OPTIMIZER_SGD_H_
#define AIDGE_CORE_OPTIMIZER_SGD_H_
#include <functional>
#include <memory>
#include <vector>
#include "aidge/data/Tensor.hpp"
#include "aidge/learning/optimizer/Optimizer.hpp"
#include "aidge/utils/StaticAttributes.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
enum class SGDAttr {
Momentum,
Dampening
};
class SGD: public Optimizer, public StaticAttributes<SGDAttr, float, float> {
private:
std::vector<Tensor> mGradientInertia;
Tensor mLR{std::vector<std::size_t>({1})};
Tensor mMomentum{std::vector<std::size_t>({1})};
Tensor mReversedDampening{std::vector<std::size_t>({1})};
public:
using Attributes_ = StaticAttributes<SGDAttr, float, float>;
template <SGDAttr e>
using attr = typename Attributes_::template attr<e>;
SGD(const float momentum = 0.0f, const float dampening = 0.0f)
: Optimizer(),
Attributes_(attr<SGDAttr::Momentum>(momentum),
attr<SGDAttr::Dampening>(dampening))
{
mMomentum.setBackend("cpu");
mMomentum.set<float>(0, momentum);
mReversedDampening.setBackend("cpu");
mReversedDampening.set<float>(0, 1.0f - dampening);
}
void update() override {
mLR.setBackend(mParameters[0]->getImpl()->backend());
mLR.set<float>(0, learningRate());
if (mParameters[0]->getImpl()->backend() != mMomentum.getImpl()->backend()) {
mMomentum.setBackend(mParameters[0]->getImpl()->backend());
mReversedDampening.setBackend(mParameters[0]->getImpl()->backend());
}
if (mLRScheduler.step() == 0) {
for (std::size_t i = 0; i < mParameters.size(); ++i) {
mGradientInertia[i] = mParameters[i]->grad()->clone();
*mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i];
}
} else {
for (std::size_t i = 0; i < mParameters.size(); ++i) {
mGradientInertia[i] = mMomentum*mGradientInertia[i] + mReversedDampening*(*mParameters[i]->grad());
*mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i];
}
}
mLRScheduler.update();
}
void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) {
Optimizer::setParameters(parameters);
mGradientInertia = std::vector<Tensor>(parameters.size());
for (std::size_t i = 0; i < parameters.size(); ++i) {
mGradientInertia[i] = Tensor(parameters[i]->dims());
}
}
};
} // namespace Aidge
namespace {
template <>
const char *const EnumStrings<Aidge::SGDAttr>::data[] = {
"Momentum",
"Dampening"
};
}
#endif // AIDGE_CORE_OPTIMIZER_SGD_H_
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include "aidge/learning/optimizer/Optimizer.hpp"
Aidge::Optimizer::~Optimizer() noexcept = default;
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_int_distribution
#include <set>
#include <vector>
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/learning/learningRate/LRScheduler.hpp"
#include "aidge/learning/learningRate/LRSchedulerList.hpp"
#include "aidge/learning/optimizer/Optimizer.hpp"
#include "aidge/learning/optimizer/SGD.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/MulImpl.hpp"
#include "aidge/backend/cpu/operator/SubImpl.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.0f); // Random float distribution between 0 and 1
std::uniform_real_distribution<float> paramDist(0.001f, 1.0f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
for (std::size_t trial = 0; trial < NBTRIALS; ++trial) {
// create a random number of Tensor with random dims and random values
// Create random Tensor, Random Gradient and random
const std::size_t nb_tensors = dimSizeDist(gen);
std::vector<std::size_t> size_tensors(nb_tensors, 1);
std::vector<std::shared_ptr<Tensor>> tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_tensors(nb_tensors);
std::vector<std::shared_ptr<Tensor>> optim_tensors(nb_tensors);
std::vector<std::shared_ptr<Tensor>> grad_tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_grad_tensors(nb_tensors);
std::vector<std::shared_ptr<Tensor>> momentum_tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_momentum_tensors(nb_tensors);
for (std::size_t i = 0; i < nb_tensors; ++i) {
std::vector<std::size_t> dims(nbDimsDist(gen));
for (std::size_t d = 0; d < dims.size(); ++d) {
dims[d] = dimSizeDist(gen);
size_tensors[i] *= dims[d];
}
val_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
val_momentum_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
for (std::size_t j = 0; j < size_tensors[i]; ++j) {
val_tensors[i][j] = valueDist(gen);
val_grad_tensors[i][j] = valueDist(gen);
// val_momentum_tensors[i][j] = 0.0f;
}
tensors[i] = std::make_shared<Tensor>(dims);
tensors[i]->setBackend("cpu");
tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]);
optim_tensors[i] = std::make_shared<Tensor>(dims);
optim_tensors[i]->setBackend("cpu");
optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
optim_tensors[i]->initGradient();
grad_tensors[i] = std::make_shared<Tensor>(dims);
grad_tensors[i]->setBackend("cpu");
grad_tensors[i]->getImpl()->setRawPtr(val_grad_tensors[i].get(), size_tensors[i]);
momentum_tensors[i] = std::make_shared<Tensor>(dims);
momentum_tensors[i]->setBackend("cpu");
momentum_tensors[i]->getImpl()->setRawPtr(val_momentum_tensors[i].get(), size_tensors[i]);
REQUIRE((tensors[i]->hasImpl() &&
optim_tensors[i]->hasImpl() &&
grad_tensors[i]->hasImpl()));
}
// generate parameters
float lr = paramDist(gen);
float momentum = paramDist(gen);
float dampening = paramDist(gen);
// set Optimizer
SGD opt = SGD(momentum, dampening);
opt.setParameters(optim_tensors);
for (std::size_t t = 0; t < nb_tensors; ++t) {
optim_tensors[t]->grad()->getImpl()->setRawPtr(val_grad_tensors[t].get(), size_tensors[t]);
}
opt.setLearningRateScheduler(learning::ConstantLR(lr));
for (std::size_t t = 0; t < nb_tensors; ++t) {
const Tensor tmpt1= *(opt.parameters().at(t));
const Tensor tmpt2= *tensors[t];
REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f));
}
///////// step 0 /////////////
// truth
for (std::size_t t = 0; t < nb_tensors; ++t) {
for (std::size_t i = 0; i < size_tensors[t]; ++i) {
val_momentum_tensors[t][i] = val_grad_tensors[t][i];
val_tensors[t][i] = val_tensors[t][i] - lr*val_momentum_tensors[t][i];
}
}
// optimizer
opt.update();
// tests
for (std::size_t t = 0; t < nb_tensors; ++t) {
const Tensor tmpt1= *(opt.parameters().at(t));
const Tensor tmpt2= *tensors[t];
REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f));
}
///////// step > 0 /////////////
for (std::size_t step = 1; step < 10; ++step) {
// truth
for (std::size_t t = 0; t < nb_tensors; ++t) {
for (std::size_t i = 0; i < size_tensors[t]; ++i) {
val_momentum_tensors[t][i] = momentum*val_momentum_tensors[t][i] + (1 - dampening)*val_grad_tensors[t][i];
val_tensors[t][i] = val_tensors[t][i] - lr*val_momentum_tensors[t][i];
}
}
// optimizer
opt.update();
// test
for (std::size_t t = 0; t < nb_tensors; ++t) {
const Tensor tmpt1= *(opt.parameters().at(t));
const Tensor tmpt2= *tensors[t];
REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f));
}
}
}
}
} // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment