diff --git a/include/aidge/learning/optimizer/Optimizer.hpp b/include/aidge/learning/optimizer/Optimizer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9e621875beb1cfd58bf8474753c536b8c4e5183c --- /dev/null +++ b/include/aidge/learning/optimizer/Optimizer.hpp @@ -0,0 +1,85 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPTIMIZER_OPTIMIZER_H_ +#define AIDGE_CORE_OPTIMIZER_OPTIMIZER_H_ + +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/learning/learningRate/LRScheduler.hpp" + +namespace Aidge { + +/** + * @brief Interface for optimization classes. + * Parameters to optimize and the learning rate scheduler should be specified outside + * of the constructor in their own setter functions to avoid constructors with too + * many parameters in derived classes. + */ +class Optimizer { +protected: + /// @brief List of Tensors to update. + std::vector<std::shared_ptr<Tensor>> mParameters{}; + /// @brief Learning rate scheduler. + /// @note Initialized with constant learning rate. + LRScheduler mLRScheduler = LRScheduler(1.0e-5f); + +public: + Optimizer() = default; + + virtual ~Optimizer() noexcept; + +public: + // getter & setters + inline const std::vector<std::shared_ptr<Tensor>>& parameters() const noexcept { + return mParameters; + } + + virtual void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) { + mParameters = parameters; + for (const auto& param : parameters) { + param->initGradient(); // create gradient and set it to zeros + } + } + + constexpr float learningRate() const noexcept { + return mLRScheduler.learningRate(); + } + + const LRScheduler& learningRateScheduler() const noexcept { + return mLRScheduler; + } + + void setLearningRateScheduler(const LRScheduler& lrscheduler) { + mLRScheduler = lrscheduler; + } + + /** + * @brief Update each Tensor registered with respect to the associated uptade function. + */ + virtual void update() {} + + /** + * @brief Reset the gradient of each parameter registered in the Optimizer. + */ + void resetGrad() const { + for (const auto& t_ptr : mParameters) { + t_ptr -> grad() -> zeros(); + } + } +}; + +} // namespace Aidge + +#endif // AIDGE_CORE_OPTIMIZER_OPTIMIZER_H_ diff --git a/include/aidge/learning/optimizer/SGD.hpp b/include/aidge/learning/optimizer/SGD.hpp new file mode 100644 index 0000000000000000000000000000000000000000..854918a60f255826e156fb443930c8db697afc89 --- /dev/null +++ b/include/aidge/learning/optimizer/SGD.hpp @@ -0,0 +1,96 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CORE_OPTIMIZER_SGD_H_ +#define AIDGE_CORE_OPTIMIZER_SGD_H_ + +#include <functional> +#include <memory> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/learning/optimizer/Optimizer.hpp" +#include "aidge/utils/StaticAttributes.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/TensorUtils.hpp" + +namespace Aidge { + +enum class SGDAttr { + Momentum, + Dampening +}; + +class SGD: public Optimizer, public StaticAttributes<SGDAttr, float, float> { +private: + std::vector<Tensor> mGradientInertia; + Tensor mLR{std::vector<std::size_t>({1})}; + Tensor mMomentum{std::vector<std::size_t>({1})}; + Tensor mReversedDampening{std::vector<std::size_t>({1})}; + +public: + using Attributes_ = StaticAttributes<SGDAttr, float, float>; + template <SGDAttr e> + using attr = typename Attributes_::template attr<e>; + + SGD(const float momentum = 0.0f, const float dampening = 0.0f) + : Optimizer(), + Attributes_(attr<SGDAttr::Momentum>(momentum), + attr<SGDAttr::Dampening>(dampening)) + { + mMomentum.setBackend("cpu"); + mMomentum.set<float>(0, momentum); + mReversedDampening.setBackend("cpu"); + mReversedDampening.set<float>(0, 1.0f - dampening); + } + + void update() override { + mLR.setBackend(mParameters[0]->getImpl()->backend()); + mLR.set<float>(0, learningRate()); + if (mParameters[0]->getImpl()->backend() != mMomentum.getImpl()->backend()) { + mMomentum.setBackend(mParameters[0]->getImpl()->backend()); + mReversedDampening.setBackend(mParameters[0]->getImpl()->backend()); + } + + if (mLRScheduler.step() == 0) { + for (std::size_t i = 0; i < mParameters.size(); ++i) { + mGradientInertia[i] = mParameters[i]->grad()->clone(); + *mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i]; + } + } else { + for (std::size_t i = 0; i < mParameters.size(); ++i) { + mGradientInertia[i] = mMomentum*mGradientInertia[i] + mReversedDampening*(*mParameters[i]->grad()); + *mParameters[i] = *mParameters[i] - mLR*mGradientInertia[i]; + } + } + mLRScheduler.update(); + } + + void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) { + Optimizer::setParameters(parameters); + mGradientInertia = std::vector<Tensor>(parameters.size()); + for (std::size_t i = 0; i < parameters.size(); ++i) { + mGradientInertia[i] = Tensor(parameters[i]->dims()); + } + } +}; + +} // namespace Aidge + + +namespace { +template <> +const char *const EnumStrings<Aidge::SGDAttr>::data[] = { + "Momentum", + "Dampening" +}; +} +#endif // AIDGE_CORE_OPTIMIZER_SGD_H_ diff --git a/src/optimizer/Optimizer.cpp b/src/optimizer/Optimizer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..367f2e84b5acab55d9458aded76f3a39c7f9e9f5 --- /dev/null +++ b/src/optimizer/Optimizer.cpp @@ -0,0 +1,14 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/learning/optimizer/Optimizer.hpp" + +Aidge::Optimizer::~Optimizer() noexcept = default; diff --git a/unit_tests/optimizer/Test_SGD.cpp b/unit_tests/optimizer/Test_SGD.cpp new file mode 100644 index 0000000000000000000000000000000000000000..17f946ae1630c2423a37f703c7923a40e5fe66bf --- /dev/null +++ b/unit_tests/optimizer/Test_SGD.cpp @@ -0,0 +1,151 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <cstddef> // std::size_t +#include <memory> +#include <random> // std::random_device, std::mt19937, std::uniform_int_distribution +#include <set> +#include <vector> + +#include "aidge/data/Tensor.hpp" +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/learning/learningRate/LRScheduler.hpp" +#include "aidge/learning/learningRate/LRSchedulerList.hpp" +#include "aidge/learning/optimizer/Optimizer.hpp" +#include "aidge/learning/optimizer/SGD.hpp" +#include "aidge/backend/cpu/operator/AddImpl.hpp" +#include "aidge/backend/cpu/operator/MulImpl.hpp" +#include "aidge/backend/cpu/operator/SubImpl.hpp" +#include "aidge/utils/TensorUtils.hpp" + +namespace Aidge { +TEST_CASE("[learning/SGD] update", "[Optimizer][SGD]") { + constexpr std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(0.1f, 1.0f); // Random float distribution between 0 and 1 + std::uniform_real_distribution<float> paramDist(0.001f, 1.0f); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); + + + for (std::size_t trial = 0; trial < NBTRIALS; ++trial) { + // create a random number of Tensor with random dims and random values + // Create random Tensor, Random Gradient and random + const std::size_t nb_tensors = dimSizeDist(gen); + std::vector<std::size_t> size_tensors(nb_tensors, 1); + + std::vector<std::shared_ptr<Tensor>> tensors(nb_tensors); + std::vector<std::unique_ptr<float[]>> val_tensors(nb_tensors); + + std::vector<std::shared_ptr<Tensor>> optim_tensors(nb_tensors); + + std::vector<std::shared_ptr<Tensor>> grad_tensors(nb_tensors); + std::vector<std::unique_ptr<float[]>> val_grad_tensors(nb_tensors); + + std::vector<std::shared_ptr<Tensor>> momentum_tensors(nb_tensors); + std::vector<std::unique_ptr<float[]>> val_momentum_tensors(nb_tensors); + + for (std::size_t i = 0; i < nb_tensors; ++i) { + std::vector<std::size_t> dims(nbDimsDist(gen)); + for (std::size_t d = 0; d < dims.size(); ++d) { + dims[d] = dimSizeDist(gen); + size_tensors[i] *= dims[d]; + } + + val_tensors[i] = std::make_unique<float[]>(size_tensors[i]); + val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]); + val_momentum_tensors[i] = std::make_unique<float[]>(size_tensors[i]); + for (std::size_t j = 0; j < size_tensors[i]; ++j) { + val_tensors[i][j] = valueDist(gen); + val_grad_tensors[i][j] = valueDist(gen); + // val_momentum_tensors[i][j] = 0.0f; + } + tensors[i] = std::make_shared<Tensor>(dims); + tensors[i]->setBackend("cpu"); + tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]); + optim_tensors[i] = std::make_shared<Tensor>(dims); + optim_tensors[i]->setBackend("cpu"); + optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]); + optim_tensors[i]->initGradient(); + + grad_tensors[i] = std::make_shared<Tensor>(dims); + grad_tensors[i]->setBackend("cpu"); + grad_tensors[i]->getImpl()->setRawPtr(val_grad_tensors[i].get(), size_tensors[i]); + + momentum_tensors[i] = std::make_shared<Tensor>(dims); + momentum_tensors[i]->setBackend("cpu"); + momentum_tensors[i]->getImpl()->setRawPtr(val_momentum_tensors[i].get(), size_tensors[i]); + + REQUIRE((tensors[i]->hasImpl() && + optim_tensors[i]->hasImpl() && + grad_tensors[i]->hasImpl())); + } + + // generate parameters + float lr = paramDist(gen); + float momentum = paramDist(gen); + float dampening = paramDist(gen); + + // set Optimizer + SGD opt = SGD(momentum, dampening); + opt.setParameters(optim_tensors); + for (std::size_t t = 0; t < nb_tensors; ++t) { + optim_tensors[t]->grad()->getImpl()->setRawPtr(val_grad_tensors[t].get(), size_tensors[t]); + } + opt.setLearningRateScheduler(learning::ConstantLR(lr)); + + for (std::size_t t = 0; t < nb_tensors; ++t) { + const Tensor tmpt1= *(opt.parameters().at(t)); + const Tensor tmpt2= *tensors[t]; + REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f)); + } + + ///////// step 0 ///////////// + // truth + for (std::size_t t = 0; t < nb_tensors; ++t) { + for (std::size_t i = 0; i < size_tensors[t]; ++i) { + val_momentum_tensors[t][i] = val_grad_tensors[t][i]; + val_tensors[t][i] = val_tensors[t][i] - lr*val_momentum_tensors[t][i]; + } + } + // optimizer + opt.update(); + // tests + for (std::size_t t = 0; t < nb_tensors; ++t) { + const Tensor tmpt1= *(opt.parameters().at(t)); + const Tensor tmpt2= *tensors[t]; + REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f)); + } + + ///////// step > 0 ///////////// + for (std::size_t step = 1; step < 10; ++step) { + // truth + for (std::size_t t = 0; t < nb_tensors; ++t) { + for (std::size_t i = 0; i < size_tensors[t]; ++i) { + val_momentum_tensors[t][i] = momentum*val_momentum_tensors[t][i] + (1 - dampening)*val_grad_tensors[t][i]; + val_tensors[t][i] = val_tensors[t][i] - lr*val_momentum_tensors[t][i]; + } + } + // optimizer + opt.update(); + // test + for (std::size_t t = 0; t < nb_tensors; ++t) { + const Tensor tmpt1= *(opt.parameters().at(t)); + const Tensor tmpt2= *tensors[t]; + REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f)); + } + } + } +} +} // namespace Aidge