Skip to content
Snippets Groups Projects

Add Adam optimizer

Merged Olivier Antoni requested to merge oantoni/aidge_learning:Adam_optimizer into dev
2 unresolved threads
Files
5
+ 131
0
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CORE_OPTIMIZER_ADAM_H_
#define AIDGE_CORE_OPTIMIZER_ADAM_H_
#include <functional>
#include <memory>
#include <vector>
#include <cmath> // std::sqrt, std::pow
#include "aidge/data/Tensor.hpp"
#include "aidge/learning/optimizer/Optimizer.hpp"
#include "aidge/utils/StaticAttributes.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
enum class AdamAttr {
Beta1,
Beta2,
Epsilon
};
class Adam: public Optimizer, public StaticAttributes<AdamAttr, float, float, float> {
private:
std::vector<Tensor> mMomentum1;
std::vector<Tensor> mMomentum2;
Tensor mLR{std::vector<std::size_t>({1})};
Tensor mBeta1{std::vector<std::size_t>({1})};
Tensor mReversedBeta1{std::vector<std::size_t>({1})};
Tensor mBeta2{std::vector<std::size_t>({1})};
Tensor mReversedBeta2{std::vector<std::size_t>({1})};
Tensor mEpsilon{std::vector<std::size_t>({1})};
public:
using Attributes_ = StaticAttributes<AdamAttr, float, float, float>;
template <AdamAttr e>
using attr = typename Attributes_::template attr<e>;
Adam(const float beta1 = 0.9f, const float beta2 = 0.999f, const float epsilon = 1.0e-8f)
: Optimizer(),
Attributes_(attr<AdamAttr::Beta1>(beta1),
attr<AdamAttr::Beta2>(beta2),
attr<AdamAttr::Epsilon>(epsilon))
{
mBeta1.setBackend("cpu");
mBeta1.set<float>(0, beta1);
mReversedBeta1.setBackend("cpu");
mReversedBeta1.set<float>(0, 1.0f - beta1);
mBeta2.setBackend("cpu");
mBeta2.set<float>(0, beta2);
mReversedBeta2.setBackend("cpu");
mReversedBeta2.set<float>(0, 1.0f - beta2);
mEpsilon.setBackend("cpu");
mEpsilon.set<float>(0, epsilon);
}
void update() override final {
mLR.setBackend(mParameters[0]->getImpl()->backend());
mLR.set<float>(0, learningRate());
if (mParameters[0]->getImpl()->backend() != mBeta1.getImpl()->backend()) {
mBeta1.setBackend(mParameters[0]->getImpl()->backend());
mReversedBeta1.setBackend(mParameters[0]->getImpl()->backend());
mBeta2.setBackend(mParameters[0]->getImpl()->backend());
mReversedBeta2.setBackend(mParameters[0]->getImpl()->backend());
}
Tensor alpha{std::vector<std::size_t>({1})};
alpha.setBackend(mParameters[0]->getImpl()->backend());
alpha.set<float>(0, learningRate() * std::sqrt(1.0f - std::pow(mBeta2.get<float>(0), mLRScheduler.step() + 1))
/ (1.0f - std::pow(mBeta1.get<float>(0), mLRScheduler.step() + 1)));
Tensor epsilon{std::vector<std::size_t>({1})};
epsilon.setBackend(mParameters[0]->getImpl()->backend());
epsilon.set<float>(0, mEpsilon.get<float>(0) * std::sqrt(1.0f - std::pow(mBeta2.get<float>(0), mLRScheduler.step() + 1)));
if (mLRScheduler.step() == 0) {
for (std::size_t i = 0; i < mParameters.size(); ++i) {
mMomentum1[i].setBackend(mParameters[i]->getImpl()->backend());
mMomentum1[i].setDataType(mParameters[i]->grad()->dataType());
mMomentum1[i].zeros();
mMomentum2[i].setBackend(mParameters[i]->getImpl()->backend());
mMomentum2[i].setDataType(mParameters[i]->grad()->dataType());
mMomentum2[i].zeros();
}
}
for (std::size_t i = 0; i < mParameters.size(); ++i) {
mMomentum1[i] = mBeta1 * mMomentum1[i] + mReversedBeta1 * (*mParameters[i]->grad());
mMomentum2[i] = mBeta2 * mMomentum2[i] + mReversedBeta2 * (*mParameters[i]->grad()) * (*mParameters[i]->grad());
*mParameters[i] = *mParameters[i] - alpha * mMomentum1[i] / (mMomentum2[i].sqrt() + epsilon);
}
mLRScheduler.update();
}
void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) override final {
Optimizer::setParameters(parameters);
mMomentum1 = std::vector<Tensor>(parameters.size());
mMomentum2 = std::vector<Tensor>(parameters.size());
for (std::size_t i = 0; i < parameters.size(); ++i) {
mMomentum1[i] = Tensor(parameters[i]->dims());
mMomentum2[i] = Tensor(parameters[i]->dims());
}
}
};
} // namespace Aidge
namespace {
template <>
const char *const EnumStrings<Aidge::AdamAttr>::data[] = {
"Beta1",
"Beta2",
"Epsilon"
};
}
#endif // AIDGE_CORE_OPTIMIZER_ADAM_H_
Loading