Skip to content
Snippets Groups Projects

version 0.1.2

Merged Maxence Naud requested to merge dev into main
13 files
+ 564
6
Compare changes
  • Side-by-side
  • Inline
Files
13
+ 131
0
 
/********************************************************************************
 
* Copyright (c) 2023 CEA-List
 
*
 
* This program and the accompanying materials are made available under the
 
* terms of the Eclipse Public License 2.0 which is available at
 
* http://www.eclipse.org/legal/epl-2.0.
 
*
 
* SPDX-License-Identifier: EPL-2.0
 
*
 
********************************************************************************/
 
 
#ifndef AIDGE_CORE_OPTIMIZER_ADAM_H_
 
#define AIDGE_CORE_OPTIMIZER_ADAM_H_
 
 
#include <functional>
 
#include <memory>
 
#include <vector>
 
#include <cmath> // std::sqrt, std::pow
 
 
#include "aidge/data/Tensor.hpp"
 
#include "aidge/learning/optimizer/Optimizer.hpp"
 
#include "aidge/utils/StaticAttributes.hpp"
 
#include "aidge/utils/Registrar.hpp"
 
#include "aidge/utils/TensorUtils.hpp"
 
 
namespace Aidge {
 
 
enum class AdamAttr {
 
Beta1,
 
Beta2,
 
Epsilon
 
};
 
 
class Adam: public Optimizer, public StaticAttributes<AdamAttr, float, float, float> {
 
private:
 
std::vector<Tensor> mMomentum1;
 
std::vector<Tensor> mMomentum2;
 
Tensor mLR{std::vector<std::size_t>({1})};
 
Tensor mBeta1{std::vector<std::size_t>({1})};
 
Tensor mReversedBeta1{std::vector<std::size_t>({1})};
 
Tensor mBeta2{std::vector<std::size_t>({1})};
 
Tensor mReversedBeta2{std::vector<std::size_t>({1})};
 
Tensor mEpsilon{std::vector<std::size_t>({1})};
 
 
public:
 
using Attributes_ = StaticAttributes<AdamAttr, float, float, float>;
 
template <AdamAttr e>
 
using attr = typename Attributes_::template attr<e>;
 
 
Adam(const float beta1 = 0.9f, const float beta2 = 0.999f, const float epsilon = 1.0e-8f)
 
: Optimizer(),
 
Attributes_(attr<AdamAttr::Beta1>(beta1),
 
attr<AdamAttr::Beta2>(beta2),
 
attr<AdamAttr::Epsilon>(epsilon))
 
{
 
mBeta1.setBackend("cpu");
 
mBeta1.set<float>(0, beta1);
 
mReversedBeta1.setBackend("cpu");
 
mReversedBeta1.set<float>(0, 1.0f - beta1);
 
 
mBeta2.setBackend("cpu");
 
mBeta2.set<float>(0, beta2);
 
mReversedBeta2.setBackend("cpu");
 
mReversedBeta2.set<float>(0, 1.0f - beta2);
 
 
mEpsilon.setBackend("cpu");
 
mEpsilon.set<float>(0, epsilon);
 
}
 
 
void update() override final {
 
mLR.setBackend(mParameters[0]->getImpl()->backend());
 
mLR.set<float>(0, learningRate());
 
if (mParameters[0]->getImpl()->backend() != mBeta1.getImpl()->backend()) {
 
mBeta1.setBackend(mParameters[0]->getImpl()->backend());
 
mReversedBeta1.setBackend(mParameters[0]->getImpl()->backend());
 
mBeta2.setBackend(mParameters[0]->getImpl()->backend());
 
mReversedBeta2.setBackend(mParameters[0]->getImpl()->backend());
 
}
 
 
Tensor alpha{std::vector<std::size_t>({1})};
 
alpha.setBackend(mParameters[0]->getImpl()->backend());
 
alpha.set<float>(0, learningRate() * std::sqrt(1.0f - std::pow(mBeta2.get<float>(0), mLRScheduler.step() + 1))
 
/ (1.0f - std::pow(mBeta1.get<float>(0), mLRScheduler.step() + 1)));
 
 
Tensor epsilon{std::vector<std::size_t>({1})};
 
epsilon.setBackend(mParameters[0]->getImpl()->backend());
 
epsilon.set<float>(0, mEpsilon.get<float>(0) * std::sqrt(1.0f - std::pow(mBeta2.get<float>(0), mLRScheduler.step() + 1)));
 
 
if (mLRScheduler.step() == 0) {
 
for (std::size_t i = 0; i < mParameters.size(); ++i) {
 
mMomentum1[i].setBackend(mParameters[i]->getImpl()->backend());
 
mMomentum1[i].setDataType(mParameters[i]->grad()->dataType());
 
mMomentum1[i].zeros();
 
mMomentum2[i].setBackend(mParameters[i]->getImpl()->backend());
 
mMomentum2[i].setDataType(mParameters[i]->grad()->dataType());
 
mMomentum2[i].zeros();
 
}
 
}
 
 
for (std::size_t i = 0; i < mParameters.size(); ++i) {
 
mMomentum1[i] = mBeta1 * mMomentum1[i] + mReversedBeta1 * (*mParameters[i]->grad());
 
mMomentum2[i] = mBeta2 * mMomentum2[i] + mReversedBeta2 * (*mParameters[i]->grad()) * (*mParameters[i]->grad());
 
*mParameters[i] = *mParameters[i] - alpha * mMomentum1[i] / (mMomentum2[i].sqrt() + epsilon);
 
}
 
 
mLRScheduler.update();
 
}
 
 
void setParameters(const std::vector<std::shared_ptr<Tensor>>& parameters) override final {
 
Optimizer::setParameters(parameters);
 
mMomentum1 = std::vector<Tensor>(parameters.size());
 
mMomentum2 = std::vector<Tensor>(parameters.size());
 
for (std::size_t i = 0; i < parameters.size(); ++i) {
 
mMomentum1[i] = Tensor(parameters[i]->dims());
 
mMomentum2[i] = Tensor(parameters[i]->dims());
 
}
 
}
 
};
 
 
} // namespace Aidge
 
 
 
namespace {
 
template <>
 
const char *const EnumStrings<Aidge::AdamAttr>::data[] = {
 
"Beta1",
 
"Beta2",
 
"Epsilon"
 
};
 
}
 
#endif // AIDGE_CORE_OPTIMIZER_ADAM_H_
Loading