From d298f23dc7d92c6074c3c0a9aded45d8ddecb013 Mon Sep 17 00:00:00 2001 From: Antoni Olivier <olivier.antoni@cea.fr> Date: Tue, 25 Jun 2024 15:34:49 +0200 Subject: [PATCH] Fix Adam optimizer unit test --- unit_tests/optimizer/Test_Adam.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/unit_tests/optimizer/Test_Adam.cpp b/unit_tests/optimizer/Test_Adam.cpp index 77d2621..d703c41 100644 --- a/unit_tests/optimizer/Test_Adam.cpp +++ b/unit_tests/optimizer/Test_Adam.cpp @@ -70,7 +70,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { val_tensors[i] = std::make_unique<float[]>(size_tensors[i]); val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]); val_momentum1_tensors[i] = std::make_unique<float[]>(size_tensors[i]); - val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]); + val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]); for (std::size_t j = 0; j < size_tensors[i]; ++j) { val_tensors[i][j] = valueDist(gen); val_grad_tensors[i][j] = valueDist(gen); @@ -92,7 +92,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { momentum_tensors[i] = std::make_shared<Tensor>(dims); momentum_tensors[i]->setBackend("cpu"); momentum_tensors[i]->getImpl()->setRawPtr(val_momentum1_tensors[i].get(), size_tensors[i]); - momentum_tensors[i]->getImpl()->setRawPtr(val_momentum2_tensors[i].get(), size_tensors[i]); + momentum_tensors[i]->getImpl()->setRawPtr(val_momentum2_tensors[i].get(), size_tensors[i]); REQUIRE((tensors[i]->hasImpl() && optim_tensors[i]->hasImpl() && @@ -102,7 +102,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { // generate parameters float lr = paramDist(gen); float beta1 = paramDist(gen); - float beta2 = paramDist(gen); + float beta2 = paramDist(gen); float epsilon = paramDist(gen); // set Optimizer @@ -121,13 +121,14 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { // truth for (std::size_t step = 0; step < 10; ++step) { + float lr2 = lr * std::sqrt(1.0f - std::pow(beta1, step + 1)) / (1.0f - std::pow(beta1, step + 1)); + float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1)); for (std::size_t t = 0; t < nb_tensors; ++t) { for (std::size_t i = 0; i < size_tensors[t]; ++i) { val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i]; val_momentum2_tensors[t][i] = beta2 * val_momentum2_tensors[t][i] + (1.0f - beta2) * val_grad_tensors[t][i] * val_grad_tensors[t][i]; val_tensors[t][i] = val_tensors[t][i] - - lr * val_momentum1_tensors[t][i] / (1.0f - std::pow(beta1, step + 1)) - / (std::sqrt(val_momentum2_tensors[t][i] / (1.0f - std::pow(beta2, step + 1))) + epsilon); + - lr2 * val_momentum1_tensors[t][i] / (std::sqrt(val_momentum2_tensors[t][i]) + epsilon2); } } // optimizer -- GitLab