diff --git a/unit_tests/optimizer/Test_Adam.cpp b/unit_tests/optimizer/Test_Adam.cpp index 77d26214bb0e7225975463613200add232210fe9..d703c418445d5a0c778d0e9352fc912cb6b58371 100644 --- a/unit_tests/optimizer/Test_Adam.cpp +++ b/unit_tests/optimizer/Test_Adam.cpp @@ -70,7 +70,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { val_tensors[i] = std::make_unique<float[]>(size_tensors[i]); val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]); val_momentum1_tensors[i] = std::make_unique<float[]>(size_tensors[i]); - val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]); + val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]); for (std::size_t j = 0; j < size_tensors[i]; ++j) { val_tensors[i][j] = valueDist(gen); val_grad_tensors[i][j] = valueDist(gen); @@ -92,7 +92,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { momentum_tensors[i] = std::make_shared<Tensor>(dims); momentum_tensors[i]->setBackend("cpu"); momentum_tensors[i]->getImpl()->setRawPtr(val_momentum1_tensors[i].get(), size_tensors[i]); - momentum_tensors[i]->getImpl()->setRawPtr(val_momentum2_tensors[i].get(), size_tensors[i]); + momentum_tensors[i]->getImpl()->setRawPtr(val_momentum2_tensors[i].get(), size_tensors[i]); REQUIRE((tensors[i]->hasImpl() && optim_tensors[i]->hasImpl() && @@ -102,7 +102,7 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { // generate parameters float lr = paramDist(gen); float beta1 = paramDist(gen); - float beta2 = paramDist(gen); + float beta2 = paramDist(gen); float epsilon = paramDist(gen); // set Optimizer @@ -121,13 +121,14 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { // truth for (std::size_t step = 0; step < 10; ++step) { + float lr2 = lr * std::sqrt(1.0f - std::pow(beta1, step + 1)) / (1.0f - std::pow(beta1, step + 1)); + float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1)); for (std::size_t t = 0; t < nb_tensors; ++t) { for (std::size_t i = 0; i < size_tensors[t]; ++i) { val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i]; val_momentum2_tensors[t][i] = beta2 * val_momentum2_tensors[t][i] + (1.0f - beta2) * val_grad_tensors[t][i] * val_grad_tensors[t][i]; val_tensors[t][i] = val_tensors[t][i] - - lr * val_momentum1_tensors[t][i] / (1.0f - std::pow(beta1, step + 1)) - / (std::sqrt(val_momentum2_tensors[t][i] / (1.0f - std::pow(beta2, step + 1))) + epsilon); + - lr2 * val_momentum1_tensors[t][i] / (std::sqrt(val_momentum2_tensors[t][i]) + epsilon2); } } // optimizer