Skip to content
Snippets Groups Projects
Commit eedf81f4 authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

add cuda test for Adam optimizer

parent ce255b8c
No related branches found
No related tags found
2 merge requests!17version 0.2.0,!15Learning backend cuda
...@@ -30,6 +30,15 @@ ...@@ -30,6 +30,15 @@
#include "aidge/backend/cpu/operator/SqrtImpl.hpp" #include "aidge/backend/cpu/operator/SqrtImpl.hpp"
#include "aidge/utils/TensorUtils.hpp" #include "aidge/utils/TensorUtils.hpp"
#if USE_AIDGE_BACKEND_CUDA
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/AddImpl.hpp"
#include "aidge/backend/cuda/operator/MulImpl.hpp"
#include "aidge/backend/cuda/operator/SubImpl.hpp"
#include "aidge/backend/cuda/operator/DivImpl.hpp"
#include "aidge/backend/cuda/operator/SqrtImpl.hpp"
#endif
namespace Aidge { namespace Aidge {
TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
constexpr std::uint16_t NBTRIALS = 10; constexpr std::uint16_t NBTRIALS = 10;
...@@ -41,105 +50,242 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") { ...@@ -41,105 +50,242 @@ TEST_CASE("[learning/Adam] update", "[Optimizer][Adam]") {
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5)); std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
SECTION("CPU") {
for (std::size_t trial = 0; trial < NBTRIALS; ++trial) {
// create a random number of Tensor with random dims and random values
// Create random Tensor, Random Gradient and random
const std::size_t nb_tensors = dimSizeDist(gen);
std::vector<std::size_t> size_tensors(nb_tensors, 1);
for (std::size_t trial = 0; trial < NBTRIALS; ++trial) { std::vector<std::shared_ptr<Tensor>> tensors(nb_tensors);
// create a random number of Tensor with random dims and random values std::vector<std::unique_ptr<float[]>> val_tensors(nb_tensors);
// Create random Tensor, Random Gradient and random
const std::size_t nb_tensors = dimSizeDist(gen);
std::vector<std::size_t> size_tensors(nb_tensors, 1);
std::vector<std::shared_ptr<Tensor>> tensors(nb_tensors); std::vector<std::shared_ptr<Tensor>> optim_tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_tensors(nb_tensors);
std::vector<std::shared_ptr<Tensor>> optim_tensors(nb_tensors); std::vector<std::shared_ptr<Tensor>> grad_tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_grad_tensors(nb_tensors);
std::vector<std::shared_ptr<Tensor>> grad_tensors(nb_tensors); std::vector<std::shared_ptr<Tensor>> momentum_tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_grad_tensors(nb_tensors); std::vector<std::unique_ptr<float[]>> val_momentum1_tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_momentum2_tensors(nb_tensors);
std::vector<std::shared_ptr<Tensor>> momentum_tensors(nb_tensors); for (std::size_t i = 0; i < nb_tensors; ++i) {
std::vector<std::unique_ptr<float[]>> val_momentum1_tensors(nb_tensors); std::vector<std::size_t> dims(nbDimsDist(gen));
std::vector<std::unique_ptr<float[]>> val_momentum2_tensors(nb_tensors); for (std::size_t d = 0; d < dims.size(); ++d) {
dims[d] = dimSizeDist(gen);
size_tensors[i] *= dims[d];
}
for (std::size_t i = 0; i < nb_tensors; ++i) { val_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
std::vector<std::size_t> dims(nbDimsDist(gen)); val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
for (std::size_t d = 0; d < dims.size(); ++d) { val_momentum1_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
dims[d] = dimSizeDist(gen); val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
size_tensors[i] *= dims[d]; for (std::size_t j = 0; j < size_tensors[i]; ++j) {
} val_tensors[i][j] = valueDist(gen);
val_grad_tensors[i][j] = valueDist(gen);
val_momentum1_tensors[i][j] = 0.0f;
val_momentum2_tensors[i][j] = 0.0f;
}
tensors[i] = std::make_shared<Tensor>(dims);
tensors[i]->setBackend("cpu");
tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]);
optim_tensors[i] = std::make_shared<Tensor>(dims);
optim_tensors[i]->setBackend("cpu");
optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
// optim_tensors[i]->initGrad();
val_tensors[i] = std::make_unique<float[]>(size_tensors[i]); grad_tensors[i] = std::make_shared<Tensor>(dims);
val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]); grad_tensors[i]->setBackend("cpu");
val_momentum1_tensors[i] = std::make_unique<float[]>(size_tensors[i]); grad_tensors[i]->getImpl()->setRawPtr(val_grad_tensors[i].get(), size_tensors[i]);
val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
for (std::size_t j = 0; j < size_tensors[i]; ++j) {
val_tensors[i][j] = valueDist(gen);
val_grad_tensors[i][j] = valueDist(gen);
val_momentum1_tensors[i][j] = 0.0f;
val_momentum2_tensors[i][j] = 0.0f;
}
tensors[i] = std::make_shared<Tensor>(dims);
tensors[i]->setBackend("cpu");
tensors[i]->getImpl()->setRawPtr(val_tensors[i].get(), size_tensors[i]);
optim_tensors[i] = std::make_shared<Tensor>(dims);
optim_tensors[i]->setBackend("cpu");
optim_tensors[i]->getImpl()->copy(val_tensors[i].get(), size_tensors[i]);
// optim_tensors[i]->initGrad();
grad_tensors[i] = std::make_shared<Tensor>(dims);
grad_tensors[i]->setBackend("cpu");
grad_tensors[i]->getImpl()->setRawPtr(val_grad_tensors[i].get(), size_tensors[i]);
momentum_tensors[i] = std::make_shared<Tensor>(dims);
momentum_tensors[i]->setBackend("cpu");
momentum_tensors[i]->getImpl()->setRawPtr(val_momentum1_tensors[i].get(), size_tensors[i]);
momentum_tensors[i]->getImpl()->setRawPtr(val_momentum2_tensors[i].get(), size_tensors[i]);
REQUIRE((tensors[i]->hasImpl() &&
optim_tensors[i]->hasImpl() &&
grad_tensors[i]->hasImpl()));
}
// generate parameters momentum_tensors[i] = std::make_shared<Tensor>(dims);
float lr = paramDist(gen); momentum_tensors[i]->setBackend("cpu");
float beta1 = paramDist(gen); momentum_tensors[i]->getImpl()->setRawPtr(val_momentum1_tensors[i].get(), size_tensors[i]);
float beta2 = paramDist(gen); momentum_tensors[i]->getImpl()->setRawPtr(val_momentum2_tensors[i].get(), size_tensors[i]);
float epsilon = paramDist(gen);
// set Optimizer
Adam opt = Adam(beta1, beta2, epsilon);
opt.setParameters(optim_tensors);
for (std::size_t t = 0; t < nb_tensors; ++t) {
optim_tensors[t]->grad()->getImpl()->setRawPtr(val_grad_tensors[t].get(), size_tensors[t]);
}
opt.setLearningRateScheduler(learning::ConstantLR(lr));
for (std::size_t t = 0; t < nb_tensors; ++t) { REQUIRE((tensors[i]->hasImpl() &&
const Tensor tmpt1= *(opt.parameters().at(t)); optim_tensors[i]->hasImpl() &&
const Tensor tmpt2= *tensors[t]; grad_tensors[i]->hasImpl()));
REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f)); }
}
// generate parameters
float lr = paramDist(gen);
float beta1 = paramDist(gen);
float beta2 = paramDist(gen);
float epsilon = paramDist(gen);
for (std::size_t step = 0; step < 10; ++step) { // set Optimizer
// truth Adam opt = Adam(beta1, beta2, epsilon);
float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, step + 1)) / (1.0f - std::pow(beta1, step + 1)); opt.setParameters(optim_tensors);
float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1));
for (std::size_t t = 0; t < nb_tensors; ++t) { for (std::size_t t = 0; t < nb_tensors; ++t) {
for (std::size_t i = 0; i < size_tensors[t]; ++i) { optim_tensors[t]->grad()->getImpl()->setRawPtr(val_grad_tensors[t].get(), size_tensors[t]);
val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i];
val_momentum2_tensors[t][i] = beta2 * val_momentum2_tensors[t][i] + (1.0f - beta2) * val_grad_tensors[t][i] * val_grad_tensors[t][i];
val_tensors[t][i] = val_tensors[t][i]
- lr2 * val_momentum1_tensors[t][i] / (std::sqrt(val_momentum2_tensors[t][i]) + epsilon2);
}
} }
// optimizer opt.setLearningRateScheduler(learning::ConstantLR(lr));
opt.update();
// tests
for (std::size_t t = 0; t < nb_tensors; ++t) { for (std::size_t t = 0; t < nb_tensors; ++t) {
const Tensor tmpt1= *(opt.parameters().at(t)); const Tensor tmpt1= *(opt.parameters().at(t));
const Tensor tmpt2= *tensors[t]; const Tensor tmpt2= *tensors[t];
REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f)); REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f));
} }
for (std::size_t step = 0; step < 10; ++step) {
// truth
float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, step + 1)) / (1.0f - std::pow(beta1, step + 1));
float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1));
for (std::size_t t = 0; t < nb_tensors; ++t) {
for (std::size_t i = 0; i < size_tensors[t]; ++i) {
val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i];
val_momentum2_tensors[t][i] = beta2 * val_momentum2_tensors[t][i] + (1.0f - beta2) * val_grad_tensors[t][i] * val_grad_tensors[t][i];
val_tensors[t][i] = val_tensors[t][i]
- lr2 * val_momentum1_tensors[t][i] / (std::sqrt(val_momentum2_tensors[t][i]) + epsilon2);
}
}
// optimizer
opt.update();
// tests
for (std::size_t t = 0; t < nb_tensors; ++t) {
const Tensor tmpt1= *(opt.parameters().at(t));
const Tensor tmpt2= *tensors[t];
REQUIRE(approxEq<float,float>(tmpt2, tmpt1, 1e-5f, 1e-8f));
}
}
}
}
#if USE_AIDGE_BACKEND_CUDA
SECTION("CUDA") {
for (std::size_t trial = 0; trial < NBTRIALS; ++trial) {
// create a random number of Tensor with random dims and random values
// Create random Tensor, Random Gradient and random
const std::size_t nb_tensors = dimSizeDist(gen);
std::vector<std::size_t> size_tensors(nb_tensors, 1);
std::vector<std::shared_ptr<Tensor>> tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_tensors(nb_tensors);
std::vector<std::shared_ptr<Tensor>> optim_tensors(nb_tensors);
std::vector<std::shared_ptr<Tensor>> grad_tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_grad_tensors(nb_tensors);
std::vector<std::shared_ptr<Tensor>> momentum_tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_momentum1_tensors(nb_tensors);
std::vector<std::unique_ptr<float[]>> val_momentum2_tensors(nb_tensors);
// Device pointers
std::vector<float*> d_val_tensors(nb_tensors);
std::vector<float*> d_val_grad_tensors(nb_tensors);
std::vector<float*> d_val_momentum1_tensors(nb_tensors);
std::vector<float*> d_val_momentum2_tensors(nb_tensors);
for (std::size_t i = 0; i < nb_tensors; ++i) {
std::vector<std::size_t> dims(nbDimsDist(gen));
for (std::size_t d = 0; d < dims.size(); ++d) {
dims[d] = dimSizeDist(gen);
size_tensors[i] *= dims[d];
}
val_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
val_grad_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
val_momentum1_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
val_momentum2_tensors[i] = std::make_unique<float[]>(size_tensors[i]);
for (std::size_t j = 0; j < size_tensors[i]; ++j) {
val_tensors[i][j] = valueDist(gen);
val_grad_tensors[i][j] = valueDist(gen);
val_momentum1_tensors[i][j] = 0.0f;
val_momentum2_tensors[i][j] = 0.0f;
}
// Allocate device memory
cudaMalloc(&d_val_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(&d_val_grad_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(&d_val_momentum1_tensors[i], size_tensors[i] * sizeof(float));
cudaMalloc(&d_val_momentum1_tensors[i], size_tensors[i] * sizeof(float));
// Copy data to device
cudaMemcpy(d_val_tensors[i], val_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_val_grad_tensors[i], val_grad_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_val_momentum1_tensors[i], val_momentum1_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_val_momentum2_tensors[i], val_momentum2_tensors[i].get(), size_tensors[i] * sizeof(float), cudaMemcpyHostToDevice);
tensors[i] = std::make_shared<Tensor>(dims);
tensors[i]->setBackend("cuda");
tensors[i]->getImpl()->setRawPtr(d_val_tensors[i], size_tensors[i]);
optim_tensors[i] = std::make_shared<Tensor>(dims);
optim_tensors[i]->setBackend("cuda");
optim_tensors[i]->getImpl()->copy(d_val_tensors[i], size_tensors[i]);
// optim_tensors[i]->initGrad();
grad_tensors[i] = std::make_shared<Tensor>(dims);
grad_tensors[i]->setBackend("cuda");
grad_tensors[i]->getImpl()->setRawPtr(d_val_grad_tensors[i], size_tensors[i]);
momentum_tensors[i] = std::make_shared<Tensor>(dims);
momentum_tensors[i]->setBackend("cuda");
momentum_tensors[i]->getImpl()->setRawPtr(d_val_momentum1_tensors[i], size_tensors[i]);
momentum_tensors[i]->getImpl()->setRawPtr(d_val_momentum2_tensors[i], size_tensors[i]);
REQUIRE((tensors[i]->hasImpl() &&
optim_tensors[i]->hasImpl() &&
grad_tensors[i]->hasImpl()));
}
// generate parameters
float lr = paramDist(gen);
float beta1 = paramDist(gen);
float beta2 = paramDist(gen);
float epsilon = paramDist(gen);
// set Optimizer
Adam opt = Adam(beta1, beta2, epsilon);
opt.setParameters(optim_tensors);
for (std::size_t t = 0; t < nb_tensors; ++t) {
optim_tensors[t]->grad()->getImpl()->setRawPtr(d_val_grad_tensors[t], size_tensors[t]);
}
opt.setLearningRateScheduler(learning::ConstantLR(lr));
for (std::size_t t = 0; t < nb_tensors; ++t) {
float* temp1 = new float[opt.parameters().at(t)->size()]();
cudaMemcpy(temp1, opt.parameters().at(t)->getImpl()->rawPtr(), sizeof(float) * opt.parameters().at(t)->size(), cudaMemcpyDeviceToHost);
float* temp2 = new float[tensors[t]->size()]();
cudaMemcpy(temp2, tensors[t]->getImpl()->rawPtr(), sizeof(float) * tensors[t]->size(), cudaMemcpyDeviceToHost);
for (size_t i = 0; i < tensors[t]->size(); ++i) {
static_cast<float>(std::abs(temp1[i] - temp2[i])) > (1e-8f + (1e-5f * static_cast<float>(std::abs(temp1[i]))));
}
}
for (std::size_t step = 0; step < 10; ++step) {
// truth
float lr2 = lr * std::sqrt(1.0f - std::pow(beta2, step + 1)) / (1.0f - std::pow(beta1, step + 1));
float epsilon2 = epsilon * std::sqrt(1.0f - std::pow(beta2, step + 1));
for (std::size_t t = 0; t < nb_tensors; ++t) {
for (std::size_t i = 0; i < size_tensors[t]; ++i) {
val_momentum1_tensors[t][i] = beta1 * val_momentum1_tensors[t][i] + (1.0f - beta1) * val_grad_tensors[t][i];
val_momentum2_tensors[t][i] = beta2 * val_momentum2_tensors[t][i] + (1.0f - beta2) * val_grad_tensors[t][i] * val_grad_tensors[t][i];
val_tensors[t][i] = val_tensors[t][i]
- lr2 * val_momentum1_tensors[t][i] / (std::sqrt(val_momentum2_tensors[t][i]) + epsilon2);
}
cudaMemcpy(d_val_momentum1_tensors[t], val_momentum1_tensors[t].get(), size_tensors[t] * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_val_momentum2_tensors[t], val_momentum2_tensors[t].get(), size_tensors[t] * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_val_tensors[t], val_tensors[t].get(), size_tensors[t] * sizeof(float), cudaMemcpyHostToDevice);
}
// optimizer
opt.update();
// tests
for (std::size_t t = 0; t < nb_tensors; ++t) {
float* temp1 = new float[opt.parameters().at(t)->size()]();
cudaMemcpy(temp1, opt.parameters().at(t)->getImpl()->rawPtr(), sizeof(float) * opt.parameters().at(t)->size(), cudaMemcpyDeviceToHost);
float* temp2 = new float[tensors[t]->size()]();
cudaMemcpy(temp2, tensors[t]->getImpl()->rawPtr(), sizeof(float) * tensors[t]->size(), cudaMemcpyDeviceToHost);
for (size_t i = 0; i < tensors[t]->size(); ++i) {
static_cast<float>(std::abs(temp1[i] - temp2[i])) > (1e-8f + (1e-5f * static_cast<float>(std::abs(temp1[i]))));
}
}
}
} }
} }
#endif
} }
} // namespace Aidge } // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment