diff --git a/unit_tests/loss/regression/Test_MSE.cpp b/unit_tests/loss/regression/Test_MSE.cpp index 2b0e6d1edfaa1d452c714a08c4998725331df2c3..4e30fd00e2d8136a9da8bcb38875e2a4eafa2c98 100644 --- a/unit_tests/loss/regression/Test_MSE.cpp +++ b/unit_tests/loss/regression/Test_MSE.cpp @@ -22,9 +22,15 @@ #include "aidge/loss/LossList.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/utils/TensorUtils.hpp" - +#if USE_AIDGE_BACKEND_CUDA +#include "aidge/backend/cuda/operator/PowImpl.hpp" +#include "aidge/backend/cuda/operator/ReduceMeanImpl.hpp" +#include "aidge/backend/cuda/operator/SubImpl.hpp" +#include "aidge/backend/cuda/operator/MulImpl.hpp" +#endif namespace Aidge { TEST_CASE("[loss/regression] MSE", "[loss][regression][MSE]") { + constexpr std::uint16_t NBTRIALS = 10; // set random variables @@ -34,54 +40,120 @@ TEST_CASE("[loss/regression] MSE", "[loss][regression][MSE]") { std::uniform_int_distribution<std::size_t> nbDimsDist(1, 2); std::uniform_real_distribution<float> valueDist(0.0f, 1.0f); - for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { - const std::size_t nb_dims = 2; // For MSE test, nb_dims is fixed as 2: NbBatch, NbChan - std::vector<std::size_t> dims(2); + SECTION("CPU") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + const std::size_t nb_dims = 2; // For MSE test, nb_dims is fixed as 2: NbBatch, NbChan + std::vector<std::size_t> dims(2); - for (std::size_t i = 0; i < nb_dims; ++i) { dims[i] = dimsDist(gen); } - const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + for (std::size_t i = 0; i < nb_dims; ++i) { dims[i] = dimsDist(gen); } + const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); - // create random predictions - std::unique_ptr<float[]> pred = std::make_unique<float[]>(nb_elements); - for (std::size_t i = 0; i < nb_elements; ++i) { - pred[i] = valueDist(gen); - } + // create random predictions + std::unique_ptr<float[]> pred = std::make_unique<float[]>(nb_elements); + for (std::size_t i = 0; i < nb_elements; ++i) { + pred[i] = valueDist(gen); + } - // create random targets - std::unique_ptr<float[]> targ = std::make_unique<float[]>(nb_elements); - for (std::size_t i = 0; i < nb_elements; ++i) { - targ[i] = valueDist(gen); - } + // create random targets + std::unique_ptr<float[]> targ = std::make_unique<float[]>(nb_elements); + for (std::size_t i = 0; i < nb_elements; ++i) { + targ[i] = valueDist(gen); + } + + // compute the MSE manually + std::unique_ptr<float[]> tmp_res_manual = std::make_unique<float[]>(nb_elements); + for (std::size_t i = 0; i < nb_elements; ++i) { + tmp_res_manual[i] = std::pow(pred[i] - targ[i],2); + } + std::cout << "Pow output manual:" << std::endl; + std::shared_ptr<Tensor> tmp_tensor = std::make_shared<Tensor>(dims); + tmp_tensor->setBackend("cpu"); + tmp_tensor->getImpl()->setRawPtr(tmp_res_manual.get(), nb_elements); + tmp_tensor->print(); + const float res_manual = std::accumulate(&tmp_res_manual[0], &tmp_res_manual[nb_elements], 0.0f, std::plus<float>()) / static_cast<float>(nb_elements); - // compute the MSE manually - std::unique_ptr<float[]> tmp_res_manual = std::make_unique<float[]>(nb_elements); - for (std::size_t i = 0; i < nb_elements; ++i) { - tmp_res_manual[i] = std::pow(pred[i] - targ[i],2); + // compute the MSE using Aidge::loss::MSE function + std::cout << "Sub input 0 manual:" << std::endl; + std::shared_ptr<Tensor> pred_tensor = std::make_shared<Tensor>(dims); + pred_tensor->setBackend("cpu"); + pred_tensor->getImpl()->setRawPtr(pred.get(), nb_elements); + pred_tensor->print(); + + std::cout << "Sub input 1 manual:" << std::endl; + std::shared_ptr<Tensor> targ_tensor = std::make_shared<Tensor>(dims); + targ_tensor->setBackend("cpu"); + targ_tensor->getImpl()->setRawPtr(targ.get(), nb_elements); + targ_tensor->print(); + const Tensor res_function = loss::MSE(pred_tensor, targ_tensor); + + // compare results + Tensor res_manual_tensor = Tensor(res_manual); + REQUIRE(approxEq<float>(res_manual, res_function)); } - std::cout << "Pow output manual:" << std::endl; - std::shared_ptr<Tensor> tmp_tensor = std::make_shared<Tensor>(dims); - tmp_tensor->setBackend("cpu"); - tmp_tensor->getImpl()->setRawPtr(tmp_res_manual.get(), nb_elements); - tmp_tensor->print(); - const float res_manual = std::accumulate(&tmp_res_manual[0], &tmp_res_manual[nb_elements], 0.0f, std::plus<float>()) / static_cast<float>(nb_elements); - - // compute the MSE using Aidge::loss::MSE function - std::cout << "Sub input 0 manual:" << std::endl; - std::shared_ptr<Tensor> pred_tensor = std::make_shared<Tensor>(dims); - pred_tensor->setBackend("cpu"); - pred_tensor->getImpl()->setRawPtr(pred.get(), nb_elements); - pred_tensor->print(); - - std::cout << "Sub input 1 manual:" << std::endl; - std::shared_ptr<Tensor> targ_tensor = std::make_shared<Tensor>(dims); - targ_tensor->setBackend("cpu"); - targ_tensor->getImpl()->setRawPtr(targ.get(), nb_elements); - targ_tensor->print(); + } +#if USE_AIDGE_BACKEND_CUDA + SECTION("CUDA") { + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + const std::size_t nb_dims = 2; // For MSE test, nb_dims is fixed as 2: NbBatch, NbChan + std::vector<std::size_t> dims(2); + + for (std::size_t i = 0; i < nb_dims; ++i) { dims[i] = dimsDist(gen); } + const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + + // create random predictions + std::unique_ptr<float[]> pred = std::make_unique<float[]>(nb_elements); + for (std::size_t i = 0; i < nb_elements; ++i) { + pred[i] = valueDist(gen); + } + float * d_pred; + cudaMalloc(&d_pred, nb_elements * sizeof(float)); + cudaMemcpy(d_pred, pred.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice); + + // create random targets + std::unique_ptr<float[]> targ = std::make_unique<float[]>(nb_elements); + for (std::size_t i = 0; i < nb_elements; ++i) { + targ[i] = valueDist(gen); + } + float * d_targ; + cudaMalloc(&d_targ, nb_elements * sizeof(float)); + cudaMemcpy(d_targ, targ.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice); + + // compute the MSE manually + std::unique_ptr<float[]> tmp_res_manual = std::make_unique<float[]>(nb_elements); + for (std::size_t i = 0; i < nb_elements; ++i) { + tmp_res_manual[i] = std::pow(pred[i] - targ[i],2); + } + float * d_tmp_res_manual; + cudaMalloc(&d_tmp_res_manual, nb_elements * sizeof(float)); + cudaMemcpy(d_tmp_res_manual, tmp_res_manual.get(), nb_elements * sizeof(float), cudaMemcpyHostToDevice); + + std::shared_ptr<Tensor> tmp_tensor = std::make_shared<Tensor>(dims); + tmp_tensor->setBackend("cuda"); + tmp_tensor->getImpl()->setRawPtr(d_tmp_res_manual, nb_elements); + + const float res_manual = std::accumulate(&tmp_res_manual[0], &tmp_res_manual[nb_elements], 0.0f, std::plus<float>()) / static_cast<float>(nb_elements); + + // compute the MSE using Aidge::loss::MSE function + std::shared_ptr<Tensor> pred_tensor = std::make_shared<Tensor>(dims); + pred_tensor->setBackend("cuda"); + pred_tensor->getImpl()->setRawPtr(d_pred, nb_elements); + + + std::shared_ptr<Tensor> targ_tensor = std::make_shared<Tensor>(dims); + targ_tensor->setBackend("cuda"); + targ_tensor->getImpl()->setRawPtr(d_targ, nb_elements); + const Tensor res_function = loss::MSE(pred_tensor, targ_tensor); - // compare results - Tensor res_manual_tensor = Tensor(res_manual); - REQUIRE(approxEq<float>(res_manual, res_function)); + // compare results + Tensor res_manual_tensor = Tensor(res_manual); + REQUIRE(approxEq<float>(res_manual, res_function)); + + cudaFree(d_pred); + cudaFree(d_targ); + cudaFree(d_tmp_res_manual); + } } +#endif } } // namespace Aidge