Skip to content
Snippets Groups Projects
Commit 259824c8 authored by Benjamin Halimi's avatar Benjamin Halimi
Browse files

add the unit test

parent f06d057b
No related branches found
No related tags found
1 merge request!64Add the CUDA backend for the Abs operator
Pipeline #63934 passed
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <chrono> // std::micro, std::chrono::time_point,
// std::chrono::system_clock
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <functional> // std::multiplies
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937
// std::uniform_int_distribution, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <cuda.h>
#include <fmt/core.h>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/AbsImpl.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/AbsImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/operator/Abs.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace std::chrono;
namespace Aidge {
TEST_CASE("[gpu/operator] Abs", "[Abs][GPU]")
{
// CONSTANTS
constexpr std::uint16_t NB_TRIALS = 10;
// SETUP RNGS
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(-1, 1);
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1), std::size_t(20));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(6));
for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial)
{
// PREPARE TEST DATA
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i)
dims.push_back(dimSizeDist(gen));
const std::size_t nbElements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* rawData = new float[nbElements];
for (std::size_t i = 0; i < nbElements; ++i)
rawData[i] = valueDist(gen);
// CPU FORWARD
std::shared_ptr<Abs_Op> cpuOp = std::make_shared<Abs_Op>();
cpuOp->setDataType(DataType::Float32);
cpuOp->setBackend("cpu");
std::shared_ptr<Tensor> cpuTensor = std::make_shared<Tensor>();
cpuOp->associateInput(0, cpuTensor);
cpuTensor->setDataType(DataType::Float32);
cpuTensor->setBackend("cpu");
cpuTensor->resize(dims);
cpuTensor->getImpl()->setRawPtr(rawData, nbElements);
auto startTime = std::chrono::system_clock::now();
cpuOp->forward();
auto endTime = std::chrono::system_clock::now();
auto cpuElapsedTime = duration_cast<milliseconds>(endTime - startTime).count();
Tensor cpuResult = *(cpuOp->getOutput(0));
// CUDA FORWARD
std::shared_ptr<Abs_Op> cudaOp = std::make_shared<Abs_Op>();
cudaOp->setDataType(DataType::Float32);
cudaOp->setBackend("cuda");
std::shared_ptr<Tensor> cudaTensor = std::make_shared<Tensor>();
cudaTensor->setDataType(DataType::Float32);
cudaTensor->setBackend("cuda");
cudaTensor->resize(dims);
cudaOp->associateInput(0, cudaTensor);
float* rawDataDevice;
cudaMalloc(reinterpret_cast<void **> (&rawDataDevice), sizeof(float) * nbElements);
cudaMemcpy(rawDataDevice, rawData, sizeof(float) * nbElements, cudaMemcpyHostToDevice);
cudaTensor->getImpl()->setRawPtr(rawDataDevice, nbElements);
startTime = std::chrono::system_clock::now();
cudaOp->forward();
endTime = std::chrono::system_clock::now();
auto cudaElapsedTime = duration_cast<milliseconds>(endTime - startTime).count();
std::shared_ptr<Tensor> fallback;
Tensor& cudaResult = cudaOp->getOutput(0)->refCastFrom(fallback, DataType::Float32, "cpu");
// COMPARE
REQUIRE(approxEq<float>(cudaResult, cpuResult));
// FREE MEMORY
delete[] rawData;
cudaFree(rawDataDevice);
// LOG INFOS
fmt::print(" Execution time on CPU : {} ms\n", cpuElapsedTime);
fmt::print(" Execution time on CUDA : {} ms\n", cudaElapsedTime);
}
}
} // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment