Skip to content
Snippets Groups Projects
Commit 259824c8 authored by Benjamin Halimi's avatar Benjamin Halimi
Browse files

add the unit test

parent f06d057b
Branches add_abs
No related tags found
1 merge request!64Add the CUDA backend for the Abs operator
Pipeline #63934 passed
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <chrono> // std::micro, std::chrono::time_point,
// std::chrono::system_clock
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <functional> // std::multiplies
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937
// std::uniform_int_distribution, std::uniform_real_distribution
#include <vector>
#include <catch2/catch_test_macros.hpp>
#include <cuda.h>
#include <fmt/core.h>
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/backend/cpu/operator/AbsImpl.hpp"
#include "aidge/backend/cuda/data/TensorImpl.hpp"
#include "aidge/backend/cuda/operator/AbsImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/graph/Node.hpp"
#include "aidge/operator/Abs.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace std::chrono;
namespace Aidge {
TEST_CASE("[gpu/operator] Abs", "[Abs][GPU]")
{
// CONSTANTS
constexpr std::uint16_t NB_TRIALS = 10;
// SETUP RNGS
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(-1, 1);
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1), std::size_t(20));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(6));
for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial)
{
// PREPARE TEST DATA
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i)
dims.push_back(dimSizeDist(gen));
const std::size_t nbElements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* rawData = new float[nbElements];
for (std::size_t i = 0; i < nbElements; ++i)
rawData[i] = valueDist(gen);
// CPU FORWARD
std::shared_ptr<Abs_Op> cpuOp = std::make_shared<Abs_Op>();
cpuOp->setDataType(DataType::Float32);
cpuOp->setBackend("cpu");
std::shared_ptr<Tensor> cpuTensor = std::make_shared<Tensor>();
cpuOp->associateInput(0, cpuTensor);
cpuTensor->setDataType(DataType::Float32);
cpuTensor->setBackend("cpu");
cpuTensor->resize(dims);
cpuTensor->getImpl()->setRawPtr(rawData, nbElements);
auto startTime = std::chrono::system_clock::now();
cpuOp->forward();
auto endTime = std::chrono::system_clock::now();
auto cpuElapsedTime = duration_cast<milliseconds>(endTime - startTime).count();
Tensor cpuResult = *(cpuOp->getOutput(0));
// CUDA FORWARD
std::shared_ptr<Abs_Op> cudaOp = std::make_shared<Abs_Op>();
cudaOp->setDataType(DataType::Float32);
cudaOp->setBackend("cuda");
std::shared_ptr<Tensor> cudaTensor = std::make_shared<Tensor>();
cudaTensor->setDataType(DataType::Float32);
cudaTensor->setBackend("cuda");
cudaTensor->resize(dims);
cudaOp->associateInput(0, cudaTensor);
float* rawDataDevice;
cudaMalloc(reinterpret_cast<void **> (&rawDataDevice), sizeof(float) * nbElements);
cudaMemcpy(rawDataDevice, rawData, sizeof(float) * nbElements, cudaMemcpyHostToDevice);
cudaTensor->getImpl()->setRawPtr(rawDataDevice, nbElements);
startTime = std::chrono::system_clock::now();
cudaOp->forward();
endTime = std::chrono::system_clock::now();
auto cudaElapsedTime = duration_cast<milliseconds>(endTime - startTime).count();
std::shared_ptr<Tensor> fallback;
Tensor& cudaResult = cudaOp->getOutput(0)->refCastFrom(fallback, DataType::Float32, "cpu");
// COMPARE
REQUIRE(approxEq<float>(cudaResult, cpuResult));
// FREE MEMORY
delete[] rawData;
cudaFree(rawDataDevice);
// LOG INFOS
fmt::print(" Execution time on CPU : {} ms\n", cpuElapsedTime);
fmt::print(" Execution time on CUDA : {} ms\n", cudaElapsedTime);
}
}
} // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment