diff --git a/unit_tests/Test_AbsImpl.cpp b/unit_tests/Test_AbsImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9daea18d8cf147337d18d7622233821938f9fae6 --- /dev/null +++ b/unit_tests/Test_AbsImpl.cpp @@ -0,0 +1,129 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <chrono> // std::micro, std::chrono::time_point, + // std::chrono::system_clock +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <functional> // std::multiplies +#include <memory> +#include <numeric> // std::accumulate +#include <random> // std::random_device, std::mt19937 + // std::uniform_int_distribution, std::uniform_real_distribution +#include <vector> + +#include <catch2/catch_test_macros.hpp> +#include <cuda.h> +#include <fmt/core.h> + +#include "aidge/backend/cpu/data/TensorImpl.hpp" +#include "aidge/backend/cpu/operator/AbsImpl.hpp" +#include "aidge/backend/cuda/data/TensorImpl.hpp" +#include "aidge/backend/cuda/operator/AbsImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/graph/Node.hpp" +#include "aidge/operator/Abs.hpp" +#include "aidge/utils/TensorUtils.hpp" + +using namespace std::chrono; +namespace Aidge { + +TEST_CASE("[gpu/operator] Abs", "[Abs][GPU]") +{ + // CONSTANTS + + constexpr std::uint16_t NB_TRIALS = 10; + + // SETUP RNGS + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> valueDist(-1, 1); + std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1), std::size_t(20)); + std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(6)); + + for (std::uint16_t trial = 0; trial < NB_TRIALS; ++trial) + { + // PREPARE TEST DATA + + const std::size_t nbDims = nbDimsDist(gen); + + std::vector<std::size_t> dims; + for (std::size_t i = 0; i < nbDims; ++i) + dims.push_back(dimSizeDist(gen)); + + const std::size_t nbElements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + float* rawData = new float[nbElements]; + for (std::size_t i = 0; i < nbElements; ++i) + rawData[i] = valueDist(gen); + + // CPU FORWARD + + std::shared_ptr<Abs_Op> cpuOp = std::make_shared<Abs_Op>(); + cpuOp->setDataType(DataType::Float32); + cpuOp->setBackend("cpu"); + + std::shared_ptr<Tensor> cpuTensor = std::make_shared<Tensor>(); + cpuOp->associateInput(0, cpuTensor); + cpuTensor->setDataType(DataType::Float32); + cpuTensor->setBackend("cpu"); + cpuTensor->resize(dims); + cpuTensor->getImpl()->setRawPtr(rawData, nbElements); + + auto startTime = std::chrono::system_clock::now(); + cpuOp->forward(); + auto endTime = std::chrono::system_clock::now(); + auto cpuElapsedTime = duration_cast<milliseconds>(endTime - startTime).count(); + + Tensor cpuResult = *(cpuOp->getOutput(0)); + + // CUDA FORWARD + + std::shared_ptr<Abs_Op> cudaOp = std::make_shared<Abs_Op>(); + cudaOp->setDataType(DataType::Float32); + cudaOp->setBackend("cuda"); + + std::shared_ptr<Tensor> cudaTensor = std::make_shared<Tensor>(); + cudaTensor->setDataType(DataType::Float32); + cudaTensor->setBackend("cuda"); + cudaTensor->resize(dims); + cudaOp->associateInput(0, cudaTensor); + + float* rawDataDevice; + cudaMalloc(reinterpret_cast<void **> (&rawDataDevice), sizeof(float) * nbElements); + cudaMemcpy(rawDataDevice, rawData, sizeof(float) * nbElements, cudaMemcpyHostToDevice); + cudaTensor->getImpl()->setRawPtr(rawDataDevice, nbElements); + + startTime = std::chrono::system_clock::now(); + cudaOp->forward(); + endTime = std::chrono::system_clock::now(); + auto cudaElapsedTime = duration_cast<milliseconds>(endTime - startTime).count(); + + std::shared_ptr<Tensor> fallback; + Tensor& cudaResult = cudaOp->getOutput(0)->refCastFrom(fallback, DataType::Float32, "cpu"); + + // COMPARE + + REQUIRE(approxEq<float>(cudaResult, cpuResult)); + + // FREE MEMORY + + delete[] rawData; + cudaFree(rawDataDevice); + + // LOG INFOS + + fmt::print(" Execution time on CPU : {} ms\n", cpuElapsedTime); + fmt::print(" Execution time on CUDA : {} ms\n", cudaElapsedTime); + } +} +} // namespace Aidge