Skip to content
Snippets Groups Projects
Commit 427f210c authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

add Pad random input test

parent adf53684
No related branches found
No related tags found
2 merge requests!32version 0.2.1,!25Add backward implementations
......@@ -10,17 +10,15 @@
********************************************************************************/
#include <array>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include <iostream>
#include <catch2/catch_test_macros.hpp>
#include "Test_cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
......@@ -367,11 +365,6 @@ TEST_CASE("[gpu/operator] Pad(forward)", "[Pad][GPU]") {
float* computedOutput = new float[myOutput->size()]();
cudaMemcpy(computedOutput, op->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * myOutput->size(), cudaMemcpyDeviceToHost);
for(int i = 0; i < myOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(myOutput->getImpl()->rawPtr()) + i);
std::cout << "target: " << targetOutput << " computed: " << computedOutput[i]<< std::endl;
}
for(int i = 0; i < myOutput->size(); i++){
const float targetOutput = *(static_cast<float*>(myOutput->getImpl()->rawPtr()) + i);
REQUIRE(fabs(computedOutput[i] - targetOutput) < 1e-6);
......@@ -621,4 +614,88 @@ TEST_CASE("[gpu/operator] Pad(forward)", "[Pad][GPU]") {
delete[] computedOutput;
}
SECTION("Random Input") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(
0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> padTypeDist(std::size_t(0), std::size_t(1));
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1), std::size_t(10));
std::uniform_int_distribution<std::size_t> padSizeDist(std::size_t(0), std::size_t(5));
// To measure execution time of 'forward()'
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial)
{
const std::size_t nbDims = 4;
std::vector<std::size_t> dims, paddings;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t borderType = padTypeDist(gen);
const std::size_t padding = padSizeDist(gen);
// Create Pad Operator CUDA
std::shared_ptr<Node> myPadCUDA = Pad<2>({padding, padding, padding, padding}, "mypadcuda", static_cast<PadBorderType>(borderType));
auto op_cuda = std::static_pointer_cast<OperatorTensor>(myPadCUDA -> getOperator());
op_cuda->setDataType(DataType::Float32);
op_cuda->setBackend("cuda");
// Create Pad Operator CPU
std::shared_ptr<Node> myPadCPU = Pad<2>({padding, padding, padding, padding}, "mypadcpu", static_cast<PadBorderType>(borderType));
auto op_cpu = std::static_pointer_cast<OperatorTensor>(myPadCPU -> getOperator());
op_cpu->setDataType(DataType::Float32);
op_cpu->setBackend("cpu");
float* array0 = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
}
// input CUDA
float* array0_d;
std::shared_ptr<Tensor> T0_cuda = std::make_shared<Tensor>();
T0_cuda->setDataType(DataType::Float32);
T0_cuda->setBackend("cuda");
T0_cuda->resize(dims);
op_cuda->associateInput(0, T0_cuda);
cudaMalloc(reinterpret_cast<void **>(&array0_d), sizeof(float) * nb_elements);
cudaMemcpy(array0_d, array0, sizeof(float) * nb_elements, cudaMemcpyHostToDevice);
T0_cuda->getImpl()->setRawPtr(array0_d, nb_elements);
// input CPU
std::shared_ptr<Tensor> T0_cpu = std::make_shared<Tensor>();
op_cpu->associateInput(0,T0_cpu);
T0_cpu->setDataType(DataType::Float32);
T0_cpu->setBackend("cpu");
T0_cpu->resize(dims);
T0_cpu -> getImpl() -> setRawPtr(array0, nb_elements);
// forward CUDA
start = std::chrono::system_clock::now();
op_cuda->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
const std::size_t outSize = op_cuda->getOutput(0)->size();
float *computed_cuda = new float[outSize]();
cudaMemcpy(computed_cuda, op_cuda->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * outSize, cudaMemcpyDeviceToHost);
// forward CPU
op_cpu->forward();
float *computed_cpu = static_cast<float*>(op_cpu->getOutput(0)->getImpl()->rawPtr());
REQUIRE(approxEq<float>(*computed_cuda, *computed_cpu));
delete[] array0;
delete[] computed_cuda;
cudaFree(array0_d);
}
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment