Skip to content
Snippets Groups Projects
Commit 4d6335c4 authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

add reshape random input test

parent 427f210c
No related branches found
No related tags found
2 merge requests!32version 0.2.1,!25Add backward implementations
......@@ -10,15 +10,15 @@
********************************************************************************/
#include <array>
#include <numeric> // std::accumulate, std::shuffle, std::transform
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include <catch2/catch_test_macros.hpp>
#include "Test_cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
......@@ -85,4 +85,99 @@ TEST_CASE("[gpu/operator] Reshape(forward)") {
delete[] computedOutput;
}
SECTION("Random Input")
{
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(
0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1),
std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(CUDNN_DIM_MAX)); // Max nbDims supported by cudnn is 8
// To measure execution time of 'forward()'
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial)
{
// generate a random Tensor
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims, shuffeledDims;
for (std::size_t i = 0; i < nbDims; ++i)
{
dims.push_back(dimSizeDist(gen));
}
shuffeledDims = dims;
std::shuffle(shuffeledDims.begin(), shuffeledDims.end(), gen);
std::vector<std::int64_t> shuffeledIntDims(shuffeledDims.size());
std::transform(shuffeledDims.begin(), shuffeledDims.end(), shuffeledIntDims.begin(),
[](int value) { return static_cast<std::int64_t>(value); });
// Create Reshape Operator CUDA
std::shared_ptr<Node> myReshapeCuda = Reshape(shuffeledIntDims, false,"myreshapecuda");
auto op_cuda = std::static_pointer_cast<OperatorTensor>(myReshapeCuda->getOperator());
op_cuda->setDataType(DataType::Float32);
op_cuda->setBackend("cuda");
// Create Reshape Operator CPU
std::shared_ptr<Node> myReshapeCpu = Reshape(shuffeledIntDims, false,"myreshapecpu");
auto op_cpu = std::static_pointer_cast<OperatorTensor>(myReshapeCpu->getOperator());
op_cpu->setDataType(DataType::Float32);
op_cpu->setBackend("cpu");
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// Fill input tensor
float *array0 = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i)
{
array0[i] = valueDist(gen);
}
// input0 CUDA
float* array0_d;
std::shared_ptr<Tensor> T0_cuda = std::make_shared<Tensor>();
T0_cuda->setDataType(DataType::Float32);
T0_cuda->setBackend("cuda");
T0_cuda->resize(dims);
op_cuda->associateInput(0, T0_cuda);
cudaMalloc(reinterpret_cast<void **>(&array0_d), sizeof(float) * nb_elements);
cudaMemcpy(array0_d, array0, sizeof(float) * nb_elements, cudaMemcpyHostToDevice);
T0_cuda->getImpl()->setRawPtr(array0_d, nb_elements);
// input0 CPU
std::shared_ptr<Tensor> T0_cpu = std::make_shared<Tensor>();
op_cpu->associateInput(0,T0_cpu);
T0_cpu->setDataType(DataType::Float32);
T0_cpu->setBackend("cpu");
T0_cpu->resize(dims);
T0_cpu -> getImpl() -> setRawPtr(array0, nb_elements);
// Run inference
start = std::chrono::system_clock::now();
op_cuda->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
float *computed_cuda = new float[nb_elements];
cudaMemcpy(computed_cuda, op_cuda->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * nb_elements, cudaMemcpyDeviceToHost);
// forward CPU
op_cpu->forward();
float *computed_cpu = static_cast<float*>(op_cpu->getOutput(0)->getImpl()->rawPtr());
REQUIRE(approxEq<float>(*computed_cuda, *computed_cpu));
delete[] computed_cuda;
delete[] array0;
cudaFree(array0_d);
}
std::cout << "number of elements over time spent: " << (number_of_operation / duration.count()) << std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment