Skip to content
Snippets Groups Projects
Commit 658c3fdc authored by Houssem ROUIS's avatar Houssem ROUIS
Browse files

add random test for Add

parent 4353e4e0
No related branches found
No related tags found
2 merge requests!32version 0.2.1,!25Add backward implementations
......@@ -9,12 +9,15 @@
*
********************************************************************************/
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include <catch2/catch_test_macros.hpp>
#include "aidge/backend/cpu.hpp"
#include "aidge/backend/cuda.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Add.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
......@@ -207,4 +210,128 @@ TEST_CASE("[gpu/operator] Add(forward)", "[Add][GPU]") {
delete[] computedOutput;
}
SECTION("Random Input") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(
0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(1),
std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(4), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
// To measure execution time of 'forward()'
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial)
{
// Create BatchNorm Operator CUDA
std::shared_ptr<Node> myAddCUDA = Add(2, "myaddcuda");
auto op_cuda = std::static_pointer_cast<OperatorTensor>(myAddCUDA -> getOperator());
// Create BatchNorm Operator CPU
std::shared_ptr<Node> myAddCPU = Add(2, "myaddcpu");
auto op_cpu = std::static_pointer_cast<OperatorTensor>(myAddCPU -> getOperator());
op_cpu->setDataType(DataType::Float32);
op_cpu->setBackend("cpu");
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims0, dims1, dims;
for (std::size_t i = 0; i < nbDims; ++i) {
const std::size_t dim = dimSizeDist(gen);
// To test broadcasting, set some dims to 1
if (boolDist(gen)) {
dims0.push_back(1);
}else{
dims0.push_back(dim);
}
if (boolDist(gen)) {
dims1.push_back(1);
}else{
dims1.push_back(dim);
}
dims.push_back(std::max(dims0[i], dims1[i]));
}
const std::size_t nb_elements0 = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t nb_elements1 = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
float* array0 = new float[nb_elements0];
float* array1 = new float[nb_elements1];
for (std::size_t i = 0; i < nb_elements0; ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < nb_elements1; ++i) {
array1[i] = valueDist(gen);
}
// input0 CUDA
float* array0_d, *array1_d;
std::shared_ptr<Tensor> T0_cuda = std::make_shared<Tensor>();
T0_cuda->setDataType(DataType::Float32);
T0_cuda->setBackend("cuda");
T0_cuda->resize(dims0);
op_cuda->associateInput(0, T0_cuda);
cudaMalloc(reinterpret_cast<void **>(&array0_d), sizeof(float) * nb_elements0);
cudaMemcpy(array0_d, array0, sizeof(float) * nb_elements0, cudaMemcpyHostToDevice);
T0_cuda->getImpl()->setRawPtr(array0_d, nb_elements0);
// input0 CPU
std::shared_ptr<Tensor> T0_cpu = std::make_shared<Tensor>();
op_cpu->associateInput(0,T0_cpu);
T0_cpu->setDataType(DataType::Float32);
T0_cpu->setBackend("cpu");
T0_cpu->resize(dims0);
T0_cpu -> getImpl() -> setRawPtr(array0, nb_elements0);
// input1 CUDA
std::shared_ptr<Tensor> T1_cuda = std::make_shared<Tensor>();
T1_cuda->setDataType(DataType::Float32);
T1_cuda->setBackend("cuda");
T1_cuda->resize(dims1);
op_cuda->associateInput(1, T1_cuda);
cudaMalloc(reinterpret_cast<void **>(&array1_d), sizeof(float) * nb_elements1);
cudaMemcpy(array1_d, array1, sizeof(float) * nb_elements1, cudaMemcpyHostToDevice);
T1_cuda->getImpl()->setRawPtr(array1_d, nb_elements1);
// input1 CPU
std::shared_ptr<Tensor> T1_cpu = std::make_shared<Tensor>();
op_cpu->associateInput(1,T1_cpu);
T1_cpu->setDataType(DataType::Float32);
T1_cpu->setBackend("cpu");
T1_cpu->resize(dims1);
T1_cpu -> getImpl() -> setRawPtr(array1, nb_elements1);
// forward CUDA
op_cuda->setDataType(DataType::Float32);
op_cuda->setBackend("cuda");
start = std::chrono::system_clock::now();
op_cuda->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
float *computedOutput = new float[nb_elements]();
cudaMemcpy(computedOutput, op_cuda->getOutput(0)->getImpl()->rawPtr(), sizeof(float) * nb_elements, cudaMemcpyDeviceToHost);
// forward CPU
op_cpu->forward();
float *computedCPU = static_cast<float*>(op_cpu->getOutput(0)->getImpl()->rawPtr());
REQUIRE(approxEq<float>(*computedOutput, *computedCPU));
delete[] array0;
delete[] array1;
delete[] computedOutput;
cudaFree(array0_d);
cudaFree(array1_d);
}
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment