Skip to content
Snippets Groups Projects
Commit f94b095a authored by Maxence Naud's avatar Maxence Naud
Browse files

Improve tests for arithmetic operators

parent e9fd4827
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!30add broadcasting for Arithmetic operators
This diff is collapsed.
...@@ -10,123 +10,305 @@ ...@@ -10,123 +10,305 @@
********************************************************************************/ ********************************************************************************/
#include <catch2/catch_test_macros.hpp> #include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/operator/Mul.hpp" #include "aidge/operator/Mul.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu.hpp" namespace Aidge {
#include <memory> TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
using namespace Aidge; // Create MatMul Operator
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("MulImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {
TEST_CASE("[cpu/operator] Mul(forward)", "[Mul][CPU]") {
SECTION("2D Tensor by Singleton") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{0.38977361, 0.34064174},
{0.00427264, 0.90872520}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{3.0}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{1.16932082, 1.02192521},
{0.01281792, 2.72617555}
}
});
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul -> getOperator());
myMul->getOperator()->associateInput(0, input_1);
myMul->getOperator()->associateInput(1, input_2);
myMul->getOperator()->setDataType(DataType::Float32);
myMul->getOperator()->setBackend("cpu");
op->computeOutputDims();
myMul->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 4; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
SECTION("Scalar / +1-D Tensor") {
} }
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
SECTION("2D Tensors") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { // generate 2 random Tensors
{ const std::size_t nbDims = nbDimsDist(gen);
{0.38977361, 0.34064174}, std::vector<std::size_t> dims;
{0.00427264, 0.90872520} for (std::size_t i = 0; i < nbDims; ++i) {
} dims.push_back(dimSizeDist(gen));
}); }
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
{ number_of_operation += nb_elements;
{0.02362096, 0.24084556},
{0.94690859, 0.13512510} // without broadcasting
} float* array0 = new float[nb_elements];
}); float* array1 = new float[nb_elements];
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { float* result = new float[nb_elements];
{
{0.00920683, 0.08204205}, for (std::size_t i = 0; i < nb_elements; ++i) {
{0.00404580, 0.12279158} array0[i] = valueDist(gen);
array1[i] = valueDist(gen);
result[i] = array0[i] * array1[i];
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1 -> getImpl() -> setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
// with broadcasting
} }
}); std::cout << "multiplications over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul -> getOperator());
myMul->getOperator()->associateInput(0, input_1);
myMul->getOperator()->associateInput(1, input_2);
myMul->getOperator()->setDataType(DataType::Float32);
myMul->getOperator()->setBackend("cpu");
op->computeOutputDims();
myMul->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 4; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
} SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
SECTION("3D Tensor by 1D Tensor") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> { // generate 2 random Tensors
{ // handle dimensions, replace some dimensions with '1' to get broadcasting
{{0.33647752, 0.89360154, 0.46586215}, constexpr std::size_t nbDims = 4;
{0.71518236, 0.71481097, 0.97991812}}, std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
{{0.17393428, 0.56849813, 0.18489265}, // create arrays and fill them with random values
{0.78397650, 0.00348300, 0.65758008}} float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
} float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
}); float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{
{0.15380561, 0.51063120, 0.93031412} for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
}); array0[i] = valueDist(gen);
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { }
{ for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
{{0.05175213, 0.45630082, 0.43339813}, array1[i] = valueDist(gen);
{0.10999906, 0.36500478, 0.91163164}}, }
{{0.02675207, 0.29029289, 0.17200825}, // compute true result
{0.12057999, 0.00177853, 0.61175603}} const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
} }
});
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul -> getOperator());
myMul->getOperator()->associateInput(0, input_1);
myMul->getOperator()->associateInput(1, input_2);
myMul->getOperator()->setDataType(DataType::Float32);
myMul->getOperator()->setBackend("cpu");
op->computeOutputDims();
myMul->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 12; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
SECTION("+1-D Tensor / 1-D Tensor") {
std::size_t number_of_operation = 0;
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims0(4);
for (std::size_t i = 0; i < nbDims; ++i) {
dims0[i] = dimSizeDist(gen);
}
std::vector<std::size_t> dimsOut = dims0;
std::vector<std::size_t> dims1 = dims0;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims1[i] = 1;
}
}
dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array1 = new float[array1_size];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < array1_size; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
auto dims1_tmp = dims1;
dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
+ strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
+ ((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, array1_size);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
std::cout << "multiplications over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
} }
} }
\ No newline at end of file } // namespace Aidge
This diff is collapsed.
...@@ -10,123 +10,305 @@ ...@@ -10,123 +10,305 @@
********************************************************************************/ ********************************************************************************/
#include <catch2/catch_test_macros.hpp> #include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/operator/Sub.hpp" #include "aidge/operator/Sub.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu.hpp" namespace Aidge {
#include <memory> TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
constexpr std::uint16_t NBTRIALS = 1000;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<int> boolDist(0,1);
using namespace Aidge; // Create MatMul Operator
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("SubImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {
TEST_CASE("[cpu/operator] Sub(forward)", "[Sub][CPU]") {
SECTION("2D Tensor by Singleton") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{0.34234560, 0.92812711},
{0.73706615, 0.69953883}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,1,1>{{2.5}});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{-2.15765429, -1.57187295},
{-1.76293385, -1.80046117}
}
});
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub -> getOperator());
mySub->getOperator()->associateInput(0, input_1);
mySub->getOperator()->associateInput(1, input_2);
mySub->getOperator()->setDataType(DataType::Float32);
mySub->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySub->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 4; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
SECTION("Scalar / +1-D Tensor") {
} }
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
SECTION("2D Tensors") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { // generate 2 random Tensors
{ const std::size_t nbDims = nbDimsDist(gen);
{0.34234560, 0.92812711}, std::vector<std::size_t> dims;
{0.73706615, 0.69953883} for (std::size_t i = 0; i < nbDims; ++i) {
} dims.push_back(dimSizeDist(gen));
}); }
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
{ number_of_operation += nb_elements;
{0.61729127, 0.83004373},
{0.72002089, 0.52473849} // without broadcasting
} float* array0 = new float[nb_elements];
}); float* array1 = new float[nb_elements];
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { float* result = new float[nb_elements];
{
{-0.27494568, 0.09808338}, for (std::size_t i = 0; i < nb_elements; ++i) {
{0.01704526, 0.17480034} array0[i] = valueDist(gen);
array1[i] = valueDist(gen);
result[i] = array0[i] - array1[i];
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// input1
T1->resize(dims);
T1 -> getImpl() -> setRawPtr(array1, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->computeOutputDims();
start = std::chrono::system_clock::now();
mySub->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
// with broadcasting
} }
}); std::cout << "multiplications over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub -> getOperator());
mySub->getOperator()->associateInput(0, input_1);
mySub->getOperator()->associateInput(1, input_2);
mySub->getOperator()->setDataType(DataType::Float32);
mySub->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySub->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 4; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
} SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
SECTION("3D Tensor by 1D Tensor") { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> { // generate 2 random Tensors
{ // handle dimensions, replace some dimensions with '1' to get broadcasting
{{0.84181279, 0.20655948, 0.09750116}, constexpr std::size_t nbDims = 4;
{0.37723488, 0.73120135, 0.04666907}}, std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
{{0.91483921, 0.93985939, 0.58823180}, // create arrays and fill them with random values
{0.39963132, 0.67879969, 0.33209187}} float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
} float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
}); float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{
{0.04784805, 0.91903114, 0.38606840} for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
}); array0[i] = valueDist(gen);
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> { }
{ for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
{{0.79396474, -0.71247166, -0.28856725}, array1[i] = valueDist(gen);
{0.32938683, -0.18782979, -0.33939934}}, }
{{0.86699116, 0.02082825, 0.20216340}, // compute true result
{0.35178328, -0.24023145, -0.05397654}} const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] - array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->computeOutputDims();
start = std::chrono::system_clock::now();
mySub->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
} }
});
std::shared_ptr<Node> mySub = Sub();
auto op = std::static_pointer_cast<OperatorTensor>(mySub -> getOperator());
mySub->getOperator()->associateInput(0, input_1);
mySub->getOperator()->associateInput(1, input_2);
mySub->getOperator()->setDataType(DataType::Float32);
mySub->getOperator()->setBackend("cpu");
op->computeOutputDims();
mySub->forward();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< 12; ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
} }
SECTION("+1-D Tensor / 1-D Tensor") {
std::size_t number_of_operation = 0;
std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims0(4);
for (std::size_t i = 0; i < nbDims; ++i) {
dims0[i] = dimSizeDist(gen);
}
std::vector<std::size_t> dimsOut = dims0;
std::vector<std::size_t> dims1 = dims0;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims1[i] = 1;
}
}
dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
float* array1 = new float[array1_size];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < array1_size; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
auto dims1_tmp = dims1;
dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
+ strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
+ ((dims1_tmp[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] - array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1 -> getImpl() -> setRawPtr(array1, array1_size);
// results
Tres->resize(dimsOut);
Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
// compute result
op->computeOutputDims();
start = std::chrono::system_clock::now();
mySub->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
// comparison between truth and computed result
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
delete[] array0;
delete[] array1;
delete[] result;
const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
}
std::cout << "multiplications over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
} }
} }
\ No newline at end of file } // namespace Aidge
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment