Skip to content
Snippets Groups Projects
Commit 412c2859 authored by Maxence Naud's avatar Maxence Naud
Browse files

[Upd] MatMul kernel test to handle more cases and add random matrices multiplications tests

parent 2f459838
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!34Matmul rework
......@@ -10,170 +10,281 @@
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
using namespace Aidge;
namespace Aidge {
TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
SECTION("2D Tensors") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{0.16672266, 0.39773488},
{0.83746278, 0.54205710}
const std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> distDims(10, 100);
std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
// Create MatMul Operator
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration;
SECTION("2-D Tensors") {
std::size_t totalComputation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2;
// Create and populate the array with random float values
float bigArray1[dim0][dim1];
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim1; ++j) {
bigArray1[i][j] = dis(gen); // Generate random float value
}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{
{
{0.50658345, 0.04777747},
{0.22279310, 0.41348755}
float bigArray2[dim1][dim2];
for (int i = 0; i < dim1; ++i) {
for (int j = 0; j < dim2; ++j) {
bigArray2[i][j] = dis(gen); // Generate random float value
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
{
{0.17307153, 0.17242400},
{0.54501140, 0.26414573}
float res[dim0][dim2];
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[i][k] * bigArray2[k][j];
}
res[i][j] = sum;
}
}
});
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(0, input_1);
op->associateInput(1, input_2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
myMatMul->forward();
expectedOutput->print();
op->getOutput(0)->print();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("3D Tensor by 2D Tensor") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,1,2,3> {
{
{
{0.53427607, 0.69181818, 0.30088913},
{0.20866227, 0.67821276, 0.25695610}
}
SECTION("3-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb;
// Create and populate the array with random float values
float bigArray1[dimNb][dim0][dim1];
for (std::size_t n = 0; n < dimNb; ++n) {
for (std::size_t i = 0; i < dim0; ++i) {
for (std::size_t j = 0; j < dim1; ++j) {
bigArray1[n][i][j] = dis(gen); // Generate random float value
}
}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,3,4>{
{
{0.03158629, 0.21031839, 0.95692378, 0.05287921},
{0.66182911, 0.91662365, 0.07928377, 0.86983263},
{0.12386280, 0.63736272, 0.15963674, 0.465079722}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,1,2,4> {
{
{
{0.51201022, 0.93828046, 0.61414438, 0.76995558},
{0.48727912, 0.82932562, 0.29446477, 0.72047055}
}
float bigArray2[dimNb][dim1][dim2];
for (std::size_t n = 0; n < dimNb; ++n) {
for (int i = 0; i < dim1; ++i) {
for (int j = 0; j < dim2; ++j) {
bigArray2[n][i][j] = dis(gen); // Generate random float value
}
}
}
});
float res[dimNb][dim0][dim2];
for (std::size_t n = 0; n < dimNb; ++n) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n][i][k] * bigArray2[n][k][j];
}
res[n][i][j] = sum;
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb*dim0*dim2);
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(0, input_1);
op->associateInput(1, input_2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
myMatMul->forward();
expectedOutput->print();
op->getOutput(0)->print();
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("4-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb1 = distNbMatrix(gen);
const std::size_t dimNb2 = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
SECTION("4D Tensors") {
std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float,1,2,4,3> {
{
{
{
{0.78191108, 0.79929698, 0.45473319},
{0.35713595, 0.45651042, 0.40217435},
{0.15343380, 0.30024308, 0.78940034},
{0.53266525, 0.16684306, 0.22095734}
},
{
{0.89860427, 0.75139457, 0.34270161},
{0.53609246, 0.62800729, 0.68399906},
{0.57119054, 0.96259099, 0.71879345},
{0.73910689, 0.62526798, 0.77325356}
// Create and populate the array with random float values
float bigArray1[dimNb1][dimNb2][dim0][dim1];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (std::size_t i = 0; i < dim0; ++i) {
for (std::size_t j = 0; j < dim1; ++j) {
bigArray1[n1][n2][i][j] = dis(gen); // Generate random float value
}
}
}
}
});
std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array4D<float,1,2,3,4>{
{
{
{
{0.36525106, 0.47606337, 0.58315367, 0.33944082},
{0.56211257, 0.64100796, 0.28841895, 0.11285251},
{0.04657018, 0.21112120, 0.88220179, 0.23004770}
},
{
{0.33073467, 0.45434207, 0.92689610, 0.02250439},
{0.57044137, 0.88543379, 0.23575044, 0.57311541},
{0.21721125, 0.16826588, 0.45728493, 0.81760287}
float bigArray2[dimNb1][dimNb2][dim1][dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (std::size_t i = 0; i < dim1; ++i) {
for (std::size_t j = 0; j < dim2; ++j) {
bigArray2[n1][n2][i][j] = dis(gen); // Generate random float value
}
}
}
}
});
std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,1,2,4,4> {
{
{
{
{0.75606567, 0.98059881, 1.08767319, 0.46022552},
{0.40578386, 0.54755372, 0.69473034, 0.26526415},
{0.26157477, 0.43216154, 0.87248170, 0.26756462},
{0.29863116, 0.40717891, 0.55367535, 0.25046772}
},
{
{0.80026478, 1.13124883, 1.16676664, 0.73105216},
{0.68411803, 0.91472197, 0.95773751, 0.93122470},
{0.89414424, 1.23277485, 1.08505893, 1.15221763},
{0.76908636, 1.01955295, 1.18607962, 1.00719821}
}
float res[dimNb1][dimNb2][dim0][dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n1][n2][i][k] * bigArray2[n1][n2][k][j];
}
res[n1][n2][i][j] = sum;
}
}
}
}
}
});
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb1,dimNb2,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb1*dimNb2*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb1,dimNb2,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb1*dimNb2*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb1,dimNb2,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb1*dimNb2*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("+2-D / 1-D") {
// allows to test both computation with a 1-D Tensor and broadcasting
// input_0
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
const std::size_t dim0 = distNbMatrix(gen);
const std::size_t dim1 = distNbMatrix(gen) + 1;
const std::size_t dim2 = distNbMatrix(gen);
const std::size_t dim3 = distNbMatrix(gen);
T0->resize({dim0,dim1,dim2,dim3});
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
// input_1
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->resize({dim3});
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(0, input_1);
op->associateInput(1, input_2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
myMatMul->forward();
expectedOutput->print();
op->getOutput(0)->print();
float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
}
}
}
\ No newline at end of file
}
} // namespace Aidge
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment