From ca4727840a6e479df0314dc7d29fa6f31e001ada Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Tue, 13 Feb 2024 15:01:30 +0000 Subject: [PATCH] [Upd] MatMul kernel test to handle more cases and add random matrices multiplications tests --- unit_tests/operator/Test_MatMulImpl.cpp | 367 +++++++++++++++--------- 1 file changed, 239 insertions(+), 128 deletions(-) diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp index abb9227a..5df0528b 100644 --- a/unit_tests/operator/Test_MatMulImpl.cpp +++ b/unit_tests/operator/Test_MatMulImpl.cpp @@ -10,170 +10,281 @@ ********************************************************************************/ #include <catch2/catch_test_macros.hpp> +#include <cstddef> // std::size_t +#include <cstdint> // std::uint16_t +#include <chrono> +#include <iostream> #include <memory> +#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution #include "aidge/data/Tensor.hpp" #include "aidge/operator/MatMul.hpp" +#include "aidge/operator/OperatorTensor.hpp" +#include "aidge/utils/TensorUtils.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp" -using namespace Aidge; +namespace Aidge { TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { - SECTION("2D Tensors") { - std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { - { - {0.16672266, 0.39773488}, - {0.83746278, 0.54205710} + const std::uint16_t NBTRIALS = 10; + // Create a random number generator + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1 + std::uniform_int_distribution<std::size_t> distDims(10, 100); + std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5); + + // Create MatMul Operator + std::shared_ptr<Node> myMatMul = MatMul(); + auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); + + // To measure execution time of 'MatMul_Op::forward()' member function call + std::chrono::time_point<std::chrono::system_clock> start; + std::chrono::time_point<std::chrono::system_clock> end; + std::chrono::duration<double, std::micro> duration; + + SECTION("2-D Tensors") { + std::size_t totalComputation = 0; + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate Tensors dimensions + const std::size_t dim0 = distDims(gen); + const std::size_t dim1 = distDims(gen); + const std::size_t dim2 = distDims(gen); + totalComputation += dim0*dim1*dim2; + + // Create and populate the array with random float values + float bigArray1[dim0][dim1]; + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + bigArray1[i][j] = dis(gen); // Generate random float value + } } - }); - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ - { - {0.50658345, 0.04777747}, - {0.22279310, 0.41348755} + float bigArray2[dim1][dim2]; + for (int i = 0; i < dim1; ++i) { + for (int j = 0; j < dim2; ++j) { + bigArray2[i][j] = dis(gen); // Generate random float value + } } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { - { - {0.17307153, 0.17242400}, - {0.54501140, 0.26414573} + float res[dim0][dim2]; + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim2; ++j) { + float sum = 0.0; + for (int k = 0; k < dim1; ++k) { + sum += bigArray1[i][k] * bigArray2[k][j]; + } + res[i][j] = sum; + } } - }); - std::shared_ptr<Node> myMatMul = MatMul(); - auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); - op->associateInput(0, input_1); - op->associateInput(1, input_2); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - op->computeOutputDims(); - myMatMul->forward(); - expectedOutput->print(); - op->getOutput(0)->print(); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); - } + // Convert bigArray1 to Tensor + std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32); + T1 -> resize({dim0,dim1}); + T1 -> setBackend("cpu"); + T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dim0*dim1); + // Convert bigArray2 to Tensor + std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32); + T2 -> resize({dim1,dim2}); + T2 -> setBackend("cpu"); + T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dim1*dim2); + // convert res to Tensor + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); + Tres -> resize({dim0,dim2}); + Tres -> setBackend("cpu"); + Tres -> getImpl() -> setRawPtr(&res[0][0], dim0*dim2); + op->associateInput(0, T1); + op->associateInput(1, T2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + myMatMul->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + } + std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; + std::cout << "total time: " << duration.count() << std::endl; } - SECTION("3D Tensor by 2D Tensor") { - std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,1,2,3> { - { - { - {0.53427607, 0.69181818, 0.30088913}, - {0.20866227, 0.67821276, 0.25695610} - } + SECTION("3-D Tensors") { + std::size_t totalComputation = 0; + duration = std::chrono::duration<double, std::micro>::zero(); + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate Tensors dimensions + const std::size_t dimNb = distNbMatrix(gen); + const std::size_t dim0 = distDims(gen); + const std::size_t dim1 = distDims(gen); + const std::size_t dim2 = distDims(gen); + totalComputation += dim0*dim1*dim2*dimNb; + + // Create and populate the array with random float values + float bigArray1[dimNb][dim0][dim1]; + for (std::size_t n = 0; n < dimNb; ++n) { + for (std::size_t i = 0; i < dim0; ++i) { + for (std::size_t j = 0; j < dim1; ++j) { + bigArray1[n][i][j] = dis(gen); // Generate random float value + } + } } - }); - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,3,4>{ - { - {0.03158629, 0.21031839, 0.95692378, 0.05287921}, - {0.66182911, 0.91662365, 0.07928377, 0.86983263}, - {0.12386280, 0.63736272, 0.15963674, 0.465079722} - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,1,2,4> { - { - { - {0.51201022, 0.93828046, 0.61414438, 0.76995558}, - {0.48727912, 0.82932562, 0.29446477, 0.72047055} - } + float bigArray2[dimNb][dim1][dim2]; + for (std::size_t n = 0; n < dimNb; ++n) { + for (int i = 0; i < dim1; ++i) { + for (int j = 0; j < dim2; ++j) { + bigArray2[n][i][j] = dis(gen); // Generate random float value + } + } } - }); + float res[dimNb][dim0][dim2]; + for (std::size_t n = 0; n < dimNb; ++n) { + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim2; ++j) { + float sum = 0.0; + for (int k = 0; k < dim1; ++k) { + sum += bigArray1[n][i][k] * bigArray2[n][k][j]; + } + res[n][i][j] = sum; + } + } + } + // Convert bigArray1 to Tensor + std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32); + T1 -> resize({dimNb,dim0,dim1}); + T1 -> setBackend("cpu"); + T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb*dim0*dim1); + // Convert bigArray2 to Tensor + std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32); + T2 -> resize({dimNb,dim1,dim2}); + T2 -> setBackend("cpu"); + T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb*dim1*dim2); + // convert res to Tensor + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); + Tres -> resize({dimNb,dim0,dim2}); + Tres -> setBackend("cpu"); + Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb*dim0*dim2); - std::shared_ptr<Node> myMatMul = MatMul(); - auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); - op->associateInput(0, input_1); - op->associateInput(1, input_2); - op->setDataType(DataType::Float32); - op->setBackend("cpu"); - op->computeOutputDims(); - myMatMul->forward(); - expectedOutput->print(); - op->getOutput(0)->print(); + op->associateInput(0, T1); + op->associateInput(1, T2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + myMatMul->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); } - + std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; + std::cout << "total time: " << duration.count() << std::endl; } + SECTION("4-D Tensors") { + std::size_t totalComputation = 0; + duration = std::chrono::duration<double, std::micro>::zero(); + for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { + // generate Tensors dimensions + const std::size_t dimNb1 = distNbMatrix(gen); + const std::size_t dimNb2 = distNbMatrix(gen); + const std::size_t dim0 = distDims(gen); + const std::size_t dim1 = distDims(gen); + const std::size_t dim2 = distDims(gen); + totalComputation += dim0*dim1*dim2*dimNb1*dimNb2; - SECTION("4D Tensors") { - std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float,1,2,4,3> { - { - { - { - {0.78191108, 0.79929698, 0.45473319}, - {0.35713595, 0.45651042, 0.40217435}, - {0.15343380, 0.30024308, 0.78940034}, - {0.53266525, 0.16684306, 0.22095734} - }, - { - {0.89860427, 0.75139457, 0.34270161}, - {0.53609246, 0.62800729, 0.68399906}, - {0.57119054, 0.96259099, 0.71879345}, - {0.73910689, 0.62526798, 0.77325356} + // Create and populate the array with random float values + float bigArray1[dimNb1][dimNb2][dim0][dim1]; + for (std::size_t n1 = 0; n1 < dimNb1; ++n1) { + for (std::size_t n2 = 0; n2 < dimNb2; ++n2) { + for (std::size_t i = 0; i < dim0; ++i) { + for (std::size_t j = 0; j < dim1; ++j) { + bigArray1[n1][n2][i][j] = dis(gen); // Generate random float value + } } } } - }); - std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array4D<float,1,2,3,4>{ - { - { - { - {0.36525106, 0.47606337, 0.58315367, 0.33944082}, - {0.56211257, 0.64100796, 0.28841895, 0.11285251}, - {0.04657018, 0.21112120, 0.88220179, 0.23004770} - }, - { - {0.33073467, 0.45434207, 0.92689610, 0.02250439}, - {0.57044137, 0.88543379, 0.23575044, 0.57311541}, - {0.21721125, 0.16826588, 0.45728493, 0.81760287} + float bigArray2[dimNb1][dimNb2][dim1][dim2]; + for (std::size_t n1 = 0; n1 < dimNb1; ++n1) { + for (std::size_t n2 = 0; n2 < dimNb2; ++n2) { + for (std::size_t i = 0; i < dim1; ++i) { + for (std::size_t j = 0; j < dim2; ++j) { + bigArray2[n1][n2][i][j] = dis(gen); // Generate random float value + } } } - } - }); - std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,1,2,4,4> { - { - { - { - {0.75606567, 0.98059881, 1.08767319, 0.46022552}, - {0.40578386, 0.54755372, 0.69473034, 0.26526415}, - {0.26157477, 0.43216154, 0.87248170, 0.26756462}, - {0.29863116, 0.40717891, 0.55367535, 0.25046772} - }, - { - {0.80026478, 1.13124883, 1.16676664, 0.73105216}, - {0.68411803, 0.91472197, 0.95773751, 0.93122470}, - {0.89414424, 1.23277485, 1.08505893, 1.15221763}, - {0.76908636, 1.01955295, 1.18607962, 1.00719821} + } + float res[dimNb1][dimNb2][dim0][dim2]; + for (std::size_t n1 = 0; n1 < dimNb1; ++n1) { + for (std::size_t n2 = 0; n2 < dimNb2; ++n2) { + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim2; ++j) { + float sum = 0.0; + for (int k = 0; k < dim1; ++k) { + sum += bigArray1[n1][n2][i][k] * bigArray2[n1][n2][k][j]; + } + res[n1][n2][i][j] = sum; + } } - } + } } - }); + // Convert bigArray1 to Tensor + std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32); + T1 -> resize({dimNb1,dimNb2,dim0,dim1}); + T1 -> setBackend("cpu"); + T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb1*dimNb2*dim0*dim1); + // Convert bigArray2 to Tensor + std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32); + T2 -> resize({dimNb1,dimNb2,dim1,dim2}); + T2 -> setBackend("cpu"); + T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb1*dimNb2*dim1*dim2); + // convert res to Tensor + std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32); + Tres -> resize({dimNb1,dimNb2,dim0,dim2}); + Tres -> setBackend("cpu"); + Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb1*dimNb2*dim0*dim2); + + op->associateInput(0, T1); + op->associateInput(1, T2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->computeOutputDims(); + start = std::chrono::system_clock::now(); + myMatMul->forward(); + end = std::chrono::system_clock::now(); + duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + } + std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl; + std::cout << "total time: " << duration.count() << std::endl; + } + + SECTION("+2-D / 1-D") { + // allows to test both computation with a 1-D Tensor and broadcasting + // input_0 + std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>(); + op->associateInput(0,T0); + const std::size_t dim0 = distNbMatrix(gen); + const std::size_t dim1 = distNbMatrix(gen) + 1; + const std::size_t dim2 = distNbMatrix(gen); + const std::size_t dim3 = distNbMatrix(gen); + T0->resize({dim0,dim1,dim2,dim3}); + T0->setDataType(DataType::Float32); + T0->setBackend("cpu"); + + // input_1 + std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(); + op -> associateInput(1,T1); + T1->resize({dim3}); + T1->setDataType(DataType::Float32); + T1->setBackend("cpu"); - std::shared_ptr<Node> myMatMul = MatMul(); - auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); - op->associateInput(0, input_1); - op->associateInput(1, input_2); op->setDataType(DataType::Float32); op->setBackend("cpu"); op->computeOutputDims(); myMatMul->forward(); - expectedOutput->print(); - op->getOutput(0)->print(); - - float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); - float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); - for (std::size_t i = 0; i< expectedOutput->size(); ++i) { - REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); - } } -} \ No newline at end of file +} +} // namespace Aidge \ No newline at end of file -- GitLab