From ca4727840a6e479df0314dc7d29fa6f31e001ada Mon Sep 17 00:00:00 2001
From: NAUD Maxence <maxence.naud@cea.fr>
Date: Tue, 13 Feb 2024 15:01:30 +0000
Subject: [PATCH] [Upd] MatMul kernel test to handle more cases and add random
 matrices multiplications tests

---
 unit_tests/operator/Test_MatMulImpl.cpp | 367 +++++++++++++++---------
 1 file changed, 239 insertions(+), 128 deletions(-)

diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp
index abb9227a..5df0528b 100644
--- a/unit_tests/operator/Test_MatMulImpl.cpp
+++ b/unit_tests/operator/Test_MatMulImpl.cpp
@@ -10,170 +10,281 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
+#include <cstddef>  // std::size_t
+#include <cstdint>  // std::uint16_t
+#include <chrono>
+#include <iostream>
 #include <memory>
+#include <random>   // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/MatMul.hpp"
+#include "aidge/operator/OperatorTensor.hpp"
+#include "aidge/utils/TensorUtils.hpp"
 
 #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
 
-using namespace Aidge;
+namespace Aidge {
 
 TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
-    SECTION("2D Tensors") {
-        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
-            {
-                {0.16672266, 0.39773488},
-                {0.83746278, 0.54205710}
+    const std::uint16_t NBTRIALS = 10;
+    // Create a random number generator
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<std::size_t> distDims(10, 100);
+    std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
+
+    // Create MatMul Operator
+    std::shared_ptr<Node> myMatMul = MatMul();
+    auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
+
+    // To measure execution time of 'MatMul_Op::forward()' member function call
+    std::chrono::time_point<std::chrono::system_clock> start;
+    std::chrono::time_point<std::chrono::system_clock> end;
+    std::chrono::duration<double, std::micro> duration;
+
+    SECTION("2-D Tensors") {
+        std::size_t totalComputation = 0;
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+            const std::size_t dim2 = distDims(gen);
+            totalComputation += dim0*dim1*dim2;
+
+            // Create and populate the array with random float values
+            float bigArray1[dim0][dim1];
+            for (int i = 0; i < dim0; ++i) {
+                for (int j = 0; j < dim1; ++j) {
+                    bigArray1[i][j] = dis(gen); // Generate random float value
+                }
             }
-        });
-        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,2,2>{
-            {
-                {0.50658345, 0.04777747},
-                {0.22279310, 0.41348755}
+            float bigArray2[dim1][dim2];
+            for (int i = 0; i < dim1; ++i) {
+                for (int j = 0; j < dim2; ++j) {
+                    bigArray2[i][j] = dis(gen); // Generate random float value
+                }
             }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
-            {
-                {0.17307153, 0.17242400},
-                {0.54501140, 0.26414573}
+            float res[dim0][dim2];
+            for (int i = 0; i < dim0; ++i) {
+                for (int j = 0; j < dim2; ++j) {
+                    float sum = 0.0;
+                    for (int k = 0; k < dim1; ++k) {
+                        sum += bigArray1[i][k] * bigArray2[k][j];
+                    }
+                    res[i][j] = sum;
+                }
             }
-        });
 
-        std::shared_ptr<Node> myMatMul = MatMul();
-        auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
-        op->associateInput(0, input_1);
-        op->associateInput(1, input_2);
-        op->setDataType(DataType::Float32);
-        op->setBackend("cpu");
-        op->computeOutputDims();
-        myMatMul->forward();
-		expectedOutput->print();
-		op->getOutput(0)->print();
 
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-        for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
-        }
+            // Convert bigArray1 to Tensor
+            std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
+            T1 -> resize({dim0,dim1});
+            T1 -> setBackend("cpu");
+            T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dim0*dim1);
+            // Convert bigArray2 to Tensor
+            std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
+            T2 -> resize({dim1,dim2});
+            T2 -> setBackend("cpu");
+            T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dim1*dim2);
+            // convert res to Tensor
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dim0,dim2});
+            Tres -> setBackend("cpu");
+            Tres -> getImpl() -> setRawPtr(&res[0][0], dim0*dim2);
 
+            op->associateInput(0, T1);
+            op->associateInput(1, T2);
+            op->setDataType(DataType::Float32);
+            op->setBackend("cpu");
+            op->computeOutputDims();
+            start = std::chrono::system_clock::now();
+            myMatMul->forward();
+            end = std::chrono::system_clock::now();
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
     }
 
-    SECTION("3D Tensor by 2D Tensor") {
-        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,1,2,3> {
-            {
-                {
-					{0.53427607, 0.69181818, 0.30088913},
-         		 	{0.20866227, 0.67821276, 0.25695610}
-				}
+    SECTION("3-D Tensors") {
+        std::size_t totalComputation = 0;
+        duration = std::chrono::duration<double, std::micro>::zero();
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dimNb = distNbMatrix(gen);
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+            const std::size_t dim2 = distDims(gen);
+            totalComputation += dim0*dim1*dim2*dimNb;
+
+            // Create and populate the array with random float values
+            float bigArray1[dimNb][dim0][dim1];
+            for (std::size_t n = 0; n < dimNb; ++n) {
+                for (std::size_t i = 0; i < dim0; ++i) {
+                    for (std::size_t j = 0; j < dim1; ++j) {
+                        bigArray1[n][i][j] = dis(gen); // Generate random float value
+                    }
+                }
             }
-        });
-        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,3,4>{
-            {
-				{0.03158629, 0.21031839, 0.95692378, 0.05287921},
-				{0.66182911, 0.91662365, 0.07928377, 0.86983263},
-				{0.12386280, 0.63736272, 0.15963674, 0.465079722}
-			}
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,1,2,4> {
-            {
-                {
-					{0.51201022, 0.93828046, 0.61414438, 0.76995558},
-         			{0.48727912, 0.82932562, 0.29446477, 0.72047055}
-				}
+            float bigArray2[dimNb][dim1][dim2];
+            for (std::size_t n = 0; n < dimNb; ++n) {
+                for (int i = 0; i < dim1; ++i) {
+                    for (int j = 0; j < dim2; ++j) {
+                        bigArray2[n][i][j] = dis(gen); // Generate random float value
+                    }
+                }
             }
-        });
+            float res[dimNb][dim0][dim2];
+            for (std::size_t n = 0; n < dimNb; ++n) {
+                for (int i = 0; i < dim0; ++i) {
+                    for (int j = 0; j < dim2; ++j) {
+                        float sum = 0.0;
+                        for (int k = 0; k < dim1; ++k) {
+                            sum += bigArray1[n][i][k] * bigArray2[n][k][j];
+                        }
+                        res[n][i][j] = sum;
+                    }
+                }
+            }
+            // Convert bigArray1 to Tensor
+            std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
+            T1 -> resize({dimNb,dim0,dim1});
+            T1 -> setBackend("cpu");
+            T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb*dim0*dim1);
+            // Convert bigArray2 to Tensor
+            std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
+            T2 -> resize({dimNb,dim1,dim2});
+            T2 -> setBackend("cpu");
+            T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb*dim1*dim2);
+            // convert res to Tensor
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dimNb,dim0,dim2});
+            Tres -> setBackend("cpu");
+            Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb*dim0*dim2);
 
-        std::shared_ptr<Node> myMatMul = MatMul();
-        auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
-        op->associateInput(0, input_1);
-        op->associateInput(1, input_2);
-        op->setDataType(DataType::Float32);
-        op->setBackend("cpu");
-        op->computeOutputDims();
-        myMatMul->forward();
-		expectedOutput->print();
-		op->getOutput(0)->print();
+            op->associateInput(0, T1);
+            op->associateInput(1, T2);
+            op->setDataType(DataType::Float32);
+            op->setBackend("cpu");
+            op->computeOutputDims();
+            start = std::chrono::system_clock::now();
+            myMatMul->forward();
+            end = std::chrono::system_clock::now();
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
 
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-        for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
         }
-
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
     }
 
+    SECTION("4-D Tensors") {
+        std::size_t totalComputation = 0;
+        duration = std::chrono::duration<double, std::micro>::zero();
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dimNb1 = distNbMatrix(gen);
+            const std::size_t dimNb2 = distNbMatrix(gen);
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+            const std::size_t dim2 = distDims(gen);
+            totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
 
-    SECTION("4D Tensors") {
-        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float,1,2,4,3> {
-            {
-                {
-                    {
-                        {0.78191108, 0.79929698, 0.45473319},
-                        {0.35713595, 0.45651042, 0.40217435},
-                        {0.15343380, 0.30024308, 0.78940034},
-                        {0.53266525, 0.16684306, 0.22095734}
-                    },
-                    {
-                        {0.89860427, 0.75139457, 0.34270161},
-                        {0.53609246, 0.62800729, 0.68399906},
-                        {0.57119054, 0.96259099, 0.71879345},   
-                        {0.73910689, 0.62526798, 0.77325356}
+            // Create and populate the array with random float values
+            float bigArray1[dimNb1][dimNb2][dim0][dim1];
+            for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
+                for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
+                    for (std::size_t i = 0; i < dim0; ++i) {
+                        for (std::size_t j = 0; j < dim1; ++j) {
+                            bigArray1[n1][n2][i][j] = dis(gen); // Generate random float value
+                        }
                     }
                 }
             }
-        });
-        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array4D<float,1,2,3,4>{
-            {
-                {
-                    {
-                        {0.36525106, 0.47606337, 0.58315367, 0.33944082},
-                        {0.56211257, 0.64100796, 0.28841895, 0.11285251},
-                        {0.04657018, 0.21112120, 0.88220179, 0.23004770}
-                    },
-                    {
-                        {0.33073467, 0.45434207, 0.92689610, 0.02250439},
-                        {0.57044137, 0.88543379, 0.23575044, 0.57311541},
-                        {0.21721125, 0.16826588, 0.45728493, 0.81760287}
+            float bigArray2[dimNb1][dimNb2][dim1][dim2];
+            for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
+                for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
+                    for (std::size_t i = 0; i < dim1; ++i) {
+                        for (std::size_t j = 0; j < dim2; ++j) {
+                            bigArray2[n1][n2][i][j] = dis(gen); // Generate random float value
+                        }
                     }
                 }
-			}
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,1,2,4,4> {
-            {
-                {
-					{
-                        {0.75606567, 0.98059881, 1.08767319, 0.46022552},
-                        {0.40578386, 0.54755372, 0.69473034, 0.26526415},
-                        {0.26157477, 0.43216154, 0.87248170, 0.26756462},
-                        {0.29863116, 0.40717891, 0.55367535, 0.25046772}
-                    },
-                    {
-                        {0.80026478, 1.13124883, 1.16676664, 0.73105216},
-                        {0.68411803, 0.91472197, 0.95773751, 0.93122470},
-                        {0.89414424, 1.23277485, 1.08505893, 1.15221763},
-                        {0.76908636, 1.01955295, 1.18607962, 1.00719821}
+            }
+            float res[dimNb1][dimNb2][dim0][dim2];
+            for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
+                for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
+                    for (int i = 0; i < dim0; ++i) {
+                        for (int j = 0; j < dim2; ++j) {
+                            float sum = 0.0;
+                            for (int k = 0; k < dim1; ++k) {
+                                sum += bigArray1[n1][n2][i][k] * bigArray2[n1][n2][k][j];
+                            }
+                            res[n1][n2][i][j] = sum;
+                        }
                     }
-				}
+                }
             }
-        });
+            // Convert bigArray1 to Tensor
+            std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
+            T1 -> resize({dimNb1,dimNb2,dim0,dim1});
+            T1 -> setBackend("cpu");
+            T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb1*dimNb2*dim0*dim1);
+            // Convert bigArray2 to Tensor
+            std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
+            T2 -> resize({dimNb1,dimNb2,dim1,dim2});
+            T2 -> setBackend("cpu");
+            T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb1*dimNb2*dim1*dim2);
+            // convert res to Tensor
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dimNb1,dimNb2,dim0,dim2});
+            Tres -> setBackend("cpu");
+            Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb1*dimNb2*dim0*dim2);
+
+            op->associateInput(0, T1);
+            op->associateInput(1, T2);
+            op->setDataType(DataType::Float32);
+            op->setBackend("cpu");
+            op->computeOutputDims();
+            start = std::chrono::system_clock::now();
+            myMatMul->forward();
+            end = std::chrono::system_clock::now();
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
+    }
+
+    SECTION("+2-D / 1-D") {
+        // allows to test both computation with a 1-D Tensor and broadcasting
+        // input_0
+        std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
+        op->associateInput(0,T0);
+        const std::size_t dim0 = distNbMatrix(gen);
+        const std::size_t dim1 = distNbMatrix(gen) + 1;
+        const std::size_t dim2 = distNbMatrix(gen);
+        const std::size_t dim3 = distNbMatrix(gen);
+        T0->resize({dim0,dim1,dim2,dim3});
+        T0->setDataType(DataType::Float32);
+        T0->setBackend("cpu");
+
+        // input_1
+        std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
+        op -> associateInput(1,T1);
+        T1->resize({dim3});
+        T1->setDataType(DataType::Float32);
+        T1->setBackend("cpu");
 
-        std::shared_ptr<Node> myMatMul = MatMul();
-        auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
-        op->associateInput(0, input_1);
-        op->associateInput(1, input_2);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         op->computeOutputDims();
         myMatMul->forward();
-		expectedOutput->print();
-		op->getOutput(0)->print();
-
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-        for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
-        }
 
     }
-}
\ No newline at end of file
+}
+} // namespace Aidge
\ No newline at end of file
-- 
GitLab