Added MatMul impl

a7693566 · Olivier BICHLER · 2b0d1e64 · a7693566 · a7693566 · a7693566
Commit a7693566 authored 11 months ago by Olivier BICHLER
--- a/include/aidge/backend/arrayfire/operator/MatMulImpl.hpp
+++ b/include/aidge/backend/arrayfire/operator/MatMulImpl.hpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_ARRAYFIRE_OPERATOR_MATMULIMPL_H_
+#define AIDGE_ARRAYFIRE_OPERATOR_MATMULIMPL_H_
+
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/MatMul.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+
+class MatMulImpl_arrayfire : public OperatorImpl {
+public:
+    MatMulImpl_arrayfire(const MatMul_Op &op): OperatorImpl(op, "arrayfire") {}
+
+    static std::unique_ptr<MatMulImpl_arrayfire> create(const MatMul_Op &op) {
+        return std::make_unique<MatMulImpl_arrayfire>(op);
+    }
+
+    void forward() override;
+};
+
+namespace {
+static Registrar<MatMul_Op> registrarMatMulImpl_arrayfire("arrayfire", Aidge::MatMulImpl_arrayfire::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_ARRAYFIRE_OPERATOR_MATMULIMPL_H_ */
--- a/src/backend/arrayfire/operator/MatMulImpl.cpp
+++ b/src/backend/arrayfire/operator/MatMulImpl.cpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cstddef>  // std::size_t
+#include <cstdint>  // std::int32_t
+#include <numeric>  // std::accumulate
+#include <vector>
+
+#include "aidge/operator/MatMul.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/arrayfire/data/TensorImpl.hpp"
+#include "aidge/backend/arrayfire/operator/MatMulImpl.hpp"
+
+void Aidge::MatMulImpl_arrayfire::forward()
+{
+    const auto& op_ = dynamic_cast<const MatMul_Op&>(mOp);
+
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MatMul Operator.");
+    AIDGE_ASSERT(op_.getInput(1), "missing input #1 in MatMul Operator.");
+
+    auto& output = std::dynamic_pointer_cast<TensorImpl_arrayfire_>(op_.getOutput(0)->getImpl())->data();
+    const auto& input0 = std::dynamic_pointer_cast<TensorImpl_arrayfire_>(op_.getInput(0)->getImpl())->data();
+    const auto& input1 = std::dynamic_pointer_cast<TensorImpl_arrayfire_>(op_.getInput(1)->getImpl())->data();
+
+    if (input0.numdims() == 1) {
+        output = af::matmul(input1, af::moddims(input0, af::dim4(input0.dims(0), 1)));
+        output = af::moddims(output, af::dim4(output.elements()));
+    }
+    else if (input1.numdims() == 1) {
+        output = af::matmul(af::moddims(input1, af::dim4(1, input1.dims(0))), input0);
+        output = af::moddims(output, af::dim4(output.elements()));
+    }
+    else {
+        output = af::matmul(input1, input0);
+    }
+}
--- a/unit_tests/Test_MatMulImpl.cpp
+++ b/unit_tests/Test_MatMulImpl.cpp
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <cstddef>  // std::size_t
+#include <cstdint>  // std::uint16_t
+#include <chrono>
+#include <iostream>
+#include <memory>
+#include <random>   // std::random_device, std::mt19937, std::uniform_real_distribution
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/MatMul.hpp"
+#include "aidge/operator/OperatorTensor.hpp"
+#include "aidge/utils/TensorUtils.hpp"
+
+#include "aidge/backend/arrayfire/operator/MatMulImpl.hpp"
+
+namespace Aidge {
+
+TEST_CASE("[arrayfire/operator] MatMul(forward)", "[MatMul][arrayfire]") {
+    const std::uint16_t NBTRIALS = 10;
+    // Create a random number generator
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<std::size_t> distDims(10, 100);
+    std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
+
+    // Create MatMul Operator
+    std::shared_ptr<Node> myMatMul = MatMul();
+    auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
+
+    // To measure execution time of 'MatMul_Op::forward()' member function call
+    std::chrono::time_point<std::chrono::system_clock> start;
+    std::chrono::time_point<std::chrono::system_clock> end;
+    std::chrono::duration<double, std::micro> duration;
+
+    SECTION("2-D Tensors") {
+        std::size_t totalComputation = 0;
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+            const std::size_t dim2 = distDims(gen);
+            totalComputation += dim0*dim1*dim2;
+
+            // Create and populate the array with random float values
+            float* bigArray1 = new float[dim0*dim1];
+            for (int i = 0; i < dim0*dim1; ++i) {
+                bigArray1[i] = dis(gen); // Generate random float value
+            }
+            float* bigArray2 = new float[dim1*dim2];
+            for (int i = 0; i < dim1*dim2; ++i) {
+                bigArray2[i] = dis(gen); // Generate random float value
+            }
+            float* res = new float[dim0*dim2];
+            for (int i = 0; i < dim0; ++i) {
+                for (int j = 0; j < dim2; ++j) {
+                    float sum = 0.0;
+                    for (int k = 0; k < dim1; ++k) {
+                        sum += bigArray1[i*dim1+k] * bigArray2[k*dim2+j];
+                    }
+                    res[i*dim2+j] = sum;
+                }
+            }
+
+
+            // Convert bigArray1 to Tensor
+            std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
+            T1 -> resize({dim0,dim1});
+            T1 -> setBackend("arrayfire");
+            T1 -> getImpl() -> copyFromHost(bigArray1, dim0*dim1);
+            // Convert bigArray2 to Tensor
+            std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
+            T2 -> resize({dim1,dim2});
+            T2 -> setBackend("arrayfire");
+            T2 -> getImpl() -> copyFromHost(bigArray2, dim1*dim2);
+            // convert res to Tensor
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dim0,dim2});
+            Tres -> setBackend("arrayfire");
+            Tres -> getImpl() -> copyFromHost(res, dim0*dim2);
+
+            op->associateInput(0, T1);
+            op->associateInput(1, T2);
+            op->setDataType(DataType::Float32);
+            op->setBackend("arrayfire");
+            op->forwardDims();
+            start = std::chrono::system_clock::now();
+            myMatMul->forward();
+            end = std::chrono::system_clock::now();
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
+    }
+
+    SECTION("3-D Tensors") {
+        std::size_t totalComputation = 0;
+        duration = std::chrono::duration<double, std::micro>::zero();
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dimNb = distNbMatrix(gen);
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+            const std::size_t dim2 = distDims(gen);
+            totalComputation += dim0*dim1*dim2*dimNb;
+
+            // Create and populate the array with random float values
+            float* bigArray1 = new float[dimNb*dim0*dim1];
+            for (std::size_t i = 0; i < dimNb*dim0*dim1; ++i) {
+                bigArray1[i] = dis(gen); // Generate random float value
+            }
+            float* bigArray2 = new float[dimNb*dim1*dim2];
+            for (int i = 0; i < dimNb*dim1*dim2; ++i) {
+                bigArray2[i] = dis(gen); // Generate random float value
+            }
+            float* res = new float[dimNb*dim0*dim2];
+            for (std::size_t n = 0; n < dimNb; ++n) {
+                for (int i = 0; i < dim0; ++i) {
+                    for (int j = 0; j < dim2; ++j) {
+                        float sum = 0.0;
+                        for (int k = 0; k < dim1; ++k) {
+                            sum += bigArray1[n*dim0*dim1 + i*dim1 + k] * bigArray2[n*dim2*dim1+k*dim2+j];
+                        }
+                        res[n*dim0*dim2+i*dim2+j] = sum;
+                    }
+                }
+            }
+            // Convert bigArray1 to Tensor
+            std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
+            T1 -> resize({dimNb,dim0,dim1});
+            T1 -> setBackend("arrayfire");
+            T1 -> getImpl() -> copyFromHost(bigArray1, dimNb*dim0*dim1);
+            // Convert bigArray2 to Tensor
+            std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
+            T2 -> resize({dimNb,dim1,dim2});
+            T2 -> setBackend("arrayfire");
+            T2 -> getImpl() -> copyFromHost(bigArray2, dimNb*dim1*dim2);
+            // convert res to Tensor
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dimNb,dim0,dim2});
+            Tres -> setBackend("arrayfire");
+            Tres -> getImpl() -> copyFromHost(res, dimNb*dim0*dim2);
+
+            op->associateInput(0, T1);
+            op->associateInput(1, T2);
+            op->setDataType(DataType::Float32);
+            op->setBackend("arrayfire");
+            op->forwardDims();
+            start = std::chrono::system_clock::now();
+            myMatMul->forward();
+            end = std::chrono::system_clock::now();
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
+    }
+
+    SECTION("4-D Tensors") {
+        std::size_t totalComputation = 0;
+        duration = std::chrono::duration<double, std::micro>::zero();
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+            // generate Tensors dimensions
+            const std::size_t dimNb1 = distNbMatrix(gen);
+            const std::size_t dimNb2 = distNbMatrix(gen);
+            const std::size_t dim0 = distDims(gen);
+            const std::size_t dim1 = distDims(gen);
+            const std::size_t dim2 = distDims(gen);
+            totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
+
+            // Create and populate the array with random float values
+            float* bigArray1 = new float[dimNb1*dimNb2*dim0*dim1];
+            for (std::size_t i = 0; i < dimNb1*dimNb2*dim0*dim1; ++i) {
+                bigArray1[i] = dis(gen); // Generate random float value
+            }
+            float* bigArray2 = new float[dimNb1*dimNb2*dim1*dim2];
+            for (std::size_t i = 0; i < dimNb1*dimNb2*dim1*dim2; ++i) {
+                bigArray2[i] = dis(gen); // Generate random float value
+            }
+            float* res = new float[dimNb1*dimNb2*dim0*dim2];
+            for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
+                for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
+                    for (int i = 0; i < dim0; ++i) {
+                        for (int j = 0; j < dim2; ++j) {
+                            float sum = 0.0;
+                            for (int k = 0; k < dim1; ++k) {
+                                sum += bigArray1[n1*dimNb2*dim0*dim1+n2*dim0*dim1+i*dim1+k] * bigArray2[n1*dimNb2*dim1*dim2+n2*dim1*dim2+k*dim2+j];
+                            }
+                            res[n1*dimNb2*dim0*dim2+n2*dim0*dim2+i*dim2+j] = sum;
+                        }
+                    }
+                }
+            }
+            // Convert bigArray1 to Tensor
+            std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
+            T1 -> resize({dimNb1,dimNb2,dim0,dim1});
+            T1 -> setBackend("arrayfire");
+            T1 -> getImpl() -> copyFromHost(bigArray1, dimNb1*dimNb2*dim0*dim1);
+            // Convert bigArray2 to Tensor
+            std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
+            T2 -> resize({dimNb1,dimNb2,dim1,dim2});
+            T2 -> setBackend("arrayfire");
+            T2 -> getImpl() -> copyFromHost(bigArray2, dimNb1*dimNb2*dim1*dim2);
+            // convert res to Tensor
+            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
+            Tres -> resize({dimNb1,dimNb2,dim0,dim2});
+            Tres -> setBackend("arrayfire");
+            Tres -> getImpl() -> copyFromHost(res, dimNb1*dimNb2*dim0*dim2);
+
+            op->associateInput(0, T1);
+            op->associateInput(1, T2);
+            op->setDataType(DataType::Float32);
+            op->setBackend("arrayfire");
+            op->forwardDims();
+            start = std::chrono::system_clock::now();
+            myMatMul->forward();
+            end = std::chrono::system_clock::now();
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+        }
+        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "total time: " << duration.count() << std::endl;
+    }
+
+    SECTION("+2-D / 1-D") {
+        // allows to test both computation with a 1-D Tensor and broadcasting
+        // input_0
+        std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
+        op->associateInput(0,T0);
+        const std::size_t dim0 = distNbMatrix(gen);
+        const std::size_t dim1 = distNbMatrix(gen) + 1;
+        const std::size_t dim2 = distNbMatrix(gen);
+        const std::size_t dim3 = distNbMatrix(gen);
+        T0->resize({dim0,dim1,dim2,dim3});
+        T0->setDataType(DataType::Float32);
+        T0->setBackend("arrayfire");
+        T0->zeros();
+
+        // input_1
+        std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
+        op -> associateInput(1,T1);
+        T1->resize({dim3});
+        T1->setDataType(DataType::Float32);
+        T1->setBackend("arrayfire");
+        T1->zeros();
+
+        op->setDataType(DataType::Float32);
+        op->setBackend("arrayfire");
+        op->forwardDims();
+        myMatMul->forward();
+
+    }
+}
+} // namespace Aidge
\ No newline at end of file