From fc36e10e5ea35c7c67623bf875b49e6f2752fe89 Mon Sep 17 00:00:00 2001 From: hrouis <houssemeddine.rouis92@gmail.com> Date: Wed, 6 Dec 2023 16:57:51 +0100 Subject: [PATCH] remove matmul attrs and update kernel --- .../aidge/backend/cpu/operator/MatMulImpl.hpp | 8 +- .../operator/MatMulImpl_forward_kernels.hpp | 54 +++-- src/operator/MatMulImpl.cpp | 18 +- unit_tests/operator/Test_MatMulImpl.cpp | 192 +++++++++++------- 4 files changed, 155 insertions(+), 117 deletions(-) diff --git a/include/aidge/backend/cpu/operator/MatMulImpl.hpp b/include/aidge/backend/cpu/operator/MatMulImpl.hpp index e8654c6e..ef517065 100644 --- a/include/aidge/backend/cpu/operator/MatMulImpl.hpp +++ b/include/aidge/backend/cpu/operator/MatMulImpl.hpp @@ -27,12 +27,12 @@ namespace Aidge { // compute kernel registry for forward and backward class MatMulImplForward_cpu - : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>, - void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, + : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType>, + void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void *, const void *, void *)> {}; class MatMulImplBackward_cpu - : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, - void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, + : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType>, + void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void *, const void *, void *)> {}; class MatMulImpl_cpu : public OperatorImpl { diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp index bc52779e..92bc5a61 100644 --- a/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp @@ -19,38 +19,54 @@ namespace Aidge { -template <class I, class W, class O> -void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, - const void* input_, const void* weights_, void* output_) { +template <class I, class O> +void MatMulImpl_cpu_forward_kernel(const std::vector<DimSize_t>& input1Dims,const std::vector<DimSize_t>& input2Dims, + const void* input1_, const void* input2_, void* output_) { // FIXME: missing MatMul parameters as arguments - const I* input = static_cast<const I*>(input_); - const W* weights = static_cast<const W*>(weights_); + const I* input1 = static_cast<const I*>(input1_); + const I* input2 = static_cast<const I*>(input2_); O* output = static_cast<O*>(output_); + size_t secondToLastIdx1 = input1Dims.size() > 1 ? input1Dims.size() - 2 : 0; + size_t secondToLastIdx2 = input2Dims.size() > 1 ? input2Dims.size() - 2 : 0; + // Checking if matrix dimensions are compatible for multiplication + assert(input1Dims.back() == input2Dims[secondToLastIdx2] && "Matrix dimensions are not compatible for multiplication"); + // Extracting dimensions + size_t rows1 = 1, cols1 = 1, cols2 = 1; - std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0)); + // For input1 + for (size_t i = 0; i < input1Dims.size() - 1; ++i) { + rows1 *= input1Dims[i]; + } + cols1 = input1Dims.back(); + + // For input2 + for (size_t i = 1; i < input2Dims.size(); ++i) { + cols2 *= input2Dims[i]; + } - for (std::size_t batch = 0; batch < batchSize; ++batch) { - for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { - output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, - input + (batch + 1)*oneInputSize, - weights + out*oneInputSize, - output[out + batch*std::get<0>(attrs)]); + // Multiplication + for (size_t i = 0; i < rows1; ++i) { + for (size_t j = 0; j < cols2; ++j) { + float sum = 0.0; + for (size_t k = 0; k < cols1; ++k) { + sum += input1[i * cols1 + k] * input2[k * cols2 + j]; + } + output[i * cols2 + j] = sum; } } } - namespace { static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>); + {DataType::Float32, DataType::Float32}, + Aidge::MatMulImpl_cpu_forward_kernel<float, float>); static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>); + {DataType::Int32, DataType::Int32}, + Aidge::MatMulImpl_cpu_forward_kernel<int, int>); static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>); + {DataType::Float64, DataType::Float64}, + Aidge::MatMulImpl_cpu_forward_kernel<double, double>); } // namespace } // namespace Aidge diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp index f02effb3..c1c3ccb0 100644 --- a/src/operator/MatMulImpl.cpp +++ b/src/operator/MatMulImpl.cpp @@ -30,24 +30,12 @@ void Aidge::MatMulImpl_cpu::forward() // Find the correct kernel type auto kernelFunc = Registrar<MatMulImplForward_cpu>::create( {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - // if (mOp.getInput(0)->nbDims() == 4) { - // kernelFunc( - // mOp.getStaticAttributes(), - // std::static_pointer_cast<Tensor>(mOp.getInput(0))->template dims<4>(), - // mOp.getInput(0))->getImpl()->rawPtr(), - // mOp.mInputs[1]->getImpl()->rawPtr(), - // mOp.mInputs[2]->getImpl()->rawPtr(), - // getCPUPtr(mOp.getRawOutput(0)); - // } - // else + kernelFunc( - dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size() / std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), getCPUPtr(mOp.getRawOutput(0))); diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp index 1edb915f..ae10df27 100644 --- a/unit_tests/operator/Test_MatMulImpl.cpp +++ b/unit_tests/operator/Test_MatMulImpl.cpp @@ -20,92 +20,126 @@ using namespace Aidge; TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { - // Test MatMul forward with batch size = 2 and feature size = 75 - std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{ - {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}}); - std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{ - {{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}}); - - std::shared_ptr<Node> myMatMul = MatMul(75, 5, "mymatmul"); - auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); - op->associateInput(1, myWeights); - - SECTION("2D input") { - std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{ - {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, - 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74}, - {75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, - 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, - 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}}); - op->associateInput(0, myInput); - op->setDataType(DataType::Int32); + SECTION("2D Tensors") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.16672266, 0.39773488}, + {0.83746278, 0.54205710} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,2,2>{ + { + {0.50658345, 0.04777747}, + {0.22279310, 0.41348755} + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {0.17307153, 0.17242400}, + {0.54501140, 0.26414573} + } + }); + + std::shared_ptr<Node> myMatMul = MatMul(); + auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); + op->associateInput(0, input_1); + op->associateInput(1, input_2); + op->setDataType(DataType::Float32); op->setBackend("cpu"); op->computeOutputDims(); myMatMul->forward(); - REQUIRE(*(op->getOutput(0)) == *myOutput); + expectedOutput->print(); + op->getOutput(0)->print(); + + float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< expectedOutput->size(); ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + } - SECTION("4D input") { - std::shared_ptr<Tensor> myInput = - std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4}, - {5, 6, 7, 8, 9}, - {10, 11, 12, 13, 14}, - {15, 16, 17, 18, 19}, - {20, 21, 22, 23, 24}}, - {{25, 26, 27, 28, 29}, - {30, 31, 32, 33, 34}, - {35, 36, 37, 38, 39}, - {40, 41, 42, 43, 44}, - {45, 46, 47, 48, 49}}, - {{50, 51, 52, 53, 54}, - {55, 56, 57, 58, 59}, - {60, 61, 62, 63, 64}, - {65, 66, 67, 68, 69}, - {70, 71, 72, 73, 74}}}, - {{{75, 76, 77, 78, 79}, - {80, 81, 82, 83, 84}, - {85, 86, 87, 88, 89}, - {90, 91, 92, 93, 94}, - {95, 96, 97, 98, 99}}, - {{100, 101, 102, 103, 104}, - {105, 106, 107, 108, 109}, - {110, 111, 112, 113, 114}, - {115, 116, 117, 118, 119}, - {120, 121, 122, 123, 124}}, - {{125, 126, 127, 128, 129}, - {130, 131, 132, 133, 134}, - {135, 136, 137, 138, 139}, - {140, 141, 142, 143, 144}, - {145, 146, 147, 148, 149}}}}}); - op->associateInput(0, myInput); - op->setDataType(DataType::Int32); + + + SECTION("3D Tensor by 1D Tensor") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> { + { + {{0.82786506, 0.19047028, 0.62954658}, + {0.63160968, 0.12468684, 0.49015969}}, + + {{0.49215794, 0.42231840, 0.02699018}, + {0.66403216, 0.94622904, 0.42048711}} + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<float,3>{ + {0.82458717, 0.88598752, 0.78737932} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> { + { + {1.34709311, 1.01722980}, + {0.80124742, 1.71698236} + } + }); + + std::shared_ptr<Node> myMatMul = MatMul(); + auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); + op->associateInput(0, input_1); + op->associateInput(1, input_2); + op->setDataType(DataType::Float32); op->setBackend("cpu"); op->computeOutputDims(); myMatMul->forward(); - REQUIRE(*(op->getOutput(0)) == *myOutput); + expectedOutput->print(); + op->getOutput(0)->print(); + + float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< expectedOutput->size(); ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + } - // std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl; + SECTION("3D Tensor by 2D Tensor") { + std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,1,2,3> { + { + { + {0.53427607, 0.69181818, 0.30088913}, + {0.20866227, 0.67821276, 0.25695610} + } + } + }); + std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array2D<float,3,4>{ + { + {0.03158629, 0.21031839, 0.95692378, 0.05287921}, + {0.66182911, 0.91662365, 0.07928377, 0.86983263}, + {0.12386280, 0.63736272, 0.15963674, 0.465079722} + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,1,2,4> { + { + { + {0.51201022, 0.93828046, 0.61414438, 0.76995558}, + {0.48727912, 0.82932562, 0.29446477, 0.72047055} + } + } + }); + + std::shared_ptr<Node> myMatMul = MatMul(); + auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); + op->associateInput(0, input_1); + op->associateInput(1, input_2); + op->setDataType(DataType::Float32); + op->setBackend("cpu"); + op->computeOutputDims(); + myMatMul->forward(); + expectedOutput->print(); + op->getOutput(0)->print(); + + float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr()); + float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr()); + for (std::size_t i = 0; i< expectedOutput->size(); ++i) { + REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + } + + } } \ No newline at end of file -- GitLab