Skip to content
Snippets Groups Projects
Commit a7693566 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Added MatMul impl

parent 2b0d1e64
No related branches found
No related tags found
No related merge requests found
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_ARRAYFIRE_OPERATOR_MATMULIMPL_H_
#define AIDGE_ARRAYFIRE_OPERATOR_MATMULIMPL_H_
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
class MatMulImpl_arrayfire : public OperatorImpl {
public:
MatMulImpl_arrayfire(const MatMul_Op &op): OperatorImpl(op, "arrayfire") {}
static std::unique_ptr<MatMulImpl_arrayfire> create(const MatMul_Op &op) {
return std::make_unique<MatMulImpl_arrayfire>(op);
}
void forward() override;
};
namespace {
static Registrar<MatMul_Op> registrarMatMulImpl_arrayfire("arrayfire", Aidge::MatMulImpl_arrayfire::create);
}
} // namespace Aidge
#endif /* AIDGE_ARRAYFIRE_OPERATOR_MATMULIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/operator/MatMul.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/arrayfire/data/TensorImpl.hpp"
#include "aidge/backend/arrayfire/operator/MatMulImpl.hpp"
void Aidge::MatMulImpl_arrayfire::forward()
{
const auto& op_ = dynamic_cast<const MatMul_Op&>(mOp);
AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MatMul Operator.");
AIDGE_ASSERT(op_.getInput(1), "missing input #1 in MatMul Operator.");
auto& output = std::dynamic_pointer_cast<TensorImpl_arrayfire_>(op_.getOutput(0)->getImpl())->data();
const auto& input0 = std::dynamic_pointer_cast<TensorImpl_arrayfire_>(op_.getInput(0)->getImpl())->data();
const auto& input1 = std::dynamic_pointer_cast<TensorImpl_arrayfire_>(op_.getInput(1)->getImpl())->data();
if (input0.numdims() == 1) {
output = af::matmul(input1, af::moddims(input0, af::dim4(input0.dims(0), 1)));
output = af::moddims(output, af::dim4(output.elements()));
}
else if (input1.numdims() == 1) {
output = af::matmul(af::moddims(input1, af::dim4(1, input1.dims(0))), input0);
output = af::moddims(output, af::dim4(output.elements()));
}
else {
output = af::matmul(input1, input0);
}
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/arrayfire/operator/MatMulImpl.hpp"
namespace Aidge {
TEST_CASE("[arrayfire/operator] MatMul(forward)", "[MatMul][arrayfire]") {
const std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> distDims(10, 100);
std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
// Create MatMul Operator
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration;
SECTION("2-D Tensors") {
std::size_t totalComputation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2;
// Create and populate the array with random float values
float* bigArray1 = new float[dim0*dim1];
for (int i = 0; i < dim0*dim1; ++i) {
bigArray1[i] = dis(gen); // Generate random float value
}
float* bigArray2 = new float[dim1*dim2];
for (int i = 0; i < dim1*dim2; ++i) {
bigArray2[i] = dis(gen); // Generate random float value
}
float* res = new float[dim0*dim2];
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[i*dim1+k] * bigArray2[k*dim2+j];
}
res[i*dim2+j] = sum;
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dim0,dim1});
T1 -> setBackend("arrayfire");
T1 -> getImpl() -> copyFromHost(bigArray1, dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dim1,dim2});
T2 -> setBackend("arrayfire");
T2 -> getImpl() -> copyFromHost(bigArray2, dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dim0,dim2});
Tres -> setBackend("arrayfire");
Tres -> getImpl() -> copyFromHost(res, dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("arrayfire");
op->forwardDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("3-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb;
// Create and populate the array with random float values
float* bigArray1 = new float[dimNb*dim0*dim1];
for (std::size_t i = 0; i < dimNb*dim0*dim1; ++i) {
bigArray1[i] = dis(gen); // Generate random float value
}
float* bigArray2 = new float[dimNb*dim1*dim2];
for (int i = 0; i < dimNb*dim1*dim2; ++i) {
bigArray2[i] = dis(gen); // Generate random float value
}
float* res = new float[dimNb*dim0*dim2];
for (std::size_t n = 0; n < dimNb; ++n) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n*dim0*dim1 + i*dim1 + k] * bigArray2[n*dim2*dim1+k*dim2+j];
}
res[n*dim0*dim2+i*dim2+j] = sum;
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb,dim0,dim1});
T1 -> setBackend("arrayfire");
T1 -> getImpl() -> copyFromHost(bigArray1, dimNb*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb,dim1,dim2});
T2 -> setBackend("arrayfire");
T2 -> getImpl() -> copyFromHost(bigArray2, dimNb*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb,dim0,dim2});
Tres -> setBackend("arrayfire");
Tres -> getImpl() -> copyFromHost(res, dimNb*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("arrayfire");
op->forwardDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("4-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb1 = distNbMatrix(gen);
const std::size_t dimNb2 = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
// Create and populate the array with random float values
float* bigArray1 = new float[dimNb1*dimNb2*dim0*dim1];
for (std::size_t i = 0; i < dimNb1*dimNb2*dim0*dim1; ++i) {
bigArray1[i] = dis(gen); // Generate random float value
}
float* bigArray2 = new float[dimNb1*dimNb2*dim1*dim2];
for (std::size_t i = 0; i < dimNb1*dimNb2*dim1*dim2; ++i) {
bigArray2[i] = dis(gen); // Generate random float value
}
float* res = new float[dimNb1*dimNb2*dim0*dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n1*dimNb2*dim0*dim1+n2*dim0*dim1+i*dim1+k] * bigArray2[n1*dimNb2*dim1*dim2+n2*dim1*dim2+k*dim2+j];
}
res[n1*dimNb2*dim0*dim2+n2*dim0*dim2+i*dim2+j] = sum;
}
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb1,dimNb2,dim0,dim1});
T1 -> setBackend("arrayfire");
T1 -> getImpl() -> copyFromHost(bigArray1, dimNb1*dimNb2*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb1,dimNb2,dim1,dim2});
T2 -> setBackend("arrayfire");
T2 -> getImpl() -> copyFromHost(bigArray2, dimNb1*dimNb2*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb1,dimNb2,dim0,dim2});
Tres -> setBackend("arrayfire");
Tres -> getImpl() -> copyFromHost(res, dimNb1*dimNb2*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("arrayfire");
op->forwardDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("+2-D / 1-D") {
// allows to test both computation with a 1-D Tensor and broadcasting
// input_0
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
const std::size_t dim0 = distNbMatrix(gen);
const std::size_t dim1 = distNbMatrix(gen) + 1;
const std::size_t dim2 = distNbMatrix(gen);
const std::size_t dim3 = distNbMatrix(gen);
T0->resize({dim0,dim1,dim2,dim3});
T0->setDataType(DataType::Float32);
T0->setBackend("arrayfire");
T0->zeros();
// input_1
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->resize({dim3});
T1->setDataType(DataType::Float32);
T1->setBackend("arrayfire");
T1->zeros();
op->setDataType(DataType::Float32);
op->setBackend("arrayfire");
op->forwardDims();
myMatMul->forward();
}
}
} // namespace Aidge
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment