Skip to content
Snippets Groups Projects
Commit e4bfebc1 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'dev' into QualityOfLife

parents 91acf7bb 2be78815
No related branches found
No related tags found
2 merge requests!50version 0.2.0,!38move tensorImpl cpu to core
Pipeline #39352 passed
...@@ -23,16 +23,14 @@ ...@@ -23,16 +23,14 @@
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge { namespace Aidge {
// class MatMul_Op;
// compute kernel registry for forward and backward
class MatMulImplForward_cpu class MatMulImplForward_cpu
: public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>, : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, void(const std::size_t, const std::size_t, const std::size_t,
const void *, const void *, void *)> {}; const void *, const void *, void *)> {};
class MatMulImplBackward_cpu class MatMulImplBackward_cpu
: public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t, void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&,
const void *, const void *, void *)> {}; const void *, const void *, void *)> {};
class MatMulImpl_cpu : public OperatorImpl { class MatMulImpl_cpu : public OperatorImpl {
......
...@@ -12,45 +12,39 @@ ...@@ -12,45 +12,39 @@
#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ #ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
namespace Aidge { namespace Aidge {
template <class I, class W, class O> template <class I, class O>
void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m,
const void* input_, const void* weights_, void* output_) { const void* input1_, const void* input2_, void* output_) {
// FIXME: missing MatMul parameters as arguments // FIXME: missing MatMul parameters as arguments
const I* input = static_cast<const I*>(input_); const I* input1 = static_cast<const I*>(input1_);
const W* weights = static_cast<const W*>(weights_); const I* input2 = static_cast<const I*>(input2_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < n; ++i) {
std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0)); for (std::size_t j = 0; j < m; ++j) {
O sum = O(0);
for (std::size_t batch = 0; batch < batchSize; ++batch) { for (std::size_t l = 0; l < k; ++l) {
for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { sum += static_cast<O>(input1[i*k + l] * input2[l*m + j]);
output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, }
input + (batch + 1)*oneInputSize, output[i*m + j] = sum;
weights + out*oneInputSize,
output[out + batch*std::get<0>(attrs)]);
} }
} }
} }
namespace { namespace {
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32}, {DataType::Float32, DataType::Float32},
Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>); Aidge::MatMulImpl_cpu_forward_kernel<float, float>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32}, {DataType::Int32, DataType::Int32},
Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>); Aidge::MatMulImpl_cpu_forward_kernel<int, int>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64( static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64}, {DataType::Float64, DataType::Float64},
Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>); Aidge::MatMulImpl_cpu_forward_kernel<double, double>);
} // namespace } // namespace
} // namespace Aidge } // namespace Aidge
......
...@@ -9,15 +9,14 @@ ...@@ -9,15 +9,14 @@
* *
********************************************************************************/ ********************************************************************************/
#include <cassert> #include <cstddef> // std::size_t
#include <chrono> // std::chrono::milliseconds #include <cstdint> // std::int32_t
#include <numeric> // std::accumulate #include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector> #include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/operator/MatMul.hpp" #include "aidge/operator/MatMul.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp" #include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp"
...@@ -30,27 +29,110 @@ void Aidge::MatMulImpl_cpu::forward() ...@@ -30,27 +29,110 @@ void Aidge::MatMulImpl_cpu::forward()
// Find the correct kernel type // Find the correct kernel type
auto kernelFunc = Registrar<MatMulImplForward_cpu>::create( auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel // Compute compatible input dimensions
// if (mOp.getInput(0)->nbDims() == 4) { std::vector<std::size_t> dims0 = static_cast<const MatMul_Op&>(mOp).getInput(0)->dims();
// kernelFunc( std::vector<std::size_t> dims1 = static_cast<const MatMul_Op&>(mOp).getInput(1)->dims();
// mOp.getStaticAttributes(),
// std::static_pointer_cast<Tensor>(mOp.getInput(0))->template dims<4>(), // keep second-to-last dimension of dims0
// mOp.getInput(0))->getImpl()->rawPtr(), const std::size_t keepDim0 = (dims0.size() > 1) ? 1 : 0;
// mOp.mInputs[1]->getImpl()->rawPtr(), // keep last dimension of dims1
// mOp.mInputs[2]->getImpl()->rawPtr(), const std::size_t keepDim1 = (dims1.size() > 1) ? 1 : 0;
// getCPUPtr(mOp.getRawOutput(0));
// } if (dims0.size() == 1) {
// else dims0.insert(dims0.cbegin(), 1);
kernelFunc( }
dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(), if (dims1.size() == 1) {
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], dims1.push_back(1);
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size() / std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], }
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)), if (dims0.size() > dims1.size()) {
getCPUPtr(mOp.getRawOutput(0))); dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
// const std::size_t dims_size = std::max(dims0.size(), dims1.size());
// at this point, dims0.size() == dims1.size()
const std::size_t nbDims = dims0.size();
// initialize strides to iterate through data because of broadcasting
std::size_t *stride_post0;
std::size_t *stride_post1;
std::int32_t *stride_step0;
std::int32_t *stride_step1;
if (nbDims > 2) {
stride_post0 = new std::size_t[nbDims-2];
stride_post0[nbDims - 3] = 1;
stride_post1 = new std::size_t[nbDims-2];
stride_post1[nbDims - 3] = 1;
for (std::size_t i = nbDims-4; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*dims0[i+1];
stride_post1[i] = stride_post1[i+1]*dims1[i+1];
}
stride_step0 = new std::int32_t[nbDims-2];
stride_step1 = new std::int32_t[nbDims-2];
for (std::size_t i = 0; i != nbDims-2; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post0[i]) : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post1[i]) : 1;
}
}
const std::vector<std::size_t>& outDims = static_cast<const MatMul_Op&>(mOp).getOutput(0)->dims();
const std::size_t nbMatrices = std::accumulate(outDims.cbegin(), outDims.cend() - keepDim0 - keepDim1, 1, std::multiplies<std::size_t>());
std::size_t dim = outDims.size() - 1 - keepDim0 - keepDim1;
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
const std::size_t n = dims0[nbDims - 2];
const std::size_t k = dims0[nbDims - 1];
const std::size_t m = dims1[nbDims - 1];
const std::size_t matrix0Size = n*k;
const std::size_t matrix1Size = k*m;
const std::size_t matrixOutSize = n*m;
for (std::size_t stack = 0; stack < nbMatrices;) {
kernelFunc(n, k, m,
getCPUPtr(mOp.getRawInput(0), offsetIn0*matrix0Size),
getCPUPtr(mOp.getRawInput(1), offsetIn1*matrix1Size),
getCPUPtr(mOp.getRawOutput(0), offsetOut*matrixOutSize));
if (++stack < nbMatrices) {
std::size_t tmp_stack = stack;
while(tmp_stack % outDims[dim] == 0) {
tmp_stack /= outDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = outDims.size() - 1 - keepDim0 - keepDim1;
}
}
if (nbDims > 2) {
delete[] stride_post0;
delete[] stride_post1;
delete[] stride_step0;
delete[] stride_step1;
}
} }
// void Aidge::MatMulImpl_cpu::forward()
// {
// assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
// assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1");
// // Find the correct kernel type
// auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
// {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
// std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// kernelFunc(
// std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
// std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims(),
// getCPUPtr(mOp.getRawInput(0)),
// getCPUPtr(mOp.getRawInput(1)),
// getCPUPtr(mOp.getRawOutput(0)));
// }
...@@ -10,102 +10,281 @@ ...@@ -10,102 +10,281 @@
********************************************************************************/ ********************************************************************************/
#include <catch2/catch_test_macros.hpp> #include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory> #include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp" #include "aidge/data/Tensor.hpp"
#include "aidge/operator/MatMul.hpp" #include "aidge/operator/MatMul.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
using namespace Aidge; namespace Aidge {
TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") { TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
// Test MatMul forward with batch size = 2 and feature size = 75 const std::uint16_t NBTRIALS = 10;
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{ // Create a random number generator
{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, std::random_device rd;
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, std::mt19937 gen(rd());
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, std::uniform_int_distribution<std::size_t> distDims(10, 100);
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, // Create MatMul Operator
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, std::shared_ptr<Node> myMatMul = MatMul();
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{
{{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}});
std::shared_ptr<Node> myMatMul = MatMul(75, 5, "mymatmul");
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator()); auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(1, myWeights);
// To measure execution time of 'MatMul_Op::forward()' member function call
SECTION("2D input") { std::chrono::time_point<std::chrono::system_clock> start;
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{ std::chrono::time_point<std::chrono::system_clock> end;
{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, std::chrono::duration<double, std::micro> duration;
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, SECTION("2-D Tensors") {
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74}, std::size_t totalComputation = 0;
{75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, // generate Tensors dimensions
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, const std::size_t dim0 = distDims(gen);
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, const std::size_t dim1 = distDims(gen);
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}}); const std::size_t dim2 = distDims(gen);
op->associateInput(0, myInput); totalComputation += dim0*dim1*dim2;
op->setDataType(DataType::Int32);
op->setBackend("cpu"); // Create and populate the array with random float values
op->computeOutputDims(); float bigArray1[dim0][dim1];
myMatMul->forward(); for (int i = 0; i < dim0; ++i) {
REQUIRE(*(op->getOutput(0)) == *myOutput); for (int j = 0; j < dim1; ++j) {
bigArray1[i][j] = dis(gen); // Generate random float value
}
}
float bigArray2[dim1][dim2];
for (int i = 0; i < dim1; ++i) {
for (int j = 0; j < dim2; ++j) {
bigArray2[i][j] = dis(gen); // Generate random float value
}
}
float res[dim0][dim2];
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[i][k] * bigArray2[k][j];
}
res[i][j] = sum;
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
} }
SECTION("4D input") {
std::shared_ptr<Tensor> myInput = SECTION("3-D Tensors") {
std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4}, std::size_t totalComputation = 0;
{5, 6, 7, 8, 9}, duration = std::chrono::duration<double, std::micro>::zero();
{10, 11, 12, 13, 14}, for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
{15, 16, 17, 18, 19}, // generate Tensors dimensions
{20, 21, 22, 23, 24}}, const std::size_t dimNb = distNbMatrix(gen);
{{25, 26, 27, 28, 29}, const std::size_t dim0 = distDims(gen);
{30, 31, 32, 33, 34}, const std::size_t dim1 = distDims(gen);
{35, 36, 37, 38, 39}, const std::size_t dim2 = distDims(gen);
{40, 41, 42, 43, 44}, totalComputation += dim0*dim1*dim2*dimNb;
{45, 46, 47, 48, 49}},
{{50, 51, 52, 53, 54}, // Create and populate the array with random float values
{55, 56, 57, 58, 59}, float bigArray1[dimNb][dim0][dim1];
{60, 61, 62, 63, 64}, for (std::size_t n = 0; n < dimNb; ++n) {
{65, 66, 67, 68, 69}, for (std::size_t i = 0; i < dim0; ++i) {
{70, 71, 72, 73, 74}}}, for (std::size_t j = 0; j < dim1; ++j) {
{{{75, 76, 77, 78, 79}, bigArray1[n][i][j] = dis(gen); // Generate random float value
{80, 81, 82, 83, 84}, }
{85, 86, 87, 88, 89}, }
{90, 91, 92, 93, 94}, }
{95, 96, 97, 98, 99}}, float bigArray2[dimNb][dim1][dim2];
{{100, 101, 102, 103, 104}, for (std::size_t n = 0; n < dimNb; ++n) {
{105, 106, 107, 108, 109}, for (int i = 0; i < dim1; ++i) {
{110, 111, 112, 113, 114}, for (int j = 0; j < dim2; ++j) {
{115, 116, 117, 118, 119}, bigArray2[n][i][j] = dis(gen); // Generate random float value
{120, 121, 122, 123, 124}}, }
{{125, 126, 127, 128, 129}, }
{130, 131, 132, 133, 134}, }
{135, 136, 137, 138, 139}, float res[dimNb][dim0][dim2];
{140, 141, 142, 143, 144}, for (std::size_t n = 0; n < dimNb; ++n) {
{145, 146, 147, 148, 149}}}}}); for (int i = 0; i < dim0; ++i) {
op->associateInput(0, myInput); for (int j = 0; j < dim2; ++j) {
op->setDataType(DataType::Int32); float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n][i][k] * bigArray2[n][k][j];
}
res[n][i][j] = sum;
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("4-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb1 = distNbMatrix(gen);
const std::size_t dimNb2 = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
// Create and populate the array with random float values
float bigArray1[dimNb1][dimNb2][dim0][dim1];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (std::size_t i = 0; i < dim0; ++i) {
for (std::size_t j = 0; j < dim1; ++j) {
bigArray1[n1][n2][i][j] = dis(gen); // Generate random float value
}
}
}
}
float bigArray2[dimNb1][dimNb2][dim1][dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (std::size_t i = 0; i < dim1; ++i) {
for (std::size_t j = 0; j < dim2; ++j) {
bigArray2[n1][n2][i][j] = dis(gen); // Generate random float value
}
}
}
}
float res[dimNb1][dimNb2][dim0][dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n1][n2][i][k] * bigArray2[n1][n2][k][j];
}
res[n1][n2][i][j] = sum;
}
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb1,dimNb2,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb1*dimNb2*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb1,dimNb2,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb1*dimNb2*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb1,dimNb2,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb1*dimNb2*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("+2-D / 1-D") {
// allows to test both computation with a 1-D Tensor and broadcasting
// input_0
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
const std::size_t dim0 = distNbMatrix(gen);
const std::size_t dim1 = distNbMatrix(gen) + 1;
const std::size_t dim2 = distNbMatrix(gen);
const std::size_t dim3 = distNbMatrix(gen);
T0->resize({dim0,dim1,dim2,dim3});
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
// input_1
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->resize({dim3});
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
op->setDataType(DataType::Float32);
op->setBackend("cpu"); op->setBackend("cpu");
op->computeOutputDims(); op->computeOutputDims();
myMatMul->forward(); myMatMul->forward();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
// std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl; }
} }
\ No newline at end of file } // namespace Aidge
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment