Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Commits on Source (17)
......@@ -15,9 +15,9 @@
#include "aidge/data/Tensor.hpp"
namespace Aidge {
inline void *getCPUPtr(std::shared_ptr<Aidge::Data> const &data) {
inline void *getCPUPtr(std::shared_ptr<Aidge::Data> const &data, const std::size_t offset = 0) {
const auto tensor = std::static_pointer_cast<Tensor>(data);
return tensor->getImpl()->hostPtr(tensor->getImplOffset());
return tensor->getImpl()->hostPtr(tensor->getImplOffset() + offset);
}
} // namespace Aidge
......
......@@ -23,16 +23,14 @@
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// class MatMul_Op;
// compute kernel registry for forward and backward
class MatMulImplForward_cpu
: public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t,
: public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType>,
void(const std::size_t, const std::size_t, const std::size_t,
const void *, const void *, void *)> {};
class MatMulImplBackward_cpu
: public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>,
void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t,
: public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType>,
void(const std::vector<DimSize_t>&, const std::vector<DimSize_t>&,
const void *, const void *, void *)> {};
class MatMulImpl_cpu : public OperatorImpl {
......
......@@ -12,45 +12,39 @@
#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
namespace Aidge {
template <class I, class W, class O>
void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* weights_, void* output_) {
template <class I, class O>
void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m,
const void* input1_, const void* input2_, void* output_) {
// FIXME: missing MatMul parameters as arguments
const I* input = static_cast<const I*>(input_);
const W* weights = static_cast<const W*>(weights_);
const I* input1 = static_cast<const I*>(input1_);
const I* input2 = static_cast<const I*>(input2_);
O* output = static_cast<O*>(output_);
std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0));
for (std::size_t batch = 0; batch < batchSize; ++batch) {
for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
input + (batch + 1)*oneInputSize,
weights + out*oneInputSize,
output[out + batch*std::get<0>(attrs)]);
for (std::size_t i = 0; i < n; ++i) {
for (std::size_t j = 0; j < m; ++j) {
O sum = O(0);
for (std::size_t l = 0; l < k; ++l) {
sum += static_cast<O>(input1[i*k + l] * input2[l*m + j]);
}
output[i*m + j] = sum;
}
}
}
namespace {
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>);
{DataType::Float32, DataType::Float32},
Aidge::MatMulImpl_cpu_forward_kernel<float, float>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>);
{DataType::Int32, DataType::Int32},
Aidge::MatMulImpl_cpu_forward_kernel<int, int>);
static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>);
{DataType::Float64, DataType::Float64},
Aidge::MatMulImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
......
......@@ -9,15 +9,14 @@
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/operator/MatMul.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp"
......@@ -30,27 +29,110 @@ void Aidge::MatMulImpl_cpu::forward()
// Find the correct kernel type
auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
{std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// Call kernel
// if (mOp.getInput(0)->nbDims() == 4) {
// kernelFunc(
// mOp.getStaticAttributes(),
// std::static_pointer_cast<Tensor>(mOp.getInput(0))->template dims<4>(),
// mOp.getInput(0))->getImpl()->rawPtr(),
// mOp.mInputs[1]->getImpl()->rawPtr(),
// mOp.mInputs[2]->getImpl()->rawPtr(),
// getCPUPtr(mOp.getRawOutput(0));
// }
// else
kernelFunc(
dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size() / std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0],
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0)));
// Compute compatible input dimensions
std::vector<std::size_t> dims0 = static_cast<const MatMul_Op&>(mOp).getInput(0)->dims();
std::vector<std::size_t> dims1 = static_cast<const MatMul_Op&>(mOp).getInput(1)->dims();
// keep second-to-last dimension of dims0
const std::size_t keepDim0 = (dims0.size() > 1) ? 1 : 0;
// keep last dimension of dims1
const std::size_t keepDim1 = (dims1.size() > 1) ? 1 : 0;
if (dims0.size() == 1) {
dims0.insert(dims0.cbegin(), 1);
}
if (dims1.size() == 1) {
dims1.push_back(1);
}
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
// const std::size_t dims_size = std::max(dims0.size(), dims1.size());
// at this point, dims0.size() == dims1.size()
const std::size_t nbDims = dims0.size();
// initialize strides to iterate through data because of broadcasting
std::size_t *stride_post0;
std::size_t *stride_post1;
std::int32_t *stride_step0;
std::int32_t *stride_step1;
if (nbDims > 2) {
stride_post0 = new std::size_t[nbDims-2];
stride_post0[nbDims - 3] = 1;
stride_post1 = new std::size_t[nbDims-2];
stride_post1[nbDims - 3] = 1;
for (std::size_t i = nbDims-4; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*dims0[i+1];
stride_post1[i] = stride_post1[i+1]*dims1[i+1];
}
stride_step0 = new std::int32_t[nbDims-2];
stride_step1 = new std::int32_t[nbDims-2];
for (std::size_t i = 0; i != nbDims-2; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post0[i]) : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post1[i]) : 1;
}
}
const std::vector<std::size_t>& outDims = static_cast<const MatMul_Op&>(mOp).getOutput(0)->dims();
const std::size_t nbMatrices = std::accumulate(outDims.cbegin(), outDims.cend() - keepDim0 - keepDim1, 1, std::multiplies<std::size_t>());
std::size_t dim = outDims.size() - 1 - keepDim0 - keepDim1;
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
const std::size_t n = dims0[nbDims - 2];
const std::size_t k = dims0[nbDims - 1];
const std::size_t m = dims1[nbDims - 1];
const std::size_t matrix0Size = n*k;
const std::size_t matrix1Size = k*m;
const std::size_t matrixOutSize = n*m;
for (std::size_t stack = 0; stack < nbMatrices;) {
kernelFunc(n, k, m,
getCPUPtr(mOp.getRawInput(0), offsetIn0*matrix0Size),
getCPUPtr(mOp.getRawInput(1), offsetIn1*matrix1Size),
getCPUPtr(mOp.getRawOutput(0), offsetOut*matrixOutSize));
if (++stack < nbMatrices) {
std::size_t tmp_stack = stack;
while(tmp_stack % outDims[dim] == 0) {
tmp_stack /= outDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = outDims.size() - 1 - keepDim0 - keepDim1;
}
}
if (nbDims > 2) {
delete[] stride_post0;
delete[] stride_post1;
delete[] stride_step0;
delete[] stride_step1;
}
}
// void Aidge::MatMulImpl_cpu::forward()
// {
// assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
// assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1");
// // Find the correct kernel type
// auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
// {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
// std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
// kernelFunc(
// std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
// std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims(),
// getCPUPtr(mOp.getRawInput(0)),
// getCPUPtr(mOp.getRawInput(1)),
// getCPUPtr(mOp.getRawOutput(0)));
// }
......@@ -19,7 +19,7 @@
using namespace Aidge;
TEST_CASE("Tensor creation") {
TEST_CASE("[backend_cpu/data] Tensor(constructor)","[Tensor]") {
SECTION("from const array") {
Tensor x = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}};
......@@ -80,13 +80,12 @@ TEST_CASE("Tensor fill") {
{11,12,13,14,15}}
});
// expectedTensor->print();
REQUIRE(*concatenatedTensor == *expectedTensor);
}
}
TEST_CASE("Tensor methods") {
TEST_CASE("[backend_cpu/data] Tensor(methods)", "[Tensor]") {
Tensor x = Array3D<int, 2, 2, 2>{{
{{1, 2},
{3, 4}},
......@@ -116,7 +115,7 @@ TEST_CASE("Tensor methods") {
REQUIRE(y.getImpl() == x.getImpl());
REQUIRE(approxEq<int>(y, Array1D<int, 2>{{3, 4}}));
REQUIRE(y.isContiguous());
Tensor y2 = x.extract({0, 1, 1}, {2, 1, 1});
REQUIRE(y2.getImpl() == x.getImpl());
REQUIRE(!y2.isContiguous());
......
......@@ -10,102 +10,281 @@
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/operator/OperatorTensor.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
using namespace Aidge;
namespace Aidge {
TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
// Test MatMul forward with batch size = 2 and feature size = 75
std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{
{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{
{{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}});
std::shared_ptr<Node> myMatMul = MatMul(75, 5, "mymatmul");
const std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> distDims(10, 100);
std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
// Create MatMul Operator
std::shared_ptr<Node> myMatMul = MatMul();
auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
op->associateInput(1, myWeights);
SECTION("2D input") {
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{
{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74},
{75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}});
op->associateInput(0, myInput);
op->setDataType(DataType::Int32);
op->setBackend("cpu");
op->computeOutputDims();
myMatMul->forward();
REQUIRE(*(op->getOutput(0)) == *myOutput);
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration;
SECTION("2-D Tensors") {
std::size_t totalComputation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2;
// Create and populate the array with random float values
float bigArray1[dim0][dim1];
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim1; ++j) {
bigArray1[i][j] = dis(gen); // Generate random float value
}
}
float bigArray2[dim1][dim2];
for (int i = 0; i < dim1; ++i) {
for (int j = 0; j < dim2; ++j) {
bigArray2[i][j] = dis(gen); // Generate random float value
}
}
float res[dim0][dim2];
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[i][k] * bigArray2[k][j];
}
res[i][j] = sum;
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("4D input") {
std::shared_ptr<Tensor> myInput =
std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4},
{5, 6, 7, 8, 9},
{10, 11, 12, 13, 14},
{15, 16, 17, 18, 19},
{20, 21, 22, 23, 24}},
{{25, 26, 27, 28, 29},
{30, 31, 32, 33, 34},
{35, 36, 37, 38, 39},
{40, 41, 42, 43, 44},
{45, 46, 47, 48, 49}},
{{50, 51, 52, 53, 54},
{55, 56, 57, 58, 59},
{60, 61, 62, 63, 64},
{65, 66, 67, 68, 69},
{70, 71, 72, 73, 74}}},
{{{75, 76, 77, 78, 79},
{80, 81, 82, 83, 84},
{85, 86, 87, 88, 89},
{90, 91, 92, 93, 94},
{95, 96, 97, 98, 99}},
{{100, 101, 102, 103, 104},
{105, 106, 107, 108, 109},
{110, 111, 112, 113, 114},
{115, 116, 117, 118, 119},
{120, 121, 122, 123, 124}},
{{125, 126, 127, 128, 129},
{130, 131, 132, 133, 134},
{135, 136, 137, 138, 139},
{140, 141, 142, 143, 144},
{145, 146, 147, 148, 149}}}}});
op->associateInput(0, myInput);
op->setDataType(DataType::Int32);
SECTION("3-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb;
// Create and populate the array with random float values
float bigArray1[dimNb][dim0][dim1];
for (std::size_t n = 0; n < dimNb; ++n) {
for (std::size_t i = 0; i < dim0; ++i) {
for (std::size_t j = 0; j < dim1; ++j) {
bigArray1[n][i][j] = dis(gen); // Generate random float value
}
}
}
float bigArray2[dimNb][dim1][dim2];
for (std::size_t n = 0; n < dimNb; ++n) {
for (int i = 0; i < dim1; ++i) {
for (int j = 0; j < dim2; ++j) {
bigArray2[n][i][j] = dis(gen); // Generate random float value
}
}
}
float res[dimNb][dim0][dim2];
for (std::size_t n = 0; n < dimNb; ++n) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n][i][k] * bigArray2[n][k][j];
}
res[n][i][j] = sum;
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("4-D Tensors") {
std::size_t totalComputation = 0;
duration = std::chrono::duration<double, std::micro>::zero();
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate Tensors dimensions
const std::size_t dimNb1 = distNbMatrix(gen);
const std::size_t dimNb2 = distNbMatrix(gen);
const std::size_t dim0 = distDims(gen);
const std::size_t dim1 = distDims(gen);
const std::size_t dim2 = distDims(gen);
totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
// Create and populate the array with random float values
float bigArray1[dimNb1][dimNb2][dim0][dim1];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (std::size_t i = 0; i < dim0; ++i) {
for (std::size_t j = 0; j < dim1; ++j) {
bigArray1[n1][n2][i][j] = dis(gen); // Generate random float value
}
}
}
}
float bigArray2[dimNb1][dimNb2][dim1][dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (std::size_t i = 0; i < dim1; ++i) {
for (std::size_t j = 0; j < dim2; ++j) {
bigArray2[n1][n2][i][j] = dis(gen); // Generate random float value
}
}
}
}
float res[dimNb1][dimNb2][dim0][dim2];
for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
for (int i = 0; i < dim0; ++i) {
for (int j = 0; j < dim2; ++j) {
float sum = 0.0;
for (int k = 0; k < dim1; ++k) {
sum += bigArray1[n1][n2][i][k] * bigArray2[n1][n2][k][j];
}
res[n1][n2][i][j] = sum;
}
}
}
}
// Convert bigArray1 to Tensor
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
T1 -> resize({dimNb1,dimNb2,dim0,dim1});
T1 -> setBackend("cpu");
T1 -> getImpl() -> setRawPtr(&bigArray1[0][0], dimNb1*dimNb2*dim0*dim1);
// Convert bigArray2 to Tensor
std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
T2 -> resize({dimNb1,dimNb2,dim1,dim2});
T2 -> setBackend("cpu");
T2 -> getImpl() -> setRawPtr(&bigArray2[0][0], dimNb1*dimNb2*dim1*dim2);
// convert res to Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
Tres -> resize({dimNb1,dimNb2,dim0,dim2});
Tres -> setBackend("cpu");
Tres -> getImpl() -> setRawPtr(&res[0][0], dimNb1*dimNb2*dim0*dim2);
op->associateInput(0, T1);
op->associateInput(1, T2);
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
start = std::chrono::system_clock::now();
myMatMul->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
}
std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
std::cout << "total time: " << duration.count() << std::endl;
}
SECTION("+2-D / 1-D") {
// allows to test both computation with a 1-D Tensor and broadcasting
// input_0
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
const std::size_t dim0 = distNbMatrix(gen);
const std::size_t dim1 = distNbMatrix(gen) + 1;
const std::size_t dim2 = distNbMatrix(gen);
const std::size_t dim3 = distNbMatrix(gen);
T0->resize({dim0,dim1,dim2,dim3});
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
// input_1
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->resize({dim3});
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
op->setDataType(DataType::Float32);
op->setBackend("cpu");
op->computeOutputDims();
myMatMul->forward();
REQUIRE(*(op->getOutput(0)) == *myOutput);
}
// std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl;
}
\ No newline at end of file
}
}
} // namespace Aidge
\ No newline at end of file