Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Showing
with 280 additions and 77 deletions
......@@ -32,7 +32,7 @@ class ReshapeImplBackward_cpu
class ReshapeImpl_cpu : public OperatorImpl {
public:
ReshapeImpl_cpu(const Reshape_Op& op) : OperatorImpl(op) {}
ReshapeImpl_cpu(const Reshape_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<ReshapeImpl_cpu> create(const Reshape_Op& op) {
return std::make_unique<ReshapeImpl_cpu>(op);
......
......@@ -34,7 +34,7 @@ class ScalingImplBackward_cpu
class ScalingImpl_cpu : public OperatorImpl {
public:
ScalingImpl_cpu(const Scaling_Op& op) : OperatorImpl(op) {}
ScalingImpl_cpu(const Scaling_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<ScalingImpl_cpu> create(const Scaling_Op& op) {
return std::make_unique<ScalingImpl_cpu>(op);
......
......@@ -33,7 +33,7 @@ class SigmoidImplBackward_cpu
class SigmoidImpl_cpu : public OperatorImpl {
public:
SigmoidImpl_cpu(const Sigmoid_Op& op) : OperatorImpl(op) {}
SigmoidImpl_cpu(const Sigmoid_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<SigmoidImpl_cpu> create(const Sigmoid_Op& op) {
return std::make_unique<SigmoidImpl_cpu>(op);
......
......@@ -40,7 +40,7 @@ class SliceImplBackward_cpu
class SliceImpl_cpu : public OperatorImpl {
public:
SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op) {}
SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) {
return std::make_unique<SliceImpl_cpu>(op);
......
......@@ -33,7 +33,7 @@ class SoftmaxImplBackward_cpu
class SoftmaxImpl_cpu : public OperatorImpl {
public:
SoftmaxImpl_cpu(const Softmax_Op& op) : OperatorImpl(op) {}
SoftmaxImpl_cpu(const Softmax_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<SoftmaxImpl_cpu> create(const Softmax_Op& op) {
return std::make_unique<SoftmaxImpl_cpu>(op);
......
......@@ -33,7 +33,7 @@ class SqrtImplBackward_cpu
class SqrtImpl_cpu : public OperatorImpl {
public:
SqrtImpl_cpu(const Sqrt_Op& op) : OperatorImpl(op) {}
SqrtImpl_cpu(const Sqrt_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<SqrtImpl_cpu> create(const Sqrt_Op& op) {
return std::make_unique<SqrtImpl_cpu>(op);
......
......@@ -33,7 +33,7 @@ class SubImplBackward_cpu
class SubImpl_cpu : public OperatorImpl {
public:
SubImpl_cpu(const Sub_Op& op) : OperatorImpl(op) {}
SubImpl_cpu(const Sub_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<SubImpl_cpu> create(const Sub_Op& op) {
return std::make_unique<SubImpl_cpu>(op);
......
......@@ -33,7 +33,7 @@ class TanhImplBackward_cpu
class TanhImpl_cpu : public OperatorImpl {
public:
TanhImpl_cpu(const Tanh_Op& op) : OperatorImpl(op) {}
TanhImpl_cpu(const Tanh_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<TanhImpl_cpu> create(const Tanh_Op& op) {
return std::make_unique<TanhImpl_cpu>(op);
......
......@@ -57,7 +57,7 @@ class TransposeImpl6DBackward_cpu
class TransposeImpl2D_cpu : public OperatorImpl {
public:
TransposeImpl2D_cpu(const Transpose_Op<2>& op) : OperatorImpl(op) {}
TransposeImpl2D_cpu(const Transpose_Op<2>& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<TransposeImpl2D_cpu> create(const Transpose_Op<2>& op) {
return std::make_unique<TransposeImpl2D_cpu>(op);
......@@ -68,7 +68,7 @@ public:
};
class TransposeImpl3D_cpu : public OperatorImpl {
public:
TransposeImpl3D_cpu(const Transpose_Op<3>& op) : OperatorImpl(op) {}
TransposeImpl3D_cpu(const Transpose_Op<3>& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<TransposeImpl3D_cpu> create(const Transpose_Op<3>& op) {
return std::make_unique<TransposeImpl3D_cpu>(op);
......@@ -79,7 +79,7 @@ public:
};
class TransposeImpl4D_cpu : public OperatorImpl {
public:
TransposeImpl4D_cpu(const Transpose_Op<4>& op) : OperatorImpl(op) {}
TransposeImpl4D_cpu(const Transpose_Op<4>& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<TransposeImpl4D_cpu> create(const Transpose_Op<4>& op) {
return std::make_unique<TransposeImpl4D_cpu>(op);
......@@ -90,7 +90,7 @@ public:
};
class TransposeImpl5D_cpu : public OperatorImpl {
public:
TransposeImpl5D_cpu(const Transpose_Op<5>& op) : OperatorImpl(op) {}
TransposeImpl5D_cpu(const Transpose_Op<5>& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<TransposeImpl5D_cpu> create(const Transpose_Op<5>& op) {
return std::make_unique<TransposeImpl5D_cpu>(op);
......@@ -101,7 +101,7 @@ public:
};
class TransposeImpl6D_cpu : public OperatorImpl {
public:
TransposeImpl6D_cpu(const Transpose_Op<6>& op) : OperatorImpl(op) {}
TransposeImpl6D_cpu(const Transpose_Op<6>& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<TransposeImpl6D_cpu> create(const Transpose_Op<6>& op) {
return std::make_unique<TransposeImpl6D_cpu>(op);
......
......@@ -9,17 +9,18 @@
*
********************************************************************************/
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include <cassert>
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp"
#include "aidge/utils/Types.h"
#include "aidge/utils/ErrorHandling.hpp"
Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
......@@ -27,15 +28,18 @@ Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex
}
void Aidge::AddImpl_cpu::forward() {
assert(mOp.getRawInput(0) && "missing input in Add operator");
DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType();
for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) {
assert(mOp.getRawInput(i) && "missing input in Add operator");
assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput);
const auto& opTensor = static_cast<const OperatorTensor&>(mOp);
AIDGE_ASSERT(opTensor.getInput(0)->hasImpl(), "cannot run Add forward because the 0-th input has no implementation.");
assert(opTensor.getInput(0) && "missing input in Add operator");
DataType datatypeFirstInput = opTensor.getInput(0)->dataType();
for (IOIndex_t i = 1; i < opTensor.nbInputs(); ++i) {
AIDGE_ASSERT(opTensor.getInput(i)->hasImpl(), "cannot run Add forward because the {}-th input has no implementation.", i);
assert(opTensor.getInput(i) && "missing input in Add operator");
assert(opTensor.getInput(i)->dataType() == datatypeFirstInput);
}
// Find the correct kernel type
const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType();
const auto outputDataType = opTensor.getOutput(0)->dataType();
const Registrar<AddImplForward_cpu>::registrar_key registrarKey = {
datatypeFirstInput,
outputDataType};
......@@ -55,26 +59,26 @@ void Aidge::AddImpl_cpu::forward() {
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::size_t nbDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->nbDims();
const std::size_t nbDims = opTensor.getOutput(0)->nbDims();
std::vector<std::vector<std::size_t>> inputsDims;
std::vector<const void*> opInputs;
std::vector<std::shared_ptr<Tensor>> inputsFallback(mOp.nbInputs());
for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) {
std::vector<std::shared_ptr<Tensor>> inputsFallback(opTensor.nbInputs());
for (IOIndex_t i = 0; i < opTensor.nbInputs(); ++i) {
std::vector<std::size_t> inputDims(nbDims, 1);
auto dims = std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dims();
auto dims = opTensor.getInput(i)->dims();
for(std::size_t j=dims.size()-1; j+1>0; --j)
{
std::size_t idx = nbDims - (dims.size()-j);
inputDims[idx] = dims[j];
}
inputsDims.push_back(inputDims);
const auto& input = std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->refCastFrom(inputsFallback[i], *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input = opTensor.getInput(i)->refCastFrom(inputsFallback[i], *opTensor.getOutput(0));
opInputs.push_back(input.getImpl()->rawPtr());
}
kernelFunc(opInputs,
inputsDims,
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
getCPUPtr(mOp.getRawOutput(0)));
opTensor.getOutput(0)->size(),
opTensor.getOutput(0)->dims(),
getCPUPtr(opTensor.getRawOutput(0)));
}
......@@ -28,17 +28,19 @@ Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputI
}
void Aidge::ConvImpl2D_cpu::forward() {
const auto& opTensor = static_cast<const OperatorTensor&>(mOp);
// FIXME: uncomment the following code once memory handling will work
assert(mOp.getRawInput(0) && "missing input #0");
assert(mOp.getRawInput(1) && "missing input #1");
assert(mOp.getRawInput(2) && "missing input #2");
// Find the correct kernel type
const auto outputDataType = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType();
const auto outputDataType = opTensor.getOutput(0)->dataType();
const Registrar<ConvImpl2DForward_cpu>::registrar_key registrarKey = {
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
opTensor.getInput(0)->dataType(),
opTensor.getInput(1)->dataType(),
opTensor.getInput(2)->dataType(),
outputDataType};
Registrar<ConvImpl2DForward_cpu>::registrar_type kernelFunc;
......@@ -57,12 +59,12 @@ void Aidge::ConvImpl2D_cpu::forward() {
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
const auto& input0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->refCastFrom(input0Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input1 = std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->refCastFrom(input1Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input2 = std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->refCastFrom(input2Fallback, *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)));
const auto& input0 = opTensor.getInput(0)->refCastFrom(input0Fallback, *opTensor.getOutput(0));
const auto& input1 = opTensor.getInput(1)->refCastFrom(input1Fallback, *opTensor.getOutput(0));
const auto& input2 = opTensor.getInput(2)->refCastFrom(input2Fallback, *opTensor.getOutput(0));
// Call kernel
kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), opTensor.getInput(0)->template dims<4>(),
input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
getCPUPtr(mOp.getRawOutput(0)));
}
......@@ -57,17 +57,18 @@ void Aidge::DivImpl_cpu::forward() {
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechnism
// 5. Call a simple kernel
const auto& opTensor = static_cast<const Div_Op&>(mOp);
// Find the correct kernel type
auto kernelFunc = Registrar<DivImplForward_cpu>::create({
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
opTensor.getInput(0)->dataType(),
opTensor.getInput(1)->dataType(),
opTensor.getOutput(0)->dataType()});
// Compute compatible input dimensions
std::vector<std::size_t> dims0 = static_cast<const Div_Op&>(mOp).getInput(0)->dims();
std::vector<std::size_t> dims1 = static_cast<const Div_Op&>(mOp).getInput(1)->dims();
const std::vector<std::size_t>& outDims = static_cast<const Div_Op&>(mOp).getOutput(0)->dims();
std::vector<std::size_t> dims0 = opTensor.getInput(0)->dims();
std::vector<std::size_t> dims1 = opTensor.getInput(1)->dims();
const std::vector<std::size_t>& outDims = opTensor.getOutput(0)->dims();
// if (dims0 == dims1) {
// const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
......@@ -108,24 +109,24 @@ void Aidge::DivImpl_cpu::forward() {
const std::size_t output_contiguous_size = std::accumulate(outDims.cbegin()+contiguousIdx, outDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::size_t *stride_post0;
std::size_t *stride_post1;
std::int32_t *stride_post0;
std::int32_t *stride_post1;
std::int32_t *stride_step0;
std::int32_t *stride_step1;
if (contiguousIdx > 0) {
stride_post0 = new std::size_t[contiguousIdx];
stride_post0 = new std::int32_t[contiguousIdx];
stride_post0[contiguousIdx - 1] = 1;
stride_post1 = new std::size_t[contiguousIdx];
stride_post1 = new std::int32_t[contiguousIdx];
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*dims0[i+1];
stride_post1[i] = stride_post1[i+1]*dims1[i+1];
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
stride_step0 = new std::int32_t[contiguousIdx];
stride_step1 = new std::int32_t[contiguousIdx];
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post0[i]) : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post1[i]) : 1;
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
......
......@@ -9,32 +9,34 @@
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Erf.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl_forward_kernels.hpp"
Aidge::NbElts_t Aidge::ErfImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
return 0;
}
void Aidge::ErfImpl_cpu::forward() {
const Erf_Op& op = static_cast<const Erf_Op&>(mOp);
// Find the correct kernel type
auto kernelFunc = Registrar<ErfImplForward_cpu>::create({
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
op.getInput(0)->dataType(),
op.getOutput(0)->dataType()
});
// Call kernel
kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
kernelFunc(
op.getInput(0)->size(),
op.getInput(0)->getImpl()->rawPtr(),
op.getOutput(0)->getImpl()->rawPtr()
);
}
......@@ -9,32 +9,34 @@
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include "aidge/backend/cpu/operator/GatherImpl.hpp"
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/GatherImpl_forward_kernels.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Gather.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/GatherImpl.hpp"
#include "aidge/backend/cpu/operator/GatherImpl_forward_kernels.hpp"
Aidge::NbElts_t Aidge::GatherImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
return 0;
}
void Aidge::GatherImpl_cpu::forward() {
const Gather_Op& op = static_cast<const Gather_Op&>(mOp);
auto kernelFunc = Registrar<GatherImplForward_cpu>::create({
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
op.getInput(0)->dataType(),
op.getOutput(0)->dataType()
});
// Call kernel
kernelFunc(dynamic_cast<const Gather_Op&>(mOp).getStaticAttributes(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
op.getInput(0)->dims(),
op.getInput(0)->getImpl()->rawPtr(),
op.getOutput(0)->getImpl()->rawPtr()
);
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <numeric> // std::accumulate
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "aidge/data/Tensor.hpp"
#include "aidge/backend/cpu/data/TensorImpl.hpp"
#include "aidge/operator/Add.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
namespace Aidge {
TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<int> boolDist(0,1);
// Create MatMul Operator
std::shared_ptr<Node> mySub = Add(2);
auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
op -> associateInput(1,T1);
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
// Create results Tensor
Tensor Tres{};
Tres.setDataType(DataType::Float32);
Tres.setBackend("cpu");
// To measure execution time of 'MatMul_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions, replace some dimensions with '1' to get broadcasting
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
dims0[i] = 1;
}
if (boolDist(gen)) {
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
array1[i] = valueDist(gen);
}
// compute true result
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] + array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
}
}
}
// conversion to Aidge::Tensors
// input0
T0->resize(dims0);
T0->getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
// input1
T1->resize(dims1);
T1->getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
// results
Tres.resize(dimsOut);
Tres.getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
Tensor T2 = *T0 + *T1;
REQUIRE(T2 == Tres);
// no implementation
Tensor T3(T1->dims());
REQUIRE_THROWS(*T0 + T3);
// // wrong backend
// static Registrar<Add_Op> registrarAddImpl_custom("custom", [](const Add_Op& op) { return std::make_unique<AddImpl_cpu>(op); } );
// static Registrar<Tensor> registrarTensorImpl_custom_Int32({"custom", DataType::Int32},
// [] (DeviceIdx_t device, std::vector<DimSize_t> dims) {
// return std::make_shared<TensorImpl_cpu<int>>(device, dims);
// }
// );
// T1.setBackend("custom");
// REQUIRE_THROWS(T0 + T1);
// wrong datatype
Tensor T4(T1->dims());
T4.setDataType(DataType::Float64);
REQUIRE_THROWS(*T0 + T4);
}
}
TEST_CASE("Test substraction of Tensors","[TensorImpl][Sub]") {
Tensor T0 = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}};
Tensor T1 = Array3D<int, 2, 2, 2>{{{{7, 1}, {3, 7}}, {{54, 0}, {7, 12}}}};
Tensor T2 = T0 - T1;
T2.print();
REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{{{{-6,1},{0,-3}},{{-49,6},{0,-4}}}}));
Tensor T3(T1.dims());
REQUIRE_THROWS(T0 - T3);
}
TEST_CASE("Test multiplication of Tensors","[TensorImpl][Mul]") {
Tensor T0 = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}};
Tensor T1 = Array3D<int, 2, 2, 2>{{{{7, 2}, {3, 7}}, {{5, 6}, {7, 8}}}};
Tensor T2 = T0 * T1;
T2.print();
REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{{{{7,4},{9,28}},{{25,36},{49,64}}}}));
Tensor T3(T1.dims());
REQUIRE_THROWS(T0 * T3);
}
TEST_CASE("Test division of Tensors","[TensorImpl][Div]") {
Tensor T0 = Array3D<int, 2, 2, 2>{{{{7,4},{9,28}},{{25,36},{49,64}}}};
Tensor T1 = Array3D<int, 2, 2, 2>{{{{7, 2}, {3, 7}}, {{5, 6}, {7, 8}}}};
Tensor T2 = T0 / T1;
T2.print();
REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}}));
Tensor T3(T1.dims());
REQUIRE_THROWS(T0 / T3);
}
} // namespace Aidge