Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
  • mick94/aidge_backend_cpu
14 results
Show changes
Commits on Source (19)
Showing
with 527 additions and 22 deletions
......@@ -31,6 +31,7 @@
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/backend/cpu/operator/FoldImpl.hpp"
#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
#include "aidge/backend/cpu/operator/LRNImpl.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
#include "aidge/backend/cpu/operator/LnImpl.hpp"
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
......@@ -40,6 +41,7 @@
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/backend/cpu/operator/RoundImpl.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
......
......@@ -48,6 +48,12 @@ REGISTRAR(AddImpl_cpu,
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int8}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int8_t, std::int8_t>, nullptr});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::UInt8}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::uint8_t, std::uint8_t>, nullptr});
REGISTRAR(AddImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
......
......@@ -29,7 +29,7 @@ using BatchNorm2D_Op = BatchNorm_Op<2>;
using BatchNormImpl2D_cpu = OperatorImpl_cpu<BatchNorm_Op<2>,
void(float,
float,
const std::array<DimSize_t, 4> &,
const std::vector<DimSize_t> &,
const void *,
const void *,
const void *,
......
......@@ -38,7 +38,7 @@ namespace Aidge {
* @param output_ Output Tensor.
*/
template <class I, class P, class O>
void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std::array<DimSize_t, 4> &dims,
void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std::vector<DimSize_t> &dims,
const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
......@@ -49,9 +49,8 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std
O *output = static_cast<O *>(output_);
const DimSize_t nbBatch = dims[0];
const DimSize_t nbChannels = dims[1];
const DimSize_t featureMapSize = dims[2]*dims[3];
const DimSize_t nbChannels = (dims.size() > 1) ? dims[1] : 1;
const DimSize_t featureMapSize = (dims.size() > 2) ? std::accumulate(dims.begin() + 2, dims.end(), 1, std::multiplies<DimSize_t>()) : 1;
if ((freeze == true) || (momentum == 0.0f)) {
for (std::size_t batch = 0; batch < nbBatch; ++batch) {
......
......@@ -149,7 +149,6 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
const std::size_t outChannels_s = oxSize * oySize;
if (dilated_kernel_x ==3 && dilated_kernel_y == 3) {
......@@ -232,13 +231,13 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t oIndexFull = ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
for (std::size_t sx = 0; sx*dilationDims[0] < dilated_kernel_x; ++sx) {
for (std::size_t sy = 0; sy*dilationDims[1] < dilated_kernel_y; ++sy) {
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))];
input[iIndex + static_cast<std::size_t>(ix + sx*dilationDims[0])*inputDims[3] + static_cast<std::size_t>(iy + sy*dilationDims[1])];
}
}
}
......
......@@ -158,7 +158,6 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
const std::size_t outChannels_s = oxSize * oySize;
using signedsize = std::make_signed<std::size_t>::type;
if (dilated_kernel_x == 3 && dilated_kernel_y == 3) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
......
......@@ -65,7 +65,7 @@ static float update_normalized_coord_with_padding(float coord, Aidge::GridSample
return coord;
}
static inline std::int64_t update_unnormalized_coord_with_padding(std::int64_t coord, std::int64_t size, Aidge::GridSample_Op::PaddingMode padding_mode) {
static std::int64_t update_unnormalized_coord_with_padding(std::int64_t coord, std::int64_t size, Aidge::GridSample_Op::PaddingMode padding_mode) {
if (!in_bound(coord, 0, size)) {
// out of bound. switch padding mode
if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LRNIMPL_H_
#define AIDGE_CPU_OPERATOR_LRNIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/LRN.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using LRNImpl_cpu = OperatorImpl_cpu<LRN_Op,
void(float, float, float, std::size_t, const std::vector<DimSize_t>&, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(LRN_Op, "cpu", Aidge::LRNImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LRNIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LRNIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_LRNIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef>
#include <cmath>
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/LRNImpl.hpp"
namespace Aidge {
template <class I, class O>
void LRNImpl_cpu_forward_kernel(float alpha, float beta, float bias, std::size_t size, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const DimSize_t nbBatch = inputDims[0];
const DimSize_t nbChannels = (inputDims.size() > 1) ? inputDims[1] : 1;
const DimSize_t featureMapSize = (inputDims.size() > 2) ? std::accumulate(inputDims.begin() + 2, inputDims.end(), 1, std::multiplies<DimSize_t>()) : 1;
for (std::size_t batch = 0; batch < nbBatch; ++batch) {
for (std::size_t ch = 0; ch < nbChannels; ++ch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
const unsigned int channelMin
= std::max<int>(0, ch - size / 2);
const unsigned int channelMax
= std::min<size_t>(nbChannels - 1, ch + size / 2);
for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
// For each input channel, accumulate the value
O accAccrossChannels(0.0);
for (unsigned int accChannel = channelMin;
accChannel < channelMax; ++accChannel)
{
accAccrossChannels += input[ioIndex + feature];
}
// Compute the output signal
output[ioIndex + feature] = input[ioIndex + feature]
/ std::pow((bias + (accAccrossChannels * accAccrossChannels) * alpha), beta);
}
}
}
}
REGISTRAR(LRNImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::LRNImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(LRNImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::LRNImpl_cpu_forward_kernel<double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LRNIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_H_
#define AIDGE_CPU_OPERATOR_ROUNDIMPL_H_
#include <cstddef> // std::size_t
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Round.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using RoundImpl_cpu = OperatorImpl_cpu<Round_Op,
void(const std::size_t, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Round_Op, "cpu", Aidge::RoundImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_
#include <cmath> //std::round
#include <cstddef> // std::size_t
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/RoundImpl.hpp"
namespace Aidge {
template <class I, class O>
void RoundImpl_cpu_forward_kernel(const std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
//std::round would not work since it doesn't follow the halves rules (See ONNX Round)
output[i] = static_cast<O>(std::nearbyint(static_cast<float>(input[i])));
}
}
REGISTRAR(RoundImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<float, float>,nullptr});
REGISTRAR(RoundImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<double, double>,nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_ */
......@@ -89,13 +89,13 @@ void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts,
}
REGISTRAR(SliceImpl_cpu,
{DataType::Float32},
{{DataType::Float32, DataType::Any}, {DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(SliceImpl_cpu,
{DataType::Float64},
{{DataType::Float64, DataType::Any}, {DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(SliceImpl_cpu,
{DataType::Int32},
{{DataType::Int32, DataType::Any}, {DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
......
......@@ -56,6 +56,12 @@ REGISTRAR(SubImpl_cpu,
REGISTRAR(SubImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr});
REGISTRAR(SubImpl_cpu,
{DataType::Int8},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int8_t, std::int8_t, std::int8_t>, nullptr});
REGISTRAR(SubImpl_cpu,
{DataType::UInt8},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::uint8_t, std::uint8_t, std::uint8_t>, nullptr});
REGISTRAR(SubImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
......
......@@ -30,15 +30,13 @@ void Aidge::BatchNormImpl2D_cpu::forward() {
AIDGE_ASSERT(op_.getInput(3), "missing input #3 for BatchNorm Operator");
AIDGE_ASSERT(op_.getInput(4), "missing input #4 for BatchNorm Operator");
AIDGE_ASSERT(op_.getOutput(0)->nbDims() == 4, "");
// Find the correct kernel type
const auto impl = Registrar<BatchNormImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.forward(op_.epsilon(),
op_.momentum(),
op_.getInput(0)->template dims<4>(),
op_.getInput(0)->dims(),
getCPUPtr(op_.getRawInput(0)),
getCPUPtr(op_.getRawInput(1)),
getCPUPtr(op_.getRawInput(2)),
......
......@@ -65,7 +65,6 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator");
AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator");
AIDGE_ASSERT(op_.getInput(2), "missing input #2 in ConvDepthWise Operator");
AIDGE_ASSERT((op_.getInput(0)->nbDims() == 4), "support for 4-dimensions tensors only");
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cassert>
#include <chrono> // std::chrono::milliseconds
#include <numeric> // std::accumulate
#include <thread> // std::this_thread::sleep_for
#include <vector>
#include "aidge/operator/LRN.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/LRNImpl.hpp"
#include "aidge/backend/cpu/operator/LRNImpl_kernels.hpp"
template <>
void Aidge::LRNImpl_cpu::forward() {
const auto& op_ = dynamic_cast<const LRN_Op&>(mOp);
AIDGE_ASSERT(!op_.getInput(0)->empty(), "LRN input empty");
// Find the correct kernel type
const auto impl = Registrar<LRNImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.forward(op_.alpha(),
op_.beta(),
op_.bias(),
op_.size(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
}
template <>
void Aidge::LRNImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for LRN_Op on backend cpu");
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <memory>
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Round.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/RoundImpl.hpp"
#include "aidge/backend/cpu/operator/RoundImpl_kernels.hpp"
template <>
void Aidge::RoundImpl_cpu::forward() {
std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0));
std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0));
AIDGE_ASSERT(in0, "missing input #0");
// Find the correct kernel type
const auto impl = Registrar<RoundImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.forward(in0->size(),
getCPUPtr(mOp.getRawInput(0)),
getCPUPtr(mOp.getRawOutput(0)));
}
template <>
void Aidge::RoundImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Round_Op on backend cpu");
}
\ No newline at end of file
......@@ -124,7 +124,9 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
dims_in[1]; // averaging per channel : 1 addition per element in
// the channel + 1 division this for every batch
// create out nb_elems
std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
std::vector<std::size_t> dims_out(dims_in.size(), 1);
dims_out[0] = dims_in[0];
dims_out[1] = dims_in[1];
const std::size_t out_nb_elems =
std::accumulate(dims_out.cbegin(), dims_out.cend(), std::size_t(1),
std::multiplies<std::size_t>());
......@@ -192,7 +194,9 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
// the channel + 1 division this for every batch
// create out nb_elems
std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
std::vector<std::size_t> dims_out(dims_in.size(), 1);
dims_out[0] = dims_in[0];
dims_out[1] = dims_in[1];
const std::size_t out_nb_elems =
std::accumulate(dims_out.cbegin(), dims_out.cend(),
std::size_t(1), std::multiplies<std::size_t>());
......@@ -253,7 +257,9 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
SECTION("2D_img") {
const std::vector<DimSize_t> in_dims{batch_size, channels, height,
width};
const std::vector<DimSize_t> out_dims{batch_size, channels};
std::vector<std::size_t> out_dims(in_dims.size(), 1);
out_dims[0] = in_dims[0];
out_dims[1] = in_dims[1];
DimSize_t in_nb_elems = batch_size * channels * height * width;
DimSize_t out_nb_elems = batch_size * channels;
number_of_operation +=
......@@ -368,7 +374,9 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
SECTION("3D_img") {
const std::vector<DimSize_t> in_dims{batch_size, channels, height,
width, depth};
const std::vector<DimSize_t> out_dims{batch_size, channels};
std::vector<std::size_t> out_dims(in_dims.size(), 1);
out_dims[0] = in_dims[0];
out_dims[1] = in_dims[1];
DimSize_t in_nb_elems =
batch_size * channels * height * width * depth;
number_of_operation +=
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <catch2/catch_test_macros.hpp>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <chrono>
#include <iostream>
#include <memory>
#include <numeric>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include <iomanip>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/Round.hpp"
#include "aidge/utils/TensorUtils.hpp"
namespace Aidge {
TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") {
constexpr std::uint16_t NBTRIALS = 15;
// Create a random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(-15, 15);
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3));
// Create BitShift Operator
std::shared_ptr<Node> myRound = Round();
auto op = std::static_pointer_cast<OperatorTensor>(myRound-> getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
// Create 2 input Tensors
std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
op->associateInput(0,T0);
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
// Create results Tensor
std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
Tres->setDataType(DataType::Float32);
Tres->setBackend("cpu");
// To measure execution time of 'Round_Op::forward()' member function call
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("Round [Forward]") {
SECTION("Test Forward Kernel") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// without broadcasting
float* array0 = new float[nb_elements];
float* result = new float[nb_elements];
for (std::size_t i = 0; i < nb_elements; ++i) {
array0[i] = valueDist(gen);
result[i] = std::nearbyint(array0[i]);
}
// input0
T0->resize(dims);
T0 -> getImpl() -> setRawPtr(array0, nb_elements);
// results
Tres->resize(dims);
Tres -> getImpl() -> setRawPtr(result, nb_elements);
op->forwardDims();
start = std::chrono::system_clock::now();
myRound->forward();
end = std::chrono::system_clock::now();
duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
bool is_eq = approxEq<float>(*(op->getOutput(0)), *Tres);
auto Output = *(op->getOutput(0));
auto prt = Output.getImpl()->rawPtr();
REQUIRE(is_eq);
delete[] array0;
delete[] result;
}
std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
}
} // namespace Aidge
}
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <cstddef>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include <catch2/catch_test_macros.hpp>
#include "aidge/recipes/Recipes.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/operator/AvgPooling.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/operator/GenericOperator.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/graph/OpArgs.hpp"
#include "aidge/scheduler/SequentialScheduler.hpp"
#include "aidge/graph/Matching.hpp"
#include "aidge/utils/TensorUtils.hpp"
using namespace Aidge;
TEST_CASE("[MatMulTiling]") {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(-1.0f, 1.0f);
auto dataProvider = Producer({2, 3, 80, 80}, "dataProvider");
auto w1 = Producer({2, 3, 80, 80}, "w1");
auto matmul1 = MatMul("matmul1");
auto w2 = Producer({2, 3, 80, 80}, "w1");
auto matmul2 = MatMul("matmul2");
auto w3 = Producer({2, 3, 80, 80}, "w1");
auto matmul3 = MatMul("matmul3");
dataProvider->addChild(matmul1, 0, 0);
w1->addChild(matmul1, 0, 1);
matmul1->addChild(matmul2, 0, 0);
w2->addChild(matmul2, 0, 1);
matmul2->addChild(matmul3, 0, 0);
w3->addChild(matmul3, 0, 1);
auto g1 = getConnectedGraphView(matmul1);
g1->setBackend("cpu");
g1->forwardDims();
g1->save("MatMulSplitting_graph");
// Fill random values
fmt::println("Fill random values");
auto tData = std::static_pointer_cast<OperatorTensor>(dataProvider->getOperator())->getOutput(0);
for (size_t i = 0; i < tData->size(); ++i) {
tData->set<float>(i, valueDist(gen));
}
auto tw1 = std::static_pointer_cast<OperatorTensor>(w1->getOperator())->getOutput(0);
for (size_t i = 0; i < tw1->size(); ++i) {
tw1->set<float>(i, valueDist(gen));
}
auto tw2 = std::static_pointer_cast<OperatorTensor>(w2->getOperator())->getOutput(0);
for (size_t i = 0; i < tw2->size(); ++i) {
tw2->set<float>(i, valueDist(gen));
}
auto tw3 = std::static_pointer_cast<OperatorTensor>(w3->getOperator())->getOutput(0);
for (size_t i = 0; i < tw3->size(); ++i) {
tw3->set<float>(i, valueDist(gen));
}
fmt::println("Schedule forward graph");
auto s1 = SequentialScheduler(g1);
s1.forward();
const auto tOut = std::static_pointer_cast<OperatorTensor>(g1->getOrderedOutputs()[0].first->getOperator())->getOutput(0)->clone();
// Tiling
fmt::println("Tiling");
matMulTiling(matmul1, {16, 16});
removeIdentity(g1);
g1->setBackend("cpu");
g1->save("MatMulSplitting_graph_split");
auto gm = SinglePassGraphMatching(g1);
gm.addNodeLambda("16x16", [](const NodePtr& node) {
const auto op =
std::static_pointer_cast<OperatorTensor>(node->getOperator());
const auto dims = op->getOutput(0)->dims();
return (dims.end()[-2] == 16 && dims.end()[-1] == 16);
});
const auto results = gm.match("MatMul[16x16]");
REQUIRE(results.size() == 25);
// Check result
fmt::println("Schedule forward tiled graph");
s1 = SequentialScheduler(g1);
s1.resetScheduling();
s1.forward();
const auto tOutTiled = std::static_pointer_cast<OperatorTensor>(g1->getOrderedOutputs()[0].first->getOperator())->getOutput(0)->clone();
REQUIRE(approxEq<float>(tOut, tOutTiled));
}