Skip to content
Snippets Groups Projects
Commit 5835bf04 authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Merged with dev

parents a7d4c88f aa30d4b3
No related branches found
No related tags found
2 merge requests!93Release v0.3.0,!79Refactor OperatorImpl for backend/export
Pipeline #54467 passed
Showing
with 1347 additions and 60 deletions
......@@ -29,27 +29,15 @@ pip install . -v
### Standard C++ Compilation
You will need to compile first the Core library before compiling the CPU one.
The makefile is designed to do it for you.
You will need to compile and install the [Core Library](https://gitlab.eclipse.org/eclipse/aidge/aidge_core) before compiling the CPU one.
To only compile the CPU library, run
```
make cpu_only
```
Once this has been done, you'll need run CMake with the
`CMAKE_INSTALL_PREFIX:PATH` flag, in order to indicate to CMake where
`aidge_core` has been installed :
```sh
cmake -DCMAKE_INSTALL_PREFIX:PATH=$(path_to_install_folder) $(CMAKE PARAMETERS) $(projet_root)
To compile the CPU library + the associated unitary tests, run
```
make cpu_tests
make all
```
To compile the CPU library with the python binding, run
```
make cpu_with_pybind
```
Important: this command can also be run with `make`.
To compile the CPU library with the python binding + the associated unitary tests, run
```
make cpu_with_pybind_tests
```
More detailed information is available in the [Aidge User Guide](https://eclipse.dev/aidge/source/GetStarted/install.html)
......@@ -21,6 +21,7 @@
#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp"
......
......@@ -43,16 +43,16 @@ void AddImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const st
// Kernels registration to implementation entry point
REGISTRAR(AddImpl_cpu,
{{DataType::Any}, {DataType::Float32}},
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(AddImpl_cpu,
{{DataType::Any}, {DataType::Float64}},
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(AddImpl_cpu,
{{DataType::Any}, {DataType::Int32}},
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
REGISTRAR(AddImpl_cpu,
{{DataType::Any}, {DataType::Int64}},
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr});
} // namespace Aidge
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
#include <cstddef>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ConstantOfShape.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ConstantOfShapeImpl_cpu = OperatorImpl_cpu<ConstantOfShape_Op,
void(const std::vector<DimSize_t>, const Tensor&, void *)>;
// Implementation entry point registration to Operator
REGISTRAR(ConstantOfShape_Op, "cpu", Aidge::ConstantOfShapeImpl_cpu::create);
} // namespace Aidge
#endif /* _AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_
#include <aidge/data/Tensor.hpp>
#include <aidge/data/half.hpp>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <functional> // std::multiplies
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class O>
void ConstantOfShapeimpl_cpu_forward_kernel(
const std::vector<DimSize_t> output_dims, const Tensor &value,
void *output_) {
O *output = static_cast<O *>(output_);
O val;
std::copy(static_cast<O *>(value.getImpl()->hostPtr()),
static_cast<O *>(value.getImpl()->hostPtr()) +
static_cast<NbElts_t>(1),
&val);
const size_t output_size = std::accumulate(
output_dims.begin(), output_dims.end(), 1, std::multiplies<DimSize_t>());
for (size_t i = 0; i < output_size; ++i) {
output[i] = val;
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float16}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int16}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_ */
......@@ -173,13 +173,13 @@ void FCImpl_cpu_backward_kernel(const DimSize_t batchSize,
// Kernels registration to implementation entry point
REGISTRAR(FCImpl_cpu,
{{DataType::Any}, {DataType::Float32}},
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>, Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>});
REGISTRAR(FCImpl_cpu,
{{DataType::Any}, {DataType::Float64}},
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>, Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>});
REGISTRAR(FCImpl_cpu,
{{DataType::Any}, {DataType::Int32}},
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, Aidge::FCImpl_cpu_backward_kernel<int32_t, int32_t, int32_t, int32_t>});
} // namespace Aidge
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#define AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/GridSample.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using GridSampleImpl_cpu = OperatorImpl_cpu<GridSample_Op,
void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)>;
// Implementation entry point registration to Operator
REGISTRAR(GridSample_Op, "cpu", Aidge::GridSampleImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ */
This diff is collapsed.
......@@ -23,7 +23,22 @@
namespace Aidge {
// Operator implementation entry point for the backend
using MulImpl_cpu = OperatorImpl_cpu<Mul_Op,
void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>;
void(const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const void*,
const void*,
void*),
void(const std::size_t,
const std::size_t,
const std::size_t,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const void*,
const void*,
const void*,
void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Mul_Op, "cpu", Aidge::MulImpl_cpu::create);
......
......@@ -48,19 +48,79 @@ void MulImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
}
}
template <class I1, class I2, class O>
void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length,
const std::size_t grad0Length,
const std::vector<std::size_t> input0Dims,
const std::vector<std::size_t> input1Dims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0,
void* gradientInput1)
{
const auto* input0 = static_cast<const I1*>(input0_);
const auto* input1 = static_cast<const I1*>(input1_);
const auto* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0);
auto* grad_input_1 = static_cast<I2*>(gradientInput1);
if(input0Dims.size() >= input1Dims.size())
{
AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
for(auto i = 0U; i < input0Length; ++i)
{
const auto indices = getMultiDimIndices(input1Dims, i);
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
grad_input_0[i] = input1[flattenedIndex] * grad_output[i];
}
for(std::size_t i = 0 ; i < grad0Length; ++i)
{
const auto indices = getMultiDimIndices(input1Dims, i);
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
grad_input_1[flattenedIndex] += input0[i] * grad_output[i];
}
} else {
AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
for(auto i = 0U; i < input1Length; ++i)
{
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
grad_input_1[i] = input0[flattenedIndex] * grad_output[i];
}
for(std::size_t i = 0 ; i < grad0Length; ++i)
{
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
grad_input_0[flattenedIndex] += input1[i] * grad_output[i];
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(MulImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, float, float>, nullptr});
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, float, float>, Aidge::MulImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(MulImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<double, double, double>, nullptr});
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<double, double, double>, Aidge::MulImpl_cpu_backward_kernel<double, double, double>});
REGISTRAR(MulImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, Aidge::MulImpl_cpu_backward_kernel<std::int32_t, std::int32_t, std::int32_t>});
REGISTRAR(MulImpl_cpu,
{DataType::Int64},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr});
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, Aidge::MulImpl_cpu_backward_kernel<std::int64_t, std::int64_t, std::int64_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MULIMPL_KERNELS_H_ */
......@@ -130,7 +130,7 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
for (std::uint32_t oy = 0; oy < oySize; ++oy) {
for (std::uint32_t ox = 0; ox < oxSize; ++ox) {
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t oIndexFull = oIndex + oy*oxSize + ox;
O outputValue = static_cast<O>(borderValue);
......@@ -139,14 +139,14 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]);
if (ix >= 0 && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0 && iy < static_cast<std::int32_t>(dims[2])) {
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
}
else if (borderType == PadBorderType::Edge) {
std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])));
std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])));
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Reflect) {
std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]);
......@@ -161,13 +161,13 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
if (iy >= static_cast<std::int32_t>(dims[2]))
iy = static_cast<std::int32_t>(dims[2]) - iy;
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Wrap) {
std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])) % static_cast<std::int32_t>(dims[3]);
std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[2]);
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
output[oIndexFull] = outputValue;
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include <functional>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/ConstantOfShape.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::ConstantOfShapeImpl_cpu::forward() {
const ConstantOfShape_Op &op_ = static_cast<const ConstantOfShape_Op &>(mOp);
// Check if input is provided
AIDGE_ASSERT(op_.getInput(0), "{} : Missing input 0", __func__);
// Find the correct kernel type
const auto impl = Registrar<ConstantOfShapeImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.forward(op_.getOutput(0)->dims(),
op_.value(),
op_.getOutput(0)->getImpl()->rawPtr());
}
template <>
void Aidge::ConstantOfShapeImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConstantOfShape_Op on backend cpu");
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include "aidge/backend/cpu/operator/GridSampleImpl.hpp"
#include <functional>
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp"
#include "aidge/operator/GridSample.hpp"
#include "aidge/utils/Types.h"
template <>
void Aidge::GridSampleImpl_cpu::forward() {
const auto& op_ = static_cast<const GridSample_Op&>(mOp);
// Find the correct kernel type
const auto impl = Registrar<GridSampleImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback, input1Fallback;
const auto& input0 = std::make_shared<Tensor>(op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)));
const auto& input1 = std::make_shared<Tensor>(op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)));
// Call kernel
impl.forward(op_,
input0, // input
input1, // grid
op_.getOutput(0) // output
);
}
template <>
void Aidge::GridSampleImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for GridSample_Op on backend cpu");
}
......@@ -44,5 +44,26 @@ void Aidge::MulImpl_cpu::forward() {
template <>
void Aidge::MulImpl_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Mul_Op on backend cpu");
const Mul_Op& op_ = dynamic_cast<const Mul_Op&>(mOp);
auto in0 = op_.getInput(0);
auto in1 = op_.getInput(1);
auto in0grad = op_.getInput(0)->grad();
auto in1grad = op_.getInput(1)->grad();
auto out0grad = op_.getOutput(0)->grad();
// Find the correct kernel type
const auto impl = Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec()));
// Call kernel
impl.backward(/* input0Length */ in0grad->size(),
/* input1Length */ in1grad->size(),
/* grad0Length */ out0grad->size(),
/* input0Dims */ in0->dims(),
/* input1Dims */ in1->dims(),
getCPUPtr(in0),
getCPUPtr(in1),
getCPUPtr(out0grad),
getCPUPtr(in0grad),
getCPUPtr(in1grad));
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <iostream>
#include <memory>
#include <numeric> // std::accumulate
#include <ostream>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "catch2/internal/catch_compiler_capabilities.hpp"
#include "catch2/internal/catch_enforce.hpp"
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators_random.hpp>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/ConstantOfShape.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include <aidge/data/Data.hpp>
#include <aidge/data/half.hpp>
#include <aidge/filler/Filler.hpp>
#include <aidge/operator/OperatorTensor.hpp>
#include <aidge/operator/Reshape.hpp>
#include <aidge/utils/TensorUtils.hpp>
#include <aidge/utils/Types.h>
namespace Aidge {
TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
auto random_seed = Catch::Generators::Detail::getSeed;
std::mt19937 gen(random_seed());
std::uniform_real_distribution<float> valueDist(
0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<DimSize_t> input_tensor_size_dist(
std::size_t(1), std::size_t(10));
std::uniform_int_distribution<int64_t> input_tensor_values_dist(
std::size_t(1), std::size_t(7));
std::uniform_real_distribution<double> operator_attr_value_dist(-100., 100.);
///////////////////////////////////////////////
// SETUP FUNCTIONS
auto generate_input_tensor =
[&gen, &input_tensor_size_dist,
&input_tensor_values_dist]() -> std::shared_ptr<Tensor> {
std::vector<DimSize_t> input_dims;
input_dims.push_back(input_tensor_size_dist(gen));
auto result = std::make_shared<Tensor>(input_dims);
result->setDataType(DataType::Int64);
result->setBackend("cpu");
for (DimSize_t i = 0; i < result->size(); ++i) {
result->set<int64_t>(i, input_tensor_values_dist(gen));
}
return result;
};
auto generate_random_operator =
[&gen,
&operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> {
auto node = ConstantOfShape(Tensor(operator_attr_value_dist(gen)));
auto op = std::static_pointer_cast<ConstantOfShape_Op>(node->getOperator());
op->setDataType(DataType::Float64);
op->setBackend("cpu");
return op;
};
auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor,
std::shared_ptr<ConstantOfShape_Op> op) {
std::vector<DimSize_t> output_dims;
output_dims.reserve(input_tensor->size());
for (DimSize_t i = 0; i < input_tensor->size(); ++i) {
output_dims.push_back(input_tensor->get<int64_t>(i));
}
auto result = std::make_shared<Tensor>(output_dims);
result->setDataType(op->value().dataType());
result->setBackend("cpu");
constantFiller(result, op->value().get<double>(0));
return result;
};
/////////////////////////////////////
// BENCHMARKING
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
int number_of_operation{0};
SECTION("ConstantOfShapeImpl_cpu::forward()") {
for (int i = 0; i < NBTRIALS; ++i) {
auto input_T = generate_input_tensor();
std::shared_ptr<ConstantOfShape_Op> op = generate_random_operator();
auto output_T = generate_output_tensor(input_T, op);
op->associateInput(0, input_T);
REQUIRE(op->forwardDims(true));
REQUIRE_NOTHROW(op->forward());
CHECK(output_T->nbDims() == op->getOutput(0)->nbDims());
for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) {
CHECK(output_T->dims().at(i) == op->getOutput(0)->dims().at(i));
}
CHECK(approxEq<double>(*output_T, *op->getOutput(0)));
}
}
}
} // namespace Aidge
......@@ -24,6 +24,337 @@
namespace Aidge {
TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]")
{
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
SECTION("Case 1: 2D and 1D tensors") {
const auto T0 = std::make_shared<Tensor>(Array2D<float,2,3>(
{
{
{1,2,3},{4,5,6}
}
}
));
const auto T1 = std::make_shared<Tensor>(Array1D<float,3>(
{0.1,0.2,0.3}
));
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
op->getOutput(0)->setGrad(std::make_shared<Tensor>(Array2D<float,2,3>({{{1.0,1.0,1.0},{1.0,1.0,1.0}}})));
op->associateInput(0,T0);
op->associateInput(1,T1);
op->forwardDims();
myMul->forward();
myMul->backward();
auto T0Grad = std::make_shared<Tensor>(Array2D<float, 2,3>({{{0.1,0.2,0.3},{0.1, 0.2, 0.3}}}));
auto T1Grad = std::make_shared<Tensor>(Array1D<float, 3>({5,7,9}));
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *T0Grad));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *T1Grad));
}
SECTION("Case 2: 3D and 1D tensors") {
const auto T0 = std::make_shared<Tensor>(Array3D<float,2,2,3>(
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0}
},
{
{7.0, 8.0, 9.0},
{10.0, 11.0, 12.0}
}
}
}
));
const auto T1 = std::make_shared<Tensor>(Array1D<float, 3>({0.3,0.2,0.1}));
const auto newGrad = std::make_shared<Tensor>(Array3D<float,2,2,3>(
{
{
{
{1, 1, 1},
{1, 1, 1}
},
{
{1, 1, 1},
{1, 1, 1}
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,2,3>(
{
{
{
{0.3, 0.2, 0.1},
{0.3, 0.2, 0.1}
},
{
{0.3, 0.2, 0.1},
{0.3, 0.2, 0.1}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float,3>(
{22.0, 26.0, 30.0}
));
for(auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
myMul->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 3: 4D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>(
{
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0},
{7.0, 8.0, 9.0}
},
{
{10.0, 11.0, 12.0},
{13.0, 14.0, 15.0},
{16.0, 17.0, 18.0}
}
},
{
{
{19.0, 20.0, 21.0},
{22.0, 23.0, 24.0},
{25.0, 26.0, 27.0}
},
{
{28.0, 29.0, 30.0},
{31.0, 32.0, 33.0},
{34.0, 35.0, 36.0}
}
}
}
}
));
const auto T1 = std::make_shared<Tensor>(Array2D<float, 3,3>(
{
{
{0.5,0.3,0.1},
{0.4,0.2,0.6},
{0.7,0.8,0.9}
}
}
));
const auto newGrad = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>(
{
{
{
{
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0}
},
{
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0}
}
},
{
{
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0}
},
{
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0}
}
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array4D<float,2,2,3,3>(
{
{
{
{
{0.5, 0.3, 0.1},
{0.4, 0.2, 0.6},
{0.7, 0.8, 0.9}
},
{
{0.5, 0.3, 0.1},
{0.4, 0.2, 0.6},
{0.7, 0.8, 0.9}
}
},
{
{
{0.5, 0.3, 0.1},
{0.4, 0.2, 0.6},
{0.7, 0.8, 0.9}
},
{
{0.5, 0.3, 0.1},
{0.4, 0.2, 0.6},
{0.7, 0.8, 0.9}
}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 3>(
{
{
{58.0, 62.0, 66.0},
{70.0, 74.0, 78.0},
{82.0, 86.0, 90.0}
}
}
));
for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
myMul->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 4: 3D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 3, 4>(
{
{
{
{1.0, 2.0, 3.0, 4.0},
{5.0, 6.0, 7.0, 8.0},
{9.0, 10.0, 11.0, 12.0},
},
{
{13.0, 14.0, 15.0, 16.0},
{17.0, 18.0, 19.0, 20.0},
{21.0, 22.0, 23.0, 24.0},
}
}
}
));
const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 4>(
{
{
{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}
}
}
));
const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2,3,4>(
{
{
{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
},
{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,3,4>(
{
{
{
{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}
},
{
{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 4>(
{
{
{14.0, 16.0, 18.0, 20.0},
{22.0, 24.0, 26.0, 28.0},
{30.0, 32.0, 34.0, 36.0}
}
}
));
for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
myMul->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
}
TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
......@@ -31,7 +362,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3));
std::uniform_int_distribution<int> boolDist(0,1);
// Create MatMul Operator
......@@ -60,6 +391,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
SECTION("MulImpl_cpu::forward()") {
SECTION("Scalar / Scalar") {
......@@ -68,16 +400,20 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
}
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
const std::size_t nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims;
const auto nbDims = nbDimsDist(gen);
auto dims = std::vector<std::size_t>{};
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
}
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
const auto nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements;
// without broadcasting
......@@ -114,67 +450,101 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
delete[] array0;
delete[] array1;
delete[] result;
// with broadcasting
}
std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl;
}
SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors
// handle dimensions, replace some dimensions with '1' to get broadcasting
constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen));
std::vector<std::size_t> dimensions;
for (std::size_t i = 0; i < nbDims; ++i)
{
dimensions.push_back(dimSizeDist(gen));
}
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims;
std::vector<std::size_t> dimsOut = dims;
for (std::size_t i = 0; i < nbDims; ++i) {
if (boolDist(gen)) {
auto dims0 = dimensions;
auto dims1 = dimensions;
auto dimsOut = dimensions;
for (std::size_t i = 0; i < nbDims; ++i)
{
if (boolDist(gen))
{
dims0[i] = 1;
}
if (boolDist(gen)) {
if (boolDist(gen))
{
dims1[i] = 1;
}
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
}
for(auto dim : dims0)
{
Log::info("Dimension of input 0 : {}", dim);
}
for(auto dim : dims1)
{
Log::info("Dimension of input 1 : {}", dim);
}
// create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i)
{
array0[i] = valueDist(gen);
}
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i)
{
array1[i] = valueDist(gen);
}
// compute true result
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) {
for (std::size_t a = 0; a < dimsOut[0]; ++a)
{
for (std::size_t b = 0; b < dimsOut[1]; ++b)
{
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
for (std::size_t c = 0; c < dimsOut[2]; ++c)
{
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
for (std::size_t d = 0; d < dimsOut[3]; ++d)
{
std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
}
......
......@@ -134,7 +134,7 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
SECTION("Asymmetric Pad") {
const int pv = 0; // pad value
std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv));
std::shared_ptr<Node> myPad = Pad<2>({0, 1, 1, 0}, "mypad", PadBorderType::Constant, static_cast<double>(pv));
auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment