Skip to content
Snippets Groups Projects
Commit a03ca6a8 authored by Noam Zerah's avatar Noam Zerah
Browse files

Merge branch 'dev' of...

Merge branch 'dev' of https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu into feat_operator_bitshift
Merging aidge/dev -> feat_operator_bitshift
parents 28ffedb7 290e8042
No related branches found
No related tags found
1 merge request!88Added Bitshift Operator
Pipeline #55117 passed
Showing
with 1502 additions and 89 deletions
...@@ -24,6 +24,7 @@ add_definitions(-DGIT_COMMIT_HASH="${GIT_COMMIT_HASH}") ...@@ -24,6 +24,7 @@ add_definitions(-DGIT_COMMIT_HASH="${GIT_COMMIT_HASH}")
# Note : project name is ${CMAKE_PROJECT_NAME} and python module name is also ${CMAKE_PROJECT_NAME} # Note : project name is ${CMAKE_PROJECT_NAME} and python module name is also ${CMAKE_PROJECT_NAME}
set(module_name _${CMAKE_PROJECT_NAME}) # target name set(module_name _${CMAKE_PROJECT_NAME}) # target name
set(pybind_module_name ${CMAKE_PROJECT_NAME}) # name of submodule for python bindings
############################################## ##############################################
# Define options # Define options
...@@ -69,9 +70,12 @@ set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON) ...@@ -69,9 +70,12 @@ set_property(TARGET ${module_name} PROPERTY POSITION_INDEPENDENT_CODE ON)
# PYTHON BINDING # PYTHON BINDING
if (PYBIND) if (PYBIND)
# Handles Python + pybind11 headers dependencies # Python binding lib is by default installed in <prefix>/python_packages/<package>/
# When installed from python, setup.py should set it to the python package dir
set(PYBIND_INSTALL_PREFIX python_packages/${pybind_module_name} CACHE PATH "Python package install prefix")
include(PybindModuleCreation) include(PybindModuleCreation)
generate_python_binding(${CMAKE_PROJECT_NAME} ${module_name}) generate_python_binding(${pybind_module_name} ${module_name})
target_link_libraries(${module_name} target_link_libraries(${module_name}
PUBLIC PUBLIC
...@@ -128,6 +132,12 @@ install(TARGETS ${module_name} EXPORT ${CMAKE_PROJECT_NAME}-targets ...@@ -128,6 +132,12 @@ install(TARGETS ${module_name} EXPORT ${CMAKE_PROJECT_NAME}-targets
) )
install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if (PYBIND)
install(TARGETS ${pybind_module_name}
DESTINATION ${PYBIND_INSTALL_PREFIX}
)
endif()
#Export the targets to a script #Export the targets to a script
install(EXPORT ${CMAKE_PROJECT_NAME}-targets install(EXPORT ${CMAKE_PROJECT_NAME}-targets
FILE "${CMAKE_PROJECT_NAME}-targets.cmake" FILE "${CMAKE_PROJECT_NAME}-targets.cmake"
......
...@@ -23,33 +23,35 @@ Those operators can be used on any machine with an Linux OS. ...@@ -23,33 +23,35 @@ Those operators can be used on any machine with an Linux OS.
pip install . -v pip install . -v
``` ```
> **TIPS :** Use environment variables to change compilation options : > **TIPS :** Use environment variables to change compilation options :
> - `AIDGE_INSTALL` : to set the installation folder. Defaults to /usr/local/lib. :warning: This path must be identical to aidge_core install path. > - `AIDGE_INSTALL` : to set the installation folder. Defaults to `<python_prefix>/lib/libAidge`. :warning: This path must be identical to aidge_core install path.
> - `AIDGE_PYTHON_BUILD_TYPE` : to set the compilation mode to **Debug** or **Release** > - `AIDGE_PYTHON_BUILD_TYPE` : to set the compilation mode to **Debug** or **Release** or "" (for default flags). Defaults to **Release**.
> - `AIDGE_BUILD_GEN` : to set the build backend with > - `AIDGE_BUILD_GEN` : to set the build backend (for development mode) or "" for the cmake default. Default to "".
### Standard C++ Compilation ## Pip installation for development
You will need to compile first the Core library before compiling the CPU one. To setup using pip in development (or editable mode), use the `--no-build-isolation -e` options to pip.
The makefile is designed to do it for you.
To only compile the CPU library, run For instance run the following command in your python environnement for a typical setup :
``` ``` bash
make cpu_only export AIDGE_PYTHON_BUILD_TYPE= # default flags (no debug info but fastest build time)
export AIDGE_PYTHON_BUILD_TYPE=Debug # or if one really need to debug the C++ code
pip install -U pip setuptools setuptools_scm[toml] cmake # Pre-install build requirements (refer to the pyproject.toml [build-system] section)
pip install -v --no-build-isolation -e .
``` ```
To compile the CPU library + the associated unitary tests, run Refer to `aidge_core/README.md` for more details on development build options.
```
make cpu_tests
```
To compile the CPU library with the python binding, run ### Standard C++ Compilation
```
make cpu_with_pybind
```
Important: this command can also be run with `make`.
You will need to compile and install the [Core Library](https://gitlab.eclipse.org/eclipse/aidge/aidge_core) before compiling the CPU one.
To compile the CPU library with the python binding + the associated unitary tests, run Once this has been done, you'll need run CMake with the
``` `CMAKE_INSTALL_PREFIX:PATH` flag, in order to indicate to CMake where
make cpu_with_pybind_tests `aidge_core` has been installed :
```sh
cmake -DCMAKE_INSTALL_PREFIX:PATH=$(path_to_install_folder) $(CMAKE PARAMETERS) $(projet_root)
make all
``` ```
More detailed information is available in the [Aidge User Guide](https://eclipse.dev/aidge/source/GetStarted/install.html)
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "aidge/backend/cpu/operator/BitShiftImpl.hpp" #include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp"
#include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp"
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
#include <cstddef>
#include <memory>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/ConstantOfShape.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// class ConstantOfShape_op;
class ConstantOfShapeImplForward_cpu
: public Registrable<
ConstantOfShapeImplForward_cpu, std::tuple<DataType>,
void(const std::vector<DimSize_t>, const Tensor&, void *)> {};
class ConstantOfShapeImpl_cpu : public OperatorImpl {
public:
ConstantOfShapeImpl_cpu(const ConstantOfShape_Op &op)
: OperatorImpl(op, "cpu") {}
static std::unique_ptr<ConstantOfShapeImpl_cpu>
create(const ConstantOfShapeImpl_cpu &op) {
return std::make_unique<ConstantOfShapeImpl_cpu>(op);
}
void forward() override;
};
namespace {
static Registrar<ConstantOfShape_Op> registrarConstantOfShapeImpl_cpu(
"cpu", Aidge::ConstantOfShapeImpl_cpu::create);
}
} // namespace Aidge
#endif /* _AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_FORWARD_KERNEL_H_
#include <aidge/data/Tensor.hpp>
#include <aidge/data/half.hpp>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <functional> // std::multiplies
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class O>
void ConstantOfShapeimpl_cpu_forward_kernel(
const std::vector<DimSize_t> output_dims, const Tensor &value,
void *output_) {
O *output = static_cast<O *>(output_);
O val;
std::copy(static_cast<O *>(value.getImpl()->hostPtr()),
static_cast<O *>(value.getImpl()->hostPtr()) +
static_cast<NbElts_t>(1),
&val);
const size_t output_size = std::accumulate(
output_dims.begin(), output_dims.end(), 1, std::multiplies<DimSize_t>());
for (size_t i = 0; i < output_size; ++i) {
output[i] = val;
}
}
// Then we add the Registrar declaration for different input/output types
namespace {
static Registrar<ConstantOfShapeImplForward_cpu>
registrarConstantOfShapeImplForward_cpu_Float16(
{DataType::Float16},
Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>);
static Registrar<ConstantOfShapeImplForward_cpu>
registrarConstantOfShapeImplForward_cpu_Float32(
{DataType::Float32},
Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>);
static Registrar<ConstantOfShapeImplForward_cpu>
registrarConstantOfShapeImplForward_cpu_Float64(
{DataType::Float64},
Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>);
static Registrar<ConstantOfShapeImplForward_cpu>
registrarConstantOfShapeImplForward_cpu_Int16(
{DataType::Int16},
Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>);
static Registrar<ConstantOfShapeImplForward_cpu>
registrarConstantOfShapeImplForward_cpu_Int32(
{DataType::Int32},
Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>);
static Registrar<ConstantOfShapeImplForward_cpu>
registrarConstantOfShapeImplForward_cpu_Int64(
{DataType::Int64}, Aidge::ConstantOfShapeimpl_cpu_forward_kernel <std::int64_t>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#define AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/GridSample.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// compute kernel registry for forward and backward
class GridSampleImpl1DForward_cpu
: public Registrable<GridSampleImpl1DForward_cpu,
std::tuple<DataType, DataType>,
void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)> {};
class GridSampleImpl2DForward_cpu
: public Registrable<GridSampleImpl2DForward_cpu,
std::tuple<DataType, DataType>,
void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)> {};
class GridSampleImpl_cpu : public OperatorImpl {
public:
GridSampleImpl_cpu(const GridSample_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<GridSampleImpl_cpu> create(const GridSample_Op &op) {
return std::make_unique<GridSampleImpl_cpu>(op);
}
public:
Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to GridSample_Op<1> implementation registry
static Registrar<GridSample_Op> registrarGridSampleImpl_cpu("cpu", Aidge::GridSampleImpl_cpu::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#include <algorithm> // std::max, std::min
#include <cmath> // std::fabs, std::trunf, std::nearbyint
#include <cstddef> // std::size_t
#include <cstdint> // std::int64_t
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/GridSampleImpl.hpp"
#include "aidge/data/half.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
static bool in_bound(float coord, float lower_bound, float upper_bound) noexcept {
return (coord > lower_bound) && (coord < upper_bound);
}
static float unnormalized_coord(float coord, float new_lower_bound, float new_upper_bound) noexcept {
return (coord + 1) / 2 * (new_upper_bound - new_lower_bound) + new_lower_bound;
}
// unused
// static float normalized_coord(float coord, float prev_lower_bound, float prev_upper_bound) noexcept {
// return (coord + prev_lower_bound) / (prev_upper_bound-prev_lower_bound) * 2 - 1;
// }
static float unnormalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
return align_corners ? unnormalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
: unnormalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
}
// unused
// static float normalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
// return align_corners ? normalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
// : normalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
// }
static float update_normalized_coord_with_padding(float coord, Aidge::GridSample_Op::PaddingMode padding_mode) {
if (!in_bound(coord, -1.0f, 1.0f)) {
if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
coord = std::min(std::max(-1.0f, coord), 1.0f);
}
else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
float abs_coord = std::fabs(coord);
float int_coord = std::truncf(abs_coord);
std::int32_t nb_refl = static_cast<std::int32_t>((int_coord - 1) / 2);
float res = ((nb_refl + 1)*2) - abs_coord;
coord = (coord > 0) ? (nb_refl % 2 == 0 ? res : -res) \
: (nb_refl % 2 == 0 ? -res : res);
}
}
return coord;
}
static inline std::int64_t update_unnormalized_coord_with_padding(std::int64_t coord, std::int64_t size, Aidge::GridSample_Op::PaddingMode padding_mode) {
if (!in_bound(coord, 0, size)) {
// out of bound. switch padding mode
if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
coord = std::min(std::max(std::int64_t(0), coord), size-std::int64_t(1));
} else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
const std::int64_t quotient = coord / (size-1);
const std::int64_t remainer = std::abs(coord - quotient*(size-1));
coord = (quotient % 2 == 0) ? remainer : size - 1 - remainer;
}
}
return coord;
}
namespace Aidge {
/**
* @brief Forward kernel for 1D GridSample on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param grid_ const grid Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
const std::shared_ptr<Tensor>& in0,
const std::shared_ptr<Tensor>& in1,
const std::shared_ptr<Tensor>& out)
{
const I* const input = static_cast<const I * const>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float* const>(in1->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O* const>(out->getImpl()->rawPtr());
O* output_ptr = output;
const std::size_t N = in0->dim(0);
const std::size_t C = in0->dim(1);
const std::size_t in_H = in0->dim(2);
const std::size_t grid_H = in1->dim(1);
const std::size_t in_N_s = in0->stride(0);
const std::size_t in_C_s = in0->stride(1);
const std::size_t in_H_s = in0->stride(2);
const std::size_t grid_N_s = in1->stride(0);
const std::size_t grid_H_s = in1->stride(1);
const std::size_t out_N_s = out->stride(0);
const std::size_t out_C_s = out->stride(1);
const std::size_t out_H_s = out->stride(2);
float* grid_ptr_N = grid;
const I* input_ptr_N = input;
O* output_ptr_N = output;
for (std::size_t n = 0; n < N; ++n) {
grid_ptr = grid_ptr_N;
for (std::size_t grid_x = 0; grid_x < grid_H; ++grid_x) {
output_ptr = output_ptr_N + grid_x*out_H_s;
/*
* change grid_x coord to match padding_mode
* Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
* Handle computation of interpolation
* any value outside bounds is considered 0
* if nearest:
* else if linear:
* else if cubic:
* else : nothing
*/
float x = *grid_ptr;
x = update_normalized_coord_with_padding(x, op.paddingMode());
x = unnormalize_grid_sample_coord(x, in_H, op.alignCorners());
if (op.mode() == GridSample_Op::Mode::Nearest) {
const std::int64_t x_rounded = std::nearbyintf(x);
if (in_bound(x_rounded, 0, in_H)) {
input_ptr = input_ptr_N + x_rounded*in_H_s;
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = *input_ptr;
input_ptr += in_C_s;
output_ptr += out_C_s;
}
} else {
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = O(0);
output_ptr += out_C_s;
}
}
} else if (op.mode() == GridSample_Op::Mode::Linear) {
const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_inf = in_bound(x_inf, 0, in_H) ?
input_ptr_NC[static_cast<std::size_t>(x_inf)*in_H_s] : I(0);
const I f_sup = in_bound(x_sup, 0, in_H) ?
input_ptr_NC[static_cast<std::size_t>(x_sup)*in_H_s] : I(0);
*output_ptr = static_cast<O>(static_cast<I>(x - x_inf)*f_inf \
+ static_cast<I>(x_sup - x)*f_sup);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
} else if (op.mode() == GridSample_Op::Mode::Cubic) {
const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
const std::int64_t x_inf_inf = update_unnormalized_coord_with_padding(x_inf - 1, in_H, op.paddingMode());
const std::int64_t x_sup_sup = update_unnormalized_coord_with_padding(x_sup + 1, in_H, op.paddingMode());
const I x1 = static_cast<I>(x - static_cast<float>(x_inf));
const I x2 = x1 * x1;
const I x3 = x1 * x2;
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_inf_inf = in_bound(x_inf_inf, 0, in_H) ? input_ptr_NC[x_inf_inf*in_H_s] : I(0);
const I f_inf = in_bound(x_inf, 0, in_H) ? input_ptr_NC[x_inf*in_H_s] : I(0);
const I f_sup = in_bound(x_sup, 0, in_H) ? input_ptr_NC[x_sup*in_H_s] : I(0);
const I f_sup_sup = in_bound(x_sup_sup, 0, in_H) ? input_ptr_NC[x_sup_sup*in_H_s] : I(0);
const I m_inf = (f_sup - f_inf_inf) / I(2);
const I m_sup = (f_sup_sup - f_inf) / I(2);
*output_ptr = f_inf \
+ x1 * m_inf \
+ x2 * (3 * (f_sup - f_inf) - 2 * m_inf - m_sup) \
+ x3 * (2*(f_inf - f_sup) + m_inf + m_sup);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
}
grid_ptr += grid_H_s;
}
input_ptr_N += in_N_s;
grid_ptr_N += grid_N_s;
output_ptr_N += out_N_s;
}
}
namespace {
static Registrar<GridSampleImpl1DForward_cpu> registrarGridSampleImpl1DForward_cpu_Float32(
{DataType::Float32, DataType::Float32},
Aidge::GridSampleImpl1D_cpu_forward_kernel<float, float>);
static Registrar<GridSampleImpl1DForward_cpu> registrarGridSampleImpl1DForward_cpu_Float16(
{DataType::Float16, DataType::Float16},
Aidge::GridSampleImpl1D_cpu_forward_kernel<half_float::half, half_float::half>);
static Registrar<GridSampleImpl1DForward_cpu> registrarGridSampleImpl1DForward_cpu_Int32(
{DataType::Int32, DataType::Int32},
Aidge::GridSampleImpl1D_cpu_forward_kernel<int, int>);
static Registrar<GridSampleImpl1DForward_cpu> registrarGridSampleImpl1DForward_cpu_Float64(
{DataType::Float64, DataType::Float64},
Aidge::GridSampleImpl1D_cpu_forward_kernel<double, double>);
/**
* @brief Forward kernel for 1D GridSample on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param grid_ const grid Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
const std::shared_ptr<Tensor>& in0,
const std::shared_ptr<Tensor>& in1,
const std::shared_ptr<Tensor>& out)
{
const I* input = static_cast<const I *>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float* const>(in0->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O* const>(out->getImpl()->rawPtr());
const std::size_t N = in0->dim(0);
const std::size_t C = in0->dim(1);
const std::size_t in_H = in0->dim(2);
const std::size_t in_W = in0->dim(3);
const std::size_t grid_H = in1->dim(1);
const std::size_t grid_W = in1->dim(2);
const std::size_t in_N_s = in0->stride(0);
const std::size_t in_C_s = in0->stride(1);
const std::size_t in_H_s = in0->stride(2);
const std::size_t in_W_s = in0->stride(3);
const std::size_t grid_N_s = in1->stride(0);
const std::size_t grid_H_s = in1->stride(1);
const std::size_t grid_W_s = in1->stride(2);
const std::size_t grid_Coord_s = in1->stride(3);
const std::size_t out_N_s = out->stride(0);
const std::size_t out_C_s = out->stride(1);
const std::size_t out_H_s = out->stride(2);
const std::size_t out_W_s = out->stride(3);
float* grid_ptr_N = grid;
const I* input_ptr_N = input;
O* output_ptr_N = output;
for (std::size_t n = 0; n < N; ++n) {
for (std::size_t grid_y = 0; grid_y < grid_H; ++grid_y) {
for (std::size_t grid_x = 0; grid_x < grid_W; ++grid_x) {
O* output_ptr = output_ptr_N + grid_y*out_H_s + grid_y*out_W_s;
grid_ptr = grid_ptr_N + grid_y*grid_H_s + grid_x*grid_W_s;
/*
* change grid_x coord to match padding_mode
* Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
* Handle computation of interpolation
* any value outside bounds is considered 0
* if nearest:
* else if linear:
* else if cubic:
* else : nothing
*/
float x = *grid_ptr;
float y = grid_ptr[grid_Coord_s];
x = update_normalized_coord_with_padding(x, op.paddingMode());
x = unnormalize_grid_sample_coord(x, in_W, op.alignCorners());
y = update_normalized_coord_with_padding(y, op.paddingMode());
y = unnormalize_grid_sample_coord(y, in_H, op.alignCorners());
if (op.mode() == GridSample_Op::Mode::Nearest) {
const std::int64_t x_rounded = std::nearbyintf(x);
const std::int64_t y_rounded = std::nearbyintf(y);
if (in_bound(x_rounded, 0, in_W) && in_bound(y_rounded, 0, in_H)) {
input_ptr = input_ptr_N + y_rounded*in_H_s + x_rounded*in_W_s;
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = *input_ptr;
input_ptr += in_C_s;
output_ptr += out_C_s;
}
} else {
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = O(0);
output_ptr += out_C_s;
}
}
} else if (op.mode() == GridSample_Op::Mode::Linear) {
const std::int64_t x_r = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode()); // right
const std::int64_t x_l = update_unnormalized_coord_with_padding(x_r + 1, in_W, op.paddingMode()); // left
const std::int64_t y_t = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode()); // top
const std::int64_t y_b = update_unnormalized_coord_with_padding(y_t + 1, in_H, op.paddingMode()); // bottom
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_tr = (in_bound(x_r, 0, in_W) && in_bound(y_t, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
+ static_cast<std::size_t>(x_r)*in_W_s]
: I(0);
const I f_tl = (in_bound(x_l, 0, in_W) && in_bound(y_t, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
+ static_cast<std::size_t>(x_l)*in_W_s]
: I(0);
const I f_br = (in_bound(x_r, 0, in_W) && in_bound(y_b, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
+ static_cast<std::size_t>(x_r)*in_W_s]
: I(0);
const I f_bl = (in_bound(x_l, 0, in_W) && in_bound(y_b, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
+ static_cast<std::size_t>(x_l)*in_W_s]
: I(0);
// compute weighted sum of the 4 corners
const I w_tr = static_cast<I>((y - static_cast<float>(y_t))*(static_cast<float>(x_r) - x));
const I w_tl = static_cast<I>((y - static_cast<float>(y_t))*(x - static_cast<float>(x_l)));
const I w_br = static_cast<I>((static_cast<float>(y_b) - y)*(static_cast<float>(x_r) - x));
const I w_bl = static_cast<I>((static_cast<float>(y_b) - y)*(x - static_cast<float>(x_l)));
*output_ptr = static_cast<O>(w_tr*f_tr + w_tl*f_tl + w_br*f_br + w_bl*f_bl);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
} else if (op.mode() == GridSample_Op::Mode::Cubic) {
/*
* .. .. .. .. .. ..
* .. 00 01 02 03 ..
* .. 10 11 12 13 ..
* .. 20 21 22 23 ..
* .. 30 31 32 33 ..
* .. .. .. .. .. ..
*/
const std::int64_t x_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode());
const std::int64_t x_0 = update_unnormalized_coord_with_padding(x_1 - 1, in_W, op.paddingMode());
const std::int64_t x_2 = update_unnormalized_coord_with_padding(x_1 + 1, in_W, op.paddingMode());
const std::int64_t x_3 = update_unnormalized_coord_with_padding(x_1 + 2, in_W, op.paddingMode());
const std::int64_t y_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode());
const std::int64_t y_0 = update_unnormalized_coord_with_padding(y_1 - 1, in_H, op.paddingMode());
const std::int64_t y_2 = update_unnormalized_coord_with_padding(y_1 + 1, in_H, op.paddingMode());
const std::int64_t y_3 = update_unnormalized_coord_with_padding(y_1 + 2, in_H, op.paddingMode());
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_00 = in_bound(x_0, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_0*in_H_s] : I(0);
const I f_01 = in_bound(x_0, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_1*in_H_s] : I(0);
const I f_02 = in_bound(x_0, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_2*in_H_s] : I(0);
const I f_03 = in_bound(x_0, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_3*in_H_s] : I(0);
const I f_10 = in_bound(x_1, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_0*in_H_s] : I(0);
const I f_20 = in_bound(x_2, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_0*in_H_s] : I(0);
const I f_30 = in_bound(x_3, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_0*in_H_s] : I(0);
const I f_11 = in_bound(x_1, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_1*in_H_s] : I(0);
const I f_12 = in_bound(x_1, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_2*in_H_s] : I(0);
const I f_13 = in_bound(x_1, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_3*in_H_s] : I(0);
const I f_21 = in_bound(x_2, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_1*in_H_s] : I(0);
const I f_22 = in_bound(x_2, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_2*in_H_s] : I(0);
const I f_23 = in_bound(x_2, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_3*in_H_s] : I(0);
const I f_31 = in_bound(x_3, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_1*in_H_s] : I(0);
const I f_32 = in_bound(x_3, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_2*in_H_s] : I(0);
const I f_33 = in_bound(x_3, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_3*in_H_s] : I(0);
const I mx_11 = (f_21 - f_01) / I(2);
const I mx_12 = (f_22 - f_02) / I(2);
const I mx_21 = (f_31 - f_11) / I(2);
const I mx_22 = (f_32 - f_12) / I(2);
const I my_11 = (f_12 - f_10) / I(2);
const I my_12 = (f_13 - f_11) / I(2);
const I my_21 = (f_22 - f_20) / I(2);
const I my_22 = (f_23 - f_21) / I(2);
const I mxy_11 = (f_22 - f_20 - f_02 - + f_00) / I(4);
const I mxy_12 = (f_23 - f_21 - f_03 - + f_01) / I(4);
const I mxy_21 = (f_32 - f_30 - f_12 - + f_10) / I(4);
const I mxy_22 = (f_33 - f_31 - f_13 - + f_11) / I(4);
const I a_00 = f_11;
const I a_10 = mx_11;
const I a_20 = I(3)*(f_21 - f_11) - I(2)*mx_11 - mx_21;
const I a_30 = I(2)*(f_11 - f_21) + mx_11 + mx_21;
const I a_01 = my_11;
const I a_11 = mxy_11;
const I a_21 = I(3)*(my_21 - my_11) - I(2)*mxy_11 - mxy_21;
const I a_31 = I(2)*(my_11 - my_21) + mxy_11 + mxy_21;
const I a_02 = I(3)*(f_12 - f_11) - I(2)*my_11 - my_12;
const I a_12 = I(3)*(mx_12 - mx_11) - I(2)*mxy_11 - mxy_12;
const I a_22 = I(9)*(f_11 + f_22 - f_21 - f_12) + I(3)*(I(2)*(mx_11 - mx_12 + my_11 - my_21) + mx_21 - mx_22 + my_12 - my_22) + mxy_22 + I(2)*(mxy_12 + mxy_21 + I(2)*mxy_11);
const I a_32 = - mxy_12 - mxy_22 + I(2)*(my_22 - my_12 - mxy_11 - mxy_21 + I(2)*(my_21 - my_11) + I(3)*(f_21 + f_12 - f_11 - f_22)) + I(3)*(mx_12 + mx_22 - mx_11 - mx_21);
const I a_03 = I(2)*(f_11 - f_12) + my_11 + my_12;
const I a_13 = I(2)*(mx_11 - mx_12) + mxy_11 + mxy_12;
const I a_23 = - mxy_21 - mxy_22 + I(2)*(-mx_21 + mx_22 - mxy_11 - mxy_12 + I(2)*(mx_12 - mx_11) + I(3)*(f_12 + f_21 - f_11 - f_22)) + I(3)*(my_21 + my_22 - my_11 - my_12);
const I a_33 = mxy_11 + mxy_21 + mxy_12 + mxy_22 + I(2)*(mx_11 + mx_21 - mx_12 - mx_22 + my_11 - my_21 + my_12 - my_22 + I(2)*(f_11 - f_21 - f_12 + f_22));
const I x2 = static_cast<I>(x*x);
const I x3 = static_cast<I>(x*x*x);
const I y2 = static_cast<I>(y*y);
const I y3 = static_cast<I>(y*y*y);
*output_ptr = static_cast<O>( \
a_00 + a_10*x + a_20*x2 + a_30*x3 \
+ a_01*y + a_11*x*y + a_21*x2*y + a_31*x3*y \
+ a_02*y2 + a_12*x*y2 + a_22*x2*y2 + a_32*x3*y2 \
+ a_03*y3 + a_13*x*y3 + a_23*x2*y3 + a_33*x3*y3);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
}
}
}
input_ptr_N += in_N_s;
grid_ptr_N += grid_N_s;
output_ptr_N += out_N_s;
}
}
static Registrar<GridSampleImpl2DForward_cpu> registrarGridSampleImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32},
Aidge::GridSampleImpl2D_cpu_forward_kernel<float, float>);
static Registrar<GridSampleImpl2DForward_cpu> registrarGridSampleImpl2DForward_cpu_Float16(
{DataType::Float16, DataType::Float16},
Aidge::GridSampleImpl2D_cpu_forward_kernel<half_float::half, half_float::half>);
static Registrar<GridSampleImpl2DForward_cpu> registrarGridSampleImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32},
Aidge::GridSampleImpl2D_cpu_forward_kernel<int, int>);
static Registrar<GridSampleImpl2DForward_cpu> registrarGridSampleImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64},
Aidge::GridSampleImpl2D_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */
...@@ -25,11 +25,25 @@ namespace Aidge { ...@@ -25,11 +25,25 @@ namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class MulImplForward_cpu class MulImplForward_cpu
: public Registrable<MulImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> { : public Registrable<MulImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&,
}; const std::vector<std::size_t>&,
const std::vector<std::size_t>&,
const void*,
const void*,
void*)> {};
class MulImplBackward_cpu class MulImplBackward_cpu
: public Registrable<MulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> { : public Registrable<MulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t,
}; const std::size_t,
const std::size_t,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const void*,
const void*,
const void*,
void*,
void*)> {};
class MulImpl_cpu : public OperatorImpl { class MulImpl_cpu : public OperatorImpl {
public: public:
...@@ -40,7 +54,9 @@ public: ...@@ -40,7 +54,9 @@ public:
} }
Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override; void forward() override;
void backward() override;
}; };
namespace { namespace {
......
#ifndef AIDGE_CPU_OPERATOR_MULIMPL_BACKWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_MULIMPL_BACKWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <cstdint> // std::int32_t, std::int64_t
#include <algorithm>
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/MulImpl.hpp"
namespace Aidge {
template <class I1, class I2, class O>
void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length,
const std::size_t grad0Length,
const std::vector<std::size_t> input0Dims,
const std::vector<std::size_t> input1Dims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0,
void* gradientInput1)
{
const auto* input0 = static_cast<const I1*>(input0_);
const auto* input1 = static_cast<const I1*>(input1_);
const auto* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0);
auto* grad_input_1 = static_cast<I2*>(gradientInput1);
if(input0Dims.size() >= input1Dims.size())
{
AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
for(auto i = 0U; i < input0Length; ++i)
{
const auto indices = getMultiDimIndices(input1Dims, i);
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
grad_input_0[i] = input1[flattenedIndex] * grad_output[i];
}
for(std::size_t i = 0 ; i < grad0Length; ++i)
{
const auto indices = getMultiDimIndices(input1Dims, i);
const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
grad_input_1[flattenedIndex] += input0[i] * grad_output[i];
}
} else {
AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
for(auto i = 0U; i < input1Length; ++i)
{
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
grad_input_1[i] = input0[flattenedIndex] * grad_output[i];
}
for(std::size_t i = 0 ; i < grad0Length; ++i)
{
const auto indices = getMultiDimIndices(input0Dims, i);
const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
grad_input_0[flattenedIndex] += input1[i] * grad_output[i];
}
}
}
namespace {
static Registrar<MulImplBackward_cpu> registrarMulImplBackward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::MulImpl_cpu_backward_kernel<float, float, float>);
static Registrar<MulImplBackward_cpu> registrarMulImplBackward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::MulImpl_cpu_backward_kernel<double, double, double>);
static Registrar<MulImplBackward_cpu> registrarMulImplBackward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::MulImpl_cpu_backward_kernel<std::int32_t, std::int32_t, std::int32_t>);
static Registrar<MulImplBackward_cpu> registrarMulImplBackward_cpu_Int64(
{DataType::Int64, DataType::Int64, DataType::Int64},
Aidge::MulImpl_cpu_backward_kernel<std::int64_t, std::int64_t, std::int64_t>);
} // namespace
} // namespace Aidge
#endif
...@@ -131,7 +131,7 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder ...@@ -131,7 +131,7 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
for (std::uint32_t oy = 0; oy < oySize; ++oy) { for (std::uint32_t oy = 0; oy < oySize; ++oy) {
for (std::uint32_t ox = 0; ox < oxSize; ++ox) { for (std::uint32_t ox = 0; ox < oxSize; ++ox) {
const std::size_t oIndexFull = oIndex + ox*oySize + oy; const std::size_t oIndexFull = oIndex + oy*oxSize + ox;
O outputValue = static_cast<O>(borderValue); O outputValue = static_cast<O>(borderValue);
...@@ -140,14 +140,14 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder ...@@ -140,14 +140,14 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]); std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]);
if (ix >= 0 && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0 && iy < static_cast<std::int32_t>(dims[2])) { if (ix >= 0 && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0 && iy < static_cast<std::int32_t>(dims[2])) {
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
} }
} }
else if (borderType == PadBorderType::Edge) { else if (borderType == PadBorderType::Edge) {
std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]))); std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])));
std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]))); std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])));
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
} }
else if (borderType == PadBorderType::Reflect) { else if (borderType == PadBorderType::Reflect) {
std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]); std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]);
...@@ -162,13 +162,13 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder ...@@ -162,13 +162,13 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
if (iy >= static_cast<std::int32_t>(dims[2])) if (iy >= static_cast<std::int32_t>(dims[2]))
iy = static_cast<std::int32_t>(dims[2]) - iy; iy = static_cast<std::int32_t>(dims[2]) - iy;
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
} }
else if (borderType == PadBorderType::Wrap) { else if (borderType == PadBorderType::Wrap) {
std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])) % static_cast<std::int32_t>(dims[3]); std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])) % static_cast<std::int32_t>(dims[3]);
std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[2]); std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[2]);
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
} }
output[oIndexFull] = outputValue; output[oIndexFull] = outputValue;
......
...@@ -17,8 +17,7 @@ dynamic = ["version"] # defined in tool.setuptools_scm ...@@ -17,8 +17,7 @@ dynamic = ["version"] # defined in tool.setuptools_scm
requires = [ requires = [
"setuptools>=64", "setuptools>=64",
"setuptools_scm[toml]==7.1.0", "setuptools_scm[toml]==7.1.0",
"cmake>=3.15.3.post1", "cmake>=3.15.3.post1"
"toml"
] ]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
......
...@@ -8,17 +8,13 @@ import multiprocessing ...@@ -8,17 +8,13 @@ import multiprocessing
from math import ceil from math import ceil
import toml
from setuptools import setup, Extension from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext from setuptools.command.build_ext import build_ext
def get_project_name() -> str: PROJECT_NAME = "aidge_backend_cpu"
with open(pathlib.Path().absolute() / "pyproject.toml", "r") as file:
project_toml = toml.load(file)
return project_toml["project"]["name"]
SETUP_DIR = pathlib.Path(__file__).parent
class AidgeBuildExtension(Extension): class AidgeBuildExtension(Extension):
def __init__(self, name): def __init__(self, name):
...@@ -26,6 +22,15 @@ class AidgeBuildExtension(Extension): ...@@ -26,6 +22,15 @@ class AidgeBuildExtension(Extension):
class AidgePkgBuild(build_ext): class AidgePkgBuild(build_ext):
def __init__(self, dist, *args, **kwargs):
super().__init__(dist, *args, **kwargs)
# Detect editable_mode for old versions of setuptools
if not hasattr(self, "editable_mode"):
if hasattr(dist, "commands"):
self.editable_mode = "develop" in dist.commands
else:
self.editable_mode = False
def run(self): def run(self):
#################################### ####################################
# BUILD PACKAGE # BUILD PACKAGE
...@@ -43,36 +48,34 @@ class AidgePkgBuild(build_ext): ...@@ -43,36 +48,34 @@ class AidgePkgBuild(build_ext):
if not build_lib.exists(): if not build_lib.exists():
build_lib.mkdir(parents=True, exist_ok=True) build_lib.mkdir(parents=True, exist_ok=True)
os.chdir(str(build_temp)) package_prefix = build_lib if not self.editable_mode else SETUP_DIR
pybind_install_prefix = (package_prefix / PROJECT_NAME).absolute()
compile_type = ( os.chdir(str(build_temp))
"Release"
if "AIDGE_PYTHON_BUILD_TYPE" not in os.environ
else os.environ["AIDGE_PYTHON_BUILD_TYPE"]
)
compile_type = os.environ.get("AIDGE_PYTHON_BUILD_TYPE", "Release")
install_path = ( install_path = (
os.path.join(sys.prefix, "lib", "libAidge") os.path.join(sys.prefix, "lib", "libAidge")
if "AIDGE_INSTALL" not in os.environ if "AIDGE_INSTALL" not in os.environ
else os.environ["AIDGE_INSTALL"] else os.environ["AIDGE_INSTALL"]
) )
build_gen = os.environ.get("AIDGE_BUILD_GEN", "")
# using ninja as default build system to build faster and with the same compiler as on windows build_gen_opts = (
build_gen = ( ["-G", build_gen]
["-G", os.environ["AIDGE_BUILD_GEN"]] if build_gen
if "AIDGE_BUILD_GEN" in os.environ
else [] else []
) )
self.spawn( self.spawn(
[ [
"cmake", "cmake",
*build_gen, *build_gen_opts,
str(cwd), str(cwd),
"-DTEST=OFF", "-DTEST=OFF",
f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}", f"-DCMAKE_INSTALL_PREFIX:PATH={install_path}",
f"-DCMAKE_BUILD_TYPE={compile_type}", f"-DCMAKE_BUILD_TYPE={compile_type}",
"-DPYBIND=ON", "-DPYBIND=ON",
f"-DPYBIND_INSTALL_PREFIX:PATH={pybind_install_prefix}",
"-DCMAKE_EXPORT_COMPILE_COMMANDS=ON", "-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
"-DCOVERAGE=OFF", "-DCOVERAGE=OFF",
] ]
...@@ -85,25 +88,11 @@ class AidgePkgBuild(build_ext): ...@@ -85,25 +88,11 @@ class AidgePkgBuild(build_ext):
self.spawn(["cmake", "--install", ".", "--config", compile_type]) self.spawn(["cmake", "--install", ".", "--config", compile_type])
os.chdir(str(cwd)) os.chdir(str(cwd))
aidge_package = build_lib / (get_project_name())
# Get "aidge core" package
# ext_lib = build_temp
print(build_temp.absolute())
# Copy all shared object files from build_temp/lib to aidge_package
for root, _, files in os.walk(build_temp.absolute()):
for file in files:
if (file.endswith(".so") or file.endswith(".pyd")) and (
root != str(aidge_package.absolute())
):
currentFile = os.path.join(root, file)
shutil.copy(currentFile, str(aidge_package.absolute()))
if __name__ == "__main__": if __name__ == "__main__":
setup( setup(
include_package_data=True, include_package_data=True,
ext_modules=[AidgeBuildExtension(get_project_name())], ext_modules=[AidgeBuildExtension(PROJECT_NAME)],
cmdclass={ cmdclass={
"build_ext": AidgePkgBuild, "build_ext": AidgePkgBuild,
}, },
......
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include <functional>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl_forward_kernels.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/ConstantOfShape.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
void Aidge::ConstantOfShapeImpl_cpu::forward() {
const ConstantOfShape_Op &op_ = static_cast<const ConstantOfShape_Op &>(mOp);
// Check if input is provided
AIDGE_ASSERT(op_.getInput(0), "{} : Missing input 0", __func__);
// Create the forward kernal with the wanted types
auto kernelFunc = Registrar<ConstantOfShapeImplForward_cpu>::create(
{op_.getOutput(0)->dataType()});
// Call kernel
kernelFunc(op_.getOutput(0)->dims(),
op_.value(),
op_.getOutput(0)->getImpl()->rawPtr());
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include "aidge/backend/cpu/operator/GridSampleImpl.hpp"
#include <functional>
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/GridSampleImpl_forward_kernels.hpp"
#include "aidge/operator/GridSample.hpp"
#include "aidge/utils/Types.h"
Aidge::Elts_t Aidge::GridSampleImpl_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
return Elts_t::DataElts(0);
}
void Aidge::GridSampleImpl_cpu::forward() {
const auto& op_ = static_cast<const GridSample_Op&>(mOp);
// Find the correct kernel type
const auto outputDataType = op_.getOutput(0)->dataType();
const Registrar<GridSampleImpl1DForward_cpu>::registrar_key registrarKey = {
op_.getInput(0)->dataType(),
outputDataType};
std::function<void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)> kernelFunc;
const std::size_t nbSpatialFeat = op_.getInput(0)->nbDims();
switch (nbSpatialFeat)
{
case 1:
kernelFunc = Registrar<GridSampleImpl1DForward_cpu>::create(registrarKey);
break;
case 2:
kernelFunc = Registrar<GridSampleImpl2DForward_cpu>::create(registrarKey);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "No CPU {} kernel available for {} dimensions.", op_.type(), nbSpatialFeat);
break;
}
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback, input1Fallback;
const auto& input0 = std::make_shared<Tensor>(op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)));
const auto& input1 = std::make_shared<Tensor>(op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)));
// Call kernel
kernelFunc(op_,
input0, // input
input1, // grid
op_.getOutput(0) // output
);
}
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp"
#include "aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp" #include "aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp"
#include "aidge/backend/cpu/operator/MulImpl_backward_kernels.hpp"
Aidge::Elts_t Aidge::MulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { Aidge::Elts_t Aidge::MulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place // this implementation can be in-place
...@@ -40,6 +41,7 @@ void Aidge::MulImpl_cpu::forward() { ...@@ -40,6 +41,7 @@ void Aidge::MulImpl_cpu::forward() {
const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(), const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()); std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
// Call kernel // Call kernel
kernelFunc(inputDims0, kernelFunc(inputDims0,
inputDims1, inputDims1,
...@@ -48,3 +50,32 @@ void Aidge::MulImpl_cpu::forward() { ...@@ -48,3 +50,32 @@ void Aidge::MulImpl_cpu::forward() {
getCPUPtr(mOp.getRawInput(1)), getCPUPtr(mOp.getRawInput(1)),
getCPUPtr(mOp.getRawOutput(0))); getCPUPtr(mOp.getRawOutput(0)));
} }
void Aidge::MulImpl_cpu::backward() {
const Mul_Op& op_ = dynamic_cast<const Mul_Op&>(mOp);
auto in0 = op_.getInput(0);
auto in1 = op_.getInput(1);
auto in0grad = op_.getInput(0)->grad();
auto in1grad = op_.getInput(1)->grad();
auto out0grad = op_.getOutput(0)->grad();
// Find kernel function
auto kernelFunc = Registrar<MulImplBackward_cpu>::create({
out0grad->dataType(),
in0grad->dataType(),
in1grad->dataType()});
kernelFunc(/* input0Length */ in0grad->size(),
/* input1Length */ in1grad->size(),
/* grad0Length */ out0grad->size(),
/* input0Dims */ in0->dims(),
/* input1Dims */ in1->dims(),
getCPUPtr(in0),
getCPUPtr(in1),
getCPUPtr(out0grad),
getCPUPtr(in0grad),
getCPUPtr(in1grad));
}
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include <algorithm>
#include <chrono>
#include <cmath>
#include <cstddef> // std::size_t
#include <cstdint> // std::uint16_t
#include <iostream>
#include <memory>
#include <numeric> // std::accumulate
#include <ostream>
#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
#include "catch2/internal/catch_compiler_capabilities.hpp"
#include "catch2/internal/catch_enforce.hpp"
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators_random.hpp>
#include "aidge/data/Tensor.hpp"
#include "aidge/operator/ConstantOfShape.hpp"
#include "aidge/utils/TensorUtils.hpp"
#include <aidge/data/Data.hpp>
#include <aidge/data/half.hpp>
#include <aidge/filler/Filler.hpp>
#include <aidge/operator/OperatorTensor.hpp>
#include <aidge/operator/Reshape.hpp>
#include <aidge/utils/TensorUtils.hpp>
#include <aidge/utils/Types.h>
namespace Aidge {
TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") {
constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator
auto random_seed = Catch::Generators::Detail::getSeed;
std::mt19937 gen(random_seed());
std::uniform_real_distribution<float> valueDist(
0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<DimSize_t> input_tensor_size_dist(
std::size_t(1), std::size_t(10));
std::uniform_int_distribution<int64_t> input_tensor_values_dist(
std::size_t(1), std::size_t(7));
std::uniform_real_distribution<double> operator_attr_value_dist(-100., 100.);
///////////////////////////////////////////////
// SETUP FUNCTIONS
auto generate_input_tensor =
[&gen, &input_tensor_size_dist,
&input_tensor_values_dist]() -> std::shared_ptr<Tensor> {
std::vector<DimSize_t> input_dims;
input_dims.push_back(input_tensor_size_dist(gen));
auto result = std::make_shared<Tensor>(input_dims);
result->setDataType(DataType::Int64);
result->setBackend("cpu");
for (DimSize_t i = 0; i < result->size(); ++i) {
result->set<int64_t>(i, input_tensor_values_dist(gen));
}
return result;
};
auto generate_random_operator =
[&gen,
&operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> {
auto node = ConstantOfShape(Tensor(operator_attr_value_dist(gen)));
auto op = std::static_pointer_cast<ConstantOfShape_Op>(node->getOperator());
op->setDataType(DataType::Float64);
op->setBackend("cpu");
return op;
};
auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor,
std::shared_ptr<ConstantOfShape_Op> op) {
std::vector<DimSize_t> output_dims;
output_dims.reserve(input_tensor->size());
for (DimSize_t i = 0; i < input_tensor->size(); ++i) {
output_dims.push_back(input_tensor->get<int64_t>(i));
}
auto result = std::make_shared<Tensor>(output_dims);
result->setDataType(op->value().dataType());
result->setBackend("cpu");
constantFiller(result, op->value().get<double>(0));
return result;
};
/////////////////////////////////////
// BENCHMARKING
std::chrono::time_point<std::chrono::system_clock> start;
std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{};
int number_of_operation{0};
SECTION("ConstantOfShapeImpl_cpu::forward()") {
for (int i = 0; i < NBTRIALS; ++i) {
auto input_T = generate_input_tensor();
std::shared_ptr<ConstantOfShape_Op> op = generate_random_operator();
auto output_T = generate_output_tensor(input_T, op);
op->associateInput(0, input_T);
REQUIRE(op->forwardDims(true));
REQUIRE_NOTHROW(op->forward());
CHECK(output_T->nbDims() == op->getOutput(0)->nbDims());
for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) {
CHECK(output_T->dims().at(i) == op->getOutput(0)->dims().at(i));
}
CHECK(approxEq<double>(*output_T, *op->getOutput(0)));
}
}
}
} // namespace Aidge
...@@ -24,6 +24,337 @@ ...@@ -24,6 +24,337 @@
namespace Aidge { namespace Aidge {
TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]")
{
std::shared_ptr<Node> myMul = Mul();
auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator());
op->setDataType(DataType::Float32);
op->setBackend("cpu");
SECTION("Case 1: 2D and 1D tensors") {
const auto T0 = std::make_shared<Tensor>(Array2D<float,2,3>(
{
{
{1,2,3},{4,5,6}
}
}
));
const auto T1 = std::make_shared<Tensor>(Array1D<float,3>(
{0.1,0.2,0.3}
));
T0->setDataType(DataType::Float32);
T0->setBackend("cpu");
T1->setDataType(DataType::Float32);
T1->setBackend("cpu");
op->getOutput(0)->setGrad(std::make_shared<Tensor>(Array2D<float,2,3>({{{1.0,1.0,1.0},{1.0,1.0,1.0}}})));
op->associateInput(0,T0);
op->associateInput(1,T1);
op->forwardDims();
myMul->forward();
myMul->backward();
auto T0Grad = std::make_shared<Tensor>(Array2D<float, 2,3>({{{0.1,0.2,0.3},{0.1, 0.2, 0.3}}}));
auto T1Grad = std::make_shared<Tensor>(Array1D<float, 3>({5,7,9}));
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *T0Grad));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *T1Grad));
}
SECTION("Case 2: 3D and 1D tensors") {
const auto T0 = std::make_shared<Tensor>(Array3D<float,2,2,3>(
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0}
},
{
{7.0, 8.0, 9.0},
{10.0, 11.0, 12.0}
}
}
}
));
const auto T1 = std::make_shared<Tensor>(Array1D<float, 3>({0.3,0.2,0.1}));
const auto newGrad = std::make_shared<Tensor>(Array3D<float,2,2,3>(
{
{
{
{1, 1, 1},
{1, 1, 1}
},
{
{1, 1, 1},
{1, 1, 1}
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,2,3>(
{
{
{
{0.3, 0.2, 0.1},
{0.3, 0.2, 0.1}
},
{
{0.3, 0.2, 0.1},
{0.3, 0.2, 0.1}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float,3>(
{22.0, 26.0, 30.0}
));
for(auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
myMul->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 3: 4D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>(
{
{
{
{
{1.0, 2.0, 3.0},
{4.0, 5.0, 6.0},
{7.0, 8.0, 9.0}
},
{
{10.0, 11.0, 12.0},
{13.0, 14.0, 15.0},
{16.0, 17.0, 18.0}
}
},
{
{
{19.0, 20.0, 21.0},
{22.0, 23.0, 24.0},
{25.0, 26.0, 27.0}
},
{
{28.0, 29.0, 30.0},
{31.0, 32.0, 33.0},
{34.0, 35.0, 36.0}
}
}
}
}
));
const auto T1 = std::make_shared<Tensor>(Array2D<float, 3,3>(
{
{
{0.5,0.3,0.1},
{0.4,0.2,0.6},
{0.7,0.8,0.9}
}
}
));
const auto newGrad = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>(
{
{
{
{
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0}
},
{
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0}
}
},
{
{
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0}
},
{
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0},
{1.0, 1.0, 1.0}
}
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array4D<float,2,2,3,3>(
{
{
{
{
{0.5, 0.3, 0.1},
{0.4, 0.2, 0.6},
{0.7, 0.8, 0.9}
},
{
{0.5, 0.3, 0.1},
{0.4, 0.2, 0.6},
{0.7, 0.8, 0.9}
}
},
{
{
{0.5, 0.3, 0.1},
{0.4, 0.2, 0.6},
{0.7, 0.8, 0.9}
},
{
{0.5, 0.3, 0.1},
{0.4, 0.2, 0.6},
{0.7, 0.8, 0.9}
}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 3>(
{
{
{58.0, 62.0, 66.0},
{70.0, 74.0, 78.0},
{82.0, 86.0, 90.0}
}
}
));
for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
myMul->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
SECTION("Case 4: 3D and 2D tensors") {
const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 3, 4>(
{
{
{
{1.0, 2.0, 3.0, 4.0},
{5.0, 6.0, 7.0, 8.0},
{9.0, 10.0, 11.0, 12.0},
},
{
{13.0, 14.0, 15.0, 16.0},
{17.0, 18.0, 19.0, 20.0},
{21.0, 22.0, 23.0, 24.0},
}
}
}
));
const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 4>(
{
{
{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}
}
}
));
const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2,3,4>(
{
{
{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
},
{
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
{1.0, 1.0, 1.0, 1.0},
}
}
}
));
const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,3,4>(
{
{
{
{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}
},
{
{0.1, 0.2, 0.3, 0.4},
{0.5, 0.6, 0.7, 0.8},
{0.9, 1.0, 1.1, 1.2}
}
}
}
));
const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 4>(
{
{
{14.0, 16.0, 18.0, 20.0},
{22.0, 24.0, 26.0, 28.0},
{30.0, 32.0, 34.0, 36.0}
}
}
));
for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
{
T->setBackend("cpu") ;
T->setDataType(DataType::Float32);
}
op->associateInput(0, T0);
op->associateInput(1, T1);
op->getOutput(0)->setGrad(newGrad);
op->forwardDims();
myMul->backward();
REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
}
}
TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
constexpr std::uint16_t NBTRIALS = 10; constexpr std::uint16_t NBTRIALS = 10;
// Create a random number generator // Create a random number generator
...@@ -31,7 +362,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { ...@@ -31,7 +362,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
std::mt19937 gen(rd()); std::mt19937 gen(rd());
std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1 std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10)); std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5)); std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3));
std::uniform_int_distribution<int> boolDist(0,1); std::uniform_int_distribution<int> boolDist(0,1);
// Create MatMul Operator // Create MatMul Operator
...@@ -60,6 +391,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { ...@@ -60,6 +391,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
std::chrono::time_point<std::chrono::system_clock> end; std::chrono::time_point<std::chrono::system_clock> end;
std::chrono::duration<double, std::micro> duration{}; std::chrono::duration<double, std::micro> duration{};
SECTION("MulImpl_cpu::forward()") { SECTION("MulImpl_cpu::forward()") {
SECTION("Scalar / Scalar") { SECTION("Scalar / Scalar") {
...@@ -68,16 +400,20 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { ...@@ -68,16 +400,20 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
} }
SECTION("+1-D Tensor / +1-D Tensor - same dimensions") { SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
std::size_t number_of_operation = 0; std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors // generate 2 random Tensors
const std::size_t nbDims = nbDimsDist(gen); const auto nbDims = nbDimsDist(gen);
std::vector<std::size_t> dims; auto dims = std::vector<std::size_t>{};
for (std::size_t i = 0; i < nbDims; ++i) { for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen)); dims.push_back(dimSizeDist(gen));
} }
const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
const auto nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
number_of_operation += nb_elements; number_of_operation += nb_elements;
// without broadcasting // without broadcasting
...@@ -114,67 +450,101 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") { ...@@ -114,67 +450,101 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
delete[] array0; delete[] array0;
delete[] array1; delete[] array1;
delete[] result; delete[] result;
// with broadcasting
} }
std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl; std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
std::cout << "total time: " << duration.count() << "μs" << std::endl; std::cout << "total time: " << duration.count() << "μs" << std::endl;
} }
SECTION("+1-D Tensor / +1-D Tensor - broadcasting") { SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
std::size_t number_of_operation = 0; std::size_t number_of_operation = 0;
for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) { for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
// generate 2 random Tensors // generate 2 random Tensors
// handle dimensions, replace some dimensions with '1' to get broadcasting // handle dimensions, replace some dimensions with '1' to get broadcasting
constexpr std::size_t nbDims = 4; constexpr std::size_t nbDims = 4;
std::vector<std::size_t> dims; std::vector<std::size_t> dimensions;
for (std::size_t i = 0; i < nbDims; ++i) {
dims.push_back(dimSizeDist(gen)); for (std::size_t i = 0; i < nbDims; ++i)
{
dimensions.push_back(dimSizeDist(gen));
} }
std::vector<std::size_t> dims0 = dims;
std::vector<std::size_t> dims1 = dims; auto dims0 = dimensions;
std::vector<std::size_t> dimsOut = dims; auto dims1 = dimensions;
for (std::size_t i = 0; i < nbDims; ++i) { auto dimsOut = dimensions;
if (boolDist(gen)) {
for (std::size_t i = 0; i < nbDims; ++i)
{
if (boolDist(gen))
{
dims0[i] = 1; dims0[i] = 1;
} }
if (boolDist(gen)) {
if (boolDist(gen))
{
dims1[i] = 1; dims1[i] = 1;
} }
dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i]; dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
} }
for(auto dim : dims0)
{
Log::info("Dimension of input 0 : {}", dim);
}
for(auto dim : dims1)
{
Log::info("Dimension of input 1 : {}", dim);
}
// create arrays and fill them with random values // create arrays and fill them with random values
float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]]; float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]]; float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]]; float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i)
{
array0[i] = valueDist(gen); array0[i] = valueDist(gen);
} }
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i)
{
array1[i] = valueDist(gen); array1[i] = valueDist(gen);
} }
// compute true result // compute true result
const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1}; const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1}; const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
for (std::size_t a = 0; a < dimsOut[0]; ++a) {
for (std::size_t b = 0; b < dimsOut[1]; ++b) { for (std::size_t a = 0; a < dimsOut[0]; ++a)
{
for (std::size_t b = 0; b < dimsOut[1]; ++b)
{
const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0) const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
+ strides0[1] * ((dims0[1] > 1) ? b : 0); + strides0[1] * ((dims0[1] > 1) ? b : 0);
const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0) const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
+ strides1[1] * ((dims1[1] > 1) ? b : 0); + strides1[1] * ((dims1[1] > 1) ? b : 0);
for (std::size_t c = 0; c < dimsOut[2]; ++c) {
for (std::size_t c = 0; c < dimsOut[2]; ++c)
{
const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a)); const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
for (std::size_t d = 0; d < dimsOut[3]; ++d) {
for (std::size_t d = 0; d < dimsOut[3]; ++d)
{
std::size_t idx0 = idx0_0 std::size_t idx0 = idx0_0
+ strides0[2] * ((dims0[2] > 1) ? c : 0) + strides0[2] * ((dims0[2] > 1) ? c : 0)
+ ((dims0[3] > 1) ? d : 0); + ((dims0[3] > 1) ? d : 0);
std::size_t idx1 = idx1_0 std::size_t idx1 = idx1_0
+ strides1[2] * ((dims1[2] > 1) ? c : 0) + strides1[2] * ((dims1[2] > 1) ? c : 0)
+ ((dims1[3] > 1) ? d : 0); + ((dims1[3] > 1) ? d : 0);
result[idx_out + d] = array0[idx0] * array1[idx1]; result[idx_out + d] = array0[idx0] * array1[idx1];
// std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl; // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
} }
......
...@@ -134,7 +134,7 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") { ...@@ -134,7 +134,7 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
SECTION("Asymmetric Pad") { SECTION("Asymmetric Pad") {
const int pv = 0; // pad value const int pv = 0; // pad value
std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv)); std::shared_ptr<Node> myPad = Pad<2>({0, 1, 1, 0}, "mypad", PadBorderType::Constant, static_cast<double>(pv));
auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator()); auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment