Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Showing
with 1732 additions and 41 deletions
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_
#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/GlobalAveragePooling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using GlobalAveragePoolingImpl_cpu = OperatorImpl_cpu<GlobalAveragePooling_Op,
void(const std::vector<DimSize_t> &, const void *, void *)>;
// Implementation entry point registration to Operator
REGISTRAR(GlobalAveragePooling_Op, "cpu", Aidge::GlobalAveragePoolingImpl_cpu::create);
} // namespace Aidge
#endif /* _AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_
#include <cstddef>
#include <functional> // std::multiplies
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value, T>::type
stableMean(const T* vec, size_t size) {
T mean = 0;
for (size_t i = 0; i < size; ++i) {
mean = std::fma<T>(vec[i] - mean, 1.0f / (i + 1), mean);
}
return mean;
}
// Specialization for integers: perform the mean computation in float
template <typename T>
typename std::enable_if<!std::is_floating_point<T>::value, T>::type
stableMean(const T* vec, size_t size) {
double mean = 0;
for (size_t i = 0; i < size; ++i) {
mean = std::fma<double>(vec[i] - mean, 1.0f / (i + 1), mean);
}
return mean;
}
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value, T>::type
castFromFloat(T value) {
return value;
}
template <typename T>
typename std::enable_if<!std::is_floating_point<T>::value, T>::type
castFromFloat(double value) {
return static_cast<T>(std::nearbyint(value));
}
template <class I, class O>
void GlobalAveragePoolingImpl_cpu_forward_kernel(
const std::vector<DimSize_t> &dims, const void *input_, void *output_) {
// error checking
AIDGE_ASSERT(dims.size() >= 3,"GlobalAveragePool needs at least a 3 dimensions "
"input, number of input dim : {}",
dims.size());
// computation
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
DimSize_t nb_elems = std::accumulate(dims.begin(), dims.end(), std::size_t(1),
std::multiplies<std::size_t>());
const DimSize_t in_batch_nb_elems{nb_elems / dims[0]};
const DimSize_t in_channel_nb_elems{in_batch_nb_elems / dims[1]};
const DimSize_t out_batch_nb_elems{dims[1]};
// parse channel by channel and fill each output with the average of the
// values in the channel
for (DimSize_t batch = 0; batch < dims[0]; ++batch) {
for (DimSize_t channel = 0; channel < dims[1]; ++channel) {
const I *filter_start = std::next(
input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
output[batch * out_batch_nb_elems + channel] = castFromFloat<O>(stableMean<I>(filter_start, in_channel_nb_elems));
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(GlobalAveragePoolingImpl_cpu,
{DataType::Float32},
{ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(GlobalAveragePoolingImpl_cpu,
{DataType::Float64},
{ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(GlobalAveragePoolingImpl_cpu,
{DataType::Int32},
{ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#define AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/GridSample.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using GridSampleImpl_cpu = OperatorImpl_cpu<GridSample_Op,
void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)>;
// Implementation entry point registration to Operator
REGISTRAR(GridSample_Op, "cpu", Aidge::GridSampleImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
#include <algorithm> // std::max, std::min
#include <cmath> // std::fabs, std::trunf, std::nearbyint
#include <cstddef> // std::size_t
#include <cstdint> // std::int64_t
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/GridSampleImpl.hpp"
#include "aidge/data/half.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
static bool in_bound(float coord, float lower_bound, float upper_bound) noexcept {
return (coord > lower_bound) && (coord < upper_bound);
}
static float unnormalized_coord(float coord, float new_lower_bound, float new_upper_bound) noexcept {
return (coord + 1) / 2 * (new_upper_bound - new_lower_bound) + new_lower_bound;
}
// unused
// static float normalized_coord(float coord, float prev_lower_bound, float prev_upper_bound) noexcept {
// return (coord + prev_lower_bound) / (prev_upper_bound-prev_lower_bound) * 2 - 1;
// }
static float unnormalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
return align_corners ? unnormalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
: unnormalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
}
// unused
// static float normalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
// return align_corners ? normalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
// : normalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
// }
static float update_normalized_coord_with_padding(float coord, Aidge::GridSample_Op::PaddingMode padding_mode) {
if (!in_bound(coord, -1.0f, 1.0f)) {
if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
coord = std::min(std::max(-1.0f, coord), 1.0f);
}
else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
float abs_coord = std::fabs(coord);
float int_coord = std::truncf(abs_coord);
std::int32_t nb_refl = static_cast<std::int32_t>((int_coord - 1) / 2);
float res = ((nb_refl + 1)*2) - abs_coord;
coord = (coord > 0) ? (nb_refl % 2 == 0 ? res : -res) \
: (nb_refl % 2 == 0 ? -res : res);
}
}
return coord;
}
static std::int64_t update_unnormalized_coord_with_padding(std::int64_t coord, std::int64_t size, Aidge::GridSample_Op::PaddingMode padding_mode) {
if (!in_bound(coord, 0, size)) {
// out of bound. switch padding mode
if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
coord = std::min(std::max(std::int64_t(0), coord), size-std::int64_t(1));
} else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
const std::int64_t quotient = coord / (size-1);
const std::int64_t remainer = std::abs(coord - quotient*(size-1));
coord = (quotient % 2 == 0) ? remainer : size - 1 - remainer;
}
}
return coord;
}
namespace Aidge {
/**
* @brief Forward kernel for 1D GridSample on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param grid_ const grid Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
const std::shared_ptr<Tensor>& in0,
const std::shared_ptr<Tensor>& in1,
const std::shared_ptr<Tensor>& out)
{
const I* const input = static_cast<const I *>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float*>(in1->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O*>(out->getImpl()->rawPtr());
O* output_ptr = output;
const std::size_t N = in0->dim(0);
const std::size_t C = in0->dim(1);
const std::size_t in_H = in0->dim(2);
const std::size_t grid_H = in1->dim(1);
const std::size_t in_N_s = in0->stride(0);
const std::size_t in_C_s = in0->stride(1);
const std::size_t in_H_s = in0->stride(2);
const std::size_t grid_N_s = in1->stride(0);
const std::size_t grid_H_s = in1->stride(1);
const std::size_t out_N_s = out->stride(0);
const std::size_t out_C_s = out->stride(1);
const std::size_t out_H_s = out->stride(2);
float* grid_ptr_N = grid;
const I* input_ptr_N = input;
O* output_ptr_N = output;
for (std::size_t n = 0; n < N; ++n) {
grid_ptr = grid_ptr_N;
for (std::size_t grid_x = 0; grid_x < grid_H; ++grid_x) {
output_ptr = output_ptr_N + grid_x*out_H_s;
/*
* change grid_x coord to match padding_mode
* Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
* Handle computation of interpolation
* any value outside bounds is considered 0
* if nearest:
* else if linear:
* else if cubic:
* else : nothing
*/
float x = *grid_ptr;
x = update_normalized_coord_with_padding(x, op.paddingMode());
x = unnormalize_grid_sample_coord(x, in_H, op.alignCorners());
if (op.mode() == GridSample_Op::Mode::Nearest) {
const std::int64_t x_rounded = std::nearbyintf(x);
if (in_bound(x_rounded, 0, in_H)) {
input_ptr = input_ptr_N + x_rounded*in_H_s;
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = *input_ptr;
input_ptr += in_C_s;
output_ptr += out_C_s;
}
} else {
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = O(0);
output_ptr += out_C_s;
}
}
} else if (op.mode() == GridSample_Op::Mode::Linear) {
const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_inf = in_bound(x_inf, 0, in_H) ?
input_ptr_NC[static_cast<std::size_t>(x_inf)*in_H_s] : I(0);
const I f_sup = in_bound(x_sup, 0, in_H) ?
input_ptr_NC[static_cast<std::size_t>(x_sup)*in_H_s] : I(0);
*output_ptr = static_cast<O>(static_cast<I>(x - x_inf)*f_inf \
+ static_cast<I>(x_sup - x)*f_sup);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
} else if (op.mode() == GridSample_Op::Mode::Cubic) {
const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
const std::int64_t x_inf_inf = update_unnormalized_coord_with_padding(x_inf - 1, in_H, op.paddingMode());
const std::int64_t x_sup_sup = update_unnormalized_coord_with_padding(x_sup + 1, in_H, op.paddingMode());
const I x1 = static_cast<I>(x - static_cast<float>(x_inf));
const I x2 = x1 * x1;
const I x3 = x1 * x2;
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_inf_inf = in_bound(x_inf_inf, 0, in_H) ? input_ptr_NC[x_inf_inf*in_H_s] : I(0);
const I f_inf = in_bound(x_inf, 0, in_H) ? input_ptr_NC[x_inf*in_H_s] : I(0);
const I f_sup = in_bound(x_sup, 0, in_H) ? input_ptr_NC[x_sup*in_H_s] : I(0);
const I f_sup_sup = in_bound(x_sup_sup, 0, in_H) ? input_ptr_NC[x_sup_sup*in_H_s] : I(0);
const I m_inf = (f_sup - f_inf_inf) / I(2);
const I m_sup = (f_sup_sup - f_inf) / I(2);
*output_ptr = f_inf \
+ x1 * m_inf \
+ x2 * (3 * (f_sup - f_inf) - 2 * m_inf - m_sup) \
+ x3 * (2*(f_inf - f_sup) + m_inf + m_sup);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
}
grid_ptr += grid_H_s;
}
input_ptr_N += in_N_s;
grid_ptr_N += grid_N_s;
output_ptr_N += out_N_s;
}
}
// Kernels registration to implementation entry point
// only accept 1st input with only 1 spatial feat. (nb dims = 1)
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
/**
* @brief Forward kernel for 1D GridSample on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param grid_ const grid Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
const std::shared_ptr<Tensor>& in0,
const std::shared_ptr<Tensor>& in1,
const std::shared_ptr<Tensor>& out)
{
const I* input = static_cast<const I *>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float*>(in0->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O*>(out->getImpl()->rawPtr());
const std::size_t N = in0->dim(0);
const std::size_t C = in0->dim(1);
const std::size_t in_H = in0->dim(2);
const std::size_t in_W = in0->dim(3);
const std::size_t grid_H = in1->dim(1);
const std::size_t grid_W = in1->dim(2);
const std::size_t in_N_s = in0->stride(0);
const std::size_t in_C_s = in0->stride(1);
const std::size_t in_H_s = in0->stride(2);
const std::size_t in_W_s = in0->stride(3);
const std::size_t grid_N_s = in1->stride(0);
const std::size_t grid_H_s = in1->stride(1);
const std::size_t grid_W_s = in1->stride(2);
const std::size_t grid_Coord_s = in1->stride(3);
const std::size_t out_N_s = out->stride(0);
const std::size_t out_C_s = out->stride(1);
const std::size_t out_H_s = out->stride(2);
const std::size_t out_W_s = out->stride(3);
float* grid_ptr_N = grid;
const I* input_ptr_N = input;
O* output_ptr_N = output;
for (std::size_t n = 0; n < N; ++n) {
for (std::size_t grid_y = 0; grid_y < grid_H; ++grid_y) {
for (std::size_t grid_x = 0; grid_x < grid_W; ++grid_x) {
O* output_ptr = output_ptr_N + grid_y*out_H_s + grid_y*out_W_s;
grid_ptr = grid_ptr_N + grid_y*grid_H_s + grid_x*grid_W_s;
/*
* change grid_x coord to match padding_mode
* Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
* Handle computation of interpolation
* any value outside bounds is considered 0
* if nearest:
* else if linear:
* else if cubic:
* else : nothing
*/
float x = *grid_ptr;
float y = grid_ptr[grid_Coord_s];
x = update_normalized_coord_with_padding(x, op.paddingMode());
x = unnormalize_grid_sample_coord(x, in_W, op.alignCorners());
y = update_normalized_coord_with_padding(y, op.paddingMode());
y = unnormalize_grid_sample_coord(y, in_H, op.alignCorners());
if (op.mode() == GridSample_Op::Mode::Nearest) {
const std::int64_t x_rounded = std::nearbyintf(x);
const std::int64_t y_rounded = std::nearbyintf(y);
if (in_bound(x_rounded, 0, in_W) && in_bound(y_rounded, 0, in_H)) {
input_ptr = input_ptr_N + y_rounded*in_H_s + x_rounded*in_W_s;
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = *input_ptr;
input_ptr += in_C_s;
output_ptr += out_C_s;
}
} else {
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = O(0);
output_ptr += out_C_s;
}
}
} else if (op.mode() == GridSample_Op::Mode::Linear) {
const std::int64_t x_r = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode()); // right
const std::int64_t x_l = update_unnormalized_coord_with_padding(x_r + 1, in_W, op.paddingMode()); // left
const std::int64_t y_t = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode()); // top
const std::int64_t y_b = update_unnormalized_coord_with_padding(y_t + 1, in_H, op.paddingMode()); // bottom
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_tr = (in_bound(x_r, 0, in_W) && in_bound(y_t, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
+ static_cast<std::size_t>(x_r)*in_W_s]
: I(0);
const I f_tl = (in_bound(x_l, 0, in_W) && in_bound(y_t, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
+ static_cast<std::size_t>(x_l)*in_W_s]
: I(0);
const I f_br = (in_bound(x_r, 0, in_W) && in_bound(y_b, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
+ static_cast<std::size_t>(x_r)*in_W_s]
: I(0);
const I f_bl = (in_bound(x_l, 0, in_W) && in_bound(y_b, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
+ static_cast<std::size_t>(x_l)*in_W_s]
: I(0);
// compute weighted sum of the 4 corners
const I w_tr = static_cast<I>((y - static_cast<float>(y_t))*(static_cast<float>(x_r) - x));
const I w_tl = static_cast<I>((y - static_cast<float>(y_t))*(x - static_cast<float>(x_l)));
const I w_br = static_cast<I>((static_cast<float>(y_b) - y)*(static_cast<float>(x_r) - x));
const I w_bl = static_cast<I>((static_cast<float>(y_b) - y)*(x - static_cast<float>(x_l)));
*output_ptr = static_cast<O>(w_tr*f_tr + w_tl*f_tl + w_br*f_br + w_bl*f_bl);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
} else if (op.mode() == GridSample_Op::Mode::Cubic) {
/*
* .. .. .. .. .. ..
* .. 00 01 02 03 ..
* .. 10 11 12 13 ..
* .. 20 21 22 23 ..
* .. 30 31 32 33 ..
* .. .. .. .. .. ..
*/
const std::int64_t x_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode());
const std::int64_t x_0 = update_unnormalized_coord_with_padding(x_1 - 1, in_W, op.paddingMode());
const std::int64_t x_2 = update_unnormalized_coord_with_padding(x_1 + 1, in_W, op.paddingMode());
const std::int64_t x_3 = update_unnormalized_coord_with_padding(x_1 + 2, in_W, op.paddingMode());
const std::int64_t y_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode());
const std::int64_t y_0 = update_unnormalized_coord_with_padding(y_1 - 1, in_H, op.paddingMode());
const std::int64_t y_2 = update_unnormalized_coord_with_padding(y_1 + 1, in_H, op.paddingMode());
const std::int64_t y_3 = update_unnormalized_coord_with_padding(y_1 + 2, in_H, op.paddingMode());
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_00 = in_bound(x_0, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_0*in_H_s] : I(0);
const I f_01 = in_bound(x_0, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_1*in_H_s] : I(0);
const I f_02 = in_bound(x_0, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_2*in_H_s] : I(0);
const I f_03 = in_bound(x_0, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_3*in_H_s] : I(0);
const I f_10 = in_bound(x_1, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_0*in_H_s] : I(0);
const I f_20 = in_bound(x_2, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_0*in_H_s] : I(0);
const I f_30 = in_bound(x_3, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_0*in_H_s] : I(0);
const I f_11 = in_bound(x_1, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_1*in_H_s] : I(0);
const I f_12 = in_bound(x_1, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_2*in_H_s] : I(0);
const I f_13 = in_bound(x_1, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_3*in_H_s] : I(0);
const I f_21 = in_bound(x_2, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_1*in_H_s] : I(0);
const I f_22 = in_bound(x_2, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_2*in_H_s] : I(0);
const I f_23 = in_bound(x_2, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_3*in_H_s] : I(0);
const I f_31 = in_bound(x_3, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_1*in_H_s] : I(0);
const I f_32 = in_bound(x_3, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_2*in_H_s] : I(0);
const I f_33 = in_bound(x_3, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_3*in_H_s] : I(0);
const I mx_11 = (f_21 - f_01) / I(2);
const I mx_12 = (f_22 - f_02) / I(2);
const I mx_21 = (f_31 - f_11) / I(2);
const I mx_22 = (f_32 - f_12) / I(2);
const I my_11 = (f_12 - f_10) / I(2);
const I my_12 = (f_13 - f_11) / I(2);
const I my_21 = (f_22 - f_20) / I(2);
const I my_22 = (f_23 - f_21) / I(2);
const I mxy_11 = (f_22 - f_20 - f_02 - + f_00) / I(4);
const I mxy_12 = (f_23 - f_21 - f_03 - + f_01) / I(4);
const I mxy_21 = (f_32 - f_30 - f_12 - + f_10) / I(4);
const I mxy_22 = (f_33 - f_31 - f_13 - + f_11) / I(4);
const I a_00 = f_11;
const I a_10 = mx_11;
const I a_20 = I(3)*(f_21 - f_11) - I(2)*mx_11 - mx_21;
const I a_30 = I(2)*(f_11 - f_21) + mx_11 + mx_21;
const I a_01 = my_11;
const I a_11 = mxy_11;
const I a_21 = I(3)*(my_21 - my_11) - I(2)*mxy_11 - mxy_21;
const I a_31 = I(2)*(my_11 - my_21) + mxy_11 + mxy_21;
const I a_02 = I(3)*(f_12 - f_11) - I(2)*my_11 - my_12;
const I a_12 = I(3)*(mx_12 - mx_11) - I(2)*mxy_11 - mxy_12;
const I a_22 = I(9)*(f_11 + f_22 - f_21 - f_12) + I(3)*(I(2)*(mx_11 - mx_12 + my_11 - my_21) + mx_21 - mx_22 + my_12 - my_22) + mxy_22 + I(2)*(mxy_12 + mxy_21 + I(2)*mxy_11);
const I a_32 = - mxy_12 - mxy_22 + I(2)*(my_22 - my_12 - mxy_11 - mxy_21 + I(2)*(my_21 - my_11) + I(3)*(f_21 + f_12 - f_11 - f_22)) + I(3)*(mx_12 + mx_22 - mx_11 - mx_21);
const I a_03 = I(2)*(f_11 - f_12) + my_11 + my_12;
const I a_13 = I(2)*(mx_11 - mx_12) + mxy_11 + mxy_12;
const I a_23 = - mxy_21 - mxy_22 + I(2)*(-mx_21 + mx_22 - mxy_11 - mxy_12 + I(2)*(mx_12 - mx_11) + I(3)*(f_12 + f_21 - f_11 - f_22)) + I(3)*(my_21 + my_22 - my_11 - my_12);
const I a_33 = mxy_11 + mxy_21 + mxy_12 + mxy_22 + I(2)*(mx_11 + mx_21 - mx_12 - mx_22 + my_11 - my_21 + my_12 - my_22 + I(2)*(f_11 - f_21 - f_12 + f_22));
const I x2 = static_cast<I>(x*x);
const I x3 = static_cast<I>(x*x*x);
const I y2 = static_cast<I>(y*y);
const I y3 = static_cast<I>(y*y*y);
*output_ptr = static_cast<O>( \
a_00 + a_10*x + a_20*x2 + a_30*x3 \
+ a_01*y + a_11*x*y + a_21*x2*y + a_31*x3*y \
+ a_02*y2 + a_12*x*y2 + a_22*x2*y2 + a_32*x3*y2 \
+ a_03*y3 + a_13*x*y3 + a_23*x2*y3 + a_33*x3*y3);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
}
}
}
input_ptr_N += in_N_s;
grid_ptr_N += grid_N_s;
output_ptr_N += out_N_s;
}
}
// Kernels registration to implementation entry point
// only accept 1st input with only 2 spatial feat. (nb dims = 2)
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(GridSampleImpl_cpu,
{{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}},
{ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_
#define AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_
#include <cstddef> // std::size_t
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Heaviside.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/future_std/span.hpp"
namespace Aidge {
using HeavisideImplCpu =
OperatorImpl_cpu<Heaviside_Op,
void(std::size_t, const void *, void *, const float),
void(const float, std::size_t, const void *, void *)>;
// Implementation entry point registration for operator Heaviside
REGISTRAR(Heaviside_Op, "cpu", HeavisideImplCpu::create);
} // namespace Aidge
#endif // AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_H_
/********************************************************************************
* Copyright (c) 2025 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef> // std::size_t
#include "aidge/backend/cpu/operator/HeavisideImpl.hpp"
#include "aidge/utils/ErrorHandling.hpp"
namespace Aidge {
template <class I, class O>
void HeavisideImplCpuForwardKernel(std::size_t inputLenght,
const void *input_,
void *output_,
const float value) {
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = (input[i] > 0) ? 1 : (input[i] == 0 ? value : 0);
}
}
// Kernels registration to implementation entry point
REGISTRAR(HeavisideImplCpu,
{DataType::Float32},
{ProdConso::inPlaceModel,
Aidge::HeavisideImplCpuForwardKernel<float, float>,
nullptr});
} // namespace Aidge
#endif // AIDGE_CPU_OPERATOR_HEAVISIDEIMPL_KERNELS_H__H_
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LRNIMPL_H_
#define AIDGE_CPU_OPERATOR_LRNIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/LRN.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using LRNImpl_cpu = OperatorImpl_cpu<LRN_Op,
void(float, float, float, std::size_t, const std::vector<DimSize_t>&, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(LRN_Op, "cpu", Aidge::LRNImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LRNIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LRNIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_LRNIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef>
#include <cmath>
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/LRNImpl.hpp"
namespace Aidge {
template <class I, class O>
void LRNImpl_cpu_forward_kernel(float alpha, float beta, float bias, std::size_t size, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const DimSize_t nbBatch = inputDims[0];
const DimSize_t nbChannels = (inputDims.size() > 1) ? inputDims[1] : 1;
const DimSize_t featureMapSize = (inputDims.size() > 2) ? std::accumulate(inputDims.begin() + 2, inputDims.end(), 1, std::multiplies<DimSize_t>()) : 1;
for (std::size_t batch = 0; batch < nbBatch; ++batch) {
for (std::size_t ch = 0; ch < nbChannels; ++ch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
const unsigned int channelMin
= std::max<int>(0, ch - size / 2);
const unsigned int channelMax
= std::min<size_t>(nbChannels - 1, ch + size / 2);
for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
// For each input channel, accumulate the value
O accAccrossChannels(0.0);
for (unsigned int accChannel = channelMin;
accChannel < channelMax; ++accChannel)
{
accAccrossChannels += input[ioIndex + feature];
}
// Compute the output signal
output[ioIndex + feature] = input[ioIndex + feature]
/ std::pow((bias + (accAccrossChannels * accAccrossChannels) * alpha), beta);
}
}
}
}
REGISTRAR(LRNImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::LRNImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(LRNImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::LRNImpl_cpu_forward_kernel<double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LRNIMPL_KERNELS_H_ */
......@@ -12,52 +12,30 @@
#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_
#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/LeakyReLU.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// class LeakyReLU_Op;
// compute kernel registry for forward and backward
class LeakyReLUImplForward_cpu
: public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
};
class LeakyReLUImplBackward_cpu
: public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
};
class LeakyReLUImpl_cpu : public OperatorImpl {
private:
const LeakyReLU_Op& mOp;
std::array<NbElts_t, 1> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
LeakyReLUImpl_cpu(const LeakyReLU_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
static std::unique_ptr<LeakyReLUImpl_cpu> create(const LeakyReLU_Op& op) {
return std::make_unique<LeakyReLUImpl_cpu>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void backward();
};
namespace {
static Registrar<LeakyReLU_Op> registrarLeakyReLUImpl_cpu("cpu", Aidge::LeakyReLUImpl_cpu::create);
}
// Operator implementation entry point for the backend
using LeakyReLUImpl_cpu = OperatorImpl_cpu<LeakyReLU_Op,
void(const float,
std::size_t,
const void*,
void*),
void(const float,
std::size_t,
const void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(LeakyReLU_Op, "cpu", Aidge::LeakyReLUImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
namespace Aidge {
template <class I, class O>
void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_,
std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const I negativeSlope = static_cast<const I>(negativeSlope_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = (input[i] >= 0) ? input[i] : input[i] * negativeSlope;
}
}
template <class I, class O>
void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_,
std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const I negativeSlope = static_cast<const I>(negativeSlope_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i];
}
}
// Kernels registration to implementation entry point
REGISTRAR(LeakyReLUImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>, Aidge::LeakyReLUImpl_cpu_backward_kernel<float, float>});
REGISTRAR(LeakyReLUImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>, Aidge::LeakyReLUImpl_cpu_backward_kernel<double, double>});
REGISTRAR(LeakyReLUImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::LeakyReLUImpl_cpu_backward_kernel<int32_t, int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LNIMPL_H_
#define AIDGE_CPU_OPERATOR_LNIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Ln.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using LnImpl_cpu = OperatorImpl_cpu<Ln_Op,
void(const std::size_t, const void*, void*),
void(const std::size_t, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Ln_Op, "cpu", Aidge::LnImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LNIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/LnImpl.hpp"
namespace Aidge {
template <class I, class O>
void LnImpl_cpu_forward_kernel(std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const float eps = 1.0e-20f;
//#pragma omp parallel for if (inputLenght > 1024)
for (std::size_t i = 0; i < inputLenght; ++i) {
if (input[i] > I(eps)) {
output[i] = std::log(input[i]);
} else {
output[i] = std::log(I(eps));
}
}
}
template <class I, class GI, class GO>
void LnImpl_cpu_backward_kernel(const std::size_t inputLenght,
const void* input_, const void* grad_output_,
void* grad_input_) {
const I* input = static_cast<const I*>(input_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
const float eps = 1.0e-20f;
for (std::size_t i = 0; i < inputLenght; ++i) {
if (input[i] > I(eps)) {
grad_input[i] = grad_output[i] / input[i];
} else {
grad_input[i] = GI(0);
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(LnImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<float, float>, Aidge::LnImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(LnImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<double, double>, Aidge::LnImpl_cpu_backward_kernel<double, double, double>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_H_
#define AIDGE_CPU_OPERATOR_MATMULIMPL_H_
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/MatMul.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using MatMulImpl_cpu = OperatorImpl_cpu<MatMul_Op,
void(const std::size_t, const std::size_t, const std::size_t,
const void *, const void *, void *)>;
// Implementation entry point registration to Operator
REGISTRAR(MatMul_Op, "cpu", Aidge::MatMulImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_
#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
namespace Aidge {
template <class I, class O>
void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m,
const void* input1_, const void* input2_, void* __restrict output_) {
// FIXME: missing MatMul parameters as arguments
const I* input1 = static_cast<const I*>(input1_);
const I* input2 = static_cast<const I*>(input2_);
O* __restrict output = static_cast<O* __restrict>(output_);
std::memset(output, O(0), n * m * sizeof(O));
for (std::size_t i = 0; i < n; ++i) {
for (std::size_t l = 0; l < k; ++l) {
for (std::size_t j = 0; j < m; ++j) {
output[i*m + j] += static_cast<O>(input1[i*k + l] * input2[l*m + j]);
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(MatMulImpl_cpu,
{DataType::Float32},
{ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(MatMulImpl_cpu,
{DataType::Float64},
{ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(MatMulImpl_cpu,
{DataType::Int32},
{ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_
#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/MaxPooling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using MaxPooling2D_Op = MaxPooling_Op<2>;
using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const bool,
const std::array<DimSize_t, 4> &,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(MaxPooling2D_Op, "cpu", Aidge::MaxPoolingImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_
#include <array>
#include <cmath>
#include <tuple>
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/data/Data.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
/**
* @brief Forward kernel for 2D MaxPoolingolution on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param attrs tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& kernelDims,
const bool /*ceilMode*/,
const std::array<DimSize_t, 4> &dims,
const void *input_,
void *output_) {
// FIXME: missing convolution parameters as arguments
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation parameter into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
I poolValue(0.0);
bool valid = false;
for (unsigned int channel = 0; channel < dims[1];
++channel){
for (unsigned int sy = syMin; sy < syMax; ++sy) {
for (unsigned int sx = sxMin; sx < sxMax; ++sx)
{
const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
if (!valid || value > poolValue) {
poolValue = value;
valid = true;
}
}
}
}
output[oIndexFull] = poolValue;
}
}
}
}
}
//N2D2 version
/*
template <class T>
void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha,
const Tensor<T>&
inputs,
const Descriptor& desc,
const T* beta,
Tensor<T>& outputs,
Tensor<ArgMax>& argMax,
bool useArgMax,
const Tensor<bool>& maps)
{
const unsigned int size = inputs.dimB() * outputs.dimZ();
#if defined(_OPENMP) && _OPENMP >= 200805
#pragma omp parallel for collapse(2) if (size > 16)
#else
#pragma omp parallel for if (inputs.dimB() > 4 && size > 16)
#endif
for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) {
for (unsigned int output = 0; output < outputs.dimZ(); ++output) {
for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) {
for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) {
const unsigned int sxMin = (unsigned int)std::max(
desc.padding[0] - (int)(ox * desc.stride[0]), 0);
const unsigned int syMin = (unsigned int)std::max(
desc.padding[1] - (int)(oy * desc.stride[1]), 0);
const unsigned int sxMax = Utils::clamp
<int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0],
0,
desc.pool[0]);
const unsigned int syMax = Utils::clamp
<int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1],
0,
desc.pool[1]);
const int ix = (int)(ox * desc.stride[0]) - desc.padding[0];
const int iy = (int)(oy * desc.stride[1]) - desc.padding[1];
T poolValue(0.0);
// For each output, compute the pool value
if (useArgMax) {
const ArgMax inputMax
= argMax(ox, oy, output, batchPos);
if (inputMax.valid) {
poolValue = inputs(inputMax.ix,
inputMax.iy,
inputMax.channel,
batchPos);
}
}
else {
unsigned int ixMax = 0;
unsigned int iyMax = 0;
unsigned int channelMax = 0;
bool valid = false;
for (unsigned int channel = 0; channel < inputs.dimZ();
++channel)
{
if (!maps.empty() && !maps(output, channel))
continue;
for (unsigned int sy = syMin; sy < syMax; ++sy) {
for (unsigned int sx = sxMin; sx < sxMax; ++sx)
{
const T value = inputs(ix + sx,
iy + sy,
channel,
batchPos);
if (!valid || value > poolValue) {
poolValue = value;
valid = true;
ixMax = ix + sx;
iyMax = iy + sy;
channelMax = channel;
}
}
}
}
argMax(ox, oy, output, batchPos)
= ArgMax(ixMax, iyMax, channelMax, valid);
}
outputs(ox, oy, output, batchPos)
= (*alpha) * poolValue
+ (*beta) * outputs(ox, oy, output, batchPos);
}
}
}
}
}
*/
// Kernels registration to implementation entry point
REGISTRAR(MaxPoolingImpl2D_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(MaxPoolingImpl2D_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(MaxPoolingImpl2D_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MULIMPL_H_
#define AIDGE_CPU_OPERATOR_MULIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Mul.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using MulImpl_cpu = OperatorImpl_cpu<Mul_Op,
void(std::vector<std::size_t>,
std::vector<std::size_t>,
const std::vector<std::size_t>&,
const void*,
const void*,
void*),
void(const std::size_t,
const std::size_t,
const std::size_t,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const std::vector<std::size_t>,
const void*,
const void*,
const void*,
void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Mul_Op, "cpu", Aidge::MulImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MULIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_MULIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_MULIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstdint> // std::int32_t, std::int64_t
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/MulImpl.hpp"
namespace {
// suppose values are contiguous in memory
template <class I1, class I2, class O>
void mul_contiguous_arrays(const std::size_t input1size,
const std::size_t input2size,
const std::size_t output1size,
const I1* input1,
const I2* input2,
O* output)
{
for (std::size_t i = 0; i < output1size; ++i)
{
const std::size_t in1_id = (input1size != 1) ? i : 0;
const std::size_t in2_id = (input2size != 1) ? i : 0;
output[i] = static_cast<O>(input1[in1_id] * input2[in2_id]);
}
}
}
namespace Aidge {
template <class I1, class I2, class O>
void MulImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
void* output_) {
const I1* input_0 = static_cast<const I1*>(input0_);
const I2* input_1 = static_cast<const I2*>(input1_);
O* output = static_cast<O*>(output_);
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// ## Compute compatible input dimensions
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0_contiguous_size; ++i)
{
output[i] = static_cast<O>(input_0[i] * input_1[i]);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with ones.
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
const std::size_t nbDims = dims0.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
break;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stride_post0[contiguousIdx - 1] = 1;
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
mul_contiguous_arrays<I1,I2,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size);
if (++stack < nbStacks) {
std::size_t tmp_stack = stack;
while(tmp_stack % outputDims[dim] == 0) {
tmp_stack /= outputDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
template <class I1, class I2, class O>
void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
const std::size_t input1Length,
const std::size_t gradOutputLength,
const std::vector<std::size_t>& dims0,
const std::vector<std::size_t>& dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
const void* grad_output_,
void* gradientInput0_,
void* gradientInput1_)
{
const I1* input0 = static_cast<const I1*>(input0_);
const I2* input1 = static_cast<const I2*>(input1_);
const O* grad_output = static_cast<const O*>(grad_output_);
auto* grad_input_0 = static_cast<I1*>(gradientInput0_);
auto* grad_input_1 = static_cast<I2*>(gradientInput1_);
std::fill_n(grad_input_0, input0Length, static_cast<I1>(0));
std::fill_n(grad_input_1, input1Length, static_cast<I2>(0));
// Broadcast dims0 and dims1 to match the shape of outputDims
auto broadcastedDims0 = getBroadcastedDims(outputDims, dims0);
auto broadcastedDims1 = getBroadcastedDims(outputDims, dims1);
for (std::size_t i = 0; i < gradOutputLength; ++i) {
auto idxOutputGrad = getMultiDimIndices(outputDims, i);
std::vector<std::size_t> idxInput0(broadcastedDims0.size());
std::vector<std::size_t> idxInput1(broadcastedDims1.size());
// Map output indices to input0 indices, considering broadcasting
for (std::size_t dimension = 0; dimension < broadcastedDims0.size(); ++dimension) {
// If input0 is broadcasted along this dimension (== 1) or both dimensions are 1, index is 0.
// idxInput0 represent the multi dim index of input0 contributing
// to the output at index i.
idxInput0[dimension] = (broadcastedDims0[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
for (std::size_t dimension = 0; dimension < broadcastedDims1.size(); ++dimension) {
idxInput1[dimension] = (broadcastedDims1[dimension] == 1) ? 0 : idxOutputGrad[dimension];
}
// We have to access tensors with a flat index, hence the conversion
auto idx0 = getFlattenedIndex(broadcastedDims0, idxInput0);
auto idx1 = getFlattenedIndex(broadcastedDims1, idxInput1);
grad_input_0[idx0] += static_cast<I1>(grad_output[i] * input1[idx1]);
grad_input_1[idx1] += static_cast<I2>(grad_output[i] * input0[idx0]);
}
}
// Kernels registration to implementation entry point
REGISTRAR(MulImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, float, float>, Aidge::MulImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(MulImpl_cpu,
{{{DataType::Float32}, {DataType::Float64}}, {DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, double, float>, Aidge::MulImpl_cpu_backward_kernel<float, double, float>});
REGISTRAR(MulImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<double, double, double>, Aidge::MulImpl_cpu_backward_kernel<double, double, double>});
REGISTRAR(MulImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, Aidge::MulImpl_cpu_backward_kernel<std::int32_t, std::int32_t, std::int32_t>});
REGISTRAR(MulImpl_cpu,
{DataType::Int64},
{ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, Aidge::MulImpl_cpu_backward_kernel<std::int64_t, std::int64_t, std::int64_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_MULIMPL_KERNELS_H_ */
......@@ -9,44 +9,44 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_
#define AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_
#ifndef AIDGE_CPU_OPERATOR_IMPL_H_
#define AIDGE_CPU_OPERATOR_IMPL_H_
#include <cstddef> // std::size_t
#include <memory>
#include <tuple> // std::tuple
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Producer.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
class ProducerImpl_cpu : public OperatorImpl {
private:
const Producer_Op &mOp;
public:
ProducerImpl_cpu(const Producer_Op &op) : mOp(op) {}
static std::unique_ptr<ProducerImpl_cpu> create(const Producer_Op &op) {
return std::make_unique<ProducerImpl_cpu>(op);
template <class Op, class FwdFunc, class BwdFunc = void()>
class OperatorImpl_cpu : public OperatorImpl,
public Registrable<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>, ImplSpec, Impl<FwdFunc, BwdFunc>>
{
public:
OperatorImpl_cpu(const Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>> create(const Op& op) {
return std::make_unique<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
virtual std::shared_ptr<ProdConso> getProdConso() const override {
const auto impl = Registrar<OperatorImpl_cpu>::create(getBestMatch(getRequiredSpec()));
return impl.prodConso(mOp);
}
void forward();
virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
// return Registrar<OperatorImpl_cpu>::getKeys(); // Note: cannot return set due to python binding
std::set<ImplSpec> implSpecsSet = Registrar<OperatorImpl_cpu>::getKeys();
return std::vector<ImplSpec>(implSpecsSet.begin(), implSpecsSet.end());
}
void backward();
void forward() override;
void backward() override;
};
namespace {
static Registrar<Producer_Op> registrarProducer1DImpl_cpu("cpu", Aidge::ProducerImpl_cpu::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_ */
#endif /* AIDGE_CPU_OPERATOR_IMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_PADIMPL_H_
#define AIDGE_CPU_OPERATOR_PADIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Pad.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
class Pad_ProdConso_cpu : public ProdConso {
public:
Pad_ProdConso_cpu(const Operator& op): ProdConso(op) {}
static std::unique_ptr<ProdConso> defaultModel(const Operator& op) {
return std::make_unique<Pad_ProdConso_cpu>(op);
}
Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
};
// Operator implementation entry point for the backend
using Pad1D_Op = Pad_Op<1>;
using PadImpl1D_cpu = OperatorImpl_cpu<Pad_Op<1>,
void(const std::array<DimSize_t, 2>&,
const PadBorderType,
const double,
const std::array<DimSize_t, 3> &,
const void *,
void *)>;
using Pad2D_Op = Pad_Op<2>;
using PadImpl2D_cpu = OperatorImpl_cpu<Pad_Op<2>,
void(const std::array<DimSize_t, 4>&,
const PadBorderType,
const double,
const std::array<DimSize_t, 4> &,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(Pad1D_Op, "cpu", Aidge::PadImpl1D_cpu::create);
REGISTRAR(Pad2D_Op, "cpu", Aidge::PadImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_PADIMPL_H_ */