Skip to content
Snippets Groups Projects
Commit 2eff4d7f authored by Maxence Naud's avatar Maxence Naud Committed by Olivier BICHLER
Browse files

Add GridSample impl for 1D and 2D

parent 10ef1960
No related branches found
No related tags found
2 merge requests!93Release v0.3.0,!77Add GridSample impl for 1D and 2D
Pipeline #54150 passed
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#define AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/GridSample.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// compute kernel registry for forward and backward
class GridSampleImpl1DForward_cpu
: public Registrable<GridSampleImpl1DForward_cpu,
std::tuple<DataType, DataType>,
void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)> {};
class GridSampleImpl2DForward_cpu
: public Registrable<GridSampleImpl2DForward_cpu,
std::tuple<DataType, DataType>,
void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)> {};
class GridSampleImpl_cpu : public OperatorImpl {
public:
GridSampleImpl_cpu(const GridSample_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<GridSampleImpl_cpu> create(const GridSample_Op &op) {
return std::make_unique<GridSampleImpl_cpu>(op);
}
public:
Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to GridSample_Op<1> implementation registry
static Registrar<GridSample_Op> registrarGridSampleImpl_cpu("cpu", Aidge::GridSampleImpl_cpu::create);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#include <algorithm> // std::max, std::min
#include <cmath> // std::fabs, std::trunf, std::nearbyint
#include <cstddef> // std::size_t
#include <cstdint> // std::int64_t
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/GridSampleImpl.hpp"
#include "aidge/data/half.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
static bool in_bound(float coord, float lower_bound, float upper_bound) noexcept {
return (coord > lower_bound) && (coord < upper_bound);
}
static float unnormalized_coord(float coord, float new_lower_bound, float new_upper_bound) noexcept {
return (coord + 1) / 2 * (new_upper_bound - new_lower_bound) + new_lower_bound;
}
// unused
// static float normalized_coord(float coord, float prev_lower_bound, float prev_upper_bound) noexcept {
// return (coord + prev_lower_bound) / (prev_upper_bound-prev_lower_bound) * 2 - 1;
// }
static float unnormalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
return align_corners ? unnormalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
: unnormalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
}
// unused
// static float normalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
// return align_corners ? normalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
// : normalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
// }
static float update_normalized_coord_with_padding(float coord, Aidge::GridSample_Op::PaddingMode padding_mode) {
if (!in_bound(coord, -1.0f, 1.0f)) {
if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
coord = std::min(std::max(-1.0f, coord), 1.0f);
}
else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
float abs_coord = std::fabs(coord);
float int_coord = std::truncf(abs_coord);
std::int32_t nb_refl = static_cast<std::int32_t>((int_coord - 1) / 2);
float res = ((nb_refl + 1)*2) - abs_coord;
coord = (coord > 0) ? (nb_refl % 2 == 0 ? res : -res) \
: (nb_refl % 2 == 0 ? -res : res);
}
}
return coord;
}
static inline std::int64_t update_unnormalized_coord_with_padding(std::int64_t coord, std::int64_t size, Aidge::GridSample_Op::PaddingMode padding_mode) {
if (!in_bound(coord, 0, size)) {
// out of bound. switch padding mode
if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
coord = std::min(std::max(std::int64_t(0), coord), size-std::int64_t(1));
} else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
const std::int64_t quotient = coord / (size-1);
const std::int64_t remainer = std::abs(coord - quotient*(size-1));
coord = (quotient % 2 == 0) ? remainer : size - 1 - remainer;
}
}
return coord;
}
namespace Aidge {
/**
* @brief Forward kernel for 1D GridSample on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param grid_ const grid Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
const std::shared_ptr<Tensor>& in0,
const std::shared_ptr<Tensor>& in1,
const std::shared_ptr<Tensor>& out)
{
const I* const input = static_cast<const I * const>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float* const>(in1->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O* const>(out->getImpl()->rawPtr());
O* output_ptr = output;
const std::size_t N = in0->dim(0);
const std::size_t C = in0->dim(1);
const std::size_t in_H = in0->dim(2);
const std::size_t grid_H = in1->dim(1);
const std::size_t in_N_s = in0->stride(0);
const std::size_t in_C_s = in0->stride(1);
const std::size_t in_H_s = in0->stride(2);
const std::size_t grid_N_s = in1->stride(0);
const std::size_t grid_H_s = in1->stride(1);
const std::size_t out_N_s = out->stride(0);
const std::size_t out_C_s = out->stride(1);
const std::size_t out_H_s = out->stride(2);
float* grid_ptr_N = grid;
const I* input_ptr_N = input;
O* output_ptr_N = output;
for (std::size_t n = 0; n < N; ++n) {
grid_ptr = grid_ptr_N;
for (std::size_t grid_x = 0; grid_x < grid_H; ++grid_x) {
output_ptr = output_ptr_N + grid_x*out_H_s;
/*
* change grid_x coord to match padding_mode
* Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
* Handle computation of interpolation
* any value outside bounds is considered 0
* if nearest:
* else if linear:
* else if cubic:
* else : nothing
*/
float x = *grid_ptr;
x = update_normalized_coord_with_padding(x, op.paddingMode());
x = unnormalize_grid_sample_coord(x, in_H, op.alignCorners());
if (op.mode() == GridSample_Op::Mode::Nearest) {
const std::int64_t x_rounded = std::nearbyintf(x);
if (in_bound(x_rounded, 0, in_H)) {
input_ptr = input_ptr_N + x_rounded*in_H_s;
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = *input_ptr;
input_ptr += in_C_s;
output_ptr += out_C_s;
}
} else {
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = O(0);
output_ptr += out_C_s;
}
}
} else if (op.mode() == GridSample_Op::Mode::Linear) {
const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_inf = in_bound(x_inf, 0, in_H) ?
input_ptr_NC[static_cast<std::size_t>(x_inf)*in_H_s] : I(0);
const I f_sup = in_bound(x_sup, 0, in_H) ?
input_ptr_NC[static_cast<std::size_t>(x_sup)*in_H_s] : I(0);
*output_ptr = static_cast<O>(static_cast<I>(x - x_inf)*f_inf \
+ static_cast<I>(x_sup - x)*f_sup);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
} else if (op.mode() == GridSample_Op::Mode::Cubic) {
const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
const std::int64_t x_inf_inf = update_unnormalized_coord_with_padding(x_inf - 1, in_H, op.paddingMode());
const std::int64_t x_sup_sup = update_unnormalized_coord_with_padding(x_sup + 1, in_H, op.paddingMode());
const I x1 = static_cast<I>(x - static_cast<float>(x_inf));
const I x2 = x1 * x1;
const I x3 = x1 * x2;
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_inf_inf = in_bound(x_inf_inf, 0, in_H) ? input_ptr_NC[x_inf_inf*in_H_s] : I(0);
const I f_inf = in_bound(x_inf, 0, in_H) ? input_ptr_NC[x_inf*in_H_s] : I(0);
const I f_sup = in_bound(x_sup, 0, in_H) ? input_ptr_NC[x_sup*in_H_s] : I(0);
const I f_sup_sup = in_bound(x_sup_sup, 0, in_H) ? input_ptr_NC[x_sup_sup*in_H_s] : I(0);
const I m_inf = (f_sup - f_inf_inf) / I(2);
const I m_sup = (f_sup_sup - f_inf) / I(2);
*output_ptr = f_inf \
+ x1 * m_inf \
+ x2 * (3 * (f_sup - f_inf) - 2 * m_inf - m_sup) \
+ x3 * (2*(f_inf - f_sup) + m_inf + m_sup);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
}
grid_ptr += grid_H_s;
}
input_ptr_N += in_N_s;
grid_ptr_N += grid_N_s;
output_ptr_N += out_N_s;
}
}
namespace {
static Registrar<GridSampleImpl1DForward_cpu> registrarGridSampleImpl1DForward_cpu_Float32(
{DataType::Float32, DataType::Float32},
Aidge::GridSampleImpl1D_cpu_forward_kernel<float, float>);
static Registrar<GridSampleImpl1DForward_cpu> registrarGridSampleImpl1DForward_cpu_Float16(
{DataType::Float16, DataType::Float16},
Aidge::GridSampleImpl1D_cpu_forward_kernel<half_float::half, half_float::half>);
static Registrar<GridSampleImpl1DForward_cpu> registrarGridSampleImpl1DForward_cpu_Int32(
{DataType::Int32, DataType::Int32},
Aidge::GridSampleImpl1D_cpu_forward_kernel<int, int>);
static Registrar<GridSampleImpl1DForward_cpu> registrarGridSampleImpl1DForward_cpu_Float64(
{DataType::Float64, DataType::Float64},
Aidge::GridSampleImpl1D_cpu_forward_kernel<double, double>);
/**
* @brief Forward kernel for 1D GridSample on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param grid_ const grid Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
const std::shared_ptr<Tensor>& in0,
const std::shared_ptr<Tensor>& in1,
const std::shared_ptr<Tensor>& out)
{
const I* input = static_cast<const I *>(in0->getImpl()->rawPtr());
const I* input_ptr = input;
float* const grid = static_cast<float* const>(in0->getImpl()->rawPtr());
float* grid_ptr = grid;
O* const output = static_cast<O* const>(out->getImpl()->rawPtr());
const std::size_t N = in0->dim(0);
const std::size_t C = in0->dim(1);
const std::size_t in_H = in0->dim(2);
const std::size_t in_W = in0->dim(3);
const std::size_t grid_H = in1->dim(1);
const std::size_t grid_W = in1->dim(2);
const std::size_t in_N_s = in0->stride(0);
const std::size_t in_C_s = in0->stride(1);
const std::size_t in_H_s = in0->stride(2);
const std::size_t in_W_s = in0->stride(3);
const std::size_t grid_N_s = in1->stride(0);
const std::size_t grid_H_s = in1->stride(1);
const std::size_t grid_W_s = in1->stride(2);
const std::size_t grid_Coord_s = in1->stride(3);
const std::size_t out_N_s = out->stride(0);
const std::size_t out_C_s = out->stride(1);
const std::size_t out_H_s = out->stride(2);
const std::size_t out_W_s = out->stride(3);
float* grid_ptr_N = grid;
const I* input_ptr_N = input;
O* output_ptr_N = output;
for (std::size_t n = 0; n < N; ++n) {
for (std::size_t grid_y = 0; grid_y < grid_H; ++grid_y) {
for (std::size_t grid_x = 0; grid_x < grid_W; ++grid_x) {
O* output_ptr = output_ptr_N + grid_y*out_H_s + grid_y*out_W_s;
grid_ptr = grid_ptr_N + grid_y*grid_H_s + grid_x*grid_W_s;
/*
* change grid_x coord to match padding_mode
* Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
* Handle computation of interpolation
* any value outside bounds is considered 0
* if nearest:
* else if linear:
* else if cubic:
* else : nothing
*/
float x = *grid_ptr;
float y = grid_ptr[grid_Coord_s];
x = update_normalized_coord_with_padding(x, op.paddingMode());
x = unnormalize_grid_sample_coord(x, in_W, op.alignCorners());
y = update_normalized_coord_with_padding(y, op.paddingMode());
y = unnormalize_grid_sample_coord(y, in_H, op.alignCorners());
if (op.mode() == GridSample_Op::Mode::Nearest) {
const std::int64_t x_rounded = std::nearbyintf(x);
const std::int64_t y_rounded = std::nearbyintf(y);
if (in_bound(x_rounded, 0, in_W) && in_bound(y_rounded, 0, in_H)) {
input_ptr = input_ptr_N + y_rounded*in_H_s + x_rounded*in_W_s;
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = *input_ptr;
input_ptr += in_C_s;
output_ptr += out_C_s;
}
} else {
for (std::size_t c = 0; c < C; ++c) {
*output_ptr = O(0);
output_ptr += out_C_s;
}
}
} else if (op.mode() == GridSample_Op::Mode::Linear) {
const std::int64_t x_r = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode()); // right
const std::int64_t x_l = update_unnormalized_coord_with_padding(x_r + 1, in_W, op.paddingMode()); // left
const std::int64_t y_t = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode()); // top
const std::int64_t y_b = update_unnormalized_coord_with_padding(y_t + 1, in_H, op.paddingMode()); // bottom
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_tr = (in_bound(x_r, 0, in_W) && in_bound(y_t, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
+ static_cast<std::size_t>(x_r)*in_W_s]
: I(0);
const I f_tl = (in_bound(x_l, 0, in_W) && in_bound(y_t, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
+ static_cast<std::size_t>(x_l)*in_W_s]
: I(0);
const I f_br = (in_bound(x_r, 0, in_W) && in_bound(y_b, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
+ static_cast<std::size_t>(x_r)*in_W_s]
: I(0);
const I f_bl = (in_bound(x_l, 0, in_W) && in_bound(y_b, 0, in_H)) ?
input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
+ static_cast<std::size_t>(x_l)*in_W_s]
: I(0);
// compute weighted sum of the 4 corners
const I w_tr = static_cast<I>((y - static_cast<float>(y_t))*(static_cast<float>(x_r) - x));
const I w_tl = static_cast<I>((y - static_cast<float>(y_t))*(x - static_cast<float>(x_l)));
const I w_br = static_cast<I>((static_cast<float>(y_b) - y)*(static_cast<float>(x_r) - x));
const I w_bl = static_cast<I>((static_cast<float>(y_b) - y)*(x - static_cast<float>(x_l)));
*output_ptr = static_cast<O>(w_tr*f_tr + w_tl*f_tl + w_br*f_br + w_bl*f_bl);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
} else if (op.mode() == GridSample_Op::Mode::Cubic) {
/*
* .. .. .. .. .. ..
* .. 00 01 02 03 ..
* .. 10 11 12 13 ..
* .. 20 21 22 23 ..
* .. 30 31 32 33 ..
* .. .. .. .. .. ..
*/
const std::int64_t x_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode());
const std::int64_t x_0 = update_unnormalized_coord_with_padding(x_1 - 1, in_W, op.paddingMode());
const std::int64_t x_2 = update_unnormalized_coord_with_padding(x_1 + 1, in_W, op.paddingMode());
const std::int64_t x_3 = update_unnormalized_coord_with_padding(x_1 + 2, in_W, op.paddingMode());
const std::int64_t y_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode());
const std::int64_t y_0 = update_unnormalized_coord_with_padding(y_1 - 1, in_H, op.paddingMode());
const std::int64_t y_2 = update_unnormalized_coord_with_padding(y_1 + 1, in_H, op.paddingMode());
const std::int64_t y_3 = update_unnormalized_coord_with_padding(y_1 + 2, in_H, op.paddingMode());
const I* input_ptr_NC = input_ptr_N;
for (std::size_t c = 0; c < C; ++c) {
const I f_00 = in_bound(x_0, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_0*in_H_s] : I(0);
const I f_01 = in_bound(x_0, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_1*in_H_s] : I(0);
const I f_02 = in_bound(x_0, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_2*in_H_s] : I(0);
const I f_03 = in_bound(x_0, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_0*in_W_s + y_3*in_H_s] : I(0);
const I f_10 = in_bound(x_1, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_0*in_H_s] : I(0);
const I f_20 = in_bound(x_2, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_0*in_H_s] : I(0);
const I f_30 = in_bound(x_3, 0, in_W) && in_bound(y_0, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_0*in_H_s] : I(0);
const I f_11 = in_bound(x_1, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_1*in_H_s] : I(0);
const I f_12 = in_bound(x_1, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_2*in_H_s] : I(0);
const I f_13 = in_bound(x_1, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_1*in_W_s + y_3*in_H_s] : I(0);
const I f_21 = in_bound(x_2, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_1*in_H_s] : I(0);
const I f_22 = in_bound(x_2, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_2*in_H_s] : I(0);
const I f_23 = in_bound(x_2, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_2*in_W_s + y_3*in_H_s] : I(0);
const I f_31 = in_bound(x_3, 0, in_W) && in_bound(y_1, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_1*in_H_s] : I(0);
const I f_32 = in_bound(x_3, 0, in_W) && in_bound(y_2, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_2*in_H_s] : I(0);
const I f_33 = in_bound(x_3, 0, in_W) && in_bound(y_3, 0, in_H) ?
input_ptr_NC[x_3*in_W_s + y_3*in_H_s] : I(0);
const I mx_11 = (f_21 - f_01) / I(2);
const I mx_12 = (f_22 - f_02) / I(2);
const I mx_21 = (f_31 - f_11) / I(2);
const I mx_22 = (f_32 - f_12) / I(2);
const I my_11 = (f_12 - f_10) / I(2);
const I my_12 = (f_13 - f_11) / I(2);
const I my_21 = (f_22 - f_20) / I(2);
const I my_22 = (f_23 - f_21) / I(2);
const I mxy_11 = (f_22 - f_20 - f_02 - + f_00) / I(4);
const I mxy_12 = (f_23 - f_21 - f_03 - + f_01) / I(4);
const I mxy_21 = (f_32 - f_30 - f_12 - + f_10) / I(4);
const I mxy_22 = (f_33 - f_31 - f_13 - + f_11) / I(4);
const I a_00 = f_11;
const I a_10 = mx_11;
const I a_20 = I(3)*(f_21 - f_11) - I(2)*mx_11 - mx_21;
const I a_30 = I(2)*(f_11 - f_21) + mx_11 + mx_21;
const I a_01 = my_11;
const I a_11 = mxy_11;
const I a_21 = I(3)*(my_21 - my_11) - I(2)*mxy_11 - mxy_21;
const I a_31 = I(2)*(my_11 - my_21) + mxy_11 + mxy_21;
const I a_02 = I(3)*(f_12 - f_11) - I(2)*my_11 - my_12;
const I a_12 = I(3)*(mx_12 - mx_11) - I(2)*mxy_11 - mxy_12;
const I a_22 = I(9)*(f_11 + f_22 - f_21 - f_12) + I(3)*(I(2)*(mx_11 - mx_12 + my_11 - my_21) + mx_21 - mx_22 + my_12 - my_22) + mxy_22 + I(2)*(mxy_12 + mxy_21 + I(2)*mxy_11);
const I a_32 = - mxy_12 - mxy_22 + I(2)*(my_22 - my_12 - mxy_11 - mxy_21 + I(2)*(my_21 - my_11) + I(3)*(f_21 + f_12 - f_11 - f_22)) + I(3)*(mx_12 + mx_22 - mx_11 - mx_21);
const I a_03 = I(2)*(f_11 - f_12) + my_11 + my_12;
const I a_13 = I(2)*(mx_11 - mx_12) + mxy_11 + mxy_12;
const I a_23 = - mxy_21 - mxy_22 + I(2)*(-mx_21 + mx_22 - mxy_11 - mxy_12 + I(2)*(mx_12 - mx_11) + I(3)*(f_12 + f_21 - f_11 - f_22)) + I(3)*(my_21 + my_22 - my_11 - my_12);
const I a_33 = mxy_11 + mxy_21 + mxy_12 + mxy_22 + I(2)*(mx_11 + mx_21 - mx_12 - mx_22 + my_11 - my_21 + my_12 - my_22 + I(2)*(f_11 - f_21 - f_12 + f_22));
const I x2 = static_cast<I>(x*x);
const I x3 = static_cast<I>(x*x*x);
const I y2 = static_cast<I>(y*y);
const I y3 = static_cast<I>(y*y*y);
*output_ptr = static_cast<O>( \
a_00 + a_10*x + a_20*x2 + a_30*x3 \
+ a_01*y + a_11*x*y + a_21*x2*y + a_31*x3*y \
+ a_02*y2 + a_12*x*y2 + a_22*x2*y2 + a_32*x3*y2 \
+ a_03*y3 + a_13*x*y3 + a_23*x2*y3 + a_33*x3*y3);
input_ptr_NC += in_C_s;
output_ptr += out_C_s;
}
}
}
}
input_ptr_N += in_N_s;
grid_ptr_N += grid_N_s;
output_ptr_N += out_N_s;
}
}
static Registrar<GridSampleImpl2DForward_cpu> registrarGridSampleImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32},
Aidge::GridSampleImpl2D_cpu_forward_kernel<float, float>);
static Registrar<GridSampleImpl2DForward_cpu> registrarGridSampleImpl2DForward_cpu_Float16(
{DataType::Float16, DataType::Float16},
Aidge::GridSampleImpl2D_cpu_forward_kernel<half_float::half, half_float::half>);
static Registrar<GridSampleImpl2DForward_cpu> registrarGridSampleImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32},
Aidge::GridSampleImpl2D_cpu_forward_kernel<int, int>);
static Registrar<GridSampleImpl2DForward_cpu> registrarGridSampleImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64},
Aidge::GridSampleImpl2D_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#include "aidge/backend/cpu/operator/GridSampleImpl.hpp"
#include <functional>
#include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/GridSampleImpl_forward_kernels.hpp"
#include "aidge/operator/GridSample.hpp"
#include "aidge/utils/Types.h"
Aidge::Elts_t Aidge::GridSampleImpl_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
// this implementation can be in-place
return Elts_t::DataElts(0);
}
void Aidge::GridSampleImpl_cpu::forward() {
const auto& op_ = static_cast<const GridSample_Op&>(mOp);
// Find the correct kernel type
const auto outputDataType = op_.getOutput(0)->dataType();
const Registrar<GridSampleImpl1DForward_cpu>::registrar_key registrarKey = {
op_.getInput(0)->dataType(),
outputDataType};
std::function<void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)> kernelFunc;
const std::size_t nbSpatialFeat = op_.getInput(0)->nbDims();
switch (nbSpatialFeat)
{
case 1:
kernelFunc = Registrar<GridSampleImpl1DForward_cpu>::create(registrarKey);
break;
case 2:
kernelFunc = Registrar<GridSampleImpl2DForward_cpu>::create(registrarKey);
break;
default:
AIDGE_THROW_OR_ABORT(std::runtime_error, "No CPU {} kernel available for {} dimensions.", op_.type(), nbSpatialFeat);
break;
}
// Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each
// call to forward(). We might put the following shared_ptr as members of
// this class to avoid that.
std::shared_ptr<Tensor> input0Fallback, input1Fallback;
const auto& input0 = std::make_shared<Tensor>(op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)));
const auto& input1 = std::make_shared<Tensor>(op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)));
// Call kernel
kernelFunc(op_,
input0, // input
input1, // grid
op_.getOutput(0) // output
);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment