Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Showing
with 1609 additions and 412 deletions
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_H_
#define AIDGE_CPU_OPERATOR_CLIPIMPL_H_
#include <cstddef> // std::size_t
#include <memory>
#include <tuple> // std::tuple
#include <vector>
#include <algorithm>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Clip.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ClipImpl_cpu = OperatorImpl_cpu<Clip_Op,
void(float, //Forward Types
float,
const void*,
const std::size_t,
void*),
void(float,//Backward Types
float,
const std::size_t,
const void*,
const void*,
void*)>;
REGISTRAR(Clip_Op,"cpu",Aidge::ClipImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ClipImpl.hpp"
namespace Aidge {
template <class I, class O>
void ClipImpl_cpu_forward_kernel(
float min_,
float max_,
const void* input_,
const std::size_t length,
void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < length; ++i) {
output[i] = std::min(std::max(static_cast<float>(input[i]), min_), max_);
}
}
template <class I, class GI, class GO>
void ClipImpl_cpu_backward_kernel(
float min_,
float max_,
const std::size_t length,
const void* input_,
const void* grad_output_,
void* grad_input_)
{
const I* input = static_cast<const I*>(input_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
for (std::size_t i = 0; i < length; ++i) {
grad_input[i] = ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0;
}
}
REGISTRAR(ClipImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<float,float>,
Aidge::ClipImpl_cpu_backward_kernel<float,float,float>});
REGISTRAR(ClipImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<double,double>,
Aidge::ClipImpl_cpu_backward_kernel<double,double,double>});
REGISTRAR(ClipImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<std::int32_t,std::int32_t>,
Aidge::ClipImpl_cpu_backward_kernel<std::int32_t,std::int32_t,std::int32_t>});
REGISTRAR(ClipImpl_cpu,
{DataType::Int64},
{ProdConso::inPlaceModel,
Aidge::ClipImpl_cpu_forward_kernel<std::int64_t,std::int64_t>,
Aidge::ClipImpl_cpu_backward_kernel<std::int64_t,std::int64_t,std::int64_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_
#include <cstddef>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ConstantOfShape.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ConstantOfShapeImpl_cpu = OperatorImpl_cpu<ConstantOfShape_Op,
void(const std::vector<DimSize_t>, const Tensor&, void *)>;
// Implementation entry point registration to Operator
REGISTRAR(ConstantOfShape_Op, "cpu", Aidge::ConstantOfShapeImpl_cpu::create);
} // namespace Aidge
#endif /* _AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_
#include <aidge/data/Tensor.hpp>
#include <aidge/data/half.hpp>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <functional> // std::multiplies
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class O>
void ConstantOfShapeimpl_cpu_forward_kernel(
const std::vector<DimSize_t> output_dims, const Tensor &value,
void *output_) {
O *output = static_cast<O *>(output_);
O val;
std::copy(static_cast<O *>(value.getImpl()->hostPtr()),
static_cast<O *>(value.getImpl()->hostPtr()) +
static_cast<NbElts_t>(1),
&val);
const size_t output_size = std::accumulate(
output_dims.begin(), output_dims.end(), 1, std::multiplies<DimSize_t>());
for (size_t i = 0; i < output_size; ++i) {
output[i] = val;
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float16}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int16}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_ */
......@@ -17,56 +17,39 @@
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ConvDepthWise.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// class ConvDepthWise_Op;
// compute kernel registry for forward and backward
class ConvDepthWiseImpl2DForward_cpu
: public Registrable<ConvDepthWiseImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvDepthWiseImpl2DBackward_cpu
: public Registrable<ConvDepthWiseImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvDepthWiseImpl2D_cpu : public OperatorImpl {
private:
const ConvDepthWise_Op<2> &mOp;
std::array<NbElts_t, 3> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
ConvDepthWiseImpl2D_cpu(const ConvDepthWise_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {}
static std::unique_ptr<ConvDepthWiseImpl2D_cpu> create(const ConvDepthWise_Op<2> &op) {
return std::make_unique<ConvDepthWiseImpl2D_cpu>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void backward();
};
namespace {
// add cpu backend to ConvDepthWise_Op<2> implementation registry
static Registrar<ConvDepthWise_Op<2>> registrarConvDepthWiseImpl2D_cpu("cpu", Aidge::ConvDepthWiseImpl2D_cpu::create);
} // namespace
// Operator implementation entry point for the backend
using ConvDepthWise1D_Op = ConvDepthWise_Op<1>;
using ConvDepthWiseImpl1D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<1>,
void(const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3>&,
const void *,
const void *,
const void *,
void *)>;
using ConvDepthWise2D_Op = ConvDepthWise_Op<2>;
using ConvDepthWiseImpl2D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(ConvDepthWise1D_Op, "cpu", Aidge::ConvDepthWiseImpl1D_cpu::create);
REGISTRAR(ConvDepthWise2D_Op, "cpu", Aidge::ConvDepthWiseImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/utils/Types.h"
#include <cmath>
#include <array>
#include <algorithm>
namespace Aidge {
/**
* @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &params, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *weights_, const void *biases_, void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) /
static_cast<float>(std::get<0>(params)[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) /
static_cast<float>(std::get<0>(params)[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) {
const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize;
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0];
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1];
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
}
}
}
}
}
namespace {
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>);
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_
#include <algorithm>
#include <array>
#include <cmath>
#include <cstddef>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
/**
* @brief Forward kernel for 1D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize;
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = ch * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
// const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
// const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
// const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvDepthWiseImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvDepthWiseImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr});
REGISTRAR(ConvDepthWiseImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
/**
* @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4>& inputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
const std::size_t outChannels_s = oxSize * oySize;
if (dilated_kernel_x ==3 && dilated_kernel_y == 3) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * 9;
if (strideDims[0] == 1 && strideDims[1]==1) {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] = biasVal + weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2];
}
}
} else {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=(strideDims[0]-2)*inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] = biasVal + weights[wIndex+0]*input[iIndex+oy*strideDims[1]]+weights[wIndex+1]*input[iIndex+oy*strideDims[1]+1]+weights[wIndex+2]*input[iIndex+oy*strideDims[1]+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy*strideDims[1]]+weights[wIndex+4]*input[iIndex+oy*strideDims[1]+1]+weights[wIndex+5]*input[iIndex+oy*strideDims[1]+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy*strideDims[1]]+weights[wIndex+7]*input[iIndex+oy*strideDims[1]+1]+weights[wIndex+8]*input[iIndex+oy*strideDims[1]+2];
}
}
}
output += outChannels_s;
}
}
} else if (dilated_kernel_x == 1 && dilated_kernel_y == 1) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch;
if (strideDims[0] == 1 && strideDims[1] == 1) {
for (std::size_t i = iIndex; i < iIndex + oxSize*oySize; ++i) {
output[i] = biasVal + weights[wIndex] * input[i];
}
} else {
std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize;
for (std::size_t ox = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=strideDims[0]*inputDims[3]) {
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
output[oIndex + oy] = biasVal + weights[wIndex]*input[iIndex+iy];
}
}
}
}
}
} else {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output, output+outChannels_s, biasVal);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t oIndexFull = ox*oySize + oy;
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
for (std::size_t kx = 0; kx*dilationDims[0] < dilated_kernel_x; ++kx) {
for (std::size_t ky = 0; ky*dilationDims[1] < dilated_kernel_y; ++ky) {
output[oIndexFull] += weights[wIndex + kx*kernelDims[1] + ky] *
input[iIndex + (ix + kx*dilationDims[0])*inputDims[3] + (iy + ky*dilationDims[1])];
}
}
}
}
output += outChannels_s;
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr});
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_ */
......@@ -17,56 +17,41 @@
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// class Conv_Op;
// compute kernel registry for forward and backward
class ConvImpl2DForward_cpu
: public Registrable<ConvImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvImpl2DBackward_cpu
: public Registrable<ConvImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvImpl2D_cpu : public OperatorImpl {
private:
const Conv_Op<2> &mOp;
std::array<NbElts_t, 3> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
ConvImpl2D_cpu(const Conv_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {}
static std::unique_ptr<ConvImpl2D_cpu> create(const Conv_Op<2> &op) {
return std::make_unique<ConvImpl2D_cpu>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void backward();
};
namespace {
// add cpu backend to Conv_Op<2> implementation registry
static Registrar<Conv_Op<2>> registrarConvImpl2D_cpu("cpu", Aidge::ConvImpl2D_cpu::create);
} // namespace
// Operator implementation entry point for the backend
using Conv1D_Op = Conv_Op<1>;
using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>,
void(const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3> &,
DimSize_t,
const void *,
const void *,
const void *,
void *)>;
using Conv2D_Op = Conv_Op<2>;
using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &,
DimSize_t,
const void *,
const void *,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create);
REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/utils/Types.h"
#include <cmath>
#include <array>
#include <algorithm>
namespace Aidge {
/**
* @brief Forward kernel for 2D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &params, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *weights_, const void *biases_, void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
/*
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
static_cast<float>(std::get<0>(params)[0]));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
static_cast<float>(std::get<0>(params)[1]));
// TODO: kernel computation
// output (Xout, Yout, outCh, batch)
// input (Xin, Yin, inCh, batch)
// weight (kernelX, kernelY, inCh, outCh)
// does not take Dilation attribute into account
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t ix = ox * std::get<0>(params)[0];
const std::size_t iy = oy * std::get<0>(params)[1];
for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox));
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
output[oIndex + batch] = biasVal;
}
for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) {
for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) {
const std::size_t wIndex =
outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx));
std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
}
}
}
}
}
}
}
*/
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
static_cast<float>(std::get<0>(params)[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
static_cast<float>(std::get<0>(params)[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize;
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0];
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1];
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
}
}
}
}
}
}
namespace {
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
/**
* @brief Forward kernel for 1D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) /
static_cast<float>(strideDims[0])));
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
// const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
// const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
// const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
}
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr});
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
/**
* @brief Forward kernel for 2D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
const std::size_t outChannels_s = oxSize * oySize;
if (dilated_kernel_x == 3 && dilated_kernel_y == 3) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output, output+outChannels_s, biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * 9;
if (strideDims[0] == 1 && strideDims[1]==1) {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2];
}
}
} else {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=(strideDims[0]-2)*inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy*strideDims[1]]+weights[wIndex+1]*input[iIndex+oy*strideDims[1]+1]+weights[wIndex+2]*input[iIndex+oy*strideDims[1]+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy*strideDims[1]]+weights[wIndex+4]*input[iIndex+oy*strideDims[1]+1]+weights[wIndex+5]*input[iIndex+oy*strideDims[1]+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy*strideDims[1]]+weights[wIndex+7]*input[iIndex+oy*strideDims[1]+1]+weights[wIndex+8]*input[iIndex+oy*strideDims[1]+2];
}
}
}
}
output += outChannels_s;
}
}
} else if (dilated_kernel_x == 1 && dilated_kernel_y == 1) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output, output+outChannels_s, biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]);
if (strideDims[0] == 1 && strideDims[1] == 1) {
for (std::size_t oIndex = 0; oIndex < oxSize*oySize; ++oIndex, ++iIndex) {
output[oIndex] += weights[wIndex] * input[iIndex];
}
} else {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=inputDims[3]*strideDims[0]) {
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+iy];
}
}
}
}
output += outChannels_s;
}
}
} else {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output, output+outChannels_s, biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
std::size_t iIndex_channel = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
// loop over each ouput line
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex_channel+=inputDims[3]*strideDims[0]) {
// loop over associated input line
for (std::size_t ky = 0, ix = 0; ky < kernelDims[0]; ++ky, ix += inputDims[3]*dilationDims[0]) {
// loop over the entire line
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
const std::size_t iIndex = iIndex_channel + ix + iy;
// loop over elements assosicated with one output
for (std::size_t kx = 0; kx < kernelDims[0]; ++kx) {
output[oIndex + oy] += weights[wIndex+kernelDims[0]*ky+kx]*input[iIndex+kx*dilationDims[1]];
}
}
}
}
}
output += outChannels_s;
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr});
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_H_
#define AIDGE_CPU_OPERATOR_DIVIMPL_H_
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Div.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using DivImpl_cpu = OperatorImpl_cpu<Div_Op,
void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Div_Op, "cpu", Aidge::DivImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_
#include <numeric> // std::accumulate
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t, std::int64_t
#include <functional> // std::multiplies
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/DivImpl.hpp"
namespace Aidge {
// template <class I1, class I2, class O>
// void DivImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
// const std::vector<std::size_t>& input2Dims,
// const std::vector<std::size_t>& outputDims,
// const void* input1_,
// const void* input2_,
// void* output_) {
// const I1* input_1 = static_cast<const I1*>(input1_);
// const I2* input_2 = static_cast<const I2*>(input2_);
// O* output = static_cast<O*>(output_);
// const std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
// {
// std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
// std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
// std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
// // TODO assert if input_2 is bad?
// output[oIndex] = input_1[idx1] / input_2[idx2];
// }
// }
template <class I1, class I2, class O>
constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_,
const std::size_t input2size_,
const std::size_t output1size_,
const void* input1_,
const void* input2_,
void* output_) {
const I1* input_1 = static_cast<const I1*>(input1_);
const I2* input_2 = static_cast<const I2*>(input2_);
O* output = static_cast<O*>(output_);
// suppose values are contiguous in memory
for (std::size_t i = 0; i < output1size_; ++i) {
const std::size_t in1_id = (input1size_ != 1) ? i : 0;
const std::size_t in2_id = (input2size_ != 1) ? i : 0;
output[i] = static_cast<O>(input_1[in1_id] / input_2[in2_id]);
}
}
// Kernels registration to implementation entry point
REGISTRAR(DivImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, nullptr});
REGISTRAR(DivImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, nullptr});
REGISTRAR(DivImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Erf.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using ErfImpl_cpu = OperatorImpl_cpu<Erf_Op,
void(const std::size_t, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Erf_Op, "cpu", Aidge::ErfImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_H_ */
......@@ -9,37 +9,39 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_
#include <cmath>
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
#include "aidge/backend/cpu/operator/ErfImpl.hpp"
namespace Aidge {
template <class I, class O>
void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& params,
std::size_t inputLenght,
void ErfImpl_cpu_forward_kernel(std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
I negativeSlope = static_cast<I>(std::get<0>(params));
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
output[i] = std::erf(input[i]);
}
}
namespace {
static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>);
static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<int, int>);
static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>);
} // namespace
// Kernels registration to implementation entry point
REGISTRAR(ErfImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(ErfImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(ErfImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_H_
#define AIDGE_CPU_OPERATOR_EXPANDIMPL_H_
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Expand.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ExpandImpl_cpu = OperatorImpl_cpu<Expand_Op,
void(const std::shared_ptr<Tensor> &,
const std::shared_ptr<Tensor> &,
void *,
const std::vector<DimSize_t> &)>;
// Implementation entry point registration to Operator
REGISTRAR(Expand_Op, "cpu", Aidge::ExpandImpl_cpu::create);
} // namespace Aidge
#endif /* _AIDGE_CPU_OPERATOR_EXPANDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_
#include "aidge/backend/cpu/operator/ExpandImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include <aidge/data/Data.hpp>
#include <aidge/data/Tensor.hpp>
#include <aidge/data/half.hpp>
#include <aidge/scheduler/ProdConso.hpp>
#include <aidge/utils/Types.h>
#include <cmath>
#include <cstdint> // std::int32_t, std::int64_t
#include <memory>
#include <numeric>
namespace {
// suppose values are contiguous in memory
template <class IO>
void expandContiguousArray(const std::size_t inputStackSize,
const std::size_t outputStackSize,
const IO *input,
IO *output) {
for (std::size_t i = 0; i < outputStackSize; ++i) {
output[i] = (inputStackSize == 1) ? input[0] : input[i];
}
return;
}
} // namespace
namespace Aidge {
template <class IO>
void ExpandImpl_cpu_forward_kernel(
const std::shared_ptr<Tensor> &inData,
const std::shared_ptr<Tensor> &_inExpandShape,
void *_output,
const std::vector<DimSize_t> &outputDims) {
// retrieving data of inputShape & dimensions of inputDims
// as the process will require to modify the values
IO *output = static_cast<IO *>(_output);
std::vector<DimSize_t> inExpandShape(_inExpandShape->size());
for (DimSize_t i = 0; i < _inExpandShape->size(); ++i) {
inExpandShape[i] = _inExpandShape->get<std::int64_t>(i);
}
std::vector<DimSize_t> inDataDims = inData->dims();
// Example with 2 tensors
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions but adding 1s to le left of "smallest"
// tensor -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then ->
// 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// ## Compute compatible input dimensions
// special case for equal dimensions, the kernel is called with the entire
// arrays at once
if (inDataDims == inExpandShape) {
const std::size_t input0ContiguousSize =
std::accumulate(inDataDims.cbegin(),
inDataDims.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0ContiguousSize; ++i) {
output[i] = inData->get<IO>(i);
}
return;
}
// set dimensions to be of equal size by filling the smallest one with
// ones.
if (inDataDims.size() > inExpandShape.size()) {
inExpandShape.insert(inExpandShape.cbegin(),
inDataDims.size() - inExpandShape.size(),
static_cast<DimSize_t>(1));
} else if (_inExpandShape->size() > inDataDims.size()) {
inDataDims.insert(inDataDims.cbegin(),
inExpandShape.size() - inDataDims.size(),
static_cast<DimSize_t>(1));
}
const std::size_t nbDims = inDataDims.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (inDataDims[contiguousIdx] != inExpandShape[contiguousIdx]) {
break;
}
}
if (contiguousIdx == (nbDims - 1)) {
// last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t> &dims =
(inDataDims[contiguousIdx] == 1) ? inDataDims : inExpandShape;
while ((contiguousIdx + 1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t inputDataContiguousSize =
std::accumulate(inDataDims.cbegin() + contiguousIdx,
inDataDims.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
const std::size_t outputContiguousSize =
std::accumulate(outputDims.cbegin() + contiguousIdx,
outputDims.cend(),
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stridePostIn =
std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> strideStepIn =
std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stridePostIn[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2;
i != static_cast<std::size_t>(-1);
--i) {
stridePostIn[i] = stridePostIn[i + 1] *
static_cast<std::int32_t>(inDataDims[i + 1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
strideStepIn[i] = (inDataDims[i] == 1) ? 1 - stridePostIn[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetInData = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks =
std::accumulate(outputDims.cbegin(),
outputDims.cbegin() + contiguousIdx,
static_cast<std::size_t>(1),
std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
expandContiguousArray<IO>(
inputDataContiguousSize,
outputContiguousSize,
&static_cast<const IO *>(
inData->getImpl()
->rawPtr())[offsetInData * inputDataContiguousSize],
&output[offsetOut * outputContiguousSize]);
if (++stack < nbStacks) {
std::size_t tmpStack = stack;
while (tmpStack % outputDims[dim] == 0) {
tmpStack /= outputDims[dim];
dim--;
}
offsetInData += strideStepIn[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
REGISTRAR(ExpandImpl_cpu,
{{DataType::Int16, DataType::Int64}, {DataType::Int16}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<std::int16_t>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Int32, DataType::Int64}, {DataType::Int32}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<std::int32_t>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Int64, DataType::Int64}, {DataType::Int64}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<std::int64_t>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Float16, DataType::Int64}, {DataType::Float16}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<half_float::half>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Float32, DataType::Int64}, {DataType::Float32}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<float>,
nullptr});
REGISTRAR(ExpandImpl_cpu,
{{DataType::Float64, DataType::Int64}, {DataType::Float64}},
{ProdConso::inPlaceModel,
Aidge::ExpandImpl_cpu_forward_kernel<double>,
nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ */
......@@ -12,53 +12,37 @@
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <memory>
#include <vector>
#include <array>
namespace Aidge {
// class FC_Op;
// compute kernel registry for forward and backward
class FCImplForward_cpu : public Registrable<FCImplForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t,
const void *, const void *, const void *, void *)> {};
class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t,
const void *, const void *, const void *, void *)> {};
class FCImpl_cpu : public OperatorImpl {
private:
const FC_Op &mOp;
std::array<NbElts_t, 3> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
FCImpl_cpu(const FC_Op &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {}
static std::unique_ptr<FCImpl_cpu> create(const FC_Op &op) { return std::make_unique<FCImpl_cpu>(op); }
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void backward();
};
namespace {
static Registrar<FC_Op> registrarFCImpl_cpu("cpu", Aidge::FCImpl_cpu::create);
}
// Operator implementation entry point for the backend
using FCImpl_cpu = OperatorImpl_cpu<FC_Op,
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *),
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *,
void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(FC_Op, "cpu", Aidge::FCImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_H_ */
......@@ -9,17 +9,17 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
// template <class I, class W, class B, class O>
// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& params, const std::array<DimSize_t, 4>& dims,
// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 4>& dims,
// const void* input_, const void* weights_, const void* biases_, void* output_) {
// // FIXME: missing FC attributes as arguments
// const I* input = static_cast<const I*>(input_);
......@@ -27,9 +27,9 @@ namespace Aidge {
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[3];
// const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] = bias;
// }
......@@ -39,10 +39,10 @@ namespace Aidge {
// for (std::size_t iy = 0; iy < dims[1]; ++iy) {
// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// const std::size_t oIndex = dims[3] * outCh;
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) +
// outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3];
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize +
// outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
// }
......@@ -53,7 +53,7 @@ namespace Aidge {
// }
// template <class I, class W, class B, class O>
// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& params, const std::array<DimSize_t, 2>& dims,
// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 2>& dims,
// const void* input_, const void* weights_, const void* biases_, void* output_) {
// // FIXME: missing FC attributes as arguments
// const I* input = static_cast<const I*>(input_);
......@@ -63,9 +63,9 @@ namespace Aidge {
// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[0];
// const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// output[oIndex + batch] = bias;
// }
......@@ -74,8 +74,8 @@ namespace Aidge {
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// const std::size_t oIndex = dims[1] * batch;
// for (std::size_t i = 0; i < dims[1]; ++i) {
// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
// std::size_t wIndex = i * std::get<0>(params) + outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3];
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// std::size_t wIndex = i * outputFeatureSize + outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// output[oIndex + outCh] += weights[wIndex] * input[i + batch];
// }
// }
......@@ -83,46 +83,104 @@ namespace Aidge {
// }
template <class I, class W, class B, class O>
void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& params, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* weights_, const void* biases_, void* output_) {
void FCImpl_cpu_forward_kernel(const DimSize_t batchSize,
const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* weights_,
const void* biases_,
void* output_) {
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const W* weights = static_cast<const W*>(weights_);
const B* biases = static_cast<const B*>(biases_);
O* output = static_cast<O*>(output_);
if (std::get<1>(params)) {
std::fill(output, output+(batchSize*std::get<0>(params)), B(0));
if (biases == nullptr) {
std::fill(output, output+(batchSize*outputFeatureSize), B(0));
}
else {
for (std::size_t batch = 0; batch < batchSize; ++batch) {
std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params)));
std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize));
}
}
for (std::size_t batch = 0; batch < batchSize; ++batch) {
for (std::size_t out = 0; out < std::get<0>(params); ++out) {
output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize,
input + (batch + 1)*oneInputSize,
weights + out*oneInputSize,
output[out + batch*std::get<0>(params)]);
for (std::size_t out = 0; out < outputFeatureSize; ++out) {
output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize,
input + (batch + 1)*inputFeatureSize,
weights + out*inputFeatureSize,
output[out + batch*outputFeatureSize]);
}
}
}
template <class I, class O, class W, class B>
void FCImpl_cpu_backward_kernel(const DimSize_t batchSize,
const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* originalInput_,
const void* weight_,
void* output_,
void* weightGrad_,
void* biasesGrad_)
{
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const I* originalInput = static_cast<const I*>(originalInput_);
const W* weight = static_cast<const W*>(weight_);
O* output = static_cast<O*>(output_);
W* weightGrad = static_cast<W*>(weightGrad_);
B* biasesGrad = static_cast<B*>(biasesGrad_);
// bias grad
if (biasesGrad == nullptr) { // no bias
std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0));
} else {
for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs
B sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += input[b*outputFeatureSize + o];
}
biasesGrad[o] = sum;
}
}
namespace {
static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>);
static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>);
static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>);
} // namespace
// weight grad
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
W sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o];
}
weightGrad[o*inputFeatureSize + c] = sum;
}
}
// input grad
for (std::size_t b = 0; b < batchSize; ++b) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
O sum{0};
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o];
}
output[b*inputFeatureSize + c] = sum;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(FCImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>, Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>});
REGISTRAR(FCImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>, Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>});
REGISTRAR(FCImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, Aidge::FCImpl_cpu_backward_kernel<int32_t, int32_t, int32_t, int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_H_
#define AIDGE_CPU_OPERATOR_FOLDIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Fold.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using Fold2D_Op = Fold_Op<2>;
using FoldImpl2D_cpu = OperatorImpl_cpu<Fold_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::vector<DimSize_t> &,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(Fold2D_Op, "cpu", Aidge::FoldImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/FoldImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <cmath>
#include <array>
#include <algorithm>
namespace Aidge {
template <class I, class O>
void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims,
const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::vector<DimSize_t> &dims,
const void *input_, void *output_)
{
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
const DimSize_t inHeight = outputDims[0];
const DimSize_t inWidth = outputDims[1];
const DimSize_t kernelExtentHeight = dilationDims[0] *
(kernelDims[0] - 1) + 1;
const DimSize_t outHeight = 1 + static_cast<DimSize_t>(
floor(static_cast<float>(inHeight - kernelExtentHeight) /
static_cast<float>(strideDims[0])));
const DimSize_t kernelExtentWidth = dilationDims[1] *
(kernelDims[1] - 1) + 1;
const DimSize_t outWidth = 1 + static_cast<DimSize_t>(
floor(static_cast<float>(inWidth - kernelExtentWidth) /
static_cast<float>(strideDims[1])));
const DimSize_t outChannels = dims[dims.size() - 2];
const DimSize_t inChannels = outChannels / kernelDims[0] / kernelDims[1];
std::fill_n(output, dims[0] * outHeight * outWidth * outChannels, O(0));
for (DimSize_t n = 0; n < dims[0]; ++n) {
for (DimSize_t outC = 0; outC < outChannels; ++outC) {
const auto inOffsetW = outC % kernelDims[1];
const auto inOffsetH = (outC / kernelDims[1]) % kernelDims[0];
const auto inC = outC / kernelDims[0] / kernelDims[1];
for (DimSize_t outH = 0; outH < outHeight; ++outH) {
const auto inH = outH * strideDims[0] + inOffsetH * dilationDims[0];
for (DimSize_t outW = 0; outW < outWidth; ++outW) {
const auto inW = outW * strideDims[1] + inOffsetW * dilationDims[1];
output[((n * inChannels + inC) * inHeight + inH) * inWidth + inW] +=
input[((n * outChannels + outC) * outHeight + outH) * outWidth + outW];
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(FoldImpl2D_cpu,
{DataType::Float32},
{ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(FoldImpl2D_cpu,
{DataType::Float64},
{ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(FoldImpl2D_cpu,
{DataType::Int32},
{ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_ */