Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Showing
with 1706 additions and 40 deletions
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_
#include <algorithm> // std::max, std::min
#include <array>
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
/**
* @brief Forward kernel for 1D Padding on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param attrs tuple of Parameters from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorders,
const PadBorderType borderType,
const double borderValue,
const std::array<DimSize_t, 3>& dims,
const void *input_,
void *output_)
{
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
const std::size_t oxSize = dims[2] + beginEndBorders[0] + beginEndBorders[1];
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) {
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2];
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize;
for (unsigned int ox = 0; ox < oxSize; ++ox) {
const std::size_t oIndexFull = oIndex + ox;
O outputValue = static_cast<O>(borderValue);
if (borderType == PadBorderType::Constant) {
int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[0]);
if (ix >= 0 && ix < static_cast<int>(dims[2])) {
outputValue = input[iIndex + static_cast<std::size_t>(ix)];
}
}
else if (borderType == PadBorderType::Edge) {
int ix = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(ox) - static_cast<int>(beginEndBorders[0])));
outputValue = input[iIndex + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Reflect) {
int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[0]);
if (ix < 0)
ix = 0 - ix;
if (ix >= static_cast<int>(dims[2]))
ix = static_cast<int>(dims[2]) - ix;
outputValue = input[iIndex + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Wrap) {
int ix = (static_cast<int>(dims[2]) + static_cast<int>(ox) - static_cast<int>(beginEndBorders[0])) % static_cast<int>(dims[2]);
outputValue = input[iIndex + static_cast<std::size_t>(ix)];
}
output[oIndexFull] = outputValue;
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(PadImpl1D_cpu,
{{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr});
REGISTRAR(PadImpl1D_cpu,
{{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr});
REGISTRAR(PadImpl1D_cpu,
{{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr});
/**
* @brief Forward kernel for 2D Padding on CPU backend.
* @tparam I Input data type.
* @tparam O Output data type.
* @param attrs tuple of Parameters from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param output_ Output Tensor.
*/
template <class I, class O>
void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorders,
const PadBorderType borderType,
const double borderValue,
const std::array<DimSize_t, 4> &dims,
const void *input_,
void *output_)
{
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
const std::size_t oySize = dims[2] + beginEndBorders[0] + beginEndBorders[2];
const std::size_t oxSize = dims[3] + beginEndBorders[1] + beginEndBorders[3];
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) {
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
for (std::uint32_t oy = 0; oy < oySize; ++oy) {
for (std::uint32_t ox = 0; ox < oxSize; ++ox) {
const std::size_t oIndexFull = oIndex + oy*oxSize + ox;
O outputValue = static_cast<O>(borderValue);
if (borderType == PadBorderType::Constant) {
std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1]);
std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0]);
if (ix >= 0 && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0 && iy < static_cast<std::int32_t>(dims[2])) {
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
}
else if (borderType == PadBorderType::Edge) {
std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1])));
std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0])));
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Reflect) {
std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1]);
std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0]);
if (ix < 0)
ix = 0 - ix;
if (iy < 0)
iy = 0 - iy;
if (ix >= static_cast<std::int32_t>(dims[3]))
ix = static_cast<std::int32_t>(dims[3]) - ix;
if (iy >= static_cast<std::int32_t>(dims[2]))
iy = static_cast<std::int32_t>(dims[2]) - iy;
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
else if (borderType == PadBorderType::Wrap) {
std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[3]);
std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0])) % static_cast<std::int32_t>(dims[2]);
outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
}
output[oIndexFull] = outputValue;
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(PadImpl2D_cpu,
{{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr});
REGISTRAR(PadImpl2D_cpu,
{{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr});
REGISTRAR(PadImpl2D_cpu,
{{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_H_
#define AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/MetaOperatorDefs.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using PaddedConv1D_Op = MetaOperator_Op;
using PaddedConvImpl1D_cpu = OperatorImpl_cpu<MetaOperator_Op,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3> &,
DimSize_t,
const void *,
const void *,
const void *,
void *)>;
using PaddedConv2D_Op = MetaOperator_Op;
using PaddedConvImpl2D_cpu = OperatorImpl_cpu<MetaOperator_Op,
void(const std::array<DimSize_t, 4>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &,
DimSize_t,
const void *,
const void *,
const void *,
void *)>;
// Implementation entry point registration to Operator
// Uncomment to activate implementation for PaddedConv. It is currently less efficient, hence why it is commented.
// REGISTRAR(PaddedConv1D_Op, std::array<std::string, 2>({"cpu", "PaddedConv1D"}), Aidge::PaddedConvImpl1D_cpu::create);
// REGISTRAR(PaddedConv2D_Op, std::array<std::string, 2>({"cpu", "PaddedConv2D"}), Aidge::PaddedConvImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_KERNELS_H_
#include <array>
#include <cstddef>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/PaddedConvImpl.hpp"
#include "aidge/operator/Pad.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Only works for constant padding zero
/**
* @brief Forward kernel for 1D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void PaddedConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorders,
const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(static_cast<signedsize>(beginEndBorders[0]) - difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + static_cast<signedsize>(beginEndBorders[1]) - difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]) - static_cast<signedsize>(beginEndBorders[0]);
for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
}
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(PaddedConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv1D")))}))},
{ProdConso::inPlaceModel, Aidge::PaddedConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(PaddedConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv1D")))}))},
{ProdConso::inPlaceModel, Aidge::PaddedConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
REGISTRAR(PaddedConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv1D")))}))},
{ProdConso::inPlaceModel, Aidge::PaddedConvImpl1D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr});
REGISTRAR(PaddedConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv1D")))}))},
{ProdConso::inPlaceModel, Aidge::PaddedConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
/**
* @brief Forward kernel for 2D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void PaddedConvImpl2D_cpu_forward_kernel(
const std::array<DimSize_t, 4>& beginEndBorders,
const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + beginEndBorders[0] + beginEndBorders[2] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + beginEndBorders[1] + beginEndBorders[3] + strideDims[1]) /
static_cast<float>(strideDims[1])));
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const std::size_t difx = ox * strideDims[0];
const std::size_t sxMin = beginEndBorders[0] < difx ? std::size_t(0) : beginEndBorders[0] - difx;
const std::size_t sxMax = (inputDims[2] + beginEndBorders[2]) < difx ?
0 :
((inputDims[2] + beginEndBorders[2]) > dilated_kernel_x + difx ?
dilated_kernel_x :
(inputDims[2] + beginEndBorders[2] - difx));
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t dify = oy * strideDims[1];
const std::size_t syMin = beginEndBorders[1] < dify ? std::size_t(0) : beginEndBorders[1] - dify;
const std::size_t syMax = (inputDims[3] + beginEndBorders[3]) < dify ?
0 :
((inputDims[3] + beginEndBorders[3]) > dilated_kernel_y + dify ?
dilated_kernel_y :
(inputDims[3] + beginEndBorders[3] - dify));
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t ix = ox * strideDims[0] - beginEndBorders[0];
const std::size_t iy = oy * strideDims[1] - beginEndBorders[1];
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
for (std::size_t sy = syMin; sy*dilationDims[1] < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + (sx*dilationDims[0] + ix)*inputDims[3] + sy*dilationDims[1] + iy];
}
}
}
}
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(PaddedConvImpl2D_cpu,
// ImplSpec{std::vector<ImplSpec::IOSpec>({ImplSpec::IOSpec{DataType::Any, DataFormat::NCHW}, ImplSpec::IOSpec{DataType::Any, DataFormat::NCHW}}) , std::vector<ImplSpec::IOSpec>({ImplSpec::IOSpec{DataType::Int32, DataFormat::NCHW}})},
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv2D")))}))},
{ProdConso::inPlaceModel, Aidge::PaddedConvImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr});
REGISTRAR(PaddedConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv2D")))}))},
{ProdConso::inPlaceModel, Aidge::PaddedConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
REGISTRAR(PaddedConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv2D")))}))},
{ProdConso::inPlaceModel, Aidge::PaddedConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(PaddedConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}, DynamicAttributes(std::map<std::string, future_std::any>({std::make_pair("type", future_std::any(std::string("PaddedConv2D")))}))},
{ProdConso::inPlaceModel, Aidge::PaddedConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_PADDEDCONVIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_POWIMPL_H_
#define AIDGE_CPU_OPERATOR_POWIMPL_H_
#include <cstddef> // std::size_t
#include <memory> // std::unique_ptr, std::make_unique
#include <string>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Pow.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using PowImpl_cpu = OperatorImpl_cpu<Pow_Op,
void(std::vector<std::size_t>, std::vector<std::size_t>, const std::vector<std::size_t>&, const void*, const void*, void*),
void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, const void*, void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_POWIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef> // std::size_t
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/PowImpl.hpp"
namespace Aidge {
namespace {
// suppose values are contiguous in memory
template <class I, class O>
void pow_contiguous_arrays(const std::size_t input1size,
const std::size_t input2size,
const std::size_t output1size,
const I* input1,
const I* input2,
O* output)
{
for (std::size_t i = 0; i < output1size; ++i)
{
const std::size_t in1_id = (input1size != 1) ? i : 0;
const std::size_t in2_id = (input2size != 1) ? i : 0;
output[i] = static_cast<O>(std::pow(input1[in1_id], input2[in2_id]));
}
}
}
template <class I, class O>
void PowImpl_cpu_forward_kernel(std::vector<std::size_t> dims0,
std::vector<std::size_t> dims1,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
void* output_) {
const I* input_0 = static_cast<const I*>(input0_);
const I* input_1 = static_cast<const I*>(input1_);
O* output = static_cast<O*>(output_);
// [5,2,1,7] & [2,6,7]
// 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
// 2. Find the highest equal dimension -> 3
// Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
// 3. Compute the highest number of contiguous data -> 7
// 4. Compute stride and offset step for the broadcast mechanism
// 5. Call a simple kernel
// special case for equal dimensions, the kernel is called with the entire arrays at once
if (dims0 == dims1) {
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t i = 0; i < input0_contiguous_size; ++i)
{
output[i] = static_cast<O>(std::pow(input_0[i], input_1[i]));
}
return;
}
// set dimensions to be of equal size by filling the smallest one with ones.
if (dims0.size() > dims1.size()) {
dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
}
else if (dims1.size() > dims0.size()) {
dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
}
const std::size_t nbDims = dims0.size();
// Find the highest equal dimension
// std::size_t contiguousIdx = nbDims - 1;
std::size_t contiguousIdx = nbDims;
while (contiguousIdx-- > 0) {
// for (; contiguousIdx+1 > 0; --contiguousIdx) {
if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
--contiguousIdx;
}
}
break;
}
}
++contiguousIdx;
// Compute the highest number of contiguous data for each Tensor
const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
const std::size_t output_contiguous_size = std::accumulate(outputDims.cbegin()+contiguousIdx, outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
// initialize strides to iterate through data because of broadcasting
std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
if (contiguousIdx > 0) {
stride_post0[contiguousIdx - 1] = 1;
stride_post1[contiguousIdx - 1] = 1;
for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
}
for (std::size_t i = 0; i != contiguousIdx; ++i) {
stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
stride_step1[i] = (dims1[i] == 1) ? 1 - stride_post1[i] : 1;
}
}
// variables for arrays offsets
std::size_t offsetIn0 = 0;
std::size_t offsetIn1 = 0;
std::size_t offsetOut = 0;
std::size_t dim = contiguousIdx - 1;
const std::size_t nbStacks = std::accumulate(outputDims.cbegin(), outputDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
for (std::size_t stack = 0; stack < nbStacks;) {
pow_contiguous_arrays<I,O>(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
input_0 + offsetIn0*input0_contiguous_size,
input_1 + offsetIn1*input1_contiguous_size,
output + offsetOut*output_contiguous_size);
if (++stack < nbStacks) {
std::size_t tmp_stack = stack;
while(tmp_stack % outputDims[dim] == 0) {
tmp_stack /= outputDims[dim];
dim--;
}
offsetIn0 += stride_step0[dim];
offsetIn1 += stride_step1[dim];
++offsetOut;
dim = contiguousIdx - 1;
}
}
}
template <class I1, class I2, class O>
void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims,
const std::vector<std::size_t>& input1Dims,
const std::vector<std::size_t>& outputDims,
const void* input0_,
const void* input1_,
const void* gradOutput_,
void* gradientInput0_,
void* gradientInput1_) {
const I1* input0 = static_cast<const I1*>(input0_);
I1* grad0 = static_cast<I1*>(gradientInput0_);
const I2* input1 = static_cast<const I2*>(input1_);
I2* grad1 = static_cast<I2*>(gradientInput1_);
const O* gradOut = static_cast<const O*>(gradOutput_);
// Fill input grads with zeros
std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
std::fill(grad0, grad0 + input0Elements, I1(0));
std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
std::fill(grad1, grad1 + input1Elements, I2(0));
std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
for (size_t oIndex = 0; oIndex < totalElements; ++oIndex)
{
// Compute indexes in inputs 0 and 1 to support broadcasting
std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
std::size_t idx0 = getFlattenedIndex(input0Dims, indexes);
std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
// grad0 = grad_output * (input1 * pow(input0, (input1 -1)))
grad0[idx0] += gradOut[oIndex]*input1[idx1]* std::pow(input0[idx0], input1[idx1]-1);
// grad1 = grad_output * (output * ln(input0))
grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * std::log(input0[idx0]);
}
}
// Kernels registration to implementation entry point
REGISTRAR(PowImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float>, Aidge::PowImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(PowImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double>, Aidge::PowImpl_cpu_backward_kernel<double, double, double>});
REGISTRAR(PowImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>});
REGISTRAR(PowImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, Aidge::PowImpl_cpu_backward_kernel<std::int64_t, std::int64_t, std::int64_t>});
REGISTRAR(PowImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int8}},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<std::int8_t, std::int8_t>, Aidge::PowImpl_cpu_backward_kernel<std::int8_t, std::int8_t, std::int8_t>});
REGISTRAR(PowImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::UInt8}},
{ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<std::uint8_t, std::uint8_t>, Aidge::PowImpl_cpu_backward_kernel<std::uint8_t, std::uint8_t, std::uint8_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */
...@@ -12,52 +12,24 @@ ...@@ -12,52 +12,24 @@
#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_H_ #ifndef AIDGE_CPU_OPERATOR_RELUIMPL_H_
#define AIDGE_CPU_OPERATOR_RELUIMPL_H_ #define AIDGE_CPU_OPERATOR_RELUIMPL_H_
#include "aidge/backend/OperatorImpl.hpp" #include <cstddef> // std::size_t
#include <memory>
#include <tuple> // std::tuple
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ReLU.hpp" #include "aidge/operator/ReLU.hpp"
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include <memory>
#include <vector>
namespace Aidge { namespace Aidge {
// class ReLU_Op; // Operator implementation entry point for the backend
using ReLUImpl_cpu = OperatorImpl_cpu<ReLU_Op,
// compute kernel registry for forward and backward void(const std::size_t, const void*, void*),
class ReLUImplForward_cpu void(const std::size_t, const void*, const void*, void*)>;
: public Registrable<ReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class ReLUImplBackward_cpu
: public Registrable<ReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class ReLUImpl_cpu : public OperatorImpl {
protected:
const ReLU_Op& mOp;
std::array<NbElts_t, 1> mNbConsumedData;
std::array<NbElts_t, 1> mNbProducedData;
public:
ReLUImpl_cpu(const ReLU_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
static std::unique_ptr<ReLUImpl_cpu> create(const ReLU_Op& op) {
return std::make_unique<ReLUImpl_cpu>(op);
}
public:
NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
void updateConsummerProducer() override final;
void forward();
void backward();
};
namespace { // Implementation entry point registration to Operator
static Registrar<ReLU_Op> registrarReLUImpl_cpu("cpu", Aidge::ReLUImpl_cpu::create); REGISTRAR(ReLU_Op, "cpu", Aidge::ReLUImpl_cpu::create);
}
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_H_ */ #endif /* AIDGE_CPU_OPERATOR_RELUIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_
#include <cstddef> // std::size_t
#include <memory>
#include <tuple> // std::tuple
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
#include "aidge/operator/ReLU.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Kernels
template <class I, class O>
void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
//#pragma omp parallel for if (inputLenght > 1024)
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = (input[i] > 0) ? input[i] : 0;
}
}
template <class I, class GI, class GO>
void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght,
const void* input_, const void* grad_output_,
void* grad_input_) {
const I* input = static_cast<const I*>(input_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
for (std::size_t i = 0; i < inputLenght; ++i) {
grad_input[i] = (input[i] > 0) ? grad_output[i] : 0;
}
}
// Kernels registration to implementation entry point
REGISTRAR(ReLUImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<float, float>, Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(ReLUImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<double, double>, Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>});
REGISTRAR(ReLUImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::ReLUImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_
#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ReduceMean.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ReduceMeanImpl_cpu = OperatorImpl_cpu<ReduceMean_Op,
void(const std::vector<std::int32_t>&,
DimSize_t,
const std::vector<DimSize_t>&,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(ReduceMean_Op, "cpu", Aidge::ReduceMeanImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_
#include <algorithm> // std::for_each
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include <functional> //std::multiplies
#include <numeric> //std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/operator/ReduceMean.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value, T>::type
stableMean(const T* vec, size_t len, size_t stride) {
T mean = 0;
for (size_t i = 0; i < len; ++i) {
mean = std::fma<T>(vec[i * stride] - mean, 1.0f / (i + 1), mean);
}
return mean;
}
// Specialization for integers: perform the mean computation in float
template <typename T>
typename std::enable_if<!std::is_floating_point<T>::value, T>::type
stableMean(const T* vec, size_t len, size_t stride) {
double mean = 0;
for (size_t i = 0; i < len; ++i) {
mean = std::fma<double>(vec[i * stride] - mean, 1.0f / (i + 1), mean);
}
return mean;
}
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value, T>::type
castFromFloat(T value) {
return value;
}
template <typename T>
typename std::enable_if<!std::is_floating_point<T>::value, T>::type
castFromFloat(double value) {
return static_cast<T>(std::nearbyint(value));
}
template <class I, class O>
void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
DimSize_t /*keepDims*/,
const std::vector<DimSize_t>& inputDims,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t nb_dims = inputDims.size();
const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
if (axes.empty()){
std::copy_n(input,totalElements, output);
}
else if (axes.size() == 1) {
const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>());
const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>());
const std::size_t dim_i = inputDims[axes[0]];
for (std::size_t pre = 0; pre < stride_pre; ++pre) {
for (std::size_t post = 0; post < stride_post; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post + post;
const std::size_t idx_o = pre * stride_post + post;
output[idx_o] = castFromFloat<O>(stableMean(input + idx_i, dim_i, stride_post));
}
}
} else {
std::size_t outputElements = totalElements;
auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
stride_post[nb_dims - 1] = 1;
for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) {
stride_post[i] = stride_post[i+1]*inputDims[i+1];
}
auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
stride_pre[0] = 1;
for (std::size_t i = 1; i < nb_dims; ++i) {
stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
}
// Type should be the return type of stableMean<I>(), which is always floating point
const decltype(stableMean<I>(input, 0, 0))* inputAccumulation = nullptr;
decltype(stableMean<I>(input, 0, 0))* outputAccumulation = nullptr;
for (const auto& axisInt : axes) {
const std::size_t a = static_cast<std::size_t>(axisInt);
outputElements /= inputDims[a];
outputAccumulation = new I[outputElements];
const std::size_t dim_i = inputDims[a];
for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) {
for (std::size_t post = 0; post < stride_post[a]; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
const std::size_t idx_o = pre * stride_post[a] + post;
if (inputAccumulation == nullptr) {
outputAccumulation[idx_o] = stableMean<I>(input + idx_i, dim_i, stride_post[a]);
}
else {
outputAccumulation[idx_o] = stableMean<I>(inputAccumulation + idx_i, dim_i, stride_post[a]);
}
}
}
std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
if (inputAccumulation != nullptr) {
delete[] inputAccumulation;
}
inputAccumulation = outputAccumulation;
}
std::transform(inputAccumulation, inputAccumulation + outputElements, output,
[](auto value) { return castFromFloat<O>(value); });
if (outputAccumulation) {
delete[] outputAccumulation;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ReduceMeanImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(ReduceMeanImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(ReduceMeanImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_
#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ReduceSum.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using ReduceSumImpl_cpu = OperatorImpl_cpu<ReduceSum_Op,
void(const std::vector<std::int32_t>&,
DimSize_t,
const std::vector<DimSize_t>&,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(ReduceSum_Op, "cpu", Aidge::ReduceSumImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ */
/********************************************************************************
* Copyright (c) 2024 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_
#include <algorithm> // std::for_each
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include <functional> //std::multiplies
#include <numeric> //std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/ReduceSumImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/operator/ReduceSum.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
template <class I, class O>
void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
DimSize_t /*keepDims*/,
const std::vector<DimSize_t>& inputDims,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t nb_dims = inputDims.size();
const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
if (axes.empty()){
std::copy_n(input,totalElements, output);
}
else if (axes.size() == 1) {
const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>());
const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>());
const std::size_t dim_i = inputDims[axes[0]];
for (std::size_t pre = 0; pre < stride_pre; ++pre) {
for (std::size_t post = 0; post < stride_post; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post + post;
const std::size_t idx_o = pre * stride_post + post;
O sum = 0;
for (std::size_t i = 0; i < dim_i; ++i) {
sum +=input[idx_i + i*stride_post];
}
output[idx_o] = sum;
}
}
} else {
std::size_t outputElements = totalElements;
auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
stride_post[nb_dims - 1] = 1;
for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) {
stride_post[i] = stride_post[i+1]*inputDims[i+1];
}
auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
stride_pre[0] = 1;
for (std::size_t i = 1; i < nb_dims; ++i) {
stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
}
const I* inputAccumulation = input;
I* outputAccumulation = nullptr;
for (const auto& axisInt : axes) {
const std::size_t a = static_cast<std::size_t>(axisInt);
outputElements /= inputDims[a];
outputAccumulation = new I[outputElements];
const std::size_t dim_i = inputDims[a];
for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) {
for (std::size_t post = 0; post < stride_post[a]; ++post) {
const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
const std::size_t idx_o = pre * stride_post[a] + post;
I sum = 0;
for (std::size_t i = 0; i < dim_i; ++i) {
sum += inputAccumulation[idx_i + i*stride_post[a]];
}
outputAccumulation[idx_o] = sum;
}
}
std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
if (inputAccumulation != input) {
delete[] inputAccumulation;
}
inputAccumulation = outputAccumulation;
}
// Copy elements from inputAccumulation to output while dividing by divisor
std::copy(inputAccumulation, inputAccumulation + outputElements, output);
if (outputAccumulation) {
delete[] outputAccumulation;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ReduceSumImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(ReduceSumImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(ReduceSumImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_RESIZEIMPL_H_
#define AIDGE_CPU_OPERATOR_RESIZEIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Resize.hpp"
#include "aidge/utils/Registrar.hpp"
#include <aidge/data/Interpolation.hpp>
#include <aidge/operator/Pad.hpp>
#include <cstdint>
namespace Aidge {
// Operator implementation entry point for the backend
using ResizeImpl_cpu = OperatorImpl_cpu<
Resize_Op,
void(const void *, // input
const std::vector<DimSize_t> &, // INput dims
const std::vector<DimSize_t> &, // OUTput dims
const Interpolation::CoordinateTransformation, // coord transfo
const Interpolation::Mode, // interpolation mode
const PadBorderType, // padding mode
void *)>; // output
// Implementation entry point registration to Operator
REGISTRAR(Resize_Op, "cpu", Aidge::ResizeImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RESIZEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_RESIZEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_RESIZEIMPL_FORWARD_KERNEL_H_
#include "aidge/backend/cpu/operator/ResizeImpl.hpp"
#include <aidge/data/Data.hpp>
#include <aidge/data/half.hpp>
#include <aidge/operator/Pad.hpp>
#include <cmath>
#include <cstdint>
#include <numeric>
#include "aidge/backend/cpu/data/Interpolation.hpp"
#include "aidge/data/Interpolation.hpp"
#include "aidge/data/Tensor.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <typename IO>
void ResizeImpl_cpu_forward_kernel(
const void *input_,
const std::vector<DimSize_t> &inputDims,
const std::vector<DimSize_t> &outputDims,
const Interpolation::CoordinateTransformation coordTransfoMode,
const Interpolation::Mode interpMode,
const PadBorderType paddingMode,
// const double * /*roi*/,
// const float * /*scales*/,
// const int64_t * /*sizes*/,
void *output_) {
// Seting a data
const IO *input = static_cast<const IO *>(input_);
IO *output = static_cast<IO *>(output_);
const DimSize_t outputLen = std::accumulate(outputDims.cbegin(),
outputDims.cend(),
1,
std::multiplies<DimSize_t>());
std::vector<float> coordInApprox(inputDims.size());
std::vector<std::size_t> coordIn(inputDims.size());
std::vector<DimSize_t> coordOut;
for (DimSize_t idxFlatOut = 0; idxFlatOut < outputLen; ++idxFlatOut) {
coordOut = Tensor::toCoord(outputDims, idxFlatOut);
coordInApprox =
Interpolation::untransformCoordinates(coordOut,
inputDims,
outputDims,
coordTransfoMode);
if ((interpMode == Interpolation::Mode::Ceil) || (interpMode == Interpolation::Mode::Floor) || (interpMode == Interpolation::Mode::RoundPreferCeil) || (interpMode == Interpolation::Mode::RoundPreferFloor)) {
for (std::size_t i = 0; i < coordInApprox.size(); ++i) {
if (interpMode == Interpolation::Mode::Ceil) {
coordInApprox[i] = std::ceil(coordInApprox[i]);
} else if (interpMode == Interpolation::Mode::Floor) {
coordInApprox[i] = std::floor(coordInApprox[i]);
} else if (interpMode == Interpolation::Mode::RoundPreferCeil) {
coordInApprox[i] = std::floor(coordInApprox[i] + 0.5f);
} else { // (interpMode == Interpolation::Mode::RoundPreferFloor)
coordInApprox[i] = std::ceil(coordInApprox[i] - 0.5f);
}
}
if (Tensor::isInBounds<float>(inputDims, coordInApprox)) {
for (std::size_t i = 0; i < coordInApprox.size(); ++i) {
coordIn[i] = static_cast<std::size_t>(coordInApprox[i]);
}
} else {
if (paddingMode == PadBorderType::Edge) {
for (std::size_t i = 0; i < coordInApprox.size(); ++i) {
coordIn[i] = coordInApprox[i] < 0 ? 0 : (coordInApprox[i] >=inputDims[i] ? inputDims[i] - 1 : static_cast<std::size_t>(coordInApprox[i]));
}
} else {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Padding mode not supported");
}
}
output[idxFlatOut] = input[Tensor::toIndex(inputDims, coordIn)];
} else {
std::set<Interpolation::Point<IO>> neighbours =
InterpolationCPU::retrieveNeighbours(input,
inputDims,
coordInApprox,
paddingMode);
output[idxFlatOut] = InterpolationCPU::interpolate(coordInApprox,
neighbours,
interpMode);
}
}
return;
}
// Kernels registration to implementation entry point
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Int16},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Int16}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<int16_t>,
nullptr});
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Int32},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Int32}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<int32_t>,
nullptr});
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Int64},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::UInt64}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<int64_t>,
nullptr});
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Float16},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Float16}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<half_float::half>,
nullptr});
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Float32},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Float32}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<float>,
nullptr});
REGISTRAR(ResizeImpl_cpu,
{{{DataType::Float64},
{DataType::Any},
{DataType::Any},
{DataType::Any}},
{DataType::Float64}},
{ProdConso::inPlaceModel,
ResizeImpl_cpu_forward_kernel<double>,
nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RESIZEIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_H_
#define AIDGE_CPU_OPERATOR_ROUNDIMPL_H_
#include <cstddef> // std::size_t
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Round.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using RoundImpl_cpu = OperatorImpl_cpu<Round_Op,
void(const std::size_t, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Round_Op, "cpu", Aidge::RoundImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_
#include <cmath> //std::round
#include <cstddef> // std::size_t
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/RoundImpl.hpp"
namespace Aidge {
template <class I, class O>
void RoundImpl_cpu_forward_kernel(const std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
//std::round would not work since it doesn't follow the halves rules (See ONNX Round)
output[i] = static_cast<O>(std::nearbyint(static_cast<float>(input[i])));
}
}
REGISTRAR(RoundImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<float, float>,nullptr});
REGISTRAR(RoundImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::RoundImpl_cpu_forward_kernel<double, double>,nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ROUNDIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
#define __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Scaling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
#include <array>
namespace Aidge {
// Operator implementation entry point for the backend
using ScalingImpl_cpu = OperatorImpl_cpu<Scaling_Op,
void(const float,
const std::size_t,
const bool,
std::size_t,
const void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Scaling_Op, "cpu", Aidge::ScalingImpl_cpu::create);
} // namespace Aidge
#endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_
#include <cmath>
#include <cstddef>
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
//TODO : improve propagate, n2d2 :
/*
template<typename T>
void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>& output,
std::size_t batchSize, std::size_t nbChannels,
std::size_t height, std::size_t width,
bool isClipped,
const std::vector<Float_T>& clippingFactorPerChannel,
const std::vector<Float_T>& scalingFactorPerChannel,
std::size_t quantizedNbBits, bool isOutputUnsigned)
{
std::size_t index = 0;
for (std::size_t batch = 0; batch < batchSize; batch++) {
for(std::size_t ch = 0; ch < nbChannels; ch++) {
for(std::size_t y = 0; y < height; y++) {
for(std::size_t x = 0; x < width; x++) {
T res = isClipped ? Clip(input(index), clippingFactorPerChannel[ch])
: input(index);
res = Scale(res, scalingFactorPerChannel[ch]);
if(quantizedNbBits > 0) {
res = saturate(std::round(res), quantizedNbBits, isOutputUnsigned);
}
output(index) = (T) res;
index++;
}
}
}
}
}
*/
namespace Aidge {
template <class O>
const O& clamp(const O& x, const O& min, const O& max)
{
return (x < min) ? min : (x > max) ? max : x;
}
template<class O>
O saturate(const O value, const std::size_t quantizedNbBits, const bool isOutputUnsigned) {
// TODO: no assertions in kernel
assert(quantizedNbBits > 0);
const O min = isOutputUnsigned ? 0 :
-(1ll << (quantizedNbBits - 1ll));
const O max = isOutputUnsigned ? (1ll << quantizedNbBits) - 1ll :
(1ll << (quantizedNbBits - 1ll)) - 1ll;
return clamp(value, min, max);
}
template <class I, class O>
void ScalingImpl_cpu_forward_kernel(const float scalingFactor,
const std::size_t quantizedNbBits,
const bool isOutputUnsigned,
std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = static_cast<O>(input[i] * static_cast<I>(scalingFactor));
if(quantizedNbBits > 0) {
output[i] = saturate(std::round(output[i]), quantizedNbBits, isOutputUnsigned);
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ScalingImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(ScalingImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(ScalingImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ */
\ No newline at end of file
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_
#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Sigmoid.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// Operator implementation entry point for the backend
using SigmoidImpl_cpu = OperatorImpl_cpu<Sigmoid_Op,
void(const std::size_t, const void*, void*),
void(const std::size_t, const void*, const void*, void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Sigmoid_Op, "cpu", Aidge::SigmoidImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
namespace Aidge {
template <class I, class O>
void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
//#pragma omp parallel for if (inputLenght > 1024)
for (std::size_t i = 0; i < inputLenght; ++i) {
if (input[i] > I(0)) {
output[i] = O(1) / (O(1) + std::exp(-input[i]));
} else {
output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i]));
}
}
}
template <class O, class GI, class GO>
void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght,
const void* output_, const void* grad_output_,
void* grad_input_) {
const O* output = static_cast<const O*>(output_);
const GO* grad_output = static_cast<const GO*>(grad_output_);
GI* grad_input = static_cast<GI*>(grad_input_);
for (std::size_t i = 0; i < inputLenght; ++i) {
grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i];
}
}
// Kernels registration to implementation entry point
REGISTRAR(SigmoidImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>, Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>});
REGISTRAR(SigmoidImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>, Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_H__
#define AIDGE_CPU_OPERATOR_SLICEIMPL_H__
#include <memory>
#include <vector>
#include <array>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Slice.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using SliceImpl_cpu = OperatorImpl_cpu<Slice_Op,
void(const std::vector<std::int64_t>&,
const std::vector<std::int64_t>&,
const std::vector<std::int8_t>&,
const std::vector<std::int64_t>&,
const std::vector<DimSize_t>&,
const void*,
void*)>;
// Implementation entry point registration to Operator
REGISTRAR(Slice_Op, "cpu", Aidge::SliceImpl_cpu::create);
} // namespace Aidge
#endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */