Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • mszczep/aidge_backend_cpu
  • eclipse/aidge/aidge_backend_cpu
  • hrouis/aidge_backend_cpu
  • oantoni/aidge_backend_cpu
  • raphaelmillet/aidge_backend_cpu
  • cguillon/aidge_backend_cpu
  • jeromeh/aidge_backend_cpu
  • axelfarr/aidge_backend_cpu
  • noamzerah/aidge_backend_cpu
  • silvanosky/aidge_backend_cpu
  • maab05/aidge_backend_cpu
  • lucaslopez/aidge_backend_cpu_ll
  • farnez/aidge_backend_cpu
13 results
Show changes
Showing
with 1669 additions and 525 deletions
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_
#include <aidge/data/Tensor.hpp>
#include <aidge/data/half.hpp>
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <functional> // std::multiplies
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class O>
void ConstantOfShapeimpl_cpu_forward_kernel(
const std::vector<DimSize_t> output_dims, const Tensor &value,
void *output_) {
O *output = static_cast<O *>(output_);
O val;
std::copy(static_cast<O *>(value.getImpl()->hostPtr()),
static_cast<O *>(value.getImpl()->hostPtr()) +
static_cast<NbElts_t>(1),
&val);
const size_t output_size = std::accumulate(
output_dims.begin(), output_dims.end(), 1, std::multiplies<DimSize_t>());
for (size_t i = 0; i < output_size; ++i) {
output[i] = val;
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float16}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int16}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>, nullptr});
REGISTRAR(ConstantOfShapeImpl_cpu,
{ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int64}},
{ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int64_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_ */
......@@ -17,43 +17,39 @@
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/ConvDepthWise.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// class ConvDepthWise_Op;
// compute kernel registry for forward and backward
class ConvDepthWiseImpl2DForward_cpu
: public Registrable<ConvDepthWiseImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvDepthWiseImpl2DBackward_cpu
: public Registrable<ConvDepthWiseImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvDepthWiseImpl2D_cpu : public OperatorImpl {
public:
ConvDepthWiseImpl2D_cpu(const ConvDepthWise_Op<2> &op) : OperatorImpl(op) {}
static std::unique_ptr<ConvDepthWiseImpl2D_cpu> create(const ConvDepthWise_Op<2> &op) {
return std::make_unique<ConvDepthWiseImpl2D_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to ConvDepthWise_Op<2> implementation registry
static Registrar<ConvDepthWise_Op<2>> registrarConvDepthWiseImpl2D_cpu("cpu", Aidge::ConvDepthWiseImpl2D_cpu::create);
} // namespace
// Operator implementation entry point for the backend
using ConvDepthWise1D_Op = ConvDepthWise_Op<1>;
using ConvDepthWiseImpl1D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<1>,
void(const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3>&,
const void *,
const void *,
const void *,
void *)>;
using ConvDepthWise2D_Op = ConvDepthWise_Op<2>;
using ConvDepthWiseImpl2D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(ConvDepthWise1D_Op, "cpu", Aidge::ConvDepthWiseImpl1D_cpu::create);
REGISTRAR(ConvDepthWise2D_Op, "cpu", Aidge::ConvDepthWiseImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <cmath>
#include <cstddef>
#include <array>
#include <algorithm>
namespace Aidge {
/**
* @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *weights_, const void *biases_, void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) {
const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize;
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
}
}
}
}
}
namespace {
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>);
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_
#include <algorithm>
#include <array>
#include <cmath>
#include <cstddef>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
/**
* @brief Forward kernel for 1D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize;
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = ch * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
// const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
// const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
// const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvDepthWiseImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvDepthWiseImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr});
REGISTRAR(ConvDepthWiseImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
/**
* @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4>& inputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
const std::size_t outChannels_s = oxSize * oySize;
if (dilated_kernel_x ==3 && dilated_kernel_y == 3) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * 9;
if (strideDims[0] == 1 && strideDims[1]==1) {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] = biasVal + weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2];
}
}
} else {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2];
}
iIndex+=strideDims[0]*inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2];
}
iIndex+=strideDims[0]*inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2];
}
}
}
output += outChannels_s;
}
}
} else if (dilated_kernel_x == 1 && dilated_kernel_y == 1) {
std::size_t index = 0;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch;
if (strideDims[0] == 1 && strideDims[1] == 1) {
for (; index < iIndex + oxSize*oySize; ++index) {
output[index] = biasVal + weights[wIndex] * input[index];
}
} else {
std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize;
for (std::size_t ox = 0; ox < oxSize; ++ox, oIndex+=oySize) {
index = iIndex + strideDims[0]*inputDims[3];
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
output[oIndex + oy] += weights[wIndex]*input[index+iy];
}
}
}
}
}
} else {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output, output+outChannels_s, biasVal);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t oIndexFull = ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
for (std::size_t sx = 0; sx*dilationDims[0] < dilated_kernel_x; ++sx) {
for (std::size_t sy = 0; sy*dilationDims[1] < dilated_kernel_y; ++sy) {
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))];
}
}
}
}
}
output += outChannels_s;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr});
REGISTRAR(ConvDepthWiseImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_ */
......@@ -17,44 +17,41 @@
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// class Conv_Op;
// compute kernel registry for forward and backward
class ConvImpl2DForward_cpu
: public Registrable<ConvImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvImpl2DBackward_cpu
: public Registrable<ConvImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
class ConvImpl2D_cpu : public OperatorImpl {
public:
ConvImpl2D_cpu(const Conv_Op<2>& op) : OperatorImpl(op) {}
static std::unique_ptr<ConvImpl2D_cpu> create(const Conv_Op<2> &op) {
return std::make_unique<ConvImpl2D_cpu>(op);
}
public:
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to Conv_Op<2> implementation registry
static Registrar<Conv_Op<2>> registrarConvImpl2D_cpu("cpu", Aidge::ConvImpl2D_cpu::create);
} // namespace
// Operator implementation entry point for the backend
using Conv1D_Op = Conv_Op<1>;
using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>,
void(const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3> &,
DimSize_t,
const void *,
const void *,
const void *,
void *)>;
using Conv2D_Op = Conv_Op<2>;
using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &,
DimSize_t,
const void *,
const void *,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create);
REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/data/half.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <cmath>
#include <array>
#include <algorithm>
namespace Aidge {
/**
* @brief Forward kernel for 2D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *weights_, const void *biases_, void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
/*
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0]));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1]));
// TODO: kernel computation
// output (Xout, Yout, outCh, batch)
// input (Xin, Yin, inCh, batch)
// weight (kernelX, kernelY, inCh, outCh)
// does not take Dilation attribute into account
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t ix = ox * std::get<0>(attrs)[0];
const std::size_t iy = oy * std::get<0>(attrs)[1];
for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox));
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
output[oIndex + batch] = biasVal;
}
for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) {
for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) {
const std::size_t wIndex =
outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx));
std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
}
}
}
}
}
}
}
*/
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize;
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
}
}
}
}
}
}
namespace {
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float16(
{DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16},
Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
/**
* @brief Forward kernel for 1D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& dilationDims,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) /
static_cast<float>(strideDims[0])));
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
// const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
// const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
// const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t sxMin = 0;
const std::size_t sxMax = dilated_kernel_x;
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
}
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr});
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
/**
* @brief Forward kernel for 2D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
const std::size_t outChannels_s = oxSize * oySize;
using signedsize = std::make_signed<std::size_t>::type;
if (dilated_kernel_x == 3 && dilated_kernel_y == 3) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output, output+outChannels_s, biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * 9;
if (strideDims[0] == 1 && strideDims[1]==1) {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2];
}
iIndex+=inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2];
}
}
} else {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2];
}
iIndex+=strideDims[0]*inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2];
}
iIndex+=strideDims[0]*inputDims[3];
for (std::size_t oy = 0; oy < oySize; ++oy) {
output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2];
}
}
}
}
output += outChannels_s;
}
}
} else if (dilated_kernel_x == 1 && dilated_kernel_y == 1) {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output, output+outChannels_s, biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]);
if (strideDims[0] == 1 && strideDims[1] == 1) {
for (std::size_t oIndex = 0; oIndex < oxSize*oySize; ++oIndex, ++iIndex) {
output[oIndex] += weights[wIndex] * input[iIndex];
}
} else {
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=inputDims[3]*strideDims[0]) {
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
output[oIndex + oy] += weights[wIndex+0]*input[iIndex+iy];
}
}
}
}
output += outChannels_s;
}
}
} else {
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output, output+outChannels_s, biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
std::size_t iIndex_channel = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
// loop over each ouput line
for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex_channel+=inputDims[3]*strideDims[0]) {
// loop over associated input line
for (std::size_t ky = 0, ix = 0; ky < kernelDims[0]; ++ky, ix += inputDims[3]*dilationDims[0]) {
// loop over the entire line
for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
const std::size_t iIndex = iIndex_channel + ix + iy;
// loop over elements assosicated with one output
for (std::size_t kx = 0; kx < kernelDims[0]; ++kx) {
output[oIndex + oy] += weights[wIndex+kernelDims[0]*ky+kx]*input[iIndex+kx*dilationDims[1]];
}
}
}
}
}
output += outChannels_s;
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr});
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */
......@@ -16,38 +16,18 @@
#include <tuple>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Div.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using DivImpl_cpu = OperatorImpl_cpu<Div_Op,
void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)>;
// compute kernel registry for forward and backward
class DivImplForward_cpu
// : public Registrable<DivImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)> {
: public Registrable<DivImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)> {
};
class DivImplBackward_cpu
: public Registrable<DivImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)> {
};
class DivImpl_cpu : public OperatorImpl {
public:
DivImpl_cpu(const Div_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<DivImpl_cpu> create(const Div_Op& op) {
return std::make_unique<DivImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override final;
};
namespace {
static Registrar<Div_Op> registrarDivImpl_cpu("cpu", Aidge::DivImpl_cpu::create);
}
// Implementation entry point registration to Operator
REGISTRAR(Div_Op, "cpu", Aidge::DivImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_H_ */
......@@ -9,11 +9,12 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_
#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_
#include <numeric> // std::accumulate
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t, std::int64_t
#include <functional> // std::multiplies
#include "aidge/utils/Registrar.hpp"
......@@ -68,19 +69,16 @@ constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_,
}
}
namespace {
static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::DivImpl_cpu_forward_kernel<float, float, float>);
static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::DivImpl_cpu_forward_kernel<int, int, int>);
static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::DivImpl_cpu_forward_kernel<double, double, double>);
} // namespace
// Kernels registration to implementation entry point
REGISTRAR(DivImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, nullptr});
REGISTRAR(DivImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, nullptr});
REGISTRAR(DivImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ */
......@@ -12,7 +12,7 @@
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Erf.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
......@@ -20,31 +20,12 @@
#include <vector>
namespace Aidge {
// class Erf_Op;
// Operator implementation entry point for the backend
using ErfImpl_cpu = OperatorImpl_cpu<Erf_Op,
void(const std::size_t, const void*, void*)>;
// compute kernel registry for forward and backward
class ErfImplForward_cpu
: public Registrable<ErfImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class ErfImplBackward_cpu
: public Registrable<ErfImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class ErfImpl_cpu : public OperatorImpl {
public:
ErfImpl_cpu(const Erf_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<ErfImpl_cpu> create(const Erf_Op& op) {
return std::make_unique<ErfImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
static Registrar<Erf_Op> registrarErfImpl_cpu("cpu", Aidge::ErfImpl_cpu::create);
}
// Implementation entry point registration to Operator
REGISTRAR(Erf_Op, "cpu", Aidge::ErfImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_H_ */
......@@ -9,8 +9,8 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_
#ifndef AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_
#include <cmath>
......@@ -32,14 +32,16 @@ void ErfImpl_cpu_forward_kernel(std::size_t inputLenght,
}
}
namespace {
static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::ErfImpl_cpu_forward_kernel<float, float>);
static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::ErfImpl_cpu_forward_kernel<int, int>);
static Registrar<ErfImplForward_cpu> registrarErfImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::ErfImpl_cpu_forward_kernel<double, double>);
} // namespace
// Kernels registration to implementation entry point
REGISTRAR(ErfImpl_cpu,
{DataType::Float32},
{ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(ErfImpl_cpu,
{DataType::Float64},
{ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(ErfImpl_cpu,
{DataType::Int32},
{ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_ */
......@@ -12,42 +12,37 @@
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
#include <array>
namespace Aidge {
// class FC_Op;
// compute kernel registry for forward and backward
class FCImplForward_cpu : public Registrable<FCImplForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t,
const void *, const void *, const void *, void *)> {};
class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t,
const void *, const void *, const void *, void *)> {};
class FCImpl_cpu : public OperatorImpl {
public:
FCImpl_cpu(const FC_Op &op) : OperatorImpl(op) {}
static std::unique_ptr<FCImpl_cpu> create(const FC_Op &op) {
return std::make_unique<FCImpl_cpu>(op);
}
void forward() override;
};
namespace {
static Registrar<FC_Op> registrarFCImpl_cpu("cpu", Aidge::FCImpl_cpu::create);
}
// Operator implementation entry point for the backend
using FCImpl_cpu = OperatorImpl_cpu<FC_Op,
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *),
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *,
void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(FC_Op, "cpu", Aidge::FCImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_H_ */
......@@ -9,13 +9,13 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include "aidge/backend/cpu/operator/FCImpl.hpp"
#include "aidge/utils/Registrar.hpp"
namespace Aidge {
// template <class I, class W, class B, class O>
......@@ -27,9 +27,9 @@ namespace Aidge {
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[3];
// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] = bias;
// }
......@@ -39,10 +39,10 @@ namespace Aidge {
// for (std::size_t iy = 0; iy < dims[1]; ++iy) {
// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// const std::size_t oIndex = dims[3] * outCh;
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) +
// outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize +
// outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
// }
......@@ -63,9 +63,9 @@ namespace Aidge {
// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[0];
// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// output[oIndex + batch] = bias;
// }
......@@ -74,8 +74,8 @@ namespace Aidge {
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// const std::size_t oIndex = dims[1] * batch;
// for (std::size_t i = 0; i < dims[1]; ++i) {
// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
// std::size_t wIndex = i * std::get<0>(attrs) + outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// std::size_t wIndex = i * outputFeatureSize + outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// output[oIndex + outCh] += weights[wIndex] * input[i + batch];
// }
// }
......@@ -83,46 +83,104 @@ namespace Aidge {
// }
template <class I, class W, class B, class O>
void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* weights_, const void* biases_, void* output_) {
void FCImpl_cpu_forward_kernel(const DimSize_t batchSize,
const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* weights_,
const void* biases_,
void* output_) {
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const W* weights = static_cast<const W*>(weights_);
const B* biases = static_cast<const B*>(biases_);
O* output = static_cast<O*>(output_);
if (std::get<1>(attrs)) {
std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0));
if (biases == nullptr) {
std::fill(output, output+(batchSize*outputFeatureSize), B(0));
}
else {
for (std::size_t batch = 0; batch < batchSize; ++batch) {
std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs)));
std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize));
}
}
for (std::size_t batch = 0; batch < batchSize; ++batch) {
for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
input + (batch + 1)*oneInputSize,
weights + out*oneInputSize,
output[out + batch*std::get<0>(attrs)]);
for (std::size_t out = 0; out < outputFeatureSize; ++out) {
output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize,
input + (batch + 1)*inputFeatureSize,
weights + out*inputFeatureSize,
output[out + batch*outputFeatureSize]);
}
}
}
template <class I, class O, class W, class B>
void FCImpl_cpu_backward_kernel(const DimSize_t batchSize,
const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* originalInput_,
const void* weight_,
void* output_,
void* weightGrad_,
void* biasesGrad_)
{
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const I* originalInput = static_cast<const I*>(originalInput_);
const W* weight = static_cast<const W*>(weight_);
O* output = static_cast<O*>(output_);
W* weightGrad = static_cast<W*>(weightGrad_);
B* biasesGrad = static_cast<B*>(biasesGrad_);
// bias grad
if (biasesGrad == nullptr) { // no bias
std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0));
} else {
for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs
B sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += input[b*outputFeatureSize + o];
}
biasesGrad[o] = sum;
}
}
namespace {
static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>);
static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>);
static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>);
} // namespace
// weight grad
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
W sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o];
}
weightGrad[o*inputFeatureSize + c] = sum;
}
}
// input grad
for (std::size_t b = 0; b < batchSize; ++b) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
O sum{0};
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o];
}
output[b*inputFeatureSize + c] = sum;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(FCImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>, Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>});
REGISTRAR(FCImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>, Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>});
REGISTRAR(FCImpl_cpu,
{ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
{ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, Aidge::FCImpl_cpu_backward_kernel<int32_t, int32_t, int32_t, int32_t>});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */
#endif /* AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_ */
......@@ -9,42 +9,34 @@
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_
#define AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_
#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_H_
#define AIDGE_CPU_OPERATOR_FOLDIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Reshape.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include <array>
#include <memory>
#include <tuple>
#include <vector>
namespace Aidge {
// class Reshape_Op;
// compute kernel registry for forward and backward
class ReshapeImplForward_cpu
: public Registrable<ReshapeImplForward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const void*, void*)> {
};
class ReshapeImplBackward_cpu
: public Registrable<ReshapeImplBackward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const void*, void*)> {
};
class ReshapeImpl_cpu : public OperatorImpl {
public:
ReshapeImpl_cpu(const Reshape_Op& op) : OperatorImpl(op) {}
static std::unique_ptr<ReshapeImpl_cpu> create(const Reshape_Op& op) {
return std::make_unique<ReshapeImpl_cpu>(op);
}
NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Fold.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace {
static Registrar<Reshape_Op> registrarReshapeImpl_cpu("cpu", Aidge::ReshapeImpl_cpu::create);
}
namespace Aidge {
// Operator implementation entry point for the backend
using Fold2D_Op = Fold_Op<2>;
using FoldImpl2D_cpu = OperatorImpl_cpu<Fold_Op<2>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::vector<DimSize_t> &,
const void *,
void *)>;
// Implementation entry point registration to Operator
REGISTRAR(Fold2D_Op, "cpu", Aidge::FoldImpl2D_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_ */
#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/FoldImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <cmath>
#include <array>
#include <algorithm>
namespace Aidge {
template <class I, class O>
void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims,
const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& dilationDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::vector<DimSize_t> &dims,
const void *input_, void *output_)
{
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
const DimSize_t inHeight = outputDims[0];
const DimSize_t inWidth = outputDims[1];
const DimSize_t kernelExtentHeight = dilationDims[0] *
(kernelDims[0] - 1) + 1;
const DimSize_t outHeight = 1 + static_cast<DimSize_t>(
floor(static_cast<float>(inHeight - kernelExtentHeight) /
static_cast<float>(strideDims[0])));
const DimSize_t kernelExtentWidth = dilationDims[1] *
(kernelDims[1] - 1) + 1;
const DimSize_t outWidth = 1 + static_cast<DimSize_t>(
floor(static_cast<float>(inWidth - kernelExtentWidth) /
static_cast<float>(strideDims[1])));
const DimSize_t outChannels = dims[dims.size() - 2];
const DimSize_t inChannels = outChannels / kernelDims[0] / kernelDims[1];
std::fill_n(output, dims[0] * outHeight * outWidth * outChannels, O(0));
for (DimSize_t n = 0; n < dims[0]; ++n) {
for (DimSize_t outC = 0; outC < outChannels; ++outC) {
const auto inOffsetW = outC % kernelDims[1];
const auto inOffsetH = (outC / kernelDims[1]) % kernelDims[0];
const auto inC = outC / kernelDims[0] / kernelDims[1];
for (DimSize_t outH = 0; outH < outHeight; ++outH) {
const auto inH = outH * strideDims[0] + inOffsetH * dilationDims[0];
for (DimSize_t outW = 0; outW < outWidth; ++outW) {
const auto inW = outW * strideDims[1] + inOffsetW * dilationDims[1];
output[((n * inChannels + inC) * inHeight + inH) * inWidth + inW] +=
input[((n * outChannels + outC) * outHeight + outH) * outWidth + outW];
}
}
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(FoldImpl2D_cpu,
{DataType::Float32},
{ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(FoldImpl2D_cpu,
{DataType::Float64},
{ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(FoldImpl2D_cpu,
{DataType::Int32},
{ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <cstddef>
#include <cmath>
#include "aidge/data/Data.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/operator/GatherImpl.hpp"
namespace Aidge {
template <class I, class O>
void GatherImpl_cpu_forward_kernel(const typename Gather_Op::Attrs& attrs, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
{
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const std::size_t axisIdx = std::get<2>(attrs)>=0 ?
std::get<2>(attrs) :
static_cast<std::size_t>(std::get<2>(attrs)) + inputDims.size();
std::size_t postAxisElems = 1;
for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
postAxisElems *= inputDims[i];
}
std::size_t preAxisElems = 1;
for (std::size_t i = 0; i < axisIdx; ++i) {
preAxisElems *= inputDims[i];
}
const std::vector<std::int64_t> indices = std::get<0>(attrs);
for (std::size_t i=0; i<preAxisElems; ++i)
{
for(std::size_t j=0; j<indices.size(); ++j)
{
const std::size_t idx = indices[j] >= 0 ? indices[j] : static_cast<std::size_t>(indices[j]) + inputDims[axisIdx];
const I* startPtr = std::next(input, i * postAxisElems * inputDims[axisIdx] + idx * postAxisElems);
std::copy_n(startPtr, postAxisElems, output);
output += postAxisElems;
}
}
}
namespace {
static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::GatherImpl_cpu_forward_kernel<float, float>);
static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::GatherImpl_cpu_forward_kernel<int, int>);
static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::GatherImpl_cpu_forward_kernel<double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_
#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_
#include <memory>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/GlobalAveragePooling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
// Operator implementation entry point for the backend
using GlobalAveragePoolingImpl_cpu = OperatorImpl_cpu<GlobalAveragePooling_Op,
void(const std::vector<DimSize_t> &, const void *, void *)>;
// Implementation entry point registration to Operator
REGISTRAR(GlobalAveragePooling_Op, "cpu", Aidge::GlobalAveragePoolingImpl_cpu::create);
} // namespace Aidge
#endif /* _AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_
#define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_
#include <cstddef>
#include <functional> // std::multiplies
#include <numeric> // std::accumulate
#include <vector>
#include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/ErrorHandling.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
template <class I, class O>
void GlobalAveragePoolingImpl_cpu_forward_kernel(
const std::vector<DimSize_t> &dims, const void *input_, void *output_) {
// error checking
AIDGE_ASSERT(dims.size() >= 3,"GlobalAveragePool needs at least a 3 dimensions "
"input, number of input dim : {}",
dims.size());
// computation
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
DimSize_t nb_elems = std::accumulate(dims.begin(), dims.end(), std::size_t(1),
std::multiplies<std::size_t>());
const DimSize_t in_batch_nb_elems{nb_elems / dims[0]};
const DimSize_t in_channel_nb_elems{in_batch_nb_elems / dims[1]};
const DimSize_t out_batch_nb_elems{dims[1]};
// parse channel by channel and fill each output with the average of the
// values in the channel
for (DimSize_t batch = 0; batch < dims[0]; ++batch) {
for (DimSize_t channel = 0; channel < dims[1]; ++channel) {
const I *filter_start = std::next(
input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
I mean = 0;
for (size_t i = 0; i < in_channel_nb_elems; ++i) {
// Single pass numerically stable mean, using the fmaf
mean = fmaf(filter_start[i] - mean, 1.0f/(i+1), mean);
}
output[batch * out_batch_nb_elems + channel] = mean;
}
}
}
// Kernels registration to implementation entry point
REGISTRAR(GlobalAveragePoolingImpl_cpu,
{DataType::Float32},
{ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>, nullptr});
REGISTRAR(GlobalAveragePoolingImpl_cpu,
{DataType::Float64},
{ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>, nullptr});
REGISTRAR(GlobalAveragePoolingImpl_cpu,
{DataType::Int32},
{ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#define AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_
#include <array>
#include <memory>
#include <tuple>
#include <vector>
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/GridSample.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
// Operator implementation entry point for the backend
using GridSampleImpl_cpu = OperatorImpl_cpu<GridSample_Op,
void(const GridSample_Op&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&,
const std::shared_ptr<Tensor>&)>;
// Implementation entry point registration to Operator
REGISTRAR(GridSample_Op, "cpu", Aidge::GridSampleImpl_cpu::create);
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ */