Skip to content
Snippets Groups Projects

Refactor OperatorImpl for backend/export

Merged Olivier BICHLER requested to merge backend_export into dev
2 unresolved threads
3 files
+ 224
351
Compare changes
  • Side-by-side
  • Inline
Files
3
@@ -17,21 +17,17 @@
@@ -17,21 +17,17 @@
#include <tuple>
#include <tuple>
#include <vector>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/operator/Conv.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge {
namespace Aidge {
// class Conv_Op;
// Operator implementation entry point for the backend
using Conv1D_Op = Conv_Op<1>;
// compute kernel registry for forward and backward
using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>,
// Conv 1D
void(const std::array<DimSize_t, 1>&,
class ConvImpl1DForward_cpu
: public Registrable<ConvImpl1DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
std::function<void(const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3> &,
const std::array<DimSize_t, 3> &,
@@ -39,31 +35,20 @@ class ConvImpl1DForward_cpu
@@ -39,31 +35,20 @@ class ConvImpl1DForward_cpu
const void *,
const void *,
const void *,
const void *,
const void *,
const void *,
void *)>> {};
void *),
void(const std::array<DimSize_t, 1>&,
class ConvImpl1D_cpu : public OperatorImpl {
const std::array<DimSize_t, 1>&,
public:
const std::array<DimSize_t, 1>&,
ConvImpl1D_cpu(const Conv_Op<1>& op) : OperatorImpl(op, "cpu") {}
bool,
const std::array<DimSize_t, 3> &,
static std::unique_ptr<ConvImpl1D_cpu> create(const Conv_Op<1> &op) {
const void *,
return std::make_unique<ConvImpl1D_cpu>(op);
const void *,
}
const void *,
 
void *)>;
public:
using Conv2D_Op = Conv_Op<2>;
std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>,
void forward() override;
void(const std::array<DimSize_t, 2>&,
};
namespace {
// add cpu backend to Conv_Op<1> implementation registry
static Registrar<Conv_Op<1>> registrarConvImpl1D_cpu("cpu", Aidge::ConvImpl1D_cpu::create);
} // namespace
// Conv 2D
class ConvImpl2DForward_cpu
: public Registrable<ConvImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
std::function<void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &,
const std::array<DimSize_t, 4> &,
@@ -71,11 +56,8 @@ class ConvImpl2DForward_cpu
@@ -71,11 +56,8 @@ class ConvImpl2DForward_cpu
const void *,
const void *,
const void *,
const void *,
const void *,
const void *,
void *)>> {};
void *),
class ConvImpl2DBackward_cpu
void(const std::array<DimSize_t, 2>&,
: public Registrable<ConvImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
std::function<void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
bool,
bool,
@@ -83,25 +65,198 @@ class ConvImpl2DBackward_cpu
@@ -83,25 +65,198 @@ class ConvImpl2DBackward_cpu
const void *,
const void *,
const void *,
const void *,
const void *,
const void *,
void *)>> {};
void *)>;
class ConvImpl2D_cpu : public OperatorImpl {
// Implementation entry point registration to Operator
public:
REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create);
ConvImpl2D_cpu(const Conv_Op<2>& op) : OperatorImpl(op, "cpu") {}
REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create);
static std::unique_ptr<ConvImpl2D_cpu> create(const Conv_Op<2> &op) {
////////////////////////////////////////////////////////////////////////////////
return std::make_unique<ConvImpl2D_cpu>(op);
 
/**
 
* @brief Forward kernel for 1D Convolution on CPU backend.
 
* @tparam I Input data type.
 
* @tparam W Weight data type.
 
* @tparam B Bias data type.
 
* @tparam O Output data type.
 
* @param params tuple of Attributes from the Operator
 
* @param inputDims Array of input dimensions.
 
* @param input_ const input Tensor.
 
* @param weights_ const weight Tensor.
 
* @param biases_ const Biais Tensor.
 
* @param output_ Output Tensor.
 
*/
 
template <class I, class W, class B, class O>
 
void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
 
const std::array<DimSize_t, 1>& /*dilationDims*/,
 
const std::array<DimSize_t, 1>& kernelDims,
 
const std::array<DimSize_t, 3>& inputDims,
 
DimSize_t outChannels,
 
const void *input_,
 
const void *weights_,
 
const void *biases_,
 
void *output_)
 
{
 
// FIXME: missing convolution attributes as arguments
 
const I *input = static_cast<const I *>(input_);
 
const W *weights = static_cast<const W *>(weights_);
 
const B *biases = static_cast<const B *>(biases_);
 
O *output = static_cast<O *>(output_);
 
 
// output H size
 
const std::size_t oxSize =
 
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
 
static_cast<float>(strideDims[0])));
 
 
// TODO: kernel computation
 
// output (batch, outCh, Xout, Yout)
 
// input (batch, inCh, Xin, Yin)
 
// weight (outCh, inCh, kernelX, kernelY)
 
// does not take Dilation attribute into account
 
using signedsize = std::make_signed<std::size_t>::type;
 
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
 
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
 
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
 
// If bias = nullptr, set B(0)
 
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
 
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
 
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
 
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
 
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
 
for (std::size_t ox = 0; ox < oxSize; ++ox) {
 
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
 
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
 
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
 
const std::size_t oIndexFull = oIndex + ox;
 
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
 
 
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
 
output[oIndexFull] += weights[wIndex + sx] *
 
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
 
}
 
}
 
}
 
}
}
}
 
}
 
 
REGISTRAR(ConvImpl1D_cpu,
 
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
 
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
 
REGISTRAR(ConvImpl1D_cpu,
 
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
 
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
 
REGISTRAR(ConvImpl1D_cpu,
 
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
 
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>, nullptr});
 
REGISTRAR(ConvImpl1D_cpu,
 
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
 
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
public:
std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
void forward() override;
};
namespace {
/**
// add cpu backend to Conv_Op<2> implementation registry
* @brief Forward kernel for 2D Convolution on CPU backend.
static Registrar<Conv_Op<2>> registrarConvImpl2D_cpu("cpu", Aidge::ConvImpl2D_cpu::create);
* @tparam I Input data type.
} // namespace
* @tparam W Weight data type.
 
* @tparam B Bias data type.
 
* @tparam O Output data type.
 
* @param params tuple of Attributes from the Operator
 
* @param inputDims Array of input dimensions.
 
* @param input_ const input Tensor.
 
* @param weights_ const weight Tensor.
 
* @param biases_ const Biais Tensor.
 
* @param output_ Output Tensor.
 
*/
 
template <class I, class W, class B, class O>
 
void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
 
const std::array<DimSize_t, 2>& /*dilationDims*/,
 
const std::array<DimSize_t, 2>& kernelDims,
 
const std::array<DimSize_t, 4> &inputDims,
 
DimSize_t outChannels,
 
const void *input_,
 
const void *weights_,
 
const void *biases_,
 
void *output_)
 
{
 
// FIXME: missing convolution attributes as arguments
 
const I *input = static_cast<const I *>(input_);
 
const W *weights = static_cast<const W *>(weights_);
 
const B *biases = static_cast<const B *>(biases_);
 
O *output = static_cast<O *>(output_);
 
 
// output H size
 
const std::size_t oxSize =
 
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
 
static_cast<float>(strideDims[0])));
 
// output W size
 
const std::size_t oySize =
 
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
 
static_cast<float>(strideDims[1])));
 
 
// TODO: kernel computation
 
// output (batch, outCh, Xout, Yout)
 
// input (batch, inCh, Xin, Yin)
 
// weight (outCh, inCh, kernelX, kernelY)
 
// does not take Dilation attribute into account
 
using signedsize = std::make_signed<std::size_t>::type;
 
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
 
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
 
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
 
// If bias = nullptr, set B(0)
 
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
 
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
 
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
 
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
 
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
 
for (std::size_t ox = 0; ox < oxSize; ++ox) {
 
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
 
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
 
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
 
for (std::size_t oy = 0; oy < oySize; ++oy) {
 
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
 
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
 
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
 
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
 
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
 
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
 
 
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
 
output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
 
weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
 
weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
 
weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
 
weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
 
weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
 
weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
 
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
 
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
 
} else {
 
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
 
for (std::size_t sy = syMin; sy < syMax; ++sy) {
 
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
 
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
 
}
 
}
 
}
 
}
 
}
 
}
 
}
 
}
 
}
 
 
REGISTRAR(ConvImpl2D_cpu,
 
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
 
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
 
REGISTRAR(ConvImpl2D_cpu,
 
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
 
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
 
REGISTRAR(ConvImpl2D_cpu,
 
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
 
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>, nullptr});
 
REGISTRAR(ConvImpl2D_cpu,
 
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
 
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
} // namespace Aidge
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */
Loading