Skip to content
Snippets Groups Projects
Commit 15465b1c authored by Olivier BICHLER's avatar Olivier BICHLER
Browse files

Adapted ConvImpl to new OperatorImpl mechanism

parent 670fb293
Branches backend_export
No related tags found
No related merge requests found
...@@ -17,21 +17,17 @@ ...@@ -17,21 +17,17 @@
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "aidge/backend/OperatorImpl.hpp" #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
#include "aidge/operator/Conv.hpp" #include "aidge/operator/Conv.hpp"
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
namespace Aidge { namespace Aidge {
// class Conv_Op; // Operator implementation entry point for the backend
using Conv1D_Op = Conv_Op<1>;
// compute kernel registry for forward and backward using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>,
// Conv 1D void(const std::array<DimSize_t, 1>&,
class ConvImpl1DForward_cpu
: public Registrable<ConvImpl1DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
std::function<void(const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&, const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&, const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3> &, const std::array<DimSize_t, 3> &,
...@@ -39,31 +35,20 @@ class ConvImpl1DForward_cpu ...@@ -39,31 +35,20 @@ class ConvImpl1DForward_cpu
const void *, const void *,
const void *, const void *,
const void *, const void *,
void *)>> {}; void *),
void(const std::array<DimSize_t, 1>&,
class ConvImpl1D_cpu : public OperatorImpl { const std::array<DimSize_t, 1>&,
public: const std::array<DimSize_t, 1>&,
ConvImpl1D_cpu(const Conv_Op<1>& op) : OperatorImpl(op, "cpu") {} bool,
const std::array<DimSize_t, 3> &,
static std::unique_ptr<ConvImpl1D_cpu> create(const Conv_Op<1> &op) { const void *,
return std::make_unique<ConvImpl1D_cpu>(op); const void *,
} const void *,
void *)>;
public: using Conv2D_Op = Conv_Op<2>;
std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); }; using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>,
void forward() override; void(const std::array<DimSize_t, 2>&,
};
namespace {
// add cpu backend to Conv_Op<1> implementation registry
static Registrar<Conv_Op<1>> registrarConvImpl1D_cpu("cpu", Aidge::ConvImpl1D_cpu::create);
} // namespace
// Conv 2D
class ConvImpl2DForward_cpu
: public Registrable<ConvImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
std::function<void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &, const std::array<DimSize_t, 4> &,
...@@ -71,11 +56,8 @@ class ConvImpl2DForward_cpu ...@@ -71,11 +56,8 @@ class ConvImpl2DForward_cpu
const void *, const void *,
const void *, const void *,
const void *, const void *,
void *)>> {}; void *),
class ConvImpl2DBackward_cpu void(const std::array<DimSize_t, 2>&,
: public Registrable<ConvImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
std::function<void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&,
bool, bool,
...@@ -83,25 +65,198 @@ class ConvImpl2DBackward_cpu ...@@ -83,25 +65,198 @@ class ConvImpl2DBackward_cpu
const void *, const void *,
const void *, const void *,
const void *, const void *,
void *)>> {}; void *)>;
class ConvImpl2D_cpu : public OperatorImpl { // Implementation entry point registration to Operator
public: REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create);
ConvImpl2D_cpu(const Conv_Op<2>& op) : OperatorImpl(op, "cpu") {} REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create);
static std::unique_ptr<ConvImpl2D_cpu> create(const Conv_Op<2> &op) { ////////////////////////////////////////////////////////////////////////////////
return std::make_unique<ConvImpl2D_cpu>(op);
/**
* @brief Forward kernel for 1D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& /*dilationDims*/,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
}
}
}
}
} }
}
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>, nullptr});
REGISTRAR(ConvImpl1D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
public:
std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
void forward() override;
};
namespace { /**
// add cpu backend to Conv_Op<2> implementation registry * @brief Forward kernel for 2D Convolution on CPU backend.
static Registrar<Conv_Op<2>> registrarConvImpl2D_cpu("cpu", Aidge::ConvImpl2D_cpu::create); * @tparam I Input data type.
} // namespace * @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& /*dilationDims*/,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
}
}
}
}
}
}
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>, nullptr});
REGISTRAR(ConvImpl2D_cpu,
{{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
{ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
} // namespace Aidge } // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */ #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#include <algorithm>
#include <array>
#include <cmath>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/data/half.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
/**
* @brief Forward kernel for 1D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& /*dilationDims*/,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
}
}
}
}
}
}
namespace {
static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float16(
{DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16},
Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>);
static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>);
static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>);
} // namespace
/**
* @brief Forward kernel for 2D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& /*dilationDims*/,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
/*
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(static_cast<float>(inputDims[0] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0]));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(static_cast<float>(inputDims[1] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1]));
// TODO: kernel computation
// output (Xout, Yout, outCh, batch)
// input (Xin, Yin, inCh, batch)
// weight (kernelX, kernelY, inCh, outCh)
// does not take Dilation attribute into account
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = inputDims[3] * (outCh + outChannels * (oy + oySize * ox));
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
for (std::size_t batch = 0; batch < inputDims[3]; ++batch) {
output[oIndex + batch] = biasVal;
}
for (std::size_t inCh = 0; inCh < inputDims[2]; ++inCh) {
for (std::size_t sx = 0; sx < kernelDims[0]; ++sx) {
for (std::size_t sy = 0; sy < kernelDims[1]; ++sy) {
const std::size_t wIndex =
outCh + outChannels * (inCh + inputDims[2] * (sy + kernelDims[1] * sx));
std::size_t iIndex = inputDims[3] * (inCh + inputDims[2] * ((iy + sy) + inputDims[1] * (ix + sx)));
for (std::size_t batch = 0; batch < inputDims[3]; ++batch) {
output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
}
}
}
}
}
}
}
*/
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
}
}
}
}
}
}
namespace {
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float16(
{DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16},
Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>);
static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>);
} // namespace
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */
...@@ -18,35 +18,18 @@ ...@@ -18,35 +18,18 @@
#include <vector> #include <vector>
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp"
#include "aidge/operator/Conv.hpp" #include "aidge/operator/Conv.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
template <>
void Aidge::ConvImpl1D_cpu::forward() { void Aidge::ConvImpl1D_cpu::forward() {
const auto& op_ = static_cast<const Conv_Op<1>&>(mOp); const auto& op_ = static_cast<const Conv_Op<1>&>(mOp);
// FIXME: uncomment the following code once memory handling will work // FIXME: uncomment the following code once memory handling will work
AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator.");
// Find the correct kernel type const auto impl = Registrar<ConvImpl1D_cpu>::create(getBestMatch(getRequiredSpec()));
const auto outputDataType = op_.getOutput(0)->dataType();
const Registrar<ConvImpl1DForward_cpu>::registrar_key registrarKey = {
op_.getInput(0)->dataType(),
op_.getInput(1)->dataType(),
(op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()),
outputDataType};
Registrar<ConvImpl1DForward_cpu>::registrar_type kernelFunc;
if (Registrar<ConvImpl1DForward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<ConvImpl1DForward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<ConvImpl1DForward_cpu>::create({
outputDataType, outputDataType, outputDataType, outputDataType});
}
// Convert input data (no overhead if not needed!) // Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each // TODO: right now, if needed, memory will be allocated/deallocated at each
...@@ -58,7 +41,7 @@ AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); ...@@ -58,7 +41,7 @@ AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
// Call kernel // Call kernel
kernelFunc(op_.strideDims(), impl.forward(op_.strideDims(),
op_.dilationDims(), op_.dilationDims(),
op_.kernelDims(), op_.kernelDims(),
op_.getInput(0)->template dims<3>(), // input dimensions op_.getInput(0)->template dims<3>(), // input dimensions
...@@ -70,6 +53,12 @@ AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); ...@@ -70,6 +53,12 @@ AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
); );
} }
template <>
void Aidge::ConvImpl1D_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<1> on backend cpu");
}
template <>
void Aidge::ConvImpl2D_cpu::forward() { void Aidge::ConvImpl2D_cpu::forward() {
const auto& op_ = dynamic_cast<const Conv_Op<2>&>(mOp); const auto& op_ = dynamic_cast<const Conv_Op<2>&>(mOp);
...@@ -77,24 +66,7 @@ void Aidge::ConvImpl2D_cpu::forward() { ...@@ -77,24 +66,7 @@ void Aidge::ConvImpl2D_cpu::forward() {
AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator.");
// Find the correct kernel type const auto impl = Registrar<ConvImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
const auto outputDataType = op_.getOutput(0)->dataType();
const Registrar<ConvImpl2DForward_cpu>::registrar_key registrarKey = {
op_.getInput(0)->dataType(),
op_.getInput(1)->dataType(),
(op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()),
outputDataType};
Registrar<ConvImpl2DForward_cpu>::registrar_type kernelFunc;
if (Registrar<ConvImpl2DForward_cpu>::exists(registrarKey)) {
// One exists with the right inputs/output types
kernelFunc = Registrar<ConvImpl2DForward_cpu>::create(registrarKey);
}
else {
// Otherwise, fallback to the kernel with all types matching output type
kernelFunc = Registrar<ConvImpl2DForward_cpu>::create({
outputDataType, outputDataType, outputDataType, outputDataType});
}
// Convert input data (no overhead if not needed!) // Convert input data (no overhead if not needed!)
// TODO: right now, if needed, memory will be allocated/deallocated at each // TODO: right now, if needed, memory will be allocated/deallocated at each
...@@ -106,7 +78,7 @@ void Aidge::ConvImpl2D_cpu::forward() { ...@@ -106,7 +78,7 @@ void Aidge::ConvImpl2D_cpu::forward() {
const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
// Call kernel // Call kernel
kernelFunc(op_.strideDims(), impl.forward(op_.strideDims(),
op_.dilationDims(), op_.dilationDims(),
op_.kernelDims(), op_.kernelDims(),
op_.getInput(0)->template dims<4>(), // input dimensions op_.getInput(0)->template dims<4>(), // input dimensions
...@@ -117,3 +89,8 @@ void Aidge::ConvImpl2D_cpu::forward() { ...@@ -117,3 +89,8 @@ void Aidge::ConvImpl2D_cpu::forward() {
getCPUPtr(mOp.getRawOutput(0)) // output getCPUPtr(mOp.getRawOutput(0)) // output
); );
} }
template <>
void Aidge::ConvImpl2D_cpu::backward() {
AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<2> on backend cpu");
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment