Skip to content
Snippets Groups Projects
Commit d1114932 authored by Maxence Naud's avatar Maxence Naud
Browse files

Merge branch 'dev' into 'main'

version 0.2.3

See merge request eclipse/aidge/aidge_backend_cpu!73
parents a7666da2 c9eacd2c
No related branches found
No related tags found
No related merge requests found
Showing
with 570 additions and 215 deletions
......@@ -36,7 +36,7 @@ class test_recipes(unittest.TestCase):
graph_view = aidge_core.sequential([input_node, conv, bn])
# Add random values to conv and BatchNorm parameters
graph_view.set_datatype(aidge_core.DataType.Float32)
graph_view.set_datatype(aidge_core.dtype.float32)
graph_view.set_backend("cpu")
np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32)
......
......@@ -24,7 +24,7 @@ class test_scheduler(unittest.TestCase):
input_node.add_child(relu)
gv.set_datatype(aidge_core.DataType.Int32)
gv.set_datatype(aidge_core.dtype.int32)
gv.set_backend("cpu")
scheduler = aidge_core.SequentialScheduler(gv)
......@@ -48,7 +48,7 @@ class test_scheduler(unittest.TestCase):
])
EXPECTED_SCHEDULE = ['0', '1', '2']
graph_view.set_datatype(aidge_core.DataType.Float32)
graph_view.set_datatype(aidge_core.dtype.float32)
graph_view.set_backend("cpu")
graph_view.forward_dims()
......@@ -74,7 +74,7 @@ class test_scheduler(unittest.TestCase):
EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid !
graph_view.set_datatype(aidge_core.DataType.Float32)
graph_view.set_datatype(aidge_core.dtype.float32)
graph_view.set_backend("cpu")
graph_view.forward_dims()
......
......@@ -32,6 +32,7 @@
#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
#include "aidge/backend/cpu/operator/SliceImpl.hpp"
#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
#include "aidge/backend/cpu/operator/SubImpl.hpp"
#include "aidge/backend/cpu/operator/TanhImpl.hpp"
......
......@@ -14,6 +14,8 @@
#include "aidge/utils/Registrar.hpp"
#include <cstdint> // std::int32_t, std::int64_t
#include "aidge/backend/cpu/data/Broadcasting.hpp"
#include "aidge/backend/cpu/operator/AddImpl.hpp"
......@@ -42,10 +44,12 @@ void AddImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const st
namespace {
static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float32(
{DataType::Float32, DataType::Float32}, Aidge::AddImpl_cpu_forward_kernel<float, float>);
static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::AddImpl_cpu_forward_kernel<int, int>);
static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, Aidge::AddImpl_cpu_forward_kernel<double, double>);
static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>);
static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int64(
{DataType::Int64, DataType::Int64}, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>);
} // namespace
} // namespace Aidge
......
......@@ -29,12 +29,20 @@ namespace Aidge {
// compute kernel registry for forward and backward
class AvgPoolingImpl2DForward_cpu
: public Registrable<AvgPoolingImpl2DForward_cpu,
std::tuple<DataType, DataType>,
void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
std::tuple<DataType, DataType>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4>&,
const void *,
void *)> {};
class AvgPoolingImpl2DBackward_cpu
: public Registrable<AvgPoolingImpl2DBackward_cpu,
std::tuple<DataType, DataType>,
void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
std::tuple<DataType, DataType>,
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4>&,
const void *,
void *)> {};
class AvgPoolingImpl2D_cpu : public OperatorImpl {
public:
......
......@@ -12,16 +12,16 @@
#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/data/Data.hpp"
#include <array>
#include <tuple>
#include <cmath>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge {
/**
* @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
......@@ -33,10 +33,11 @@ namespace Aidge {
* @param output_ Output Tensor.
*/
template <class I, class O>
void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
const std::array<DimSize_t, 4> &dims,
const void *input_,
void *output_) {
void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &dims,
const void *input_,
void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_);
......@@ -44,12 +45,12 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
......@@ -63,16 +64,16 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0);
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx);
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify);
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t ix = ox * std::get<0>(attrs)[0];
const std::size_t iy = oy * std::get<0>(attrs)[1];
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += static_cast<O>(
......
......@@ -30,26 +30,28 @@ namespace Aidge {
class BatchNormImpl2DForward_cpu
: public Registrable<BatchNormImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType>,
void(const BatchNorm_Op<2>::Attrs &,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *,
void *,
void *,
const bool)> {};
void(float,
float,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *,
void *,
void *,
const bool)> {};
class BatchNormImpl2DBackward_cpu
: public Registrable<BatchNormImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType>,
void(const BatchNorm_Op<2>::Attrs &,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *,
void *,
void *)> {};
void(float,
float,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *,
void *,
void *)> {};
class BatchNormImpl2D_cpu : public OperatorImpl {
public:
......
......@@ -38,7 +38,7 @@ namespace Aidge {
* @param output_ Output Tensor.
*/
template <class I, class P, class O>
void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
......@@ -53,12 +53,12 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con
const DimSize_t featureMapSize = dims[2]*dims[3];
if ((freeze == true) || (std::get<1>(attrs) == 0.0f)) {
if ((freeze == true) || (momentum == 0.0f)) {
for (std::size_t batch = 0; batch < nbBatch; ++batch) {
for (std::size_t ch = 0; ch < nbChannels; ++ch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(attrs)));
const P var = std::sqrt(batchVar[ch] + static_cast<P>(epsilon));
for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
......@@ -82,10 +82,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con
const I inputMean = sum / static_cast<I>(nbDataPerChannel);
const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean;
batchMean[ch] = batchMean[ch]*(1-std::get<1>(attrs)) + inputMean*std::get<1>(attrs);
batchVar[ch] = batchVar[ch]*(1-std::get<1>(attrs)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(attrs);
batchMean[ch] = batchMean[ch]*(1-momentum) + inputMean*momentum;
batchVar[ch] = batchVar[ch]*(1-momentum) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*momentum;
const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(attrs)));
const P var = std::sqrt(inputVar + static_cast<P>(epsilon));
for (std::size_t batch = 0; batch < nbBatch; ++batch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
......
......@@ -25,18 +25,60 @@
namespace Aidge {
// class ConvDepthWise_Op;
// compute kernel registry for forward and backward
class ConvDepthWiseImpl1DForward_cpu
: public Registrable<ConvDepthWiseImpl1DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3>&,
const void *,
const void *,
const void *,
void *)> {};
class ConvDepthWiseImpl1D_cpu : public OperatorImpl {
public:
ConvDepthWiseImpl1D_cpu(const ConvDepthWise_Op<1> &op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<ConvDepthWiseImpl1D_cpu> create(const ConvDepthWise_Op<1> &op) {
return std::make_unique<ConvDepthWiseImpl1D_cpu>(op);
}
Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to ConvDepthWise_Op<1> implementation registry
static Registrar<ConvDepthWise_Op<1>> registrarConvDepthWiseImpl1D_cpu("cpu", Aidge::ConvDepthWiseImpl1D_cpu::create);
} // namespace
// compute kernel registry for forward and backward
class ConvDepthWiseImpl2DForward_cpu
: public Registrable<ConvDepthWiseImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)> {};
class ConvDepthWiseImpl2DBackward_cpu
: public Registrable<ConvDepthWiseImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
bool,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)> {};
class ConvDepthWiseImpl2D_cpu : public OperatorImpl {
public:
......
......@@ -12,17 +12,93 @@
#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include <array>
#include <cmath>
#include <cstddef>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <cmath>
#include <cstddef>
#include <array>
#include <algorithm>
namespace Aidge {
/**
* @brief Forward kernel for 1D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& /*dilationDims*/,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_) {
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, ch, Xin, Yin)
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize;
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = ch * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
}
}
}
}
}
namespace {
static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>);
static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>);
} // namespace
/**
* @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
* @tparam I Input data type.
......@@ -30,15 +106,22 @@ namespace Aidge {
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *weights_, const void *biases_, void *output_) {
void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& /*dilationDims*/,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4>& inputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
......@@ -48,12 +131,12 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
......@@ -61,40 +144,40 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
// weight (outCh, ch, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) {
const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize;
B biasVal = ((!std::get<4>(attrs)) && biases != nullptr) ? biases[ch] : B(0);
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize;
B biasVal = (biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1];
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx);
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify);
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
......@@ -110,7 +193,7 @@ static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DFor
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>);
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>);
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>);
......
......@@ -27,16 +27,63 @@ namespace Aidge {
// class Conv_Op;
// compute kernel registry for forward and backward
// Conv 1D
class ConvImpl1DForward_cpu
: public Registrable<ConvImpl1DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 1>&,
const std::array<DimSize_t, 3> &,
DimSize_t,
const void *,
const void *,
const void *,
void *)> {};
class ConvImpl1D_cpu : public OperatorImpl {
public:
ConvImpl1D_cpu(const Conv_Op<1>& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<ConvImpl1D_cpu> create(const Conv_Op<1> &op) {
return std::make_unique<ConvImpl1D_cpu>(op);
}
public:
Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override;
};
namespace {
// add cpu backend to Conv_Op<1> implementation registry
static Registrar<Conv_Op<1>> registrarConvImpl1D_cpu("cpu", Aidge::ConvImpl1D_cpu::create);
} // namespace
// Conv 2D
class ConvImpl2DForward_cpu
: public Registrable<ConvImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4> &,
DimSize_t,
const void *,
const void *,
const void *,
void *)> {};
class ConvImpl2DBackward_cpu
: public Registrable<ConvImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
const void *, const void *, void *)> {};
void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
bool,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)> {};
class ConvImpl2D_cpu : public OperatorImpl {
public:
......
......@@ -12,17 +12,100 @@
#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include <algorithm>
#include <array>
#include <cmath>
#include "aidge/data/half.hpp"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/ConvImpl.hpp"
#include "aidge/data/half.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <cmath>
#include <array>
#include <algorithm>
namespace Aidge {
/**
* @brief Forward kernel for 1D Convolution on CPU backend.
* @tparam I Input data type.
* @tparam W Weight data type.
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
const std::array<DimSize_t, 1>& /*dilationDims*/,
const std::array<DimSize_t, 1>& kernelDims,
const std::array<DimSize_t, 3>& inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
const B *biases = static_cast<const B *>(biases_);
O *output = static_cast<O *>(output_);
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
// input (batch, inCh, Xin, Yin)
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
const std::size_t oIndexFull = oIndex + ox;
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
output[oIndexFull] += weights[wIndex + sx] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
}
}
}
}
}
}
namespace {
static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float32(
{DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float16(
{DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16},
Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>);
static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>);
static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>);
} // namespace
/**
* @brief Forward kernel for 2D Convolution on CPU backend.
* @tparam I Input data type.
......@@ -30,15 +113,23 @@ namespace Aidge {
* @tparam B Bias data type.
* @tparam O Output data type.
* @param params tuple of Attributes from the Operator
* @param dims Array of input dimensions.
* @param inputDims Array of input dimensions.
* @param input_ const input Tensor.
* @param weights_ const weight Tensor.
* @param biases_ const Biais Tensor.
* @param output_ Output Tensor.
*/
template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *weights_, const void *biases_, void *output_) {
void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 2>& /*dilationDims*/,
const std::array<DimSize_t, 2>& kernelDims,
const std::array<DimSize_t, 4> &inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_);
......@@ -47,12 +138,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
/*
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0]));
static_cast<std::size_t>(static_cast<float>(inputDims[0] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0]));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1]));
static_cast<std::size_t>(static_cast<float>(inputDims[1] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1]));
// TODO: kernel computation
// output (Xout, Yout, outCh, batch)
......@@ -61,22 +152,22 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
// does not take Dilation attribute into account
for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t ix = ox * std::get<0>(attrs)[0];
const std::size_t iy = oy * std::get<0>(attrs)[1];
const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * strideDims[1];
for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox));
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = inputDims[3] * (outCh + outChannels * (oy + oySize * ox));
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
for (std::size_t batch = 0; batch < inputDims[3]; ++batch) {
output[oIndex + batch] = biasVal;
}
for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) {
for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) {
for (std::size_t inCh = 0; inCh < inputDims[2]; ++inCh) {
for (std::size_t sx = 0; sx < kernelDims[0]; ++sx) {
for (std::size_t sy = 0; sy < kernelDims[1]; ++sy) {
const std::size_t wIndex =
outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx));
std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
for (std::size_t batch = 0; batch < dims[3]; ++batch) {
outCh + outChannels * (inCh + inputDims[2] * (sy + kernelDims[1] * sx));
std::size_t iIndex = inputDims[3] * (inCh + inputDims[2] * ((iy + sy) + inputDims[1] * (ix + sx)));
for (std::size_t batch = 0; batch < inputDims[3]; ++batch) {
output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
}
}
......@@ -90,12 +181,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
// output H size
const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
static_cast<float>(std::get<0>(attrs)[0])));
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(strideDims[0])));
// output W size
const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
static_cast<float>(std::get<0>(attrs)[1])));
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(strideDims[1])));
// TODO: kernel computation
// output (batch, outCh, Xout, Yout)
......@@ -103,42 +194,42 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
// weight (outCh, inCh, kernelX, kernelY)
// does not take Dilation attribute into account
using signedsize = std::make_signed<std::size_t>::type;
for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize;
// If NoBias or bias = nullptr, set B(0)
B biasVal = ((!std::get<5>(attrs)) && biases != nullptr) ? biases[outCh] : B(0);
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
// If bias = nullptr, set B(0)
B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1];
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx);
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify);
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
}
}
}
......
......@@ -14,6 +14,7 @@
#include <numeric> // std::accumulate
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t, std::int64_t
#include <functional> // std::multiplies
#include "aidge/utils/Registrar.hpp"
......@@ -76,7 +77,7 @@ static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float32(
Aidge::DivImpl_cpu_forward_kernel<float, float, float>);
static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::DivImpl_cpu_forward_kernel<int, int, int>);
Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>);
static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::DivImpl_cpu_forward_kernel<double, double, double>);
......
......@@ -12,14 +12,14 @@
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_H_
#include <array>
#include <memory>
#include <vector>
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/FC.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
#include <array>
namespace Aidge {
// class FC_Op;
......@@ -30,27 +30,27 @@ class FCImplForward_cpu : public Registrable<FCImplForward_cpu,
DataType,
DataType,
DataType>,
void(const FC_Op::Attrs&,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *)> {};
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *)> {};
class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
std::tuple<DataType,
DataType,
DataType,
DataType>,
void(const FC_Op::Attrs&,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *,
void *,
void *)> {};
void(const DimSize_t,
const DimSize_t,
const DimSize_t,
const void *,
const void *,
const void *,
void *,
void *,
void *)> {};
class FCImpl_cpu : public OperatorImpl {
public:
......
......@@ -19,8 +19,16 @@
namespace Aidge {
template <class I, class O, class W, class B>
void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* originalInput_, const void* weight_, void* output_, void* weightGrad_, void* biasesGrad_) {
void FCImpl_cpu_backward_kernel(const DimSize_t batchSize,
const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* originalInput_,
const void* weight_,
void* output_,
void* weightGrad_,
void* biasesGrad_)
{
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const I* originalInput = static_cast<const I*>(originalInput_);
......@@ -31,37 +39,37 @@ void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batch
// bias grad
if (std::get<1>(attrs)) { // no bias
std::fill(biasesGrad, biasesGrad + std::get<0>(attrs), B(0));
if (biasesGrad == nullptr) { // no bias
std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0));
} else {
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { // nb outputs
for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs
B sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += input[b*std::get<0>(attrs) + o];
sum += input[b*outputFeatureSize + o];
}
biasesGrad[o] = sum;
}
}
// weight grad
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
for (std::size_t c = 0; c < oneInputSize; ++c) {
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
W sum{0};
for (std::size_t b = 0; b < batchSize; ++b) {
sum += originalInput[b*oneInputSize + c]*input[b*std::get<0>(attrs) + o];
sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o];
}
weightGrad[o*oneInputSize + c] = sum;
weightGrad[o*inputFeatureSize + c] = sum;
}
}
// input grad
for (std::size_t b = 0; b < batchSize; ++b) {
for (std::size_t c = 0; c < oneInputSize; ++c) {
for (std::size_t c = 0; c < inputFeatureSize; ++c) {
O sum{0};
for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
sum += weight[o*oneInputSize + c] * input[b*std::get<0>(attrs) + o];
for (std::size_t o = 0; o < outputFeatureSize; ++o) {
sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o];
}
output[b*oneInputSize + c] = sum;
output[b*inputFeatureSize + c] = sum;
}
}
}
......
......@@ -27,9 +27,9 @@ namespace Aidge {
// const B* biases = static_cast<const B*>(biases_);
// O* output = static_cast<O*>(output_);
// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[3];
// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] = bias;
// }
......@@ -39,10 +39,10 @@ namespace Aidge {
// for (std::size_t iy = 0; iy < dims[1]; ++iy) {
// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// const std::size_t oIndex = dims[3] * outCh;
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) +
// outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize +
// outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// for (std::size_t batch = 0; batch < dims[3]; ++batch) {
// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
// }
......@@ -63,9 +63,9 @@ namespace Aidge {
// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
// std::size_t oIndex = outIdx * dims[0];
// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// output[oIndex + batch] = bias;
// }
......@@ -74,8 +74,8 @@ namespace Aidge {
// for (std::size_t batch = 0; batch < dims[0]; ++batch) {
// const std::size_t oIndex = dims[1] * batch;
// for (std::size_t i = 0; i < dims[1]; ++i) {
// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
// std::size_t wIndex = i * std::get<0>(attrs) + outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
// std::size_t wIndex = i * outputFeatureSize + outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3];
// output[oIndex + outCh] += weights[wIndex] * input[i + batch];
// }
// }
......@@ -83,29 +83,34 @@ namespace Aidge {
// }
template <class I, class W, class B, class O>
void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
const void* input_, const void* weights_, const void* biases_, void* output_) {
void FCImpl_cpu_forward_kernel(const DimSize_t batchSize,
const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* weights_,
const void* biases_,
void* output_) {
// FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_);
const W* weights = static_cast<const W*>(weights_);
const B* biases = static_cast<const B*>(biases_);
O* output = static_cast<O*>(output_);
if (std::get<1>(attrs)) {
std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0));
if (biases == nullptr) {
std::fill(output, output+(batchSize*outputFeatureSize), B(0));
}
else {
for (std::size_t batch = 0; batch < batchSize; ++batch) {
std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs)));
std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize));
}
}
for (std::size_t batch = 0; batch < batchSize; ++batch) {
for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
input + (batch + 1)*oneInputSize,
weights + out*oneInputSize,
output[out + batch*std::get<0>(attrs)]);
for (std::size_t out = 0; out < outputFeatureSize; ++out) {
output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize,
input + (batch + 1)*inputFeatureSize,
weights + out*inputFeatureSize,
output[out + batch*outputFeatureSize]);
}
}
}
......
......@@ -25,11 +25,19 @@
namespace Aidge {
// compute kernel registry for forward and backward
class LeakyReLUImplForward_cpu
: public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
};
: public Registrable<LeakyReLUImplForward_cpu,
std::tuple<DataType, DataType>,
void(const float,
std::size_t,
const void*,
void*)> {};
class LeakyReLUImplBackward_cpu
: public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
};
: public Registrable<LeakyReLUImplBackward_cpu,
std::tuple<DataType, DataType>,
void(const float,
std::size_t,
const void*,
void*)> {};
class LeakyReLUImpl_cpu : public OperatorImpl {
public:
......
......@@ -18,17 +18,17 @@
namespace Aidge {
template <class I, class O>
void LeakyReLUImpl_cpu_backward_kernel(const LeakyReLU_Op::Attrs& attrs,
void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_,
std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
I negativeSlope = static_cast<I>(std::get<0>(attrs));
const I negativeSlope = static_cast<const I>(negativeSlope_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] > 0 ? input[i] : negativeSlope*input[i];
output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i];
}
}
......
......@@ -18,17 +18,17 @@
namespace Aidge {
template <class I, class O>
void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs,
void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_,
std::size_t inputLenght,
const void* input_,
void* output_) {
const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_);
const I negativeSlope = static_cast<const I>(std::get<0>(attrs));
const I negativeSlope = static_cast<const I>(negativeSlope_);
for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
output[i] = (input[i] >= 0) ? input[i] : input[i] * negativeSlope;
}
}
......
/********************************************************************************
* Copyright (c) 2023 CEA-List
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* SPDX-License-Identifier: EPL-2.0
*
********************************************************************************/
#ifndef AIDGE_CPU_OPERATOR_LNIMPL_H_
#define AIDGE_CPU_OPERATOR_LNIMPL_H_
#include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/Ln.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
namespace Aidge {
// class Ln_Op;
// compute kernel registry for forward and backward
class LnImplForward_cpu
: public Registrable<LnImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
};
class LnImplBackward_cpu
: public Registrable<LnImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> {
};
class LnImpl_cpu : public OperatorImpl {
public:
LnImpl_cpu(const Ln_Op& op) : OperatorImpl(op, "cpu") {}
static std::unique_ptr<LnImpl_cpu> create(const Ln_Op& op) {
return std::make_unique<LnImpl_cpu>(op);
}
Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
void forward() override final;
void backward() override final;
};
namespace {
static Registrar<Ln_Op> registrarLnImpl_cpu("cpu", Aidge::LnImpl_cpu::create);
}
} // namespace Aidge
#endif /* AIDGE_CPU_OPERATOR_LNIMPL_H_ */
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment