Skip to content
Snippets Groups Projects
Commit 33db8fa2 authored by Maxence Naud's avatar Maxence Naud
Browse files

Adapt to aidge_core changes and fix globalAvgPooling test

parent 583195c7
No related branches found
No related tags found
2 merge requests!73version 0.2.3,!71Improve UI
Pipeline #48933 failed
Showing
with 297 additions and 204 deletions
...@@ -36,7 +36,7 @@ class test_recipes(unittest.TestCase): ...@@ -36,7 +36,7 @@ class test_recipes(unittest.TestCase):
graph_view = aidge_core.sequential([input_node, conv, bn]) graph_view = aidge_core.sequential([input_node, conv, bn])
# Add random values to conv and BatchNorm parameters # Add random values to conv and BatchNorm parameters
graph_view.set_datatype(aidge_core.DataType.Float32) graph_view.set_datatype(aidge_core.DataType.float32)
graph_view.set_backend("cpu") graph_view.set_backend("cpu")
np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32) np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32)
......
...@@ -24,7 +24,7 @@ class test_scheduler(unittest.TestCase): ...@@ -24,7 +24,7 @@ class test_scheduler(unittest.TestCase):
input_node.add_child(relu) input_node.add_child(relu)
gv.set_datatype(aidge_core.DataType.Int32) gv.set_datatype(aidge_core.DataType.int32)
gv.set_backend("cpu") gv.set_backend("cpu")
scheduler = aidge_core.SequentialScheduler(gv) scheduler = aidge_core.SequentialScheduler(gv)
...@@ -48,7 +48,7 @@ class test_scheduler(unittest.TestCase): ...@@ -48,7 +48,7 @@ class test_scheduler(unittest.TestCase):
]) ])
EXPECTED_SCHEDULE = ['0', '1', '2'] EXPECTED_SCHEDULE = ['0', '1', '2']
graph_view.set_datatype(aidge_core.DataType.Float32) graph_view.set_datatype(aidge_core.DataType.float32)
graph_view.set_backend("cpu") graph_view.set_backend("cpu")
graph_view.forward_dims() graph_view.forward_dims()
...@@ -74,7 +74,7 @@ class test_scheduler(unittest.TestCase): ...@@ -74,7 +74,7 @@ class test_scheduler(unittest.TestCase):
EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid ! EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid !
graph_view.set_datatype(aidge_core.DataType.Float32) graph_view.set_datatype(aidge_core.DataType.float32)
graph_view.set_backend("cpu") graph_view.set_backend("cpu")
graph_view.forward_dims() graph_view.forward_dims()
......
...@@ -29,12 +29,20 @@ namespace Aidge { ...@@ -29,12 +29,20 @@ namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class AvgPoolingImpl2DForward_cpu class AvgPoolingImpl2DForward_cpu
: public Registrable<AvgPoolingImpl2DForward_cpu, : public Registrable<AvgPoolingImpl2DForward_cpu,
std::tuple<DataType, DataType>, std::tuple<DataType, DataType>,
void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4>&,
const void *,
void *)> {};
class AvgPoolingImpl2DBackward_cpu class AvgPoolingImpl2DBackward_cpu
: public Registrable<AvgPoolingImpl2DBackward_cpu, : public Registrable<AvgPoolingImpl2DBackward_cpu,
std::tuple<DataType, DataType>, std::tuple<DataType, DataType>,
void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 4>&,
const void *,
void *)> {};
class AvgPoolingImpl2D_cpu : public OperatorImpl { class AvgPoolingImpl2D_cpu : public OperatorImpl {
public: public:
......
...@@ -12,16 +12,16 @@ ...@@ -12,16 +12,16 @@
#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ #ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/data/Data.hpp"
#include <array> #include <array>
#include <tuple> #include <tuple>
#include <cmath> #include <cmath>
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
#include "aidge/data/Data.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h"
namespace Aidge { namespace Aidge {
/** /**
* @brief Forward kernel for 2D AvgPoolingolution on CPU backend. * @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
...@@ -33,10 +33,11 @@ namespace Aidge { ...@@ -33,10 +33,11 @@ namespace Aidge {
* @param output_ Output Tensor. * @param output_ Output Tensor.
*/ */
template <class I, class O> template <class I, class O>
void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs, void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 4> &dims, const std::array<DimSize_t, 2>& kernelDims,
const void *input_, const std::array<DimSize_t, 4> &dims,
void *output_) { const void *input_,
void *output_) {
// FIXME: missing convolution attributes as arguments // FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_); const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_); O *output = static_cast<O *>(output_);
...@@ -44,12 +45,12 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs, ...@@ -44,12 +45,12 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
// output H size // output H size
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) / static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(std::get<0>(attrs)[0]))); static_cast<float>(strideDims[0])));
// output W size // output W size
const std::size_t oySize = const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1]) / static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(std::get<0>(attrs)[1]))); static_cast<float>(strideDims[1])));
// TODO: kernel computation // TODO: kernel computation
// output (batch, outCh, Xout, Yout) // output (batch, outCh, Xout, Yout)
...@@ -63,16 +64,16 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs, ...@@ -63,16 +64,16 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0);
for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx); const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) { for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify); const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy; const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const std::size_t ix = ox * std::get<0>(attrs)[0]; const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * std::get<0>(attrs)[1]; const std::size_t iy = oy * strideDims[1];
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += static_cast<O>( output[oIndexFull] += static_cast<O>(
......
...@@ -30,26 +30,28 @@ namespace Aidge { ...@@ -30,26 +30,28 @@ namespace Aidge {
class BatchNormImpl2DForward_cpu class BatchNormImpl2DForward_cpu
: public Registrable<BatchNormImpl2DForward_cpu, : public Registrable<BatchNormImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType>, std::tuple<DataType, DataType, DataType>,
void(const BatchNorm_Op<2>::Attrs &, void(float,
const std::array<DimSize_t, 4> &, float,
const void *, const std::array<DimSize_t, 4> &,
const void *, const void *,
const void *, const void *,
void *, const void *,
void *, void *,
void *, void *,
const bool)> {}; void *,
const bool)> {};
class BatchNormImpl2DBackward_cpu class BatchNormImpl2DBackward_cpu
: public Registrable<BatchNormImpl2DBackward_cpu, : public Registrable<BatchNormImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType>, std::tuple<DataType, DataType, DataType>,
void(const BatchNorm_Op<2>::Attrs &, void(float,
const std::array<DimSize_t, 4> &, float,
const void *, const std::array<DimSize_t, 4> &,
const void *, const void *,
const void *, const void *,
void *, const void *,
void *, void *,
void *)> {}; void *,
void *)> {};
class BatchNormImpl2D_cpu : public OperatorImpl { class BatchNormImpl2D_cpu : public OperatorImpl {
public: public:
......
...@@ -38,7 +38,7 @@ namespace Aidge { ...@@ -38,7 +38,7 @@ namespace Aidge {
* @param output_ Output Tensor. * @param output_ Output Tensor.
*/ */
template <class I, class P, class O> template <class I, class P, class O>
void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std::array<DimSize_t, 4> &dims,
const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) { const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
// FIXME: missing convolution attributes as arguments // FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_); const I *input = static_cast<const I *>(input_);
...@@ -53,12 +53,12 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con ...@@ -53,12 +53,12 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con
const DimSize_t featureMapSize = dims[2]*dims[3]; const DimSize_t featureMapSize = dims[2]*dims[3];
if ((freeze == true) || (std::get<1>(attrs) == 0.0f)) { if ((freeze == true) || (momentum == 0.0f)) {
for (std::size_t batch = 0; batch < nbBatch; ++batch) { for (std::size_t batch = 0; batch < nbBatch; ++batch) {
for (std::size_t ch = 0; ch < nbChannels; ++ch) { for (std::size_t ch = 0; ch < nbChannels; ++ch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(attrs))); const P var = std::sqrt(batchVar[ch] + static_cast<P>(epsilon));
for (std::size_t feature = 0; feature<featureMapSize; ++feature) { for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var; output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
...@@ -82,10 +82,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con ...@@ -82,10 +82,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con
const I inputMean = sum / static_cast<I>(nbDataPerChannel); const I inputMean = sum / static_cast<I>(nbDataPerChannel);
const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean; const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean;
batchMean[ch] = batchMean[ch]*(1-std::get<1>(attrs)) + inputMean*std::get<1>(attrs); batchMean[ch] = batchMean[ch]*(1-momentum) + inputMean*momentum;
batchVar[ch] = batchVar[ch]*(1-std::get<1>(attrs)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(attrs); batchVar[ch] = batchVar[ch]*(1-momentum) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*momentum;
const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(attrs))); const P var = std::sqrt(inputVar + static_cast<P>(epsilon));
for (std::size_t batch = 0; batch < nbBatch; ++batch) { for (std::size_t batch = 0; batch < nbBatch; ++batch) {
const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
for (std::size_t feature = 0; feature<featureMapSize; ++feature) { for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
......
...@@ -30,13 +30,27 @@ namespace Aidge { ...@@ -30,13 +30,27 @@ namespace Aidge {
class ConvDepthWiseImpl2DForward_cpu class ConvDepthWiseImpl2DForward_cpu
: public Registrable<ConvDepthWiseImpl2DForward_cpu, : public Registrable<ConvDepthWiseImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>, std::tuple<DataType, DataType, DataType, DataType>,
void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void(const std::array<DimSize_t, 2>&,
const void *, const void *, void *)> {}; const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
bool,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)> {};
class ConvDepthWiseImpl2DBackward_cpu class ConvDepthWiseImpl2DBackward_cpu
: public Registrable<ConvDepthWiseImpl2DBackward_cpu, : public Registrable<ConvDepthWiseImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>, std::tuple<DataType, DataType, DataType, DataType>,
void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void(const std::array<DimSize_t, 2>&,
const void *, const void *, void *)> {}; const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
bool,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)> {};
class ConvDepthWiseImpl2D_cpu : public OperatorImpl { class ConvDepthWiseImpl2D_cpu : public OperatorImpl {
public: public:
......
...@@ -37,8 +37,16 @@ namespace Aidge { ...@@ -37,8 +37,16 @@ namespace Aidge {
* @param output_ Output Tensor. * @param output_ Output Tensor.
*/ */
template <class I, class W, class B, class O> template <class I, class W, class B, class O>
void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &inputDims, void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const void *input_, const void *weights_, const void *biases_, void *output_) { const std::array<DimSize_t, 2>& /*dilationDims*/,
const std::array<DimSize_t, 2>& kernelDims,
bool noBias,
const std::array<DimSize_t, 4> &inputDims,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments // FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_); const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_); const W *weights = static_cast<const W *>(weights_);
...@@ -48,12 +56,12 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at ...@@ -48,12 +56,12 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
// output H size // output H size
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(std::get<0>(attrs)[0]))); static_cast<float>(strideDims[0])));
// output W size // output W size
const std::size_t oySize = const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - std::get<2>(attrs)[1] + std::get<0>(attrs)[1]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(std::get<0>(attrs)[1]))); static_cast<float>(strideDims[1])));
// TODO: kernel computation // TODO: kernel computation
// output (batch, outCh, Xout, Yout) // output (batch, outCh, Xout, Yout)
...@@ -64,36 +72,36 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at ...@@ -64,36 +72,36 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize; const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize;
B biasVal = ((!std::get<3>(attrs)) && biases != nullptr) ? biases[ch] : B(0); B biasVal = ((!noBias) && biases != nullptr) ? biases[ch] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = ch * std::get<2>(attrs)[0] * std::get<2>(attrs)[1]; const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > std::get<2>(attrs)[0] ? std::get<2>(attrs)[0] : inputDims[2] + difx); const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) { for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > std::get<2>(attrs)[1] ? std::get<2>(attrs)[1] : inputDims[3] + dify); const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy; const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]); const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else { } else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) { for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<2>(attrs)[1] + sy] * output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
} }
} }
...@@ -110,7 +118,7 @@ static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DFor ...@@ -110,7 +118,7 @@ static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DFor
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>); Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>);
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32( static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>); Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>);
static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64( static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>); Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>);
......
...@@ -30,13 +30,28 @@ namespace Aidge { ...@@ -30,13 +30,28 @@ namespace Aidge {
class ConvImpl2DForward_cpu class ConvImpl2DForward_cpu
: public Registrable<ConvImpl2DForward_cpu, : public Registrable<ConvImpl2DForward_cpu,
std::tuple<DataType, DataType, DataType, DataType>, std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, DimSize_t, const void *, void(const std::array<DimSize_t, 2>&,
const void *, const void *, void *)> {}; const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
bool,
const std::array<DimSize_t, 4> &,
DimSize_t,
const void *,
const void *,
const void *,
void *)> {};
class ConvImpl2DBackward_cpu class ConvImpl2DBackward_cpu
: public Registrable<ConvImpl2DBackward_cpu, : public Registrable<ConvImpl2DBackward_cpu,
std::tuple<DataType, DataType, DataType, DataType>, std::tuple<DataType, DataType, DataType, DataType>,
void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void(const std::array<DimSize_t, 2>&,
const void *, const void *, void *)> {}; const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
bool,
const std::array<DimSize_t, 4> &,
const void *,
const void *,
const void *,
void *)> {};
class ConvImpl2D_cpu : public OperatorImpl { class ConvImpl2D_cpu : public OperatorImpl {
public: public:
......
...@@ -37,8 +37,17 @@ namespace Aidge { ...@@ -37,8 +37,17 @@ namespace Aidge {
* @param output_ Output Tensor. * @param output_ Output Tensor.
*/ */
template <class I, class W, class B, class O> template <class I, class W, class B, class O>
void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &inputDims, DimSize_t outChannels, void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const void *input_, const void *weights_, const void *biases_, void *output_) { const std::array<DimSize_t, 2>& /*dilationDims*/,
const std::array<DimSize_t, 2>& kernelDims,
bool noBias,
const std::array<DimSize_t, 4> &inputDims,
DimSize_t outChannels,
const void *input_,
const void *weights_,
const void *biases_,
void *output_)
{
// FIXME: missing convolution attributes as arguments // FIXME: missing convolution attributes as arguments
const I *input = static_cast<const I *>(input_); const I *input = static_cast<const I *>(input_);
const W *weights = static_cast<const W *>(weights_); const W *weights = static_cast<const W *>(weights_);
...@@ -47,12 +56,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar ...@@ -47,12 +56,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
/* /*
// output H size // output H size
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(static_cast<float>(inputDims[0] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) / static_cast<std::size_t>(static_cast<float>(inputDims[0] - kernelDims[0] + strideDims[0]) /
static_cast<float>(std::get<0>(attrs)[0])); static_cast<float>(strideDims[0]));
// output W size // output W size
const std::size_t oySize = const std::size_t oySize =
static_cast<std::size_t>(static_cast<float>(inputDims[1] - std::get<2>(attrs)[1] + std::get<0>(attrs)[1]) / static_cast<std::size_t>(static_cast<float>(inputDims[1] - kernelDims[1] + strideDims[1]) /
static_cast<float>(std::get<0>(attrs)[1])); static_cast<float>(strideDims[1]));
// TODO: kernel computation // TODO: kernel computation
// output (Xout, Yout, outCh, batch) // output (Xout, Yout, outCh, batch)
...@@ -61,8 +70,8 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar ...@@ -61,8 +70,8 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
// does not take Dilation attribute into account // does not take Dilation attribute into account
for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t ox = 0; ox < oxSize; ++ox) {
for (std::size_t oy = 0; oy < oySize; ++oy) { for (std::size_t oy = 0; oy < oySize; ++oy) {
const std::size_t ix = ox * std::get<0>(attrs)[0]; const std::size_t ix = ox * strideDims[0];
const std::size_t iy = oy * std::get<0>(attrs)[1]; const std::size_t iy = oy * strideDims[1];
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = inputDims[3] * (outCh + outChannels * (oy + oySize * ox)); const std::size_t oIndex = inputDims[3] * (outCh + outChannels * (oy + oySize * ox));
...@@ -71,10 +80,10 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar ...@@ -71,10 +80,10 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
output[oIndex + batch] = biasVal; output[oIndex + batch] = biasVal;
} }
for (std::size_t inCh = 0; inCh < inputDims[2]; ++inCh) { for (std::size_t inCh = 0; inCh < inputDims[2]; ++inCh) {
for (std::size_t sx = 0; sx < std::get<2>(attrs)[0]; ++sx) { for (std::size_t sx = 0; sx < kernelDims[0]; ++sx) {
for (std::size_t sy = 0; sy < std::get<2>(attrs)[1]; ++sy) { for (std::size_t sy = 0; sy < kernelDims[1]; ++sy) {
const std::size_t wIndex = const std::size_t wIndex =
outCh + outChannels * (inCh + inputDims[2] * (sy + std::get<2>(attrs)[1] * sx)); outCh + outChannels * (inCh + inputDims[2] * (sy + kernelDims[1] * sx));
std::size_t iIndex = inputDims[3] * (inCh + inputDims[2] * ((iy + sy) + inputDims[1] * (ix + sx))); std::size_t iIndex = inputDims[3] * (inCh + inputDims[2] * ((iy + sy) + inputDims[1] * (ix + sx)));
for (std::size_t batch = 0; batch < inputDims[3]; ++batch) { for (std::size_t batch = 0; batch < inputDims[3]; ++batch) {
output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
...@@ -90,12 +99,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar ...@@ -90,12 +99,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
// output H size // output H size
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
static_cast<float>(std::get<0>(attrs)[0]))); static_cast<float>(strideDims[0])));
// output W size // output W size
const std::size_t oySize = const std::size_t oySize =
static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - std::get<2>(attrs)[1] + std::get<0>(attrs)[1]) / static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
static_cast<float>(std::get<0>(attrs)[1]))); static_cast<float>(strideDims[1])));
// TODO: kernel computation // TODO: kernel computation
// output (batch, outCh, Xout, Yout) // output (batch, outCh, Xout, Yout)
...@@ -107,37 +116,37 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar ...@@ -107,37 +116,37 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize; const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
// If NoBias or bias = nullptr, set B(0) // If NoBias or bias = nullptr, set B(0)
B biasVal = ((!std::get<3>(attrs)) && biases != nullptr) ? biases[outCh] : B(0); B biasVal = ((!noBias) && biases != nullptr) ? biases[outCh] : B(0);
std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
const std::size_t wIndex = (inCh + outCh*inputDims[1]) * std::get<2>(attrs)[0] * std::get<2>(attrs)[1]; const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t ox = 0; ox < oxSize; ++ox) {
const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > std::get<2>(attrs)[0] ? std::get<2>(attrs)[0] : inputDims[2] + difx); const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
for (std::size_t oy = 0; oy < oySize; ++oy) { for (std::size_t oy = 0; oy < oySize; ++oy) {
const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > std::get<2>(attrs)[1] ? std::get<2>(attrs)[1] : inputDims[3] + dify); const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
const std::size_t oIndexFull = oIndex + ox*oySize + oy; const std::size_t oIndexFull = oIndex + ox*oySize + oy;
const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]); const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
output[oIndexFull] += (weights[wIndex + 0*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 0*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 0*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 1*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
weights[wIndex + 2*std::get<2>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
} else { } else {
for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
for (std::size_t sy = syMin; sy < syMax; ++sy) { for (std::size_t sy = syMin; sy < syMax; ++sy) {
output[oIndexFull] += weights[wIndex + sx*std::get<2>(attrs)[1] + sy] * output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
} }
} }
......
...@@ -12,14 +12,15 @@ ...@@ -12,14 +12,15 @@
#ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_ #ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_
#define AIDGE_CPU_OPERATOR_FCIMPL_H_ #define AIDGE_CPU_OPERATOR_FCIMPL_H_
#include <memory>
#include <vector>
#include <array>
#include "aidge/backend/OperatorImpl.hpp" #include "aidge/backend/OperatorImpl.hpp"
#include "aidge/operator/FC.hpp" #include "aidge/operator/FC.hpp"
#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <memory>
#include <vector>
#include <array>
namespace Aidge { namespace Aidge {
// class FC_Op; // class FC_Op;
...@@ -30,29 +31,29 @@ class FCImplForward_cpu : public Registrable<FCImplForward_cpu, ...@@ -30,29 +31,29 @@ class FCImplForward_cpu : public Registrable<FCImplForward_cpu,
DataType, DataType,
DataType, DataType,
DataType>, DataType>,
void(const FC_Op::Attrs&, void(const bool,
const DimSize_t, const DimSize_t,
const DimSize_t, const DimSize_t,
const DimSize_t, const DimSize_t,
const void *, const void *,
const void *, const void *,
const void *, const void *,
void *)> {}; void *)> {};
class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu, class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
std::tuple<DataType, std::tuple<DataType,
DataType, DataType,
DataType, DataType,
DataType>, DataType>,
void(const FC_Op::Attrs&, void(const bool,
const DimSize_t, const DimSize_t,
const DimSize_t, const DimSize_t,
const DimSize_t, const DimSize_t,
const void *, const void *,
const void *, const void *,
const void *, const void *,
void *, void *,
void *, void *,
void *)> {}; void *)> {};
class FCImpl_cpu : public OperatorImpl { class FCImpl_cpu : public OperatorImpl {
public: public:
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
namespace Aidge { namespace Aidge {
template <class I, class O, class W, class B> template <class I, class O, class W, class B>
void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, void FCImpl_cpu_backward_kernel(const bool noBias,
const DimSize_t batchSize, const DimSize_t batchSize,
const DimSize_t inputFeatureSize, const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize, const DimSize_t outputFeatureSize,
...@@ -40,7 +40,7 @@ void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, ...@@ -40,7 +40,7 @@ void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs,
// bias grad // bias grad
if (std::get<0>(attrs)) { // no bias if (noBias) { // no bias
std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0)); std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0));
} else { } else {
for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs
......
...@@ -83,16 +83,21 @@ namespace Aidge { ...@@ -83,16 +83,21 @@ namespace Aidge {
// } // }
template <class I, class W, class B, class O> template <class I, class W, class B, class O>
void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t inputFeatureSize, void FCImpl_cpu_forward_kernel(const bool noBias,
const DimSize_t outputFeatureSize, const DimSize_t batchSize,
const void* input_, const void* weights_, const void* biases_, void* output_) { const DimSize_t inputFeatureSize,
const DimSize_t outputFeatureSize,
const void* input_,
const void* weights_,
const void* biases_,
void* output_) {
// FIXME: missing FC attributes as arguments // FIXME: missing FC attributes as arguments
const I* input = static_cast<const I*>(input_); const I* input = static_cast<const I*>(input_);
const W* weights = static_cast<const W*>(weights_); const W* weights = static_cast<const W*>(weights_);
const B* biases = static_cast<const B*>(biases_); const B* biases = static_cast<const B*>(biases_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
if (std::get<0>(attrs)) { if (noBias) {
std::fill(output, output+(batchSize*outputFeatureSize), B(0)); std::fill(output, output+(batchSize*outputFeatureSize), B(0));
} }
else { else {
......
...@@ -25,11 +25,19 @@ ...@@ -25,11 +25,19 @@
namespace Aidge { namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class LeakyReLUImplForward_cpu class LeakyReLUImplForward_cpu
: public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> { : public Registrable<LeakyReLUImplForward_cpu,
}; std::tuple<DataType, DataType>,
void(const float,
std::size_t,
const void*,
void*)> {};
class LeakyReLUImplBackward_cpu class LeakyReLUImplBackward_cpu
: public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> { : public Registrable<LeakyReLUImplBackward_cpu,
}; std::tuple<DataType, DataType>,
void(const float,
std::size_t,
const void*,
void*)> {};
class LeakyReLUImpl_cpu : public OperatorImpl { class LeakyReLUImpl_cpu : public OperatorImpl {
public: public:
......
...@@ -18,17 +18,17 @@ ...@@ -18,17 +18,17 @@
namespace Aidge { namespace Aidge {
template <class I, class O> template <class I, class O>
void LeakyReLUImpl_cpu_backward_kernel(const LeakyReLU_Op::Attrs& attrs, void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_,
std::size_t inputLenght, std::size_t inputLenght,
const void* input_, const void* input_,
void* output_) { void* output_) {
const I* input = static_cast<const I*>(input_); const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
I negativeSlope = static_cast<I>(std::get<0>(attrs)); const I negativeSlope = static_cast<const I>(negativeSlope_);
for (std::size_t i = 0; i < inputLenght; ++i) { for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] > 0 ? input[i] : negativeSlope*input[i]; output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i];
} }
} }
......
...@@ -18,17 +18,17 @@ ...@@ -18,17 +18,17 @@
namespace Aidge { namespace Aidge {
template <class I, class O> template <class I, class O>
void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs, void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_,
std::size_t inputLenght, std::size_t inputLenght,
const void* input_, const void* input_,
void* output_) { void* output_) {
const I* input = static_cast<const I*>(input_); const I* input = static_cast<const I*>(input_);
O* output = static_cast<O*>(output_); O* output = static_cast<O*>(output_);
const I negativeSlope = static_cast<const I>(std::get<0>(attrs)); const I negativeSlope = static_cast<const I>(negativeSlope_);
for (std::size_t i = 0; i < inputLenght; ++i) { for (std::size_t i = 0; i < inputLenght; ++i) {
output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope; output[i] = (input[i] >= 0) ? input[i] : input[i] * negativeSlope;
} }
} }
......
...@@ -29,12 +29,22 @@ namespace Aidge { ...@@ -29,12 +29,22 @@ namespace Aidge {
// compute kernel registry for forward and backward // compute kernel registry for forward and backward
class MaxPoolingImpl2DForward_cpu class MaxPoolingImpl2DForward_cpu
: public Registrable<MaxPoolingImpl2DForward_cpu, : public Registrable<MaxPoolingImpl2DForward_cpu,
std::tuple<DataType, DataType>, std::tuple<DataType, DataType>,
void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const bool,
const std::array<DimSize_t, 4> &,
const void *,
void *)> {};
class MaxPoolingImpl2DBackward_cpu class MaxPoolingImpl2DBackward_cpu
: public Registrable<MaxPoolingImpl2DBackward_cpu, : public Registrable<MaxPoolingImpl2DBackward_cpu,
std::tuple<DataType, DataType>, std::tuple<DataType, DataType>,
void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; void(const std::array<DimSize_t, 2>&,
const std::array<DimSize_t, 2>&,
const bool,
const std::array<DimSize_t, 4> &,
const void *,
void *)> {};
class MaxPoolingImpl2D_cpu : public OperatorImpl { class MaxPoolingImpl2D_cpu : public OperatorImpl {
public: public:
......
...@@ -12,15 +12,15 @@ ...@@ -12,15 +12,15 @@
#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ #ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp" #include <array>
#include <cmath>
#include <tuple>
#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
#include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/data/Data.hpp" #include "aidge/data/Data.hpp"
#include <array> #include "aidge/utils/Registrar.hpp"
#include <tuple> #include "aidge/utils/Types.h"
#include <cmath>
namespace Aidge { namespace Aidge {
/** /**
...@@ -33,17 +33,16 @@ namespace Aidge { ...@@ -33,17 +33,16 @@ namespace Aidge {
* @param output_ Output Tensor. * @param output_ Output Tensor.
*/ */
template <class I, class O> template <class I, class O>
void MaxPoolingImpl2D_cpu_forward_kernel(const MaxPooling_Op<2>::Attrs &attrs, void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
const std::array<DimSize_t, 4> &dims, const std::array<DimSize_t, 2>& kernelDims,
const void *input_, const bool /*ceilMode*/,
void *output_) { const std::array<DimSize_t, 4> &dims,
const void *input_,
void *output_) {
// FIXME: missing convolution parameters as arguments // FIXME: missing convolution parameters as arguments
const I *input = static_cast<const I *>(input_); const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_); O *output = static_cast<O *>(output_);
std::array<DimSize_t, 2> strideDims = std::get<0>(attrs);
std::array<DimSize_t, 2> kernelDims = std::get<1>(attrs);
// output H size // output H size
const std::size_t oxSize = const std::size_t oxSize =
static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
......
...@@ -30,13 +30,21 @@ namespace Aidge { ...@@ -30,13 +30,21 @@ namespace Aidge {
class PadImpl2DForward_cpu class PadImpl2DForward_cpu
: public Registrable<PadImpl2DForward_cpu, : public Registrable<PadImpl2DForward_cpu,
std::tuple<DataType, DataType>, std::tuple<DataType, DataType>,
void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void(const std::array<DimSize_t, 4>&,
void *)> {}; const PadBorderType,
const double,
const std::array<DimSize_t, 4> &,
const void *,
void *)> {};
class PadImpl2DBackward_cpu class PadImpl2DBackward_cpu
: public Registrable<PadImpl2DBackward_cpu, : public Registrable<PadImpl2DBackward_cpu,
std::tuple<DataType, DataType>, std::tuple<DataType, DataType>,
void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void(const std::array<DimSize_t, 4>&,
void *)> {}; const PadBorderType,
const double,
const std::array<DimSize_t, 4> &,
const void *,
void *)> {};
class PadImpl2D_cpu : public OperatorImpl { class PadImpl2D_cpu : public OperatorImpl {
public: public:
......
...@@ -12,14 +12,15 @@ ...@@ -12,14 +12,15 @@
#ifndef AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ #ifndef AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
#define AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
#include "aidge/utils/Registrar.hpp" #include <algorithm> // std::max, std::min
#include <array>
#include <cstddef> // std::size_t
#include <cstdint> // std::int32_t
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl.hpp"
#include "aidge/utils/Registrar.hpp"
#include "aidge/utils/Types.h" #include "aidge/utils/Types.h"
#include "aidge/backend/cpu/data/GetCPUPtr.h"
#include <cmath>
#include <array>
#include <algorithm>
namespace Aidge { namespace Aidge {
/** /**
...@@ -32,58 +33,62 @@ namespace Aidge { ...@@ -32,58 +33,62 @@ namespace Aidge {
* @param output_ Output Tensor. * @param output_ Output Tensor.
*/ */
template <class I, class O> template <class I, class O>
void PadImpl2D_cpu_forward_kernel(const Pad_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorders,
const void *input_, void *output_) const PadBorderType borderType,
const double borderValue,
const std::array<DimSize_t, 4> &dims,
const void *input_,
void *output_)
{ {
const I *input = static_cast<const I *>(input_); const I *input = static_cast<const I *>(input_);
O *output = static_cast<O *>(output_); O *output = static_cast<O *>(output_);
const std::size_t oySize = dims[2] + std::get<0>(attrs)[0] + std::get<0>(attrs)[1]; const std::size_t oySize = dims[2] + beginEndBorders[0] + beginEndBorders[1];
const std::size_t oxSize = dims[3] + std::get<0>(attrs)[2] + std::get<0>(attrs)[3]; const std::size_t oxSize = dims[3] + beginEndBorders[2] + beginEndBorders[3];
for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t batch = 0; batch < dims[0]; ++batch) {
for (std::size_t ch = 0; ch < dims[1]; ++ch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) {
const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
for (unsigned int oy = 0; oy < oySize; ++oy) { for (std::uint32_t oy = 0; oy < oySize; ++oy) {
for (unsigned int ox = 0; ox < oxSize; ++ox) { for (std::uint32_t ox = 0; ox < oxSize; ++ox) {
const std::size_t oIndexFull = oIndex + ox*oySize + oy; const std::size_t oIndexFull = oIndex + ox*oySize + oy;
O outputValue = std::get<2>(attrs); O outputValue = static_cast<O>(borderValue);
if (std::get<1>(attrs) == PadBorderType::Constant) { if (borderType == PadBorderType::Constant) {
int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]); std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]);
int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]); std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]);
if (ix >= 0 && ix < static_cast<int>(dims[3]) && iy >= 0 && iy < static_cast<int>(dims[2])) { if (ix >= 0 && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0 && iy < static_cast<std::int32_t>(dims[2])) {
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
} }
} }
else if (std::get<1>(attrs) == PadBorderType::Edge) { else if (borderType == PadBorderType::Edge) {
int ix = std::max(0, std::min(static_cast<int>(dims[3]) - 1, static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]))); std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])));
int iy = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]))); std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])));
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
} }
else if (std::get<1>(attrs) == PadBorderType::Reflect) { else if (borderType == PadBorderType::Reflect) {
int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]); std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]);
int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]); std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]);
if (ix < 0) if (ix < 0)
ix = 0 - ix; ix = 0 - ix;
if (iy < 0) if (iy < 0)
iy = 0 - iy; iy = 0 - iy;
if (ix >= static_cast<int>(dims[3])) if (ix >= static_cast<std::int32_t>(dims[3]))
ix = static_cast<int>(dims[3]) - ix; ix = static_cast<std::int32_t>(dims[3]) - ix;
if (iy >= static_cast<int>(dims[2])) if (iy >= static_cast<std::int32_t>(dims[2]))
iy = static_cast<int>(dims[2]) - iy; iy = static_cast<std::int32_t>(dims[2]) - iy;
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
} }
else if (std::get<1>(attrs) == PadBorderType::Wrap) { else if (borderType == PadBorderType::Wrap) {
int ix = (static_cast<int>(dims[3]) + static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3])) % static_cast<int>(dims[3]); std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])) % static_cast<std::int32_t>(dims[3]);
int iy = (static_cast<int>(dims[2]) + static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1])) % static_cast<int>(dims[2]); std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[2]);
outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
} }
...@@ -101,7 +106,7 @@ static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float32( ...@@ -101,7 +106,7 @@ static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float32(
Aidge::PadImpl2D_cpu_forward_kernel<float, float>); Aidge::PadImpl2D_cpu_forward_kernel<float, float>);
static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Int32( static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Int32(
{DataType::Int32, DataType::Int32}, {DataType::Int32, DataType::Int32},
Aidge::PadImpl2D_cpu_forward_kernel<int, int>); Aidge::PadImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>);
static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float64( static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float64(
{DataType::Float64, DataType::Float64}, {DataType::Float64, DataType::Float64},
Aidge::PadImpl2D_cpu_forward_kernel<double, double>); Aidge::PadImpl2D_cpu_forward_kernel<double, double>);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment