diff --git a/aidge_backend_cpu/unit_tests/test_recipes.py b/aidge_backend_cpu/unit_tests/test_recipes.py index 12d8774369af5a46cfbd30d44fc90f4f97ca9821..7c11b92b93eaf04eb83518992c46bf4dec40dfca 100644 --- a/aidge_backend_cpu/unit_tests/test_recipes.py +++ b/aidge_backend_cpu/unit_tests/test_recipes.py @@ -36,7 +36,7 @@ class test_recipes(unittest.TestCase): graph_view = aidge_core.sequential([input_node, conv, bn]) # Add random values to conv and BatchNorm parameters - graph_view.set_datatype(aidge_core.DataType.Float32) + graph_view.set_datatype(aidge_core.dtype.float32) graph_view.set_backend("cpu") np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32) diff --git a/aidge_backend_cpu/unit_tests/test_scheduler.py b/aidge_backend_cpu/unit_tests/test_scheduler.py index 0c41d59963c7633151745f2efe1f1fac3ee07815..0aeeb04b74a078f77c57500b959d6ef9fa9af4d0 100644 --- a/aidge_backend_cpu/unit_tests/test_scheduler.py +++ b/aidge_backend_cpu/unit_tests/test_scheduler.py @@ -24,7 +24,7 @@ class test_scheduler(unittest.TestCase): input_node.add_child(relu) - gv.set_datatype(aidge_core.DataType.Int32) + gv.set_datatype(aidge_core.dtype.int32) gv.set_backend("cpu") scheduler = aidge_core.SequentialScheduler(gv) @@ -48,7 +48,7 @@ class test_scheduler(unittest.TestCase): ]) EXPECTED_SCHEDULE = ['0', '1', '2'] - graph_view.set_datatype(aidge_core.DataType.Float32) + graph_view.set_datatype(aidge_core.dtype.float32) graph_view.set_backend("cpu") graph_view.forward_dims() @@ -74,7 +74,7 @@ class test_scheduler(unittest.TestCase): EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid ! - graph_view.set_datatype(aidge_core.DataType.Float32) + graph_view.set_datatype(aidge_core.dtype.float32) graph_view.set_backend("cpu") graph_view.forward_dims() diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index a1417de1517a8212b4b4308e5128a5ee3fce1e39..11f9c264098d5a238d0d1f8e6bc4fac0cc099549 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -32,6 +32,7 @@ #include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" #include "aidge/backend/cpu/operator/SqrtImpl.hpp" +#include "aidge/backend/cpu/operator/SliceImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp index 478a0226f43ccbc64d567a56ab89a558179438c5..94b22dcc7fc8251f8ca907ab0b060b0275309c9d 100644 --- a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp @@ -14,6 +14,8 @@ #include "aidge/utils/Registrar.hpp" +#include <cstdint> // std::int32_t, std::int64_t + #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" @@ -42,10 +44,12 @@ void AddImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const st namespace { static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float32( {DataType::Float32, DataType::Float32}, Aidge::AddImpl_cpu_forward_kernel<float, float>); -static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::AddImpl_cpu_forward_kernel<int, int>); static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float64( {DataType::Float64, DataType::Float64}, Aidge::AddImpl_cpu_forward_kernel<double, double>); +static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>); +static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int64( + {DataType::Int64, DataType::Int64}, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>); } // namespace } // namespace Aidge diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index ce126dc2b870d6ac767c15bc6fbca2deb07e8772..12a5dc334619c16e6ad3a77f0cd76f4db7a87b77 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -29,12 +29,20 @@ namespace Aidge { // compute kernel registry for forward and backward class AvgPoolingImpl2DForward_cpu : public Registrable<AvgPoolingImpl2DForward_cpu, - std::tuple<DataType, DataType>, - void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + std::tuple<DataType, DataType>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 4>&, + const void *, + void *)> {}; class AvgPoolingImpl2DBackward_cpu : public Registrable<AvgPoolingImpl2DBackward_cpu, - std::tuple<DataType, DataType>, - void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + std::tuple<DataType, DataType>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 4>&, + const void *, + void *)> {}; class AvgPoolingImpl2D_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp index d6950e11e935a3f6d5548148d1c393a5340af224..c7d9f86235c3bf1d7d01cf429cab29d156592fb5 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp @@ -12,16 +12,16 @@ #ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ -#include "aidge/utils/Registrar.hpp" - -#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" -#include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/data/Data.hpp" #include <array> #include <tuple> #include <cmath> +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + namespace Aidge { /** * @brief Forward kernel for 2D AvgPoolingolution on CPU backend. @@ -33,10 +33,11 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class O> -void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs, - const std::array<DimSize_t, 4> &dims, - const void *input_, - void *output_) { +void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, + const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *output_) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); @@ -44,12 +45,12 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs, // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) / - static_cast<float>(std::get<0>(attrs)[0]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1]) / - static_cast<float>(std::get<0>(attrs)[1]))); + static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) / + static_cast<float>(strideDims[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -63,16 +64,16 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs, const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0); for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); + const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const std::size_t ix = ox * std::get<0>(attrs)[0]; - const std::size_t iy = oy * std::get<0>(attrs)[1]; + const std::size_t ix = ox * strideDims[0]; + const std::size_t iy = oy * strideDims[1]; if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { output[oIndexFull] += static_cast<O>( diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp index 8bd567dab3d564ccdeffdc581585e404fc4697a4..93bdab2d3f37e3bd8dc1e68ab68a05de8c8015ed 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp @@ -30,26 +30,28 @@ namespace Aidge { class BatchNormImpl2DForward_cpu : public Registrable<BatchNormImpl2DForward_cpu, std::tuple<DataType, DataType, DataType>, - void(const BatchNorm_Op<2>::Attrs &, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, - void *, - void *, - void *, - const bool)> {}; + void(float, + float, + const std::array<DimSize_t, 4> &, + const void *, + const void *, + const void *, + void *, + void *, + void *, + const bool)> {}; class BatchNormImpl2DBackward_cpu : public Registrable<BatchNormImpl2DBackward_cpu, std::tuple<DataType, DataType, DataType>, - void(const BatchNorm_Op<2>::Attrs &, - const std::array<DimSize_t, 4> &, - const void *, - const void *, - const void *, - void *, - void *, - void *)> {}; + void(float, + float, + const std::array<DimSize_t, 4> &, + const void *, + const void *, + const void *, + void *, + void *, + void *)> {}; class BatchNormImpl2D_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp index cfde6ebe7cab8cfe2f793723983c8552bd9747b8..19f232a783bccf0a800d41f2bc566ccf6e04f05e 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp @@ -38,7 +38,7 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class P, class O> -void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, +void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std::array<DimSize_t, 4> &dims, const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) { // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); @@ -53,12 +53,12 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con const DimSize_t featureMapSize = dims[2]*dims[3]; - if ((freeze == true) || (std::get<1>(attrs) == 0.0f)) { + if ((freeze == true) || (momentum == 0.0f)) { for (std::size_t batch = 0; batch < nbBatch; ++batch) { for (std::size_t ch = 0; ch < nbChannels; ++ch) { const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]); - const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(attrs))); + const P var = std::sqrt(batchVar[ch] + static_cast<P>(epsilon)); for (std::size_t feature = 0; feature<featureMapSize; ++feature) { output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var; @@ -82,10 +82,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con const I inputMean = sum / static_cast<I>(nbDataPerChannel); const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) - inputMean*inputMean; - batchMean[ch] = batchMean[ch]*(1-std::get<1>(attrs)) + inputMean*std::get<1>(attrs); - batchVar[ch] = batchVar[ch]*(1-std::get<1>(attrs)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(attrs); + batchMean[ch] = batchMean[ch]*(1-momentum) + inputMean*momentum; + batchVar[ch] = batchVar[ch]*(1-momentum) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*momentum; - const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(attrs))); + const P var = std::sqrt(inputVar + static_cast<P>(epsilon)); for (std::size_t batch = 0; batch < nbBatch; ++batch) { const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize; for (std::size_t feature = 0; feature<featureMapSize; ++feature) { diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp index a61a7299ed6bd5c5a3e41c09e9d5b5f1f7ae3326..ec886a310dd2edc616ced6ee447665eab3ce301a 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp @@ -25,18 +25,60 @@ namespace Aidge { // class ConvDepthWise_Op; +// compute kernel registry for forward and backward +class ConvDepthWiseImpl1DForward_cpu + : public Registrable<ConvDepthWiseImpl1DForward_cpu, + std::tuple<DataType, DataType, DataType, DataType>, + void(const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 3>&, + const void *, + const void *, + const void *, + void *)> {}; + +class ConvDepthWiseImpl1D_cpu : public OperatorImpl { +public: + ConvDepthWiseImpl1D_cpu(const ConvDepthWise_Op<1> &op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<ConvDepthWiseImpl1D_cpu> create(const ConvDepthWise_Op<1> &op) { + return std::make_unique<ConvDepthWiseImpl1D_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to ConvDepthWise_Op<1> implementation registry +static Registrar<ConvDepthWise_Op<1>> registrarConvDepthWiseImpl1D_cpu("cpu", Aidge::ConvDepthWiseImpl1D_cpu::create); +} // namespace // compute kernel registry for forward and backward class ConvDepthWiseImpl2DForward_cpu : public Registrable<ConvDepthWiseImpl2DForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, - const void *, const void *, void *)> {}; + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 4> &, + const void *, + const void *, + const void *, + void *)> {}; class ConvDepthWiseImpl2DBackward_cpu : public Registrable<ConvDepthWiseImpl2DBackward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, - const void *, const void *, void *)> {}; + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + bool, + const std::array<DimSize_t, 4> &, + const void *, + const void *, + const void *, + void *)> {}; class ConvDepthWiseImpl2D_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp index 801bd315f9e5058ffade574fc92179b1e3c513e4..a02aa672b92f089790ef1903af8b804f816f3baa 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp @@ -12,17 +12,93 @@ #ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ -#include "aidge/utils/Registrar.hpp" +#include <algorithm> +#include <array> +#include <cmath> +#include <cstddef> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" +#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -#include <cmath> -#include <cstddef> -#include <array> -#include <algorithm> namespace Aidge { +/** + * @brief Forward kernel for 1D ConvDepthWiseolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, + const std::array<DimSize_t, 1>& /*dilationDims*/, + const std::array<DimSize_t, 1>& kernelDims, + const std::array<DimSize_t, 3>& inputDims, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, ch, Xin, Yin) + // weight (outCh, ch, kernelX, kernelY) + // does not take Dilation attribute into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { + const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize; + B biasVal = (biases != nullptr) ? biases[ch] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2]; + const std::size_t wIndex = ch * kernelDims[0]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t oIndexFull = oIndex + ox; + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + output[oIndexFull] += weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + } + } + } + } +} + +namespace { +static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>); +static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>); +static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>); +} // namespace + + /** * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend. * @tparam I Input data type. @@ -30,15 +106,22 @@ namespace Aidge { * @tparam B Bias data type. * @tparam O Output data type. * @param params tuple of Attributes from the Operator - * @param dims Array of input dimensions. + * @param inputDims Array of input dimensions. * @param input_ const input Tensor. * @param weights_ const weight Tensor. * @param biases_ const Biais Tensor. * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, - const void *input_, const void *weights_, const void *biases_, void *output_) { +void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, + const std::array<DimSize_t, 2>& /*dilationDims*/, + const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 4>& inputDims, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) +{ // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); @@ -48,12 +131,12 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) / - static_cast<float>(std::get<0>(attrs)[0]))); + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) / - static_cast<float>(std::get<0>(attrs)[1]))); + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / + static_cast<float>(strideDims[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -61,40 +144,40 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at // weight (outCh, ch, kernelX, kernelY) // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) { - const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize; - B biasVal = ((!std::get<4>(attrs)) && biases != nullptr) ? biases[ch] : B(0); + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { + const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize; + B biasVal = (biases != nullptr) ? biases[ch] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); - const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1]; + const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); + const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); - const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]); + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); } else { for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; } } } @@ -110,7 +193,7 @@ static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DFor Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>); static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32( {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>); + Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>); static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64( {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>); diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index e7ce0892a6241009a8e80821e341b3209a19faa4..d7be46c251a82d1b631f4ad50e7175fa2f896d03 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -27,16 +27,63 @@ namespace Aidge { // class Conv_Op; // compute kernel registry for forward and backward +// Conv 1D +class ConvImpl1DForward_cpu + : public Registrable<ConvImpl1DForward_cpu, + std::tuple<DataType, DataType, DataType, DataType>, + void(const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 3> &, + DimSize_t, + const void *, + const void *, + const void *, + void *)> {}; + +class ConvImpl1D_cpu : public OperatorImpl { + public: + ConvImpl1D_cpu(const Conv_Op<1>& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<ConvImpl1D_cpu> create(const Conv_Op<1> &op) { + return std::make_unique<ConvImpl1D_cpu>(op); + } + + public: + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to Conv_Op<1> implementation registry +static Registrar<Conv_Op<1>> registrarConvImpl1D_cpu("cpu", Aidge::ConvImpl1D_cpu::create); +} // namespace + +// Conv 2D class ConvImpl2DForward_cpu : public Registrable<ConvImpl2DForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, - const void *, const void *, void *)> {}; + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 4> &, + DimSize_t, + const void *, + const void *, + const void *, + void *)> {}; class ConvImpl2DBackward_cpu : public Registrable<ConvImpl2DBackward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, - const void *, const void *, void *)> {}; + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + bool, + const std::array<DimSize_t, 4> &, + const void *, + const void *, + const void *, + void *)> {}; class ConvImpl2D_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp index 00d34f6596780f42aa5864058ea543f046f8edb1..88a71c47244788f2da5e576c8ad5170a92561909 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp @@ -12,17 +12,100 @@ #ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ -#include "aidge/utils/Registrar.hpp" +#include <algorithm> +#include <array> +#include <cmath> -#include "aidge/data/half.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/data/half.hpp" +#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -#include <cmath> -#include <array> -#include <algorithm> namespace Aidge { +/** + * @brief Forward kernel for 1D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, + const std::array<DimSize_t, 1>& /*dilationDims*/, + const std::array<DimSize_t, 1>& kernelDims, + const std::array<DimSize_t, 3>& inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) +{ + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, inCh, Xin, Yin) + // weight (outCh, inCh, kernelX, kernelY) + // does not take Dilation attribute into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = (outCh + batch*outChannels) * oxSize; + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t oIndexFull = oIndex + ox; + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + output[oIndexFull] += weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + } + } + } + } + } +} + +namespace { +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>); +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float16( + {DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16}, + Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>); +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>); +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>); +} // namespace + + /** * @brief Forward kernel for 2D Convolution on CPU backend. * @tparam I Input data type. @@ -30,15 +113,23 @@ namespace Aidge { * @tparam B Bias data type. * @tparam O Output data type. * @param params tuple of Attributes from the Operator - * @param dims Array of input dimensions. + * @param inputDims Array of input dimensions. * @param input_ const input Tensor. * @param weights_ const weight Tensor. * @param biases_ const Biais Tensor. * @param output_ Output Tensor. */ template <class I, class W, class B, class O> -void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, - const void *input_, const void *weights_, const void *biases_, void *output_) { +void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, + const std::array<DimSize_t, 2>& /*dilationDims*/, + const std::array<DimSize_t, 2>& kernelDims, + const std::array<DimSize_t, 4> &inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) +{ // FIXME: missing convolution attributes as arguments const I *input = static_cast<const I *>(input_); const W *weights = static_cast<const W *>(weights_); @@ -47,12 +138,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar /* // output H size const std::size_t oxSize = - static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) / - static_cast<float>(std::get<0>(attrs)[0])); + static_cast<std::size_t>(static_cast<float>(inputDims[0] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0])); // output W size const std::size_t oySize = - static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) / - static_cast<float>(std::get<0>(attrs)[1])); + static_cast<std::size_t>(static_cast<float>(inputDims[1] - kernelDims[1] + strideDims[1]) / + static_cast<float>(strideDims[1])); // TODO: kernel computation // output (Xout, Yout, outCh, batch) @@ -61,22 +152,22 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar // does not take Dilation attribute into account for (std::size_t ox = 0; ox < oxSize; ++ox) { for (std::size_t oy = 0; oy < oySize; ++oy) { - const std::size_t ix = ox * std::get<0>(attrs)[0]; - const std::size_t iy = oy * std::get<0>(attrs)[1]; + const std::size_t ix = ox * strideDims[0]; + const std::size_t iy = oy * strideDims[1]; - for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) { - const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox)); + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = inputDims[3] * (outCh + outChannels * (oy + oySize * ox)); B biasVal = (biases != nullptr) ? biases[outCh] : B(0); - for (std::size_t batch = 0; batch < dims[3]; ++batch) { + for (std::size_t batch = 0; batch < inputDims[3]; ++batch) { output[oIndex + batch] = biasVal; } - for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { - for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) { - for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) { + for (std::size_t inCh = 0; inCh < inputDims[2]; ++inCh) { + for (std::size_t sx = 0; sx < kernelDims[0]; ++sx) { + for (std::size_t sy = 0; sy < kernelDims[1]; ++sy) { const std::size_t wIndex = - outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx)); - std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx))); - for (std::size_t batch = 0; batch < dims[3]; ++batch) { + outCh + outChannels * (inCh + inputDims[2] * (sy + kernelDims[1] * sx)); + std::size_t iIndex = inputDims[3] * (inCh + inputDims[2] * ((iy + sy) + inputDims[1] * (ix + sx))); + for (std::size_t batch = 0; batch < inputDims[3]; ++batch) { output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; } } @@ -90,12 +181,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar // output H size const std::size_t oxSize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) / - static_cast<float>(std::get<0>(attrs)[0]))); + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); // output W size const std::size_t oySize = - static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) / - static_cast<float>(std::get<0>(attrs)[1]))); + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) / + static_cast<float>(strideDims[1]))); // TODO: kernel computation // output (batch, outCh, Xout, Yout) @@ -103,42 +194,42 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar // weight (outCh, inCh, kernelX, kernelY) // does not take Dilation attribute into account using signedsize = std::make_signed<std::size_t>::type; - for (std::size_t batch = 0; batch < dims[0]; ++batch) { - for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) { - const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize; - // If NoBias or bias = nullptr, set B(0) - B biasVal = ((!std::get<5>(attrs)) && biases != nullptr) ? biases[outCh] : B(0); + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize; + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); - for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) { - const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3]; - const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1]; + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1]; for (std::size_t ox = 0; ox < oxSize; ++ox) { - const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); - const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); for (std::size_t oy = 0; oy < oySize; ++oy) { - const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]); + const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]); const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0))); - const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify); + const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify); const std::size_t oIndexFull = oIndex + ox*oySize + oy; - const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); - const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]); + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + const signedsize iy = static_cast<signedsize>(oy * strideDims[1]); if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) { - output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] + - weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] + - weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] + - weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]); + output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] + + weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] + + weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] + + weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]); } else { for (std::size_t sx = sxMin; sx < sxMax; ++sx) { for (std::size_t sy = syMin; sy < syMax; ++sy) { - output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] * - input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; + output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))]; } } } diff --git a/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp index 3cdcefa9e1c865f66b64ed527605d46af31be8af..74db1128c111ae62bedb6fa61682abca62429cdb 100644 --- a/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp @@ -14,6 +14,7 @@ #include <numeric> // std::accumulate #include <cstddef> // std::size_t +#include <cstdint> // std::int32_t, std::int64_t #include <functional> // std::multiplies #include "aidge/utils/Registrar.hpp" @@ -76,7 +77,7 @@ static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float32( Aidge::DivImpl_cpu_forward_kernel<float, float, float>); static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Int32( {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::DivImpl_cpu_forward_kernel<int, int, int>); + Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>); static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float64( {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::DivImpl_cpu_forward_kernel<double, double, double>); diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp index fedd8b38b2dbee9e5fd288a07d5cd722470723e5..f21cd0ff330f61b942eb55f036c7b23458a5959a 100644 --- a/include/aidge/backend/cpu/operator/FCImpl.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl.hpp @@ -12,14 +12,14 @@ #ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_ #define AIDGE_CPU_OPERATOR_FCIMPL_H_ +#include <array> +#include <memory> +#include <vector> + #include "aidge/backend/OperatorImpl.hpp" #include "aidge/operator/FC.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -#include <memory> -#include <vector> -#include <array> namespace Aidge { // class FC_Op; @@ -30,27 +30,27 @@ class FCImplForward_cpu : public Registrable<FCImplForward_cpu, DataType, DataType, DataType>, - void(const FC_Op::Attrs&, - const DimSize_t, - const DimSize_t, - const void *, - const void *, - const void *, - void *)> {}; + void(const DimSize_t, + const DimSize_t, + const DimSize_t, + const void *, + const void *, + const void *, + void *)> {}; class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu, std::tuple<DataType, DataType, DataType, DataType>, - void(const FC_Op::Attrs&, - const DimSize_t, - const DimSize_t, - const void *, - const void *, - const void *, - void *, - void *, - void *)> {}; + void(const DimSize_t, + const DimSize_t, + const DimSize_t, + const void *, + const void *, + const void *, + void *, + void *, + void *)> {}; class FCImpl_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp index 50fb5f49033cccd3c554d692bc336c7d5d677384..c93a44d922dce2dc18df94bf903134ddadf5256f 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp @@ -19,8 +19,16 @@ namespace Aidge { template <class I, class O, class W, class B> -void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, - const void* input_, const void* originalInput_, const void* weight_, void* output_, void* weightGrad_, void* biasesGrad_) { +void FCImpl_cpu_backward_kernel(const DimSize_t batchSize, + const DimSize_t inputFeatureSize, + const DimSize_t outputFeatureSize, + const void* input_, + const void* originalInput_, + const void* weight_, + void* output_, + void* weightGrad_, + void* biasesGrad_) +{ // FIXME: missing FC attributes as arguments const I* input = static_cast<const I*>(input_); const I* originalInput = static_cast<const I*>(originalInput_); @@ -31,37 +39,37 @@ void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batch // bias grad - if (std::get<1>(attrs)) { // no bias - std::fill(biasesGrad, biasesGrad + std::get<0>(attrs), B(0)); + if (biasesGrad == nullptr) { // no bias + std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0)); } else { - for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { // nb outputs + for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs B sum{0}; for (std::size_t b = 0; b < batchSize; ++b) { - sum += input[b*std::get<0>(attrs) + o]; + sum += input[b*outputFeatureSize + o]; } biasesGrad[o] = sum; } } // weight grad - for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { - for (std::size_t c = 0; c < oneInputSize; ++c) { + for (std::size_t o = 0; o < outputFeatureSize; ++o) { + for (std::size_t c = 0; c < inputFeatureSize; ++c) { W sum{0}; for (std::size_t b = 0; b < batchSize; ++b) { - sum += originalInput[b*oneInputSize + c]*input[b*std::get<0>(attrs) + o]; + sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o]; } - weightGrad[o*oneInputSize + c] = sum; + weightGrad[o*inputFeatureSize + c] = sum; } } // input grad for (std::size_t b = 0; b < batchSize; ++b) { - for (std::size_t c = 0; c < oneInputSize; ++c) { + for (std::size_t c = 0; c < inputFeatureSize; ++c) { O sum{0}; - for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { - sum += weight[o*oneInputSize + c] * input[b*std::get<0>(attrs) + o]; + for (std::size_t o = 0; o < outputFeatureSize; ++o) { + sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o]; } - output[b*oneInputSize + c] = sum; + output[b*inputFeatureSize + c] = sum; } } } diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp index 64f3b3e18f7255b74decad5137cbb5ccd6966123..caeacd1bda2fde086fd649c50a733e790fc2c000 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp @@ -27,9 +27,9 @@ namespace Aidge { // const B* biases = static_cast<const B*>(biases_); // O* output = static_cast<O*>(output_); -// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) { +// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) { // std::size_t oIndex = outIdx * dims[3]; -// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx]; +// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx]; // for (std::size_t batch = 0; batch < dims[3]; ++batch) { // output[oIndex + batch] = bias; // } @@ -39,10 +39,10 @@ namespace Aidge { // for (std::size_t iy = 0; iy < dims[1]; ++iy) { // for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { // const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix)); -// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) { +// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) { // const std::size_t oIndex = dims[3] * outCh; -// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) + -// outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3]; +// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize + +// outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3]; // for (std::size_t batch = 0; batch < dims[3]; ++batch) { // output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; // } @@ -63,9 +63,9 @@ namespace Aidge { // // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N] -// for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) { +// for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) { // std::size_t oIndex = outIdx * dims[0]; -// const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx]; +// const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx]; // for (std::size_t batch = 0; batch < dims[0]; ++batch) { // output[oIndex + batch] = bias; // } @@ -74,8 +74,8 @@ namespace Aidge { // for (std::size_t batch = 0; batch < dims[0]; ++batch) { // const std::size_t oIndex = dims[1] * batch; // for (std::size_t i = 0; i < dims[1]; ++i) { -// for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) { -// std::size_t wIndex = i * std::get<0>(attrs) + outCh; // (iIndex*std::get<0>(attrs) + oIndex)/dims[3]; +// for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) { +// std::size_t wIndex = i * outputFeatureSize + outCh; // (iIndex*outputFeatureSize + oIndex)/dims[3]; // output[oIndex + outCh] += weights[wIndex] * input[i + batch]; // } // } @@ -83,29 +83,34 @@ namespace Aidge { // } template <class I, class W, class B, class O> -void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize, - const void* input_, const void* weights_, const void* biases_, void* output_) { +void FCImpl_cpu_forward_kernel(const DimSize_t batchSize, + const DimSize_t inputFeatureSize, + const DimSize_t outputFeatureSize, + const void* input_, + const void* weights_, + const void* biases_, + void* output_) { // FIXME: missing FC attributes as arguments const I* input = static_cast<const I*>(input_); const W* weights = static_cast<const W*>(weights_); const B* biases = static_cast<const B*>(biases_); O* output = static_cast<O*>(output_); - if (std::get<1>(attrs)) { - std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0)); + if (biases == nullptr) { + std::fill(output, output+(batchSize*outputFeatureSize), B(0)); } else { for (std::size_t batch = 0; batch < batchSize; ++batch) { - std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs))); + std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize)); } } for (std::size_t batch = 0; batch < batchSize; ++batch) { - for (std::size_t out = 0; out < std::get<0>(attrs); ++out) { - output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize, - input + (batch + 1)*oneInputSize, - weights + out*oneInputSize, - output[out + batch*std::get<0>(attrs)]); + for (std::size_t out = 0; out < outputFeatureSize; ++out) { + output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize, + input + (batch + 1)*inputFeatureSize, + weights + out*inputFeatureSize, + output[out + batch*outputFeatureSize]); } } } diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp index 880a59b3aeae2598f6b1ed5e287af18fd7bcfd6f..c9ad909eee631189a81067eda076c0b8cbb13377 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp @@ -25,11 +25,19 @@ namespace Aidge { // compute kernel registry for forward and backward class LeakyReLUImplForward_cpu - : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> { -}; + : public Registrable<LeakyReLUImplForward_cpu, + std::tuple<DataType, DataType>, + void(const float, + std::size_t, + const void*, + void*)> {}; class LeakyReLUImplBackward_cpu - : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> { -}; + : public Registrable<LeakyReLUImplBackward_cpu, + std::tuple<DataType, DataType>, + void(const float, + std::size_t, + const void*, + void*)> {}; class LeakyReLUImpl_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp index 949e6af66a476693b347f38a45edea10e21bc933..e308d940890101ad396c7ed20541bbc4f8b035cf 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp @@ -18,17 +18,17 @@ namespace Aidge { template <class I, class O> -void LeakyReLUImpl_cpu_backward_kernel(const LeakyReLU_Op::Attrs& attrs, +void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_, std::size_t inputLenght, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - I negativeSlope = static_cast<I>(std::get<0>(attrs)); + const I negativeSlope = static_cast<const I>(negativeSlope_); for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = input[i] > 0 ? input[i] : negativeSlope*input[i]; + output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i]; } } diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp index d10b32e18ee983fc1270bc4a7cce35e18f601071..450d0bf4ace4879f90e0104e14b5bf61366e96c2 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp @@ -18,17 +18,17 @@ namespace Aidge { template <class I, class O> -void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs, +void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_, std::size_t inputLenght, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - const I negativeSlope = static_cast<const I>(std::get<0>(attrs)); + const I negativeSlope = static_cast<const I>(negativeSlope_); for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope; + output[i] = (input[i] >= 0) ? input[i] : input[i] * negativeSlope; } } diff --git a/include/aidge/backend/cpu/operator/LnImpl.hpp b/include/aidge/backend/cpu/operator/LnImpl.hpp new file mode 100755 index 0000000000000000000000000000000000000000..faa03855a4f881f2a644ebc4023871b7acd6275c --- /dev/null +++ b/include/aidge/backend/cpu/operator/LnImpl.hpp @@ -0,0 +1,54 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LNIMPL_H_ +#define AIDGE_CPU_OPERATOR_LNIMPL_H_ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Ln.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> + +namespace Aidge { +// class Ln_Op; + +// compute kernel registry for forward and backward +class LnImplForward_cpu + : public Registrable<LnImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { +}; +class LnImplBackward_cpu + : public Registrable<LnImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> { +}; + +class LnImpl_cpu : public OperatorImpl { +public: + LnImpl_cpu(const Ln_Op& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<LnImpl_cpu> create(const Ln_Op& op) { + return std::make_unique<LnImpl_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + + void forward() override final; + + void backward() override final; +}; + +namespace { +static Registrar<Ln_Op> registrarLnImpl_cpu("cpu", Aidge::LnImpl_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LNIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp new file mode 100755 index 0000000000000000000000000000000000000000..5fb82e35f8855d9d6e2eb85e9ab380c9f1fc9b90 --- /dev/null +++ b/include/aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp @@ -0,0 +1,50 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LNIMPL_BACKWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_LNIMPL_BACKWARD_KERNEL_H_ + +#include <cstddef> // std::size_t + +#include "aidge/backend/cpu/operator/LnImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { +template <class I, class GI, class GO> +void LnImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* input_, const void* grad_output_, + void* grad_input_) { + + const I* input = static_cast<const I*>(input_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + const float eps = 1.0e-20f; + + for (std::size_t i = 0; i < inputLenght; ++i) { + if (input[i] > I(eps)) { + grad_input[i] = grad_output[i] / input[i]; + } else { + grad_input[i] = GI(0); + } + } +} + +namespace { +static Registrar<LnImplBackward_cpu> registrarLnImplBackward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::LnImpl_cpu_backward_kernel<float, float, float>); +static Registrar<LnImplBackward_cpu> registrarLnImplBackward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::LnImpl_cpu_backward_kernel<double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LNIMPL_BACKWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp new file mode 100755 index 0000000000000000000000000000000000000000..ebb975512a6e7c0f7225c305372f0ec6e7060786 --- /dev/null +++ b/include/aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp @@ -0,0 +1,47 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_LNIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_LNIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" + +#include "aidge/backend/cpu/operator/LnImpl.hpp" + +namespace Aidge { +template <class I, class O> +void LnImpl_cpu_forward_kernel(std::size_t inputLenght, + const void* input_, + void* output_) { + + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + const float eps = 1.0e-20f; + +//#pragma omp parallel for if (inputLenght > 1024) + for (std::size_t i = 0; i < inputLenght; ++i) { + if (input[i] > I(eps)) { + output[i] = std::log(input[i]); + } else { + output[i] = std::log(I(eps)); + } + } +} + +namespace { +static Registrar<LnImplForward_cpu> registrarLnImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::LnImpl_cpu_forward_kernel<float, float>); +static Registrar<LnImplForward_cpu> registrarLnImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::LnImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_LNIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp index d2d30aa7db5b1522712faa846ef33e1b21772d5e..4dd30e1fb939837f6861313eda04d7d05f3c8110 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -29,12 +29,22 @@ namespace Aidge { // compute kernel registry for forward and backward class MaxPoolingImpl2DForward_cpu : public Registrable<MaxPoolingImpl2DForward_cpu, - std::tuple<DataType, DataType>, - void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + std::tuple<DataType, DataType>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const bool, + const std::array<DimSize_t, 4> &, + const void *, + void *)> {}; class MaxPoolingImpl2DBackward_cpu : public Registrable<MaxPoolingImpl2DBackward_cpu, - std::tuple<DataType, DataType>, - void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {}; + std::tuple<DataType, DataType>, + void(const std::array<DimSize_t, 2>&, + const std::array<DimSize_t, 2>&, + const bool, + const std::array<DimSize_t, 4> &, + const void *, + void *)> {}; class MaxPoolingImpl2D_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp index c4baccdee5def0be93be42b5657d77d21240328c..79a7bd154f4d4e19a71d719597992466c37c6a9f 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp @@ -12,15 +12,15 @@ #ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ -#include "aidge/utils/Registrar.hpp" +#include <array> +#include <cmath> +#include <tuple> #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/data/Data.hpp" -#include <array> -#include <tuple> -#include <cmath> +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" namespace Aidge { /** @@ -33,17 +33,16 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class O> -void MaxPoolingImpl2D_cpu_forward_kernel(const MaxPooling_Op<2>::Attrs &attrs, - const std::array<DimSize_t, 4> &dims, - const void *input_, - void *output_) { +void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, + const std::array<DimSize_t, 2>& kernelDims, + const bool /*ceilMode*/, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *output_) { // FIXME: missing convolution parameters as arguments const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); - std::array<DimSize_t, 2> strideDims = std::get<0>(attrs); - std::array<DimSize_t, 2> kernelDims = std::get<1>(attrs); - // output H size const std::size_t oxSize = static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) / diff --git a/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp index e1387768ea02e2a9f35790c64c7674c321a1faa7..c44199ba4797682362f4a7cb223435d6d1585443 100644 --- a/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp @@ -14,6 +14,8 @@ #include "aidge/utils/Registrar.hpp" +#include <cstdint> // std::int32_t, std::int64_t + #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/operator/MulImpl.hpp" @@ -35,13 +37,13 @@ void MulImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, totalElements *= dimSize; } - for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); std::size_t idx2 = getFlattenedIndex(input2Dims, indexes); - + output[oIndex] = input_1[idx1] * input_2[idx2]; } } @@ -50,12 +52,15 @@ namespace { static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float32( {DataType::Float32, DataType::Float32, DataType::Float32}, Aidge::MulImpl_cpu_forward_kernel<float, float, float>); -static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::MulImpl_cpu_forward_kernel<int, int, int>); static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float64( {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::MulImpl_cpu_forward_kernel<double, double, double>); +static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::MulImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>); +static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int64( + {DataType::Int64, DataType::Int64, DataType::Int64}, + Aidge::MulImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>); } // namespace } // namespace Aidge diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp index b3c91a43419e9a5e9e1299f4a2118a51b6b64fc7..c6e41c29fd203fdd80b2acb9ad0dfcac91a0f66c 100644 --- a/include/aidge/backend/cpu/operator/PadImpl.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl.hpp @@ -25,18 +25,54 @@ namespace Aidge { // class Pad_Op; +// compute kernel registry for forward and backward +class PadImpl1DForward_cpu + : public Registrable<PadImpl1DForward_cpu, + std::tuple<DataType, DataType>, + void(const std::array<DimSize_t, 2>&, + const PadBorderType, + const double, + const std::array<DimSize_t, 3> &, + const void *, + void *)> {}; + +class PadImpl1D_cpu : public OperatorImpl { +public: + PadImpl1D_cpu(const Pad_Op<1> &op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<PadImpl1D_cpu> create(const Pad_Op<1> &op) { + return std::make_unique<PadImpl1D_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to Pad_Op<1> implementation registry +static Registrar<Pad_Op<1>> registrarPadImpl1D_cpu("cpu", Aidge::PadImpl1D_cpu::create); +} // namespace + // compute kernel registry for forward and backward class PadImpl2DForward_cpu : public Registrable<PadImpl2DForward_cpu, std::tuple<DataType, DataType>, - void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, - void *)> {}; + void(const std::array<DimSize_t, 4>&, + const PadBorderType, + const double, + const std::array<DimSize_t, 4> &, + const void *, + void *)> {}; class PadImpl2DBackward_cpu : public Registrable<PadImpl2DBackward_cpu, std::tuple<DataType, DataType>, - void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, - void *)> {}; + void(const std::array<DimSize_t, 4>&, + const PadBorderType, + const double, + const std::array<DimSize_t, 4> &, + const void *, + void *)> {}; class PadImpl2D_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp index f6f00bc4df661921708e605f44056a77bb8125f4..26c873c8fe7f140b09b31d0f1a9d4125acbcf50f 100644 --- a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp @@ -12,16 +12,95 @@ #ifndef AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ -#include "aidge/utils/Registrar.hpp" +#include <algorithm> // std::max, std::min +#include <array> +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t #include "aidge/backend/cpu/operator/PadImpl.hpp" +#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -#include <cmath> -#include <array> -#include <algorithm> namespace Aidge { +/** + * @brief Forward kernel for 1D Padding on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param attrs tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorders, + const PadBorderType borderType, + const double borderValue, + const std::array<DimSize_t, 3>& dims, + const void *input_, + void *output_) +{ + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + const std::size_t oxSize = dims[2] + beginEndBorders[0] + beginEndBorders[1]; + + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t ch = 0; ch < dims[1]; ++ch) { + const std::size_t iIndex = (ch + batch*dims[1]) * dims[2]; + const std::size_t oIndex = (ch + batch*dims[1]) * oxSize; + + for (unsigned int ox = 0; ox < oxSize; ++ox) { + const std::size_t oIndexFull = oIndex + ox; + + O outputValue = static_cast<O>(borderValue); + + if (borderType == PadBorderType::Constant) { + int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]); + + if (ix >= 0 && ix < static_cast<int>(dims[2])) { + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + } + else if (borderType == PadBorderType::Edge) { + int ix = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]))); + + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + else if (borderType == PadBorderType::Reflect) { + int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]); + + if (ix < 0) + ix = 0 - ix; + if (ix >= static_cast<int>(dims[2])) + ix = static_cast<int>(dims[2]) - ix; + + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + else if (borderType == PadBorderType::Wrap) { + int ix = (static_cast<int>(dims[2]) + static_cast<int>(ox) - static_cast<int>(beginEndBorders[1])) % static_cast<int>(dims[2]); + + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + + output[oIndexFull] = outputValue; + } + } + } +} + +namespace { +static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, + PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>); +static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, + PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>); +static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, + PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>); +} // namespace + + /** * @brief Forward kernel for 2D Padding on CPU backend. * @tparam I Input data type. @@ -32,58 +111,62 @@ namespace Aidge { * @param output_ Output Tensor. */ template <class I, class O> -void PadImpl2D_cpu_forward_kernel(const Pad_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims, - const void *input_, void *output_) +void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorders, + const PadBorderType borderType, + const double borderValue, + const std::array<DimSize_t, 4> &dims, + const void *input_, + void *output_) { const I *input = static_cast<const I *>(input_); O *output = static_cast<O *>(output_); - const std::size_t oySize = dims[2] + std::get<0>(attrs)[0] + std::get<0>(attrs)[1]; - const std::size_t oxSize = dims[3] + std::get<0>(attrs)[2] + std::get<0>(attrs)[3]; + const std::size_t oySize = dims[2] + beginEndBorders[0] + beginEndBorders[1]; + const std::size_t oxSize = dims[3] + beginEndBorders[2] + beginEndBorders[3]; for (std::size_t batch = 0; batch < dims[0]; ++batch) { for (std::size_t ch = 0; ch < dims[1]; ++ch) { const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3]; const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize; - for (unsigned int oy = 0; oy < oySize; ++oy) { - for (unsigned int ox = 0; ox < oxSize; ++ox) { + for (std::uint32_t oy = 0; oy < oySize; ++oy) { + for (std::uint32_t ox = 0; ox < oxSize; ++ox) { const std::size_t oIndexFull = oIndex + ox*oySize + oy; - O outputValue = std::get<2>(attrs); + O outputValue = static_cast<O>(borderValue); - if (std::get<1>(attrs) == PadBorderType::Constant) { - int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]); - int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]); + if (borderType == PadBorderType::Constant) { + std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]); + std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]); - if (ix >= 0 && ix < static_cast<int>(dims[3]) && iy >= 0 && iy < static_cast<int>(dims[2])) { + if (ix >= 0 && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0 && iy < static_cast<std::int32_t>(dims[2])) { outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; } } - else if (std::get<1>(attrs) == PadBorderType::Edge) { - int ix = std::max(0, std::min(static_cast<int>(dims[3]) - 1, static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]))); - int iy = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]))); + else if (borderType == PadBorderType::Edge) { + std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]))); + std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]))); outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; } - else if (std::get<1>(attrs) == PadBorderType::Reflect) { - int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]); - int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]); + else if (borderType == PadBorderType::Reflect) { + std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]); + std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]); if (ix < 0) ix = 0 - ix; if (iy < 0) iy = 0 - iy; - if (ix >= static_cast<int>(dims[3])) - ix = static_cast<int>(dims[3]) - ix; - if (iy >= static_cast<int>(dims[2])) - iy = static_cast<int>(dims[2]) - iy; + if (ix >= static_cast<std::int32_t>(dims[3])) + ix = static_cast<std::int32_t>(dims[3]) - ix; + if (iy >= static_cast<std::int32_t>(dims[2])) + iy = static_cast<std::int32_t>(dims[2]) - iy; outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; } - else if (std::get<1>(attrs) == PadBorderType::Wrap) { - int ix = (static_cast<int>(dims[3]) + static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3])) % static_cast<int>(dims[3]); - int iy = (static_cast<int>(dims[2]) + static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1])) % static_cast<int>(dims[2]); + else if (borderType == PadBorderType::Wrap) { + std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])) % static_cast<std::int32_t>(dims[3]); + std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[2]); outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)]; } @@ -101,7 +184,7 @@ static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float32( Aidge::PadImpl2D_cpu_forward_kernel<float, float>); static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Int32( {DataType::Int32, DataType::Int32}, - Aidge::PadImpl2D_cpu_forward_kernel<int, int>); + Aidge::PadImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>); static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float64( {DataType::Float64, DataType::Float64}, Aidge::PadImpl2D_cpu_forward_kernel<double, double>); diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp index 43a9714ad2d32228fac9bf9c526191f0cec5bfa0..1bd932e43608d98f737cc9046aed74b2fec6abc6 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp @@ -18,15 +18,15 @@ #include "aidge/utils/Registrar.hpp" namespace Aidge { -template <class O, class GI, class GO> +template <class I, class GI, class GO> void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght, - const void* output_, const void* grad_output_, - void* grad_input_) { - const O* output = static_cast<const O*>(output_); + const void* input_, const void* grad_output_, + void* grad_input_) { + const I* input = static_cast<const I*>(input_); const GO* grad_output = static_cast<const GO*>(grad_output_); GI* grad_input = static_cast<GI*>(grad_input_); for (std::size_t i = 0; i < inputLenght; ++i) { - grad_input[i] = (output[i] > GO(0)) ? GI(grad_output[i]) : GI(0); + grad_input[i] = (input[i] > 0) ? grad_output[i] : 0; } } diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp index aa533786d3ce5b6f5cd501b6ba74b1be2823d407..af9c65590c7182185c9d79669dde49e592cbeb5d 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp @@ -27,7 +27,7 @@ void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght, //#pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = input[i] > 0 ? input[i] : 0; + output[i] = (input[i] > 0) ? input[i] : 0; } } diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp index 7355a2bd46f45ab5019a31832001ae3335c1d8e8..8d784c38dc006ea82f040dfe83b4bef05908dd68 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp @@ -28,12 +28,20 @@ namespace Aidge { // Every DIM class ReduceMeanImplForward_cpu : public Registrable<ReduceMeanImplForward_cpu, - std::tuple<DataType, DataType>, - void(const ReduceMean_Op::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {}; + std::tuple<DataType, DataType>, + void(const std::vector<std::int32_t>&, + DimSize_t, + const std::vector<DimSize_t>&, + const void *, + void *)> {}; class ReduceMeanImpl1DBackward_cpu : public Registrable<ReduceMeanImpl1DBackward_cpu, - std::tuple<DataType, DataType>, - void(const ReduceMean_Op::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {}; + std::tuple<DataType, DataType>, + void(const std::vector<std::int32_t>&, + DimSize_t, + const std::vector<DimSize_t>&, + const void *, + void *)> {}; class ReduceMeanImpl_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp index 6533f7b19eac07d429cd8c5ed05ea082457b9e7b..bba355e16958bb1a22bde1d24304d992a658ade8 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp @@ -26,15 +26,15 @@ namespace Aidge { template <class I, class O> -void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attrs, - const std::vector<DimSize_t>& inputDims, - const void* input_, - void* output_) { +void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes, + DimSize_t /*keepDims*/, + const std::vector<DimSize_t>& inputDims, + const void* input_, + void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - const std::vector<std::int32_t>& axes = std::get<0>(attrs); const std::size_t nb_dims = inputDims.size(); const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>()); diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp index 66bb42f7fb909ee9b6c91a6321ee3fa32c977626..8590169272818a225fe4299150f873733cdd9cd9 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp @@ -26,11 +26,23 @@ namespace Aidge { // compute kernel registry for forward and backward class ScalingImplForward_cpu - : public Registrable<ScalingImplForward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> { -}; + : public Registrable<ScalingImplForward_cpu, + std::tuple<DataType, DataType>, + void(const float, + const std::size_t, + const bool, + std::size_t, + const void*, + void*)> {}; class ScalingImplBackward_cpu - : public Registrable<ScalingImplBackward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> { -}; + : public Registrable<ScalingImplBackward_cpu, + std::tuple<DataType, DataType>, + void(const float, + const std::size_t, + const bool, + std::size_t, + const void*, + void*)> {}; class ScalingImpl_cpu : public OperatorImpl { public: diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp index df8e1a7e7b02a4ad032d6f09fae3ae2cd8a42eff..c654265dd6f650129201037976d89da4b0f39d96 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp @@ -73,22 +73,21 @@ O saturate(const O value, const std::size_t quantizedNbBits, const bool isOutput } template <class I, class O> -void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Attrs& attrs, - std::size_t inputLenght, - const void* input_, - void* output_) { +void ScalingImpl_cpu_forward_kernel(const float scalingFactor, + const std::size_t quantizedNbBits, + const bool isOutputUnsigned, + std::size_t inputLenght, + const void* input_, + void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); - const I& scalingFactor = static_cast<const I&>(std::get<0>(attrs)); - const std::size_t quantizedNbBits = static_cast<std::size_t>(std::get<1>(attrs)); - const bool isOutputUnsigned = static_cast<bool>(std::get<2>(attrs)); for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = input[i] * scalingFactor; + output[i] = static_cast<O>(input[i] * static_cast<I>(scalingFactor)); if(quantizedNbBits > 0) { - output[i] = saturate(std::round(output[i]), quantizedNbBits, isOutputUnsigned); + output[i] = saturate(std::round(output[i]), quantizedNbBits, isOutputUnsigned); } } } diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp index 2e43023d678c8a4258c80fb91d82d2858fcdf188..34340e6166a48b465c7723e85d91c195bfb42277 100644 --- a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp +++ b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp @@ -28,7 +28,7 @@ class SigmoidImplForward_cpu : public Registrable<SigmoidImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { }; class SigmoidImplBackward_cpu - : public Registrable<SigmoidImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { + : public Registrable<SigmoidImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> { }; class SigmoidImpl_cpu : public OperatorImpl { @@ -40,7 +40,10 @@ public: } Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; + + void forward() override final; + + void backward() override final; }; namespace { diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4ceb3bd7ed9a3fb739591eee488f8035770fef18 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp @@ -0,0 +1,43 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ + +#include <cstddef> // std::size_t + +#include "aidge/backend/cpu/operator/SigmoidImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { +template <class O, class GI, class GO> +void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* output_, const void* grad_output_, + void* grad_input_) { + const O* output = static_cast<const O*>(output_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + for (std::size_t i = 0; i < inputLenght; ++i) { + grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i]; + } +} + +namespace { +static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>); +static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp index a53650942540e6368855ffe19e2f7f651ab5b6bc..24ba11a0bca7f3fa15f9ac1e2c13e29f88eaf074 100644 --- a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp @@ -19,15 +19,19 @@ namespace Aidge { template <class I, class O> void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght, - const void* input_, - void* output_) { + const void* input_, + void* output_) { const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); //#pragma omp parallel for if (inputLenght > 1024) for (std::size_t i = 0; i < inputLenght; ++i) { - output[i] = static_cast<O>(1.0) / (static_cast<O>(1.0) + std::exp(-input[i])); + if (input[i] > I(0)) { + output[i] = O(1) / (O(1) + std::exp(-input[i])); + } else { + output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i])); + } } } diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..61aed1553bfbd2e67fc837ec6ea8d80b26ef3558 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp @@ -0,0 +1,67 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_H__ +#define AIDGE_CPU_OPERATOR_SLICEIMPL_H__ + +#include <memory> +#include <vector> +#include <array> + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Slice.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +namespace Aidge { +// class Slice_Op; + +// compute kernel registry for forward and backward +class SliceImplForward_cpu + : public Registrable<SliceImplForward_cpu, + std::tuple<DataType, DataType>, + void(const std::vector<std::int64_t>&, + const std::vector<std::int64_t>&, + const std::vector<std::int8_t>&, + const std::vector<std::int64_t>&, + const std::vector<DimSize_t>&, + const void*, + void*)> {}; +class SliceImplBackward_cpu + : public Registrable<SliceImplBackward_cpu, + std::tuple<DataType, DataType>, + void(const std::vector<std::int64_t>&, + const std::vector<std::int64_t>&, + const std::vector<std::int8_t>&, + const std::vector<std::int64_t>&, + const std::vector<DimSize_t>&, + const void*, + void*)> {}; + +class SliceImpl_cpu : public OperatorImpl { +public: + SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) { + return std::make_unique<SliceImpl_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create); +} +} // namespace Aidge + +#endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..31e409369cc640bbda9f54c54652af7f72b509b6 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp @@ -0,0 +1,101 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ + +#include <algorithm> +#include <cmath> +#include <cstddef> +#include <iterator> + +#include "aidge/utils/Registrar.hpp" +#include "aidge/backend/cpu/operator/SliceImpl.hpp" + +namespace Aidge { + +template<class I, class O> +void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts, + const std::vector<std::int64_t>& ends, + const std::vector<std::int8_t>& axes, + const std::vector<std::int64_t>& steps, + const std::vector<DimSize_t>& inputDims, + const void* input_, + void* output_) +{ + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + const std::size_t nbDims = inputDims.size(); + std::vector<DimSize_t> dims = inputDims; + DimSize_t totalSize = std::accumulate(inputDims.cbegin(), inputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const I* inputAccumulation = input; + I* outputAccumulation = nullptr; + const std::size_t nbAxes = starts.size(); + for (std::size_t i = 0; i < nbAxes; ++i) { + const DimIdx_t axis = axes[i] >= 0 ? + static_cast<DimIdx_t>(axes[i]) : + static_cast<DimIdx_t>(axes[i] + static_cast<DimIdx_t>(inputDims.size())); + const DimSize_t start = std::min(starts[i] >= 0 ? + static_cast<DimSize_t>(starts[i]) : + static_cast<DimSize_t>(starts[i] + static_cast<std::int64_t>(inputDims[axis])), + dims[axis]-1); + const DimSize_t end = ends[i] >= 0 ? + static_cast<DimSize_t>(ends[i]) : + static_cast<DimSize_t>(ends[i] + static_cast<std::int64_t>(inputDims[axis])); + const std::int64_t step = steps[i]; + + const std::size_t sliceSize = static_cast<std::size_t>(std::ceil((static_cast<float>(end) - static_cast<float>(start)) / static_cast<float>(step))); + + outputAccumulation = new I[totalSize]; + const std::size_t stride_pre = std::accumulate(dims.cbegin(), dims.cbegin() + axis, 1, std::multiplies<std::size_t>()); + const std::size_t stride_post = std::accumulate(dims.crbegin(), dims.crbegin() + nbDims -1 - axis, 1, std::multiplies<std::size_t>()); + for (std::size_t outer = 0; outer < stride_pre; ++outer) + { + const std::size_t idx_in = outer * stride_post * dims[axis] + start * stride_post; + const std::size_t idx_out = outer * stride_post * sliceSize; + std::size_t addedSlices = 0; + for (std::size_t inner = 0; inner < sliceSize; ++inner) + { + std::copy_n(std::next(inputAccumulation, idx_in + inner * step * stride_post), + stride_post, + std::next(outputAccumulation, idx_out + addedSlices * stride_post)); + addedSlices++; + } + } + totalSize /= dims[axis]; + totalSize *= sliceSize; + dims[axis] = sliceSize; + + if (inputAccumulation != input) { + delete[] inputAccumulation; + } + inputAccumulation = outputAccumulation; + + } + // Copy elements from inputAccumulation to output while dividing by divisor + std::copy_n(inputAccumulation, totalSize, output); + if (outputAccumulation) { + delete[] outputAccumulation; + } +} + +namespace { +static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, float>); +static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, int>); +static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp index 19b0bd21de129ed303151987323234364ce5f6f2..10e6f58bb44b63f2d8712dc0aa64e0660f3356b2 100644 --- a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp @@ -14,6 +14,10 @@ #include "aidge/utils/Registrar.hpp" +#include <cstddef> // std::size_t +#include <cstdint> // std::int32_t, std::int64_t +#include <vector> + #include "aidge/backend/cpu/data/Broadcasting.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp" @@ -36,7 +40,7 @@ void SubImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims, totalElements *= dimSize; } - for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) + for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) { std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex); std::size_t idx1 = getFlattenedIndex(input1Dims, indexes); @@ -49,12 +53,15 @@ namespace { static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float32( {DataType::Float32, DataType::Float32, DataType::Float32}, Aidge::SubImpl_cpu_forward_kernel<float, float, float>); -static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::SubImpl_cpu_forward_kernel<int, int, int>); static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float64( {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::SubImpl_cpu_forward_kernel<double, double, double>); +static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>); +static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int64( + {DataType::Int64, DataType::Int64, DataType::Int64}, + Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>); } // namespace } // namespace Aidge diff --git a/include/aidge/backend/cpu/operator/TanhImpl.hpp b/include/aidge/backend/cpu/operator/TanhImpl.hpp index 9e44f7bcd2b2392c634421478a096258b3e39795..0bf851e77d94c160c0362301df33d682347daf0c 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl.hpp @@ -28,7 +28,7 @@ class TanhImplForward_cpu : public Registrable<TanhImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { }; class TanhImplBackward_cpu - : public Registrable<TanhImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> { + : public Registrable<TanhImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> { }; class TanhImpl_cpu : public OperatorImpl { @@ -40,7 +40,10 @@ public: } Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - void forward() override; + + void forward() override final; + + void backward() override final; }; namespace { diff --git a/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3a13c2cad21c35822fc6248590550e4716ee046d --- /dev/null +++ b/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp @@ -0,0 +1,43 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ + +#include <cstddef> // std::size_t + +#include "aidge/backend/cpu/operator/TanhImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +namespace Aidge { +template <class O, class GI, class GO> +void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght, + const void* output_, const void* grad_output_, + void* grad_input_) { + const O* output = static_cast<const O*>(output_); + const GO* grad_output = static_cast<const GO*>(grad_output_); + GI* grad_input = static_cast<GI*>(grad_input_); + for (std::size_t i = 0; i < inputLenght; ++i) { + grad_input[i] = (O(1) - output[i] * output[i]) * grad_output[i]; + } +} + +namespace { +static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::TanhImpl_cpu_backward_kernel<float, float, float>); +static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::TanhImpl_cpu_backward_kernel<double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ */ diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index 8ba6751bf4068a69ed07e362924f59d0f4aca6c5..feaa7e67a8d0bc726462aed99e557493d3b8d0c6 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -9,17 +9,17 @@ * ********************************************************************************/ -#include <cassert> +#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" + +#include <array> #include <numeric> -#include <thread> #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/operator/AvgPooling.hpp" - -#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/AvgPooling.hpp" +#include "aidge/utils/Types.h" Aidge::Elts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -27,15 +27,18 @@ Aidge::Elts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*in } void Aidge::AvgPoolingImpl2D_cpu::forward() { - assert(mOp.getRawInput(0) && "missing input #0"); + const auto& op_ = dynamic_cast<const AvgPooling_Op<2>&>(mOp); + assert(op_.getInput(0) && "missing input #0"); // Find the correct kernel type - auto kernelFunc = - Registrar<AvgPoolingImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + auto kernelFunc = Registrar<AvgPoolingImpl2DForward_cpu>::create( + {op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); // Call kernel - kernelFunc(dynamic_cast<const AvgPooling_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + kernelFunc(op_.strideDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<4>(), + getCPUPtr(op_.getInput(0)), + getCPUPtr(op_.getOutput(0))); } diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp index 96179d11850624f831333c9a4badaddf2221ecff..3046eea9bd241732daf39cce1783b5ee50de01c7 100644 --- a/src/operator/BatchNormImpl.cpp +++ b/src/operator/BatchNormImpl.cpp @@ -9,7 +9,9 @@ * ********************************************************************************/ -#include <cassert> +#include "aidge/backend/cpu/operator/BatchNormImpl.hpp" + + #include <numeric> // std::accumulate #include <vector> @@ -17,7 +19,6 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/BatchNorm.hpp" -#include "aidge/backend/cpu/operator/BatchNormImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp" Aidge::Elts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { @@ -26,27 +27,29 @@ Aidge::Elts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inp } void Aidge::BatchNormImpl2D_cpu::forward() { - assert(mOp.getRawInput(0) && "missing input #0"); - assert(mOp.getRawInput(1) && "missing input #1"); - assert(mOp.getRawInput(2) && "missing input #2"); - assert(mOp.getRawInput(3) && "missing input #3"); - assert(mOp.getRawInput(4) && "missing input #4"); + const auto& op_ = dynamic_cast<const BatchNorm_Op<2>&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 for BatchNorm Operator"); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 for BatchNorm Operator"); + AIDGE_ASSERT(op_.getInput(2), "missing input #2 for BatchNorm Operator"); + AIDGE_ASSERT(op_.getInput(3), "missing input #3 for BatchNorm Operator"); + AIDGE_ASSERT(op_.getInput(4), "missing input #4 for BatchNorm Operator"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->nbDims() == 4); + AIDGE_ASSERT(op_.getOutput(0)->nbDims() == 4, ""); // Find the correct kernel type auto kernelFunc = - Registrar<BatchNormImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + Registrar<BatchNormImpl2DForward_cpu>::create({op_.getInput(0)->dataType(), + op_.getInput(1)->dataType(), + op_.getOutput(0)->dataType()}); // Call kernel - kernelFunc(dynamic_cast<const BatchNorm_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawInput(2)), - getCPUPtr(mOp.getRawInput(3)), - getCPUPtr(mOp.getRawInput(4)), - getCPUPtr(mOp.getRawOutput(0)), - true); + kernelFunc(op_.epsilon(), + op_.momentum(), + op_.getInput(0)->template dims<4>(), + getCPUPtr(op_.getRawInput(0)), + getCPUPtr(op_.getRawInput(1)), + getCPUPtr(op_.getRawInput(2)), + getCPUPtr(op_.getRawInput(3)), + getCPUPtr(op_.getRawInput(4)), + getCPUPtr(op_.getRawOutput(0)), + true); } diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index 5c8d2fe307c70bd7ee3f64e14735417f7ffb0c67..591e8a0637d1e52c75193ac1750a210a08815ccc 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -9,18 +9,71 @@ * ********************************************************************************/ -#include <cassert> -#include <chrono> // std::chrono::milliseconds -#include <numeric> // std::accumulate -#include <thread> // std::this_thread::sleep_for +#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" + +#include <memory> #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp" +#include "aidge/data/Tensor.hpp" #include "aidge/operator/ConvDepthWise.hpp" +#include "aidge/utils/Log.hpp" +#include "aidge/utils/Types.h" -#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" -#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp" + +Aidge::Elts_t Aidge::ConvDepthWiseImpl1D_cpu::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::ConvDepthWiseImpl1D_cpu::forward() { + const auto& op_ = dynamic_cast<const ConvDepthWise_Op<1>&>(mOp); + + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator"); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator"); + + AIDGE_ASSERT((op_.getInput(0)->nbDims() == 3), "support for 4-dimensions tensors only"); + + // Find the correct kernel type + const auto outputDataType = op_.getOutput(0)->dataType(); + const Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_key registrarKey = { + op_.getInput(0)->dataType(), + op_.getInput(1)->dataType(), + ((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), + outputDataType}; + + Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_type kernelFunc; + if (Registrar<ConvDepthWiseImpl1DForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create({ + outputDataType, outputDataType, outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + + // Call kernel + kernelFunc(op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), // Conv attributes + op_.getInput(0)->template dims<3>(), // input dimensions + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} Aidge::Elts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -28,23 +81,37 @@ Aidge::Elts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t / } void Aidge::ConvDepthWiseImpl2D_cpu::forward() { - assert(mOp.getRawInput(0) && "missing input #0"); - assert(mOp.getRawInput(1) && "missing input #1"); - assert(mOp.getRawInput(2) && "missing input #2"); + const auto& op_ = dynamic_cast<const ConvDepthWise_Op<2>&>(mOp); - assert((std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims() == 4) && "support for 4-dimensions tensors only"); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator"); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator"); + AIDGE_ASSERT(op_.getInput(2), "missing input #2 in ConvDepthWise Operator"); + + AIDGE_ASSERT((op_.getInput(0)->nbDims() == 4), "support for 4-dimensions tensors only"); // Find the correct kernel type - auto kernelFunc = - Registrar<ConvDepthWiseImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + auto kernelFunc = Registrar<ConvDepthWiseImpl2DForward_cpu>::create( + {op_.getInput(0)->dataType(), + op_.getInput(1)->dataType(), + op_.getInput(2)->dataType(), + op_.getOutput(0)->dataType()}); + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = op_.getInput(2) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); // Call kernel - kernelFunc(dynamic_cast<const ConvDepthWise_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawInput(1)), - getCPUPtr(mOp.getRawInput(2)), - getCPUPtr(mOp.getRawOutput(0))); + kernelFunc(op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<4>(), + input0.getImpl()->rawPtr(), + input1.getImpl()->rawPtr(), + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, + getCPUPtr(op_.getRawOutput(0))); } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index 7457a1a0b75af1f922c5a65ac88aabc813d00069..0be31befe2019d70b628db878443f14b1d622f1c 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -9,18 +9,71 @@ * ********************************************************************************/ +#include "aidge/backend/cpu/operator/ConvImpl.hpp" + #include <cassert> #include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp" #include "aidge/operator/Conv.hpp" +#include "aidge/utils/Types.h" -#include "aidge/backend/cpu/operator/ConvImpl.hpp" -#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp" +Aidge::Elts_t Aidge::ConvImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::ConvImpl1D_cpu::forward() { + const auto& op_ = static_cast<const Conv_Op<1>&>(mOp); + + // FIXME: uncomment the following code once memory handling will work +AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); + + // Find the correct kernel type + const auto outputDataType = op_.getOutput(0)->dataType(); + const Registrar<ConvImpl1DForward_cpu>::registrar_key registrarKey = { + op_.getInput(0)->dataType(), + op_.getInput(1)->dataType(), + (op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), + outputDataType}; + + Registrar<ConvImpl1DForward_cpu>::registrar_type kernelFunc; + if (Registrar<ConvImpl1DForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<ConvImpl1DForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<ConvImpl1DForward_cpu>::create({ + outputDataType, outputDataType, outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + + // Call kernel + kernelFunc(op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<3>(), // input dimensions + dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -28,19 +81,18 @@ Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx } void Aidge::ConvImpl2D_cpu::forward() { - const auto& opTensor = static_cast<const OperatorTensor&>(mOp); + const auto& op_ = dynamic_cast<const Conv_Op<2>&>(mOp); // FIXME: uncomment the following code once memory handling will work - assert(mOp.getRawInput(0) && "missing input #0"); - assert(mOp.getRawInput(1) && "missing input #1"); - assert(mOp.getRawInput(2) && "missing input #2"); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); // Find the correct kernel type - const auto outputDataType = opTensor.getOutput(0)->dataType(); + const auto outputDataType = op_.getOutput(0)->dataType(); const Registrar<ConvImpl2DForward_cpu>::registrar_key registrarKey = { - opTensor.getInput(0)->dataType(), - opTensor.getInput(1)->dataType(), - opTensor.getInput(2)->dataType(), + op_.getInput(0)->dataType(), + op_.getInput(1)->dataType(), + (op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), outputDataType}; Registrar<ConvImpl2DForward_cpu>::registrar_type kernelFunc; @@ -59,12 +111,19 @@ void Aidge::ConvImpl2D_cpu::forward() { // call to forward(). We might put the following shared_ptr as members of // this class to avoid that. std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; - const auto& input0 = opTensor.getInput(0)->refCastFrom(input0Fallback, *opTensor.getOutput(0)); - const auto& input1 = opTensor.getInput(1)->refCastFrom(input1Fallback, *opTensor.getOutput(0)); - const auto& input2 = opTensor.getInput(2)->refCastFrom(input2Fallback, *opTensor.getOutput(0)); + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); // Call kernel - kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), opTensor.getInput(0)->template dims<4>(), - input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), - getCPUPtr(mOp.getRawOutput(0))); + kernelFunc(op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<4>(), // input dimensions + dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); } diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index d9edf3a9959c1c80dbe85c93f7a1499260452c4c..f7eebb7b21512fb3b388b6927409fba9a1d92b34 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -29,14 +29,13 @@ void Aidge::FCImpl_cpu::forward() const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0"); AIDGE_ASSERT(op_.getInput(1), "missing input #1"); - AIDGE_ASSERT(op_.getInput(2), "missing input #2"); // Find the correct kernel type const auto outputDataType = op_.getOutput(0)->dataType(); const Registrar<FCImplForward_cpu>::registrar_key registrarKey = { op_.getInput(0)->dataType(), op_.getInput(1)->dataType(), - op_.getInput(2)->dataType(), + ((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), outputDataType}; Registrar<FCImplForward_cpu>::registrar_type kernelFunc; @@ -57,14 +56,16 @@ void Aidge::FCImpl_cpu::forward() std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0))); const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0))); - const auto& input2 = op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))) : Tensor(); // Call kernel const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; - kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), - batchSize, - input0.size() / batchSize, - input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), + kernelFunc(batchSize, + input1.dims()[1], // nb input features + input1.dims()[0], // nb output features + input0.getImpl()->rawPtr(), + input1.getImpl()->rawPtr(), + (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, getCPUPtr(mOp.getRawOutput(0))); } @@ -75,14 +76,13 @@ void Aidge::FCImpl_cpu::backward() AIDGE_ASSERT(fc_grad, "missing ouput #0 gradient"); AIDGE_ASSERT(op_.getInput(0)->grad(), "missing input #0 gradient"); AIDGE_ASSERT(op_.getInput(1)->grad(), "missing input #1 gradient"); - AIDGE_ASSERT(op_.getInput(2)->grad(), "missing input #2 gradient"); // Find the correct kernel type const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = { fc_grad->dataType(), - op_.getInput(0)->grad()->dataType(), op_.getInput(1)->grad()->dataType(), - op_.getInput(2)->grad()->dataType()}; + (op_.getInput(2)) ? op_.getInput(2)->grad()->dataType() : op_.getInput(1)->grad()->dataType(), + op_.getInput(0)->grad()->dataType()}; Registrar<FCImplBackward_cpu>::registrar_type kernelFunc; if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) { @@ -102,17 +102,17 @@ void Aidge::FCImpl_cpu::backward() std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback; const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0))); const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0))); - const auto& input2grad = op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))); + const auto& input2grad = (op_.getInput(2)) ? op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))) : Tensor(); // Call kernel const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1; - kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), - batchSize, - input0grad.size() / batchSize, + kernelFunc(batchSize, + input1grad.dims()[1], // nb input features + input1grad.dims()[0], // nb output features getCPUPtr(fc_grad), getCPUPtr(op_.getInput(0)), getCPUPtr(mOp.getRawInput(1)), input0grad.getImpl()->rawPtr(), input1grad.getImpl()->rawPtr(), - input2grad.getImpl()->rawPtr()); + (op_.getInput(2)) ? input2grad.getImpl()->rawPtr() : nullptr); } diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp index 340af3eeaf370988f9b12d8535812c938e47078a..9d4f2a7edcdf263751ec1d9cea10cd4d60055610 100644 --- a/src/operator/LeakyReLUImpl.cpp +++ b/src/operator/LeakyReLUImpl.cpp @@ -9,18 +9,19 @@ * ********************************************************************************/ -#include <cassert> +#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" + #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp" #include "aidge/data/Tensor.hpp" #include "aidge/operator/LeakyReLU.hpp" +#include "aidge/utils/Log.hpp" #include "aidge/utils/Types.h" #include "aidge/utils/Registrar.hpp" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" -#include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp" -#include "aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp" Aidge::Elts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -29,6 +30,7 @@ Aidge::Elts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIn void Aidge::LeakyReLUImpl_cpu::forward() { const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); @@ -39,7 +41,7 @@ void Aidge::LeakyReLUImpl_cpu::forward() { out0->dataType()}); // Call kernel - kernelFunc(dynamic_cast<const LeakyReLU_Op&>(mOp).getStaticAttributes(), + kernelFunc(op_.negativeSlope(), in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); @@ -58,7 +60,7 @@ void Aidge::LeakyReLUImpl_cpu::backward() { out0->dataType()}); // Call kernel - kernelFunc(dynamic_cast<const LeakyReLU_Op&>(mOp).getStaticAttributes(), + kernelFunc(op_.negativeSlope(), in0->size(), getCPUPtr(in0), getCPUPtr(out0)); diff --git a/src/operator/LnImpl.cpp b/src/operator/LnImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..12885a944be46a977463e900af4047319bb1c8b2 --- /dev/null +++ b/src/operator/LnImpl.cpp @@ -0,0 +1,65 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/operator/Ln.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" + +#include "aidge/backend/cpu/operator/LnImpl.hpp" +#include "aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp" + +Aidge::Elts_t Aidge::LnImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::LnImpl_cpu::forward() { + const Ln_Op& op_ = static_cast<const Ln_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + AIDGE_ASSERT(in0, "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<LnImplForward_cpu>::create({ + in0->dataType(), + out0->dataType()}); + + // Call kernel + kernelFunc(in0->size(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} + +void Aidge::LnImpl_cpu::backward() { + const Ln_Op& op_ = dynamic_cast<const Ln_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); + + // Find the correct kernel type + auto kernelFunc = Registrar<LnImplBackward_cpu>::create({ + in0->dataType(), + gra_int0->dataType(), + gra_out0->dataType() + }); + + // Call kernel + kernelFunc(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); +} diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp index 94591eaa9848b24aeb7afa1e8b6b87a3e6e2b45f..2e6d67abbdd6776a1f75449a0f4562143cbaae87 100644 --- a/src/operator/MaxPoolingImpl.cpp +++ b/src/operator/MaxPoolingImpl.cpp @@ -9,17 +9,16 @@ * ********************************************************************************/ -#include <cassert> -#include <numeric> -#include <thread> +#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" + #include <vector> -#include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp" #include "aidge/operator/MaxPooling.hpp" +#include "aidge/utils/Log.hpp" +#include "aidge/utils/Types.h" -#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" -#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp" Aidge::Elts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -27,15 +26,20 @@ Aidge::Elts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*in } void Aidge::MaxPoolingImpl2D_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + const auto& op_ = dynamic_cast<const MaxPooling_Op<2>&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator."); // Find the correct kernel type - auto kernelFunc = - Registrar<MaxPoolingImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + auto kernelFunc = Registrar<MaxPoolingImpl2DForward_cpu>::create({ + op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType() + }); // Call kernel - kernelFunc(dynamic_cast<const MaxPooling_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + kernelFunc(op_.strideDims(), + op_.kernelDims(), + op_.ceilMode(), + op_.getInput(0)->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp index cd420a6241723c5d3fa5836838f84ce6bfe965d1..b4b52d6be855b6a1f8c0a71a6a9169ee9690f34c 100644 --- a/src/operator/PadImpl.cpp +++ b/src/operator/PadImpl.cpp @@ -9,10 +9,6 @@ * ********************************************************************************/ -#include <cassert> -#include <chrono> // std::chrono::milliseconds -#include <numeric> // std::accumulate -#include <thread> // std::this_thread::sleep_for #include <vector> #include "aidge/utils/Types.h" @@ -22,8 +18,40 @@ #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp" -Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const { - assert(inputIdx == 0 && "operator has only one input"); +Aidge::Elts_t Aidge::PadImpl1D_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const { + AIDGE_ASSERT(inputIdx == 0, "input index out of range." + "{} Operator has only one input", mOp.type()); + (void) inputIdx; + + + // Padding cannot be in-place! + // We must ensure that we do not override data that has not been consummed yet. + const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(); + const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(); + return Elts_t::DataElts(outputSize - inputSize); +} + +void Aidge::PadImpl1D_cpu::forward() { + const auto& op_ = dynamic_cast<const Pad_Op<1>&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator."); + + // Find the correct kernel type + auto kernelFunc = Registrar<PadImpl1DForward_cpu>::create({ + op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(op_.beginEndBorders(), + op_.borderType(), + op_.borderValue(), + op_.getInput(0)->template dims<3>(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} + +Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const { + AIDGE_ASSERT(inputIdx == 0, "input index out of range." + "{} Operator has only one input", mOp.type()); (void) inputIdx; // Padding cannot be in-place! @@ -34,15 +62,19 @@ Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) c } void Aidge::PadImpl2D_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + const auto& op_ = dynamic_cast<const Pad_Op<2>&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator."); // Find the correct kernel type - auto kernelFunc = - Registrar<PadImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + auto kernelFunc = Registrar<PadImpl2DForward_cpu>::create({ + op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); // Call kernel - kernelFunc(dynamic_cast<const Pad_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + kernelFunc(op_.beginEndBorders(), + op_.borderType(), + op_.borderValue(), + op_.getInput(0)->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp index 06859f09db169946175a93140e04f2e2a99e3362..4a0fb9f5d929e2ce731a21b5553e1b9257a32daa 100644 --- a/src/operator/ReLUImpl.cpp +++ b/src/operator/ReLUImpl.cpp @@ -28,13 +28,15 @@ Aidge::Elts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t } void Aidge::ReLUImpl_cpu::forward() { - std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0)); + const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type auto kernelFunc = Registrar<ReLUImplForward_cpu>::create({ in0->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + out0->dataType()}); // Call kernel kernelFunc(in0->size(), @@ -43,20 +45,20 @@ void Aidge::ReLUImpl_cpu::forward() { } void Aidge::ReLUImpl_cpu::backward() { - // reversing in and out Tensors - const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp); + const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); std::shared_ptr<Tensor> out0 = op_.getOutput(0); - std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); - AIDGE_ASSERT(out0, "current {} operator output#0 has not gradient Tensor.", op_.type()); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); // Find the correct kernel type auto kernelFunc = Registrar<ReLUImplBackward_cpu>::create({ - out0->dataType(), - gra_out0->dataType(), - gra_int0->dataType() + in0->dataType(), + gra_int0->dataType(), + gra_out0->dataType() }); // Call kernel - kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); + kernelFunc(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); } diff --git a/src/operator/ReduceMeanImpl.cpp b/src/operator/ReduceMeanImpl.cpp index a9f17a28a2a47ec7bc50820d587e8d0f359d2bb3..b4cd8ffa9b46aaa1c1d7a2eca947ed0254947fef 100644 --- a/src/operator/ReduceMeanImpl.cpp +++ b/src/operator/ReduceMeanImpl.cpp @@ -26,10 +26,11 @@ void Aidge::ReduceMeanImpl_cpu::forward() { op_.getOutput(0)->dataType()}); // Call kernel - kernelFunc(op_.getStaticAttributes(), - op_.getInput(0)->dims(), - op_.getInput(0)->getImpl()->rawPtr(), - op_.getOutput(0)->getImpl()->rawPtr()); + kernelFunc(op_.axes(), + op_.keepDims(), + op_.getInput(0)->dims(), + op_.getInput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->getImpl()->rawPtr()); } // void Aidge::ReduceMeanImpl1D_cpu::forward() { diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp index d0b58702c73f01fb62114d335f5c2342908542ea..db4670836e702f536243aadec36c5ba85b2344c8 100644 --- a/src/operator/ScalingImpl.cpp +++ b/src/operator/ScalingImpl.cpp @@ -12,6 +12,7 @@ #include <cassert> #include <numeric> // std::accumulate #include <functional> // std::multiplies +#include <vector> #include "aidge/operator/Scaling.hpp" @@ -19,7 +20,6 @@ #include "aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp" #include "aidge/utils/Types.h" #include "aidge/backend/cpu/data/GetCPUPtr.h" -#include <vector> Aidge::Elts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -27,16 +27,19 @@ Aidge::Elts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOInde } void Aidge::ScalingImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + const auto& op_ = dynamic_cast<const Scaling_Op&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Scaling Operator."); // Find the correct kernel type auto kernelFunc = Registrar<ScalingImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); // Call kernel - kernelFunc(dynamic_cast<const Scaling_Op&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - getCPUPtr(mOp.getRawInput(0)), - getCPUPtr(mOp.getRawOutput(0))); + kernelFunc(op_.scalingFactor(), + op_.quantizedNbBits(), + op_.isOutputUnsigned(), + op_.getInput(0)->size(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/SigmoidImpl.cpp b/src/operator/SigmoidImpl.cpp index dd7ec26cb36777f79d382c815b60d2381544a0bd..ad69935c02e392d7aa1c9601acb827c5baf8970f 100644 --- a/src/operator/SigmoidImpl.cpp +++ b/src/operator/SigmoidImpl.cpp @@ -21,6 +21,7 @@ #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp" Aidge::Elts_t Aidge::SigmoidImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -28,15 +29,36 @@ Aidge::Elts_t Aidge::SigmoidImpl_cpu::getNbRequiredProtected(const Aidge::IOInde } void Aidge::SigmoidImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type auto kernelFunc = Registrar<SigmoidImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + in0->dataType(), + out0->dataType()}); // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + kernelFunc(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } + +void Aidge::SigmoidImpl_cpu::backward() { + const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); + + // Find the correct kernel type + auto kernelFunc = Registrar<SigmoidImplBackward_cpu>::create({ + out0->dataType(), + gra_int0->dataType(), + gra_out0->dataType() + }); + + // Call kernel + kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); +} diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8ffe4dcdd97b58758885b013d0c1770bd98a83ba --- /dev/null +++ b/src/operator/SliceImpl.cpp @@ -0,0 +1,44 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/SliceImpl.hpp" + +#include <vector> + +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include "aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp" +#include "aidge/operator/Slice.hpp" +#include "aidge/utils/Log.hpp" +#include "aidge/utils/Types.h" + +Aidge::Elts_t Aidge::SliceImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::SliceImpl_cpu::forward() { + const auto& op_ = dynamic_cast<const Slice_Op&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Slice Operator."); + + // Find the correct kernel type + auto kernelFunc = Registrar<SliceImplForward_cpu>::create({ + op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(op_.starts(), + op_.ends(), + op_.axes(), + op_.steps(), + op_.getInput(0)->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index 240267613e557c20edcc00e81f4bf20d17d9962f..5bc3699e2146e36a63b4a1602ca1cb86e3ff1e2f 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -28,19 +28,18 @@ Aidge::Elts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOInde } void Aidge::SoftmaxImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims()>1); + const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp); + AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty"); // Find the correct kernel type auto kernelFunc = Registrar<SoftmaxImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); - Softmax_Op::Attrs attr = dynamic_cast<const Softmax_Op&>(mOp).getStaticAttributes(); - const int& axisIdx = static_cast<const int&>(std::get<0>(attr)); + std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis(); // Call kernel - kernelFunc(axisIdx, + kernelFunc(static_cast<std::size_t>(axis), // axisIdx std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); diff --git a/src/operator/TanhImpl.cpp b/src/operator/TanhImpl.cpp index 44e180739ed86e25d4be6d0beb693f73bdadbf35..a2469ed9b83679c0edf8d0a761abf9d3d046db6e 100644 --- a/src/operator/TanhImpl.cpp +++ b/src/operator/TanhImpl.cpp @@ -21,6 +21,7 @@ #include "aidge/backend/cpu/operator/TanhImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp" +#include "aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp" Aidge::Elts_t Aidge::TanhImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place @@ -28,15 +29,37 @@ Aidge::Elts_t Aidge::TanhImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t } void Aidge::TanhImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp); + std::shared_ptr<Tensor> in0 = op_.getInput(0); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + AIDGE_ASSERT(in0, "missing input #0"); // Find the correct kernel type auto kernelFunc = Registrar<TanhImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + in0->dataType(), + out0->dataType()}); // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + kernelFunc(in0->size(), getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } + +void Aidge::TanhImpl_cpu::backward() { + const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp); + std::shared_ptr<Tensor> out0 = op_.getOutput(0); + std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad(); + std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad(); + AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type()); + + // Find the correct kernel type + auto kernelFunc = Registrar<TanhImplBackward_cpu>::create({ + out0->dataType(), + gra_int0->dataType(), + gra_out0->dataType() + }); + + // Call kernel + kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0)); +} + diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp index 43903100a163b4499ed96c44d77ad119534d2eaa..d5f2065b624de431b43edef9a83bf079905129dd 100644 --- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp +++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp @@ -237,7 +237,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling", REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i)); } - REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres)); + REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres, 1e-4f)); delete[] array0; delete[] result; diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index aa9a3909619aac2bcd2718ab7aaa0f8f6699ed34..271a1e2f9860d92f840916f6b2e396993b0bea39 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -194,13 +194,19 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { SECTION("LSTM(forward)") { auto pop = Pop(); auto myLSTM = LSTM(32, 64, 0, true, "ltsm"); - auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); + auto op = std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); - auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph(); - microGraph->save("lstm", false, false); + auto microGraph = op->getMicroGraph(); + microGraph->save("lstm", false, true); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( @@ -259,7 +265,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { microGraph->save("lstm", false, false); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( @@ -316,7 +328,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( @@ -344,13 +362,12 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myLSTM->input(8).first->getOperator()->setOutput(0, myInitR); auto g = getConnectedGraphView(myLSTM); - g->setDataType(DataType::Float32); - g->setBackend("cpu"); + g->compile("cpu", DataType::Float32); g->save("lstm_seq", true, true); auto scheduler = SequentialScheduler(g); - scheduler.forward(true); + scheduler.forward(); scheduler.saveSchedulingDiagram("lstm_seq_schedule"); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( @@ -378,7 +395,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->add(pop); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( @@ -441,7 +464,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->add(pop); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( diff --git a/unit_tests/operator/Test_SliceImpl.cpp b/unit_tests/operator/Test_SliceImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2b9f89e62c09c04a7f848c362336418ef62aecce --- /dev/null +++ b/unit_tests/operator/Test_SliceImpl.cpp @@ -0,0 +1,279 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Slice.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { + SECTION("1D Tensor") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> { + {0, 1, -2,-3, 4,-5,-6, 7, 8, 9} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,3> { + {0, 1, -2} + }); + std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,1>{{0}}); + std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,1>{{3}}); + std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,1>{{0}}); + + std::shared_ptr<Node> mySlice = Slice(); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->associateInput(1,starts); + mySlice->getOperator()->associateInput(2,ends); + mySlice->getOperator()->associateInput(3,axes); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("2D Tensor") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,3> { + { + {-5,-6, 7}, + {-5,-6, 7} + } + }); + std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,2>{{0,5}}); + std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,2>{{2,8}}); + std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,2>{{0,1}}); + + std::shared_ptr<Node> mySlice = Slice(); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->associateInput(1,starts); + mySlice->getOperator()->associateInput(2,ends); + mySlice->getOperator()->associateInput(3,axes); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + // op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("3D Tensor") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> { + { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,1,1,3> { + { + { + { 4,-5,-6} + } + } + }); + std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,3>{{0,1,4}}); + std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,3>{{1,2,7}}); + std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,3>{{0,1,2}}); + + std::shared_ptr<Node> mySlice = Slice(); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->associateInput(1,starts); + mySlice->getOperator()->associateInput(2,ends); + mySlice->getOperator()->associateInput(3,axes); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + // mySlice->getOperator()->output(0).print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("4D Tensor") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { + { + { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + }, + { + { + { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3,11,-5,-6, 7,-1,10} + } + } + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { + { + { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + }, + { + { + { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3,11,-5,-6, 7,-1,10} + } + } + } + }); + std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,4>{{0,0,0,0}}); + std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,4>{{2,2,2,10}}); + std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,4>{{0,1,2,3}}); + + std::shared_ptr<Node> mySlice = Slice(); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->associateInput(1,starts); + mySlice->getOperator()->associateInput(2,ends); + mySlice->getOperator()->associateInput(3,axes); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + // op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("Attributes instead of inputs") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { + { + { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + }, + { + { + { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3,11,-5,-6, 7,-1,10} + } + } + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,1,1,5> { + { + { + { + { 0, 1, 2,-3, 4} + } + } + } + }); + + std::shared_ptr<Node> mySlice = Slice({0,0,0,0}, {1,1,1,5}, {0,1,2,3}, {1,1,1,1}); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + // op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("Different Steps") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,4,2,8> { + { + { + { 0, 1, 2,-3, 4,-5,-6,7}, + {-5, 4, 2,-3, 4,-5,-6,-7} + }, + { + { 10, 11, 12,-13, 14,-15,-16,17}, + {-15, 14, 12,-13, 14,-15,-16,-17} + }, + { + { 20, 21, 22,-23, 24,-25,-26,27}, + {-25, 24, 22,-23, 24,-25,-26,-27} + }, + { + { 30, 31, 32,-33, 34,-35,-36,37}, + {-35, 34, 32,-33, 34,-35,-36,-37} + } + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,1,3> { + { + { + { 7, 4, 1} + }, + { + { 27, 24, 21} + } + } + }); + + std::shared_ptr<Node> mySlice = Slice({0,0,7}, {4,1,0}, {0,1,2}, {2,1,-3}); + // Steps are 2,1,-3 so the slice will be: + // on Axis 0: from 0 to 4 by step of 2 + // on Axis 1: from 0 to 1 by step of 1 + // on Axis 2: from 7 to 0 by step of -3 (reverse the order of elements) + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } +} diff --git a/unit_tests/recipies/Test_HorizontalTiling.cpp b/unit_tests/recipies/Test_HorizontalTiling.cpp index 2c10cdf369d7d37ea67b70b9dfe3e76018da2a32..7c127548417492141c3ea1eeb9374042befe75d2 100644 --- a/unit_tests/recipies/Test_HorizontalTiling.cpp +++ b/unit_tests/recipies/Test_HorizontalTiling.cpp @@ -174,7 +174,7 @@ TEST_CASE("[core/recipes] Tiling(transformation)", "[Tiling][Recipes]") { REQUIRE(*(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->getOutput(0)) == *myOutput); GraphView::replace({myConv, myConv->getParent(1), myConv->getParent(2)}, tiledConv); - g->compile("cpu", DataType::Int32); + g->compile("cpu", DataType::Int32, 0, {{2,3,5,5}}); // changes myInput DataType from Int32 to Float32. Why?????? s.resetScheduling(); s.forward(); diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 01ccd37c319ee64deb15240b30cc369b37c9e47d..16112628053a35ef71d5819a53aacc85425da88d 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -416,7 +416,7 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward auto myProd = Producer(inputTensor, "prod"); myProd -> addChild(gv); gv -> compile("cpu", DataType::Float32); - compile_gradient(gv); + SequentialScheduler scheduler(gv); scheduler.forward(); auto outNode = gv->getOrderedOutputs()[0].first; @@ -432,7 +432,6 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward {6.0f, 6.0f, 6.0f, 6.0f, 6.0f}, {6.0f, 6.0f, 6.0f, 7.0f, 7.0f}, {7.0f, 7.0f, 7.0f, 7.0f, 7.0f}}}}}); - predictedOutput->initGrad(); predictedOutput->setGrad(targetOutput); REQUIRE_NOTHROW(scheduler.backward()); } diff --git a/version.txt b/version.txt index ee1372d33a29e27945406f0527f8af8e6ee119c9..7179039691ce07a214e7a815893fee97a97b1422 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.2.2 +0.2.3