diff --git a/aidge_backend_cpu/unit_tests/test_recipes.py b/aidge_backend_cpu/unit_tests/test_recipes.py index 0e58a6b122326997f5eb8cbb39ca85fd3c261036..7c11b92b93eaf04eb83518992c46bf4dec40dfca 100644 --- a/aidge_backend_cpu/unit_tests/test_recipes.py +++ b/aidge_backend_cpu/unit_tests/test_recipes.py @@ -36,7 +36,7 @@ class test_recipes(unittest.TestCase): graph_view = aidge_core.sequential([input_node, conv, bn]) # Add random values to conv and BatchNorm parameters - graph_view.set_datatype(aidge_core.DataType.float32) + graph_view.set_datatype(aidge_core.dtype.float32) graph_view.set_backend("cpu") np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32) diff --git a/aidge_backend_cpu/unit_tests/test_scheduler.py b/aidge_backend_cpu/unit_tests/test_scheduler.py index a90e38f0c8e5c6750d658ec59783fb47602dd85d..0aeeb04b74a078f77c57500b959d6ef9fa9af4d0 100644 --- a/aidge_backend_cpu/unit_tests/test_scheduler.py +++ b/aidge_backend_cpu/unit_tests/test_scheduler.py @@ -24,7 +24,7 @@ class test_scheduler(unittest.TestCase): input_node.add_child(relu) - gv.set_datatype(aidge_core.DataType.int32) + gv.set_datatype(aidge_core.dtype.int32) gv.set_backend("cpu") scheduler = aidge_core.SequentialScheduler(gv) @@ -48,7 +48,7 @@ class test_scheduler(unittest.TestCase): ]) EXPECTED_SCHEDULE = ['0', '1', '2'] - graph_view.set_datatype(aidge_core.DataType.float32) + graph_view.set_datatype(aidge_core.dtype.float32) graph_view.set_backend("cpu") graph_view.forward_dims() @@ -74,7 +74,7 @@ class test_scheduler(unittest.TestCase): EXPECTED_SCHEDULE = [['0', '1', '3', '2'], ['0', '3', '1', '2']] # Both scheduling are valid ! - graph_view.set_datatype(aidge_core.DataType.float32) + graph_view.set_datatype(aidge_core.dtype.float32) graph_view.set_backend("cpu") graph_view.forward_dims() diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp index 0a78564707ee13d5ec5e55902e1b52c1cf9c13d4..ec886a310dd2edc616ced6ee447665eab3ce301a 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp @@ -25,6 +25,35 @@ namespace Aidge { // class ConvDepthWise_Op; +// compute kernel registry for forward and backward +class ConvDepthWiseImpl1DForward_cpu + : public Registrable<ConvDepthWiseImpl1DForward_cpu, + std::tuple<DataType, DataType, DataType, DataType>, + void(const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 3>&, + const void *, + const void *, + const void *, + void *)> {}; + +class ConvDepthWiseImpl1D_cpu : public OperatorImpl { +public: + ConvDepthWiseImpl1D_cpu(const ConvDepthWise_Op<1> &op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<ConvDepthWiseImpl1D_cpu> create(const ConvDepthWise_Op<1> &op) { + return std::make_unique<ConvDepthWiseImpl1D_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to ConvDepthWise_Op<1> implementation registry +static Registrar<ConvDepthWise_Op<1>> registrarConvDepthWiseImpl1D_cpu("cpu", Aidge::ConvDepthWiseImpl1D_cpu::create); +} // namespace // compute kernel registry for forward and backward class ConvDepthWiseImpl2DForward_cpu @@ -33,7 +62,6 @@ class ConvDepthWiseImpl2DForward_cpu void(const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, - bool, const std::array<DimSize_t, 4> &, const void *, const void *, diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp index 4f05ff9a2fb53f174b4131d4913858f4afe7c691..a02aa672b92f089790ef1903af8b804f816f3baa 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp @@ -23,6 +23,82 @@ #include "aidge/utils/Types.h" namespace Aidge { +/** + * @brief Forward kernel for 1D ConvDepthWiseolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, + const std::array<DimSize_t, 1>& /*dilationDims*/, + const std::array<DimSize_t, 1>& kernelDims, + const std::array<DimSize_t, 3>& inputDims, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) { + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, ch, Xin, Yin) + // weight (outCh, ch, kernelX, kernelY) + // does not take Dilation attribute into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { + const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize; + B biasVal = (biases != nullptr) ? biases[ch] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2]; + const std::size_t wIndex = ch * kernelDims[0]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t oIndexFull = oIndex + ox; + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + output[oIndexFull] += weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + } + } + } + } +} + +namespace { +static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>); +static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>); +static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>); +} // namespace + + /** * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend. * @tparam I Input data type. @@ -40,8 +116,7 @@ template <class I, class W, class B, class O> void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::array<DimSize_t, 2>& /*dilationDims*/, const std::array<DimSize_t, 2>& kernelDims, - bool noBias, - const std::array<DimSize_t, 4> &inputDims, + const std::array<DimSize_t, 4>& inputDims, const void *input_, const void *weights_, const void *biases_, @@ -72,7 +147,7 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize; - B biasVal = ((!noBias) && biases != nullptr) ? biases[ch] : B(0); + B biasVal = (biases != nullptr) ? biases[ch] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3]; const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1]; diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index f27faa4d90e133a1dcdd25607760a311fe8abdde..d7be46c251a82d1b631f4ad50e7175fa2f896d03 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -27,13 +27,45 @@ namespace Aidge { // class Conv_Op; // compute kernel registry for forward and backward +// Conv 1D +class ConvImpl1DForward_cpu + : public Registrable<ConvImpl1DForward_cpu, + std::tuple<DataType, DataType, DataType, DataType>, + void(const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 1>&, + const std::array<DimSize_t, 3> &, + DimSize_t, + const void *, + const void *, + const void *, + void *)> {}; + +class ConvImpl1D_cpu : public OperatorImpl { + public: + ConvImpl1D_cpu(const Conv_Op<1>& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<ConvImpl1D_cpu> create(const Conv_Op<1> &op) { + return std::make_unique<ConvImpl1D_cpu>(op); + } + + public: + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to Conv_Op<1> implementation registry +static Registrar<Conv_Op<1>> registrarConvImpl1D_cpu("cpu", Aidge::ConvImpl1D_cpu::create); +} // namespace + +// Conv 2D class ConvImpl2DForward_cpu : public Registrable<ConvImpl2DForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, void(const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, const std::array<DimSize_t, 2>&, - bool, const std::array<DimSize_t, 4> &, DimSize_t, const void *, diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp index 312344e4ea381602eb4368cb937596825caf9651..88a71c47244788f2da5e576c8ad5170a92561909 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp @@ -12,17 +12,100 @@ #ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ -#include "aidge/utils/Registrar.hpp" +#include <algorithm> +#include <array> +#include <cmath> -#include "aidge/data/half.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/ConvImpl.hpp" +#include "aidge/data/half.hpp" +#include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" -#include <cmath> -#include <array> -#include <algorithm> namespace Aidge { +/** + * @brief Forward kernel for 1D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims, + const std::array<DimSize_t, 1>& /*dilationDims*/, + const std::array<DimSize_t, 1>& kernelDims, + const std::array<DimSize_t, 3>& inputDims, + DimSize_t outChannels, + const void *input_, + const void *weights_, + const void *biases_, + void *output_) +{ + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) / + static_cast<float>(strideDims[0]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, inCh, Xin, Yin) + // weight (outCh, inCh, kernelX, kernelY) + // does not take Dilation attribute into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = (outCh + batch*outChannels) * oxSize; + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx); + const std::size_t oIndexFull = oIndex + ox; + const signedsize ix = static_cast<signedsize>(ox * strideDims[0]); + + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + output[oIndexFull] += weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + } + } + } + } + } +} + +namespace { +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>); +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float16( + {DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16}, + Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>); +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>); +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>); +} // namespace + + /** * @brief Forward kernel for 2D Convolution on CPU backend. * @tparam I Input data type. @@ -40,7 +123,6 @@ template <class I, class W, class B, class O> void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, const std::array<DimSize_t, 2>& /*dilationDims*/, const std::array<DimSize_t, 2>& kernelDims, - bool noBias, const std::array<DimSize_t, 4> &inputDims, DimSize_t outChannels, const void *input_, @@ -115,8 +197,8 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims, for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize; - // If NoBias or bias = nullptr, set B(0) - B biasVal = ((!noBias) && biases != nullptr) ? biases[outCh] : B(0); + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3]; diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp index 0ba500c036d6b7ad926086517abfbdd075143d1f..f21cd0ff330f61b942eb55f036c7b23458a5959a 100644 --- a/include/aidge/backend/cpu/operator/FCImpl.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl.hpp @@ -12,15 +12,14 @@ #ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_ #define AIDGE_CPU_OPERATOR_FCIMPL_H_ +#include <array> #include <memory> #include <vector> -#include <array> #include "aidge/backend/OperatorImpl.hpp" #include "aidge/operator/FC.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" -#include "aidge/backend/cpu/data/GetCPUPtr.h" namespace Aidge { // class FC_Op; @@ -31,8 +30,7 @@ class FCImplForward_cpu : public Registrable<FCImplForward_cpu, DataType, DataType, DataType>, - void(const bool, - const DimSize_t, + void(const DimSize_t, const DimSize_t, const DimSize_t, const void *, @@ -44,8 +42,7 @@ class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu, DataType, DataType, DataType>, - void(const bool, - const DimSize_t, + void(const DimSize_t, const DimSize_t, const DimSize_t, const void *, diff --git a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp index 9cb4c6f870375aad41e13c9ff65f4ab6250e9c78..c93a44d922dce2dc18df94bf903134ddadf5256f 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp @@ -19,8 +19,7 @@ namespace Aidge { template <class I, class O, class W, class B> -void FCImpl_cpu_backward_kernel(const bool noBias, - const DimSize_t batchSize, +void FCImpl_cpu_backward_kernel(const DimSize_t batchSize, const DimSize_t inputFeatureSize, const DimSize_t outputFeatureSize, const void* input_, @@ -40,7 +39,7 @@ void FCImpl_cpu_backward_kernel(const bool noBias, // bias grad - if (noBias) { // no bias + if (biasesGrad == nullptr) { // no bias std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0)); } else { for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp index 2f00af0906fe9f23f804dfa6a2e5cb3aff7c7988..caeacd1bda2fde086fd649c50a733e790fc2c000 100644 --- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp @@ -83,8 +83,7 @@ namespace Aidge { // } template <class I, class W, class B, class O> -void FCImpl_cpu_forward_kernel(const bool noBias, - const DimSize_t batchSize, +void FCImpl_cpu_forward_kernel(const DimSize_t batchSize, const DimSize_t inputFeatureSize, const DimSize_t outputFeatureSize, const void* input_, @@ -97,7 +96,7 @@ void FCImpl_cpu_forward_kernel(const bool noBias, const B* biases = static_cast<const B*>(biases_); O* output = static_cast<O*>(output_); - if (noBias) { + if (biases == nullptr) { std::fill(output, output+(batchSize*outputFeatureSize), B(0)); } else { diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp index b90e6a4f23bbf5f5eeed800a2f47230a38c90e78..c6e41c29fd203fdd80b2acb9ad0dfcac91a0f66c 100644 --- a/include/aidge/backend/cpu/operator/PadImpl.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl.hpp @@ -25,6 +25,34 @@ namespace Aidge { // class Pad_Op; +// compute kernel registry for forward and backward +class PadImpl1DForward_cpu + : public Registrable<PadImpl1DForward_cpu, + std::tuple<DataType, DataType>, + void(const std::array<DimSize_t, 2>&, + const PadBorderType, + const double, + const std::array<DimSize_t, 3> &, + const void *, + void *)> {}; + +class PadImpl1D_cpu : public OperatorImpl { +public: + PadImpl1D_cpu(const Pad_Op<1> &op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<PadImpl1D_cpu> create(const Pad_Op<1> &op) { + return std::make_unique<PadImpl1D_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to Pad_Op<1> implementation registry +static Registrar<Pad_Op<1>> registrarPadImpl1D_cpu("cpu", Aidge::PadImpl1D_cpu::create); +} // namespace + // compute kernel registry for forward and backward class PadImpl2DForward_cpu diff --git a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp index 268c8d7fce8c5f10a85aaf102b42310158115dc4..26c873c8fe7f140b09b31d0f1a9d4125acbcf50f 100644 --- a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp @@ -17,12 +17,90 @@ #include <cstddef> // std::size_t #include <cstdint> // std::int32_t -#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/utils/Registrar.hpp" #include "aidge/utils/Types.h" namespace Aidge { +/** + * @brief Forward kernel for 1D Padding on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param attrs tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorders, + const PadBorderType borderType, + const double borderValue, + const std::array<DimSize_t, 3>& dims, + const void *input_, + void *output_) +{ + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + const std::size_t oxSize = dims[2] + beginEndBorders[0] + beginEndBorders[1]; + + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t ch = 0; ch < dims[1]; ++ch) { + const std::size_t iIndex = (ch + batch*dims[1]) * dims[2]; + const std::size_t oIndex = (ch + batch*dims[1]) * oxSize; + + for (unsigned int ox = 0; ox < oxSize; ++ox) { + const std::size_t oIndexFull = oIndex + ox; + + O outputValue = static_cast<O>(borderValue); + + if (borderType == PadBorderType::Constant) { + int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]); + + if (ix >= 0 && ix < static_cast<int>(dims[2])) { + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + } + else if (borderType == PadBorderType::Edge) { + int ix = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]))); + + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + else if (borderType == PadBorderType::Reflect) { + int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]); + + if (ix < 0) + ix = 0 - ix; + if (ix >= static_cast<int>(dims[2])) + ix = static_cast<int>(dims[2]) - ix; + + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + else if (borderType == PadBorderType::Wrap) { + int ix = (static_cast<int>(dims[2]) + static_cast<int>(ox) - static_cast<int>(beginEndBorders[1])) % static_cast<int>(dims[2]); + + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + + output[oIndexFull] = outputValue; + } + } + } +} + +namespace { +static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, + PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>); +static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, + PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>); +static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, + PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>); +} // namespace + + /** * @brief Forward kernel for 2D Padding on CPU backend. * @tparam I Input data type. diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index 9c2ad5e4e691ffd304c1bff37222e3c6383fba4e..591e8a0637d1e52c75193ac1750a210a08815ccc 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -22,6 +22,59 @@ #include "aidge/utils/Types.h" +Aidge::Elts_t Aidge::ConvDepthWiseImpl1D_cpu::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::ConvDepthWiseImpl1D_cpu::forward() { + const auto& op_ = dynamic_cast<const ConvDepthWise_Op<1>&>(mOp); + + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator"); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator"); + + AIDGE_ASSERT((op_.getInput(0)->nbDims() == 3), "support for 4-dimensions tensors only"); + + // Find the correct kernel type + const auto outputDataType = op_.getOutput(0)->dataType(); + const Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_key registrarKey = { + op_.getInput(0)->dataType(), + op_.getInput(1)->dataType(), + ((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), + outputDataType}; + + Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_type kernelFunc; + if (Registrar<ConvDepthWiseImpl1DForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create({ + outputDataType, outputDataType, outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + + // Call kernel + kernelFunc(op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), // Conv attributes + op_.getInput(0)->template dims<3>(), // input dimensions + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} + Aidge::Elts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place return Elts_t::DataElts(0); @@ -43,14 +96,22 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() { op_.getInput(2)->dataType(), op_.getOutput(0)->dataType()}); + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = op_.getInput(2) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + // Call kernel kernelFunc(op_.strideDims(), op_.dilationDims(), op_.kernelDims(), - op_.noBias(), op_.getInput(0)->template dims<4>(), - getCPUPtr(op_.getRawInput(0)), - getCPUPtr(op_.getRawInput(1)), - getCPUPtr(op_.getRawInput(2)), + input0.getImpl()->rawPtr(), + input1.getImpl()->rawPtr(), + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, getCPUPtr(op_.getRawOutput(0))); } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index f38481b317d2f10e2b7570aea3818570f8cd8a8f..0be31befe2019d70b628db878443f14b1d622f1c 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -22,6 +22,59 @@ #include "aidge/operator/Conv.hpp" #include "aidge/utils/Types.h" +Aidge::Elts_t Aidge::ConvImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::ConvImpl1D_cpu::forward() { + const auto& op_ = static_cast<const Conv_Op<1>&>(mOp); + + // FIXME: uncomment the following code once memory handling will work +AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); + AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); + + // Find the correct kernel type + const auto outputDataType = op_.getOutput(0)->dataType(); + const Registrar<ConvImpl1DForward_cpu>::registrar_key registrarKey = { + op_.getInput(0)->dataType(), + op_.getInput(1)->dataType(), + (op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), + outputDataType}; + + Registrar<ConvImpl1DForward_cpu>::registrar_type kernelFunc; + if (Registrar<ConvImpl1DForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<ConvImpl1DForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<ConvImpl1DForward_cpu>::create({ + outputDataType, outputDataType, outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); + const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); + + // Call kernel + kernelFunc(op_.strideDims(), + op_.dilationDims(), + op_.kernelDims(), + op_.getInput(0)->template dims<3>(), // input dimensions + dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} + Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place return Elts_t::DataElts(0); @@ -33,14 +86,13 @@ void Aidge::ConvImpl2D_cpu::forward() { // FIXME: uncomment the following code once memory handling will work AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator."); AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator."); - AIDGE_ASSERT(op_.getInput(2), "missing input #2 in Conv Operator."); // Find the correct kernel type const auto outputDataType = op_.getOutput(0)->dataType(); const Registrar<ConvImpl2DForward_cpu>::registrar_key registrarKey = { op_.getInput(0)->dataType(), op_.getInput(1)->dataType(), - op_.getInput(2)->dataType(), + (op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), outputDataType}; Registrar<ConvImpl2DForward_cpu>::registrar_type kernelFunc; @@ -61,18 +113,17 @@ void Aidge::ConvImpl2D_cpu::forward() { std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)); const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)); - const auto& input2 = op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor(); // Call kernel kernelFunc(op_.strideDims(), op_.dilationDims(), op_.kernelDims(), - op_.noBias(), // Conv attributes op_.getInput(0)->template dims<4>(), // input dimensions dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels input0.getImpl()->rawPtr(), // input input1.getImpl()->rawPtr(), // weight - input2.getImpl()->rawPtr(), // bias + op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias getCPUPtr(mOp.getRawOutput(0)) // output ); } diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index b8f1cfe3bd9d4005fd1130c64efb7ed51fcd9dff..f7eebb7b21512fb3b388b6927409fba9a1d92b34 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -29,14 +29,13 @@ void Aidge::FCImpl_cpu::forward() const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp); AIDGE_ASSERT(op_.getInput(0), "missing input #0"); AIDGE_ASSERT(op_.getInput(1), "missing input #1"); - AIDGE_ASSERT(op_.getInput(2), "missing input #2"); // Find the correct kernel type const auto outputDataType = op_.getOutput(0)->dataType(); const Registrar<FCImplForward_cpu>::registrar_key registrarKey = { - outputDataType, - outputDataType, - outputDataType, + op_.getInput(0)->dataType(), + op_.getInput(1)->dataType(), + ((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()), outputDataType}; Registrar<FCImplForward_cpu>::registrar_type kernelFunc; @@ -57,15 +56,16 @@ void Aidge::FCImpl_cpu::forward() std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0))); const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0))); - const auto& input2 = op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))); + const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))) : Tensor(); // Call kernel const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1; - kernelFunc(op_.noBias(), - batchSize, + kernelFunc(batchSize, input1.dims()[1], // nb input features input1.dims()[0], // nb output features - input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(), + input0.getImpl()->rawPtr(), + input1.getImpl()->rawPtr(), + (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, getCPUPtr(mOp.getRawOutput(0))); } @@ -76,14 +76,13 @@ void Aidge::FCImpl_cpu::backward() AIDGE_ASSERT(fc_grad, "missing ouput #0 gradient"); AIDGE_ASSERT(op_.getInput(0)->grad(), "missing input #0 gradient"); AIDGE_ASSERT(op_.getInput(1)->grad(), "missing input #1 gradient"); - AIDGE_ASSERT(op_.getInput(2)->grad(), "missing input #2 gradient"); // Find the correct kernel type const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = { fc_grad->dataType(), - op_.getInput(0)->grad()->dataType(), op_.getInput(1)->grad()->dataType(), - op_.getInput(2)->grad()->dataType()}; + (op_.getInput(2)) ? op_.getInput(2)->grad()->dataType() : op_.getInput(1)->grad()->dataType(), + op_.getInput(0)->grad()->dataType()}; Registrar<FCImplBackward_cpu>::registrar_type kernelFunc; if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) { @@ -103,12 +102,11 @@ void Aidge::FCImpl_cpu::backward() std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback; const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0))); const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0))); - const auto& input2grad = op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))); + const auto& input2grad = (op_.getInput(2)) ? op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))) : Tensor(); // Call kernel const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1; - kernelFunc(op_.noBias(), - batchSize, + kernelFunc(batchSize, input1grad.dims()[1], // nb input features input1grad.dims()[0], // nb output features getCPUPtr(fc_grad), @@ -116,5 +114,5 @@ void Aidge::FCImpl_cpu::backward() getCPUPtr(mOp.getRawInput(1)), input0grad.getImpl()->rawPtr(), input1grad.getImpl()->rawPtr(), - input2grad.getImpl()->rawPtr()); + (op_.getInput(2)) ? input2grad.getImpl()->rawPtr() : nullptr); } diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp index 8ab812188127f989270068427402b40c1ff5ea51..b4b52d6be855b6a1f8c0a71a6a9169ee9690f34c 100644 --- a/src/operator/PadImpl.cpp +++ b/src/operator/PadImpl.cpp @@ -18,8 +18,40 @@ #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp" -Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const { - assert(inputIdx == 0 && "operator has only one input"); +Aidge::Elts_t Aidge::PadImpl1D_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const { + AIDGE_ASSERT(inputIdx == 0, "input index out of range." + "{} Operator has only one input", mOp.type()); + (void) inputIdx; + + + // Padding cannot be in-place! + // We must ensure that we do not override data that has not been consummed yet. + const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(); + const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(); + return Elts_t::DataElts(outputSize - inputSize); +} + +void Aidge::PadImpl1D_cpu::forward() { + const auto& op_ = dynamic_cast<const Pad_Op<1>&>(mOp); + AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator."); + + // Find the correct kernel type + auto kernelFunc = Registrar<PadImpl1DForward_cpu>::create({ + op_.getInput(0)->dataType(), + op_.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(op_.beginEndBorders(), + op_.borderType(), + op_.borderValue(), + op_.getInput(0)->template dims<3>(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} + +Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const { + AIDGE_ASSERT(inputIdx == 0, "input index out of range." + "{} Operator has only one input", mOp.type()); (void) inputIdx; // Padding cannot be in-place! diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index 56005f31d9f133bc7248adc3f71ce03015e8829c..271a1e2f9860d92f840916f6b2e396993b0bea39 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -200,7 +200,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { microGraph->save("lstm", false, true); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( @@ -259,7 +265,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { microGraph->save("lstm", false, false); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( @@ -316,7 +328,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator()); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( @@ -377,7 +395,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->add(pop); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( @@ -440,7 +464,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { myGraph->add(pop); REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); - REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data); + for (size_t i = 1; i < 9; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param); + } + for (size_t i = 9; i < 17; ++i) { + REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam); + } REQUIRE(myLSTM->nbOutputs() == 2); std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 01ccd37c319ee64deb15240b30cc369b37c9e47d..16112628053a35ef71d5819a53aacc85425da88d 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -416,7 +416,7 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward auto myProd = Producer(inputTensor, "prod"); myProd -> addChild(gv); gv -> compile("cpu", DataType::Float32); - compile_gradient(gv); + SequentialScheduler scheduler(gv); scheduler.forward(); auto outNode = gv->getOrderedOutputs()[0].first; @@ -432,7 +432,6 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward {6.0f, 6.0f, 6.0f, 6.0f, 6.0f}, {6.0f, 6.0f, 6.0f, 7.0f, 7.0f}, {7.0f, 7.0f, 7.0f, 7.0f, 7.0f}}}}}); - predictedOutput->initGrad(); predictedOutput->setGrad(targetOutput); REQUIRE_NOTHROW(scheduler.backward()); }