diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp index 1d3b29d43678e8d97e05b9b169a98f7e757838d8..7a1497a2f4a2ae0e6005897ae504502505bbe60a 100644 --- a/include/aidge/backend/cpu/operator/AddImpl.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl.hpp @@ -40,7 +40,7 @@ public: return std::make_unique<AddImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp index 1b62de7e145dfab02e78319600c1b30b29fd715b..ce126dc2b870d6ac767c15bc6fbca2deb07e8772 100644 --- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp @@ -44,7 +44,7 @@ public: return std::make_unique<AvgPoolingImpl2D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp index 3743c40a706156c45e6b1e7bf5dfdd50f40ed195..8bd567dab3d564ccdeffdc581585e404fc4697a4 100644 --- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp +++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp @@ -59,7 +59,7 @@ public: return std::make_unique<BatchNormImpl2D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/ConcatImpl.hpp b/include/aidge/backend/cpu/operator/ConcatImpl.hpp index 559d5026d3b7430489ffb1cf08ef143df013c4c4..a997ffa9860f87fe0d9bc4e64239a656053416a6 100644 --- a/include/aidge/backend/cpu/operator/ConcatImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConcatImpl.hpp @@ -48,18 +48,6 @@ public: } public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; - - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final; - - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - - void updateConsummerProducer() override final; - void forward() override; void backward() override; diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp index 470e189d3a9a8ce52dd067794cfd1bf6a7404696..a61a7299ed6bd5c5a3e41c09e9d5b5f1f7ae3326 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp @@ -46,7 +46,7 @@ public: return std::make_unique<ConvDepthWiseImpl2D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index 5e739b06118e788f716f6e5d6a41a58cab9b5203..e7ce0892a6241009a8e80821e341b3209a19faa4 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -47,7 +47,7 @@ class ConvImpl2D_cpu : public OperatorImpl { } public: - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/DivImpl.hpp b/include/aidge/backend/cpu/operator/DivImpl.hpp index 06a1ae49ffacf3fbf0ae923081d8d9cf1a5a40d6..3a19d7303464e3543bd1ce83e334c4a6bdb713a2 100644 --- a/include/aidge/backend/cpu/operator/DivImpl.hpp +++ b/include/aidge/backend/cpu/operator/DivImpl.hpp @@ -40,7 +40,7 @@ public: return std::make_unique<DivImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override final; }; diff --git a/include/aidge/backend/cpu/operator/ErfImpl.hpp b/include/aidge/backend/cpu/operator/ErfImpl.hpp index 1402868ea5b8cb441c12dbefaad17304fdfdc749..6864803a542e4beed0259be9c4722d4215bec449 100644 --- a/include/aidge/backend/cpu/operator/ErfImpl.hpp +++ b/include/aidge/backend/cpu/operator/ErfImpl.hpp @@ -38,7 +38,7 @@ public: return std::make_unique<ErfImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/GatherImpl.hpp b/include/aidge/backend/cpu/operator/GatherImpl.hpp index fce777d0ac4d53134aa65689b6ac2ec02b805d98..2164f6c4f26dca64c672f62bc8fdc0895c642ae4 100644 --- a/include/aidge/backend/cpu/operator/GatherImpl.hpp +++ b/include/aidge/backend/cpu/operator/GatherImpl.hpp @@ -38,7 +38,6 @@ public: return std::make_unique<GatherImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp index 2bb78b2f4ccacfa1080203efcbc6f9896e464661..81f10975cc107a23448da3df14b88f6b31d55146 100644 --- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_forward_kernels.hpp @@ -49,12 +49,12 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel( for (DimSize_t channel = 0; channel < dims[1]; ++channel) { const I *filter_start = std::next( input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems)); - I sum = 0; + I mean = 0; for (size_t i = 0; i < in_channel_nb_elems; ++i) { - sum += filter_start[i]; + // Single pass numerically stable mean, using the fmaf + mean = fmaf(filter_start[i] - mean, 1.0f/(i+1), mean); } - output[batch * out_batch_nb_elems + channel] = - sum / static_cast<I>(in_channel_nb_elems); + output[batch * out_batch_nb_elems + channel] = mean; } } } diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp index 42116c52d829a8b4ba27311b3ab2d35fcea37e8b..880a59b3aeae2598f6b1ed5e287af18fd7bcfd6f 100644 --- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp @@ -39,7 +39,7 @@ public: return std::make_unique<LeakyReLUImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override final; diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp index 15629b59b31f6f2228802861f6ae0d7d70b2bff9..d2d30aa7db5b1522712faa846ef33e1b21772d5e 100644 --- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp +++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp @@ -44,7 +44,7 @@ public: return std::make_unique<MaxPoolingImpl2D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/MemorizeImpl.hpp b/include/aidge/backend/cpu/operator/MemorizeImpl.hpp index 10d18d780e1e450d1a2c58faa932e9d851a41f19..5ea0c9d4f3802490e5b41b5ea1c8454c87c65b28 100644 --- a/include/aidge/backend/cpu/operator/MemorizeImpl.hpp +++ b/include/aidge/backend/cpu/operator/MemorizeImpl.hpp @@ -29,8 +29,8 @@ public: return std::make_unique<MemorizeImpl_cpu>(op); } - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; - NbElts_t getRequiredMemory(const Aidge::IOIndex_t outputIdx, + Elts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; + Elts_t getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const override final; void updateConsummerProducer() override final; void forward() override; diff --git a/include/aidge/backend/cpu/operator/MulImpl.hpp b/include/aidge/backend/cpu/operator/MulImpl.hpp index 230094475088c6f7802f8a8af75986ded55e9137..2d42194c417bd7d57c00f4325a4585cf59d95b24 100644 --- a/include/aidge/backend/cpu/operator/MulImpl.hpp +++ b/include/aidge/backend/cpu/operator/MulImpl.hpp @@ -39,7 +39,7 @@ public: return std::make_unique<MulImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp index a1efb0f699beb7a45cc104e7c6ab723c1952a5b1..b3c91a43419e9a5e9e1299f4a2118a51b6b64fc7 100644 --- a/include/aidge/backend/cpu/operator/PadImpl.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl.hpp @@ -46,7 +46,7 @@ public: return std::make_unique<PadImpl2D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/PopImpl.hpp b/include/aidge/backend/cpu/operator/PopImpl.hpp index 29272f5d759b5b39c6bfd704ab1e84b0777e33c5..19d5903973da378ce003daf4de9e1ae54d7b1b0e 100644 --- a/include/aidge/backend/cpu/operator/PopImpl.hpp +++ b/include/aidge/backend/cpu/operator/PopImpl.hpp @@ -39,7 +39,7 @@ public: return std::make_unique<PopImpl_cpu>(op); } - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp index f82b3dfd91ad6e1ea6f732105963c1ee07b08367..514e63af5ae5d1d1d00f7f328f5367df2bfa163d 100644 --- a/include/aidge/backend/cpu/operator/PowImpl.hpp +++ b/include/aidge/backend/cpu/operator/PowImpl.hpp @@ -39,7 +39,7 @@ public: return std::make_unique<PowImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; void backward() override; }; diff --git a/include/aidge/backend/cpu/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp index 1c87fe6d80b3d571c55e4355d8b5ef703a2133e4..cef82482813757312c638aebac9f2afd738493db 100644 --- a/include/aidge/backend/cpu/operator/ReLUImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp @@ -41,7 +41,7 @@ public: return std::make_unique<ReLUImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override final; diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp index e2b7288320e3e57495044381c34c5b1be1d3c243..7355a2bd46f45ab5019a31832001ae3335c1d8e8 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp @@ -44,7 +44,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl { } public: - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; @@ -87,7 +86,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl { // } // public: -// NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; // void forward() override; // }; @@ -100,7 +98,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl { // } // public: -// NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; // void forward() override; // }; @@ -113,7 +110,6 @@ class ReduceMeanImpl_cpu : public OperatorImpl { // } // public: -// NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; // void forward() override; // }; namespace { diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp index d7a967e84f53924a4b050ed79d1220f9bc79232e..6533f7b19eac07d429cd8c5ed05ea082457b9e7b 100644 --- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp @@ -47,22 +47,23 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attr for (std::size_t post = 0; post < stride_post; ++post) { const std::size_t idx_i = pre * dim_i * stride_post + post; const std::size_t idx_o = pre * stride_post + post; - output[idx_o] = input[idx_i]; - for (std::size_t i = 1; i < dim_i; ++i) { - output[idx_o] += input[idx_i + i*stride_post]; + O mean = 0; + for (std::size_t i = 0; i < dim_i; ++i) { + // Single pass numerically stable mean, using the fmaf + mean = fmaf(input[idx_i + i*stride_post] - mean, 1.0f/(i+1), mean); } - output[idx_o] /= dim_i; + output[idx_o] = mean; } } } else { std::size_t outputElements = totalElements; - std::size_t *stride_post = new std::size_t[nb_dims]; + auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); stride_post[nb_dims - 1] = 1; for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) { stride_post[i] = stride_post[i+1]*inputDims[i+1]; } - std::size_t *stride_pre = new std::size_t[nb_dims]; + auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]); stride_pre[0] = 1; for (std::size_t i = 1; i < nb_dims; ++i) { stride_pre[i] = stride_pre[i-1]*inputDims[i-1]; @@ -80,13 +81,15 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attr for (std::size_t post = 0; post < stride_post[a]; ++post) { const std::size_t idx_i = pre * dim_i * stride_post[a] + post; const std::size_t idx_o = pre * stride_post[a] + post; - outputAccumulation[idx_o] = inputAccumulation[idx_i]; - for (std::size_t i = 1; i < dim_i; ++i) { - outputAccumulation[idx_o] += inputAccumulation[idx_i + i*stride_post[a]]; + I mean = 0; + for (std::size_t i = 0; i < dim_i; ++i) { + // Single pass numerically stable mean, using the fmaf + mean = fmaf(inputAccumulation[idx_i + i*stride_post[a]] - mean, 1.0f/(i+1), mean); } + outputAccumulation[idx_o] = mean; } } - std::for_each(stride_pre+a+1, stride_pre+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; }); + std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; }); if (inputAccumulation != input) { delete[] inputAccumulation; } @@ -94,14 +97,10 @@ void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attr } // Copy elements from inputAccumulation to output while dividing by divisor - I divisor = totalElements / outputElements; - std::transform(inputAccumulation, inputAccumulation + outputElements, output, - [divisor](I element) { return element / divisor; }); + std::copy(inputAccumulation, inputAccumulation + outputElements, output); if (outputAccumulation) { delete[] outputAccumulation; } - delete[] stride_post; - delete[] stride_pre; } } diff --git a/include/aidge/backend/cpu/operator/ReshapeImpl.hpp b/include/aidge/backend/cpu/operator/ReshapeImpl.hpp index d2d819e8d56df59437904aa9b4ae91185c8288f2..1dc5fa2a09533494568ffea78153887d01368a7d 100644 --- a/include/aidge/backend/cpu/operator/ReshapeImpl.hpp +++ b/include/aidge/backend/cpu/operator/ReshapeImpl.hpp @@ -38,7 +38,7 @@ public: return std::make_unique<ReshapeImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp index 088625e963b158811aad85665a25b68bf2892bb9..66bb42f7fb909ee9b6c91a6321ee3fa32c977626 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp @@ -40,7 +40,7 @@ public: return std::make_unique<ScalingImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp index f54a6c84aa83414cbe8a7a1713f36dd3311dda3f..2e43023d678c8a4258c80fb91d82d2858fcdf188 100644 --- a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp +++ b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp @@ -39,7 +39,7 @@ public: return std::make_unique<SigmoidImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp index 72d6105388924dc1553cbeba2124da66d804980f..1583435c12a243ef5861299434a7fc1409307538 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp @@ -46,14 +46,6 @@ public: return std::make_unique<SliceImpl_cpu>(op); } - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - void forward() override; void backward() override; }; diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp index 9eb5323702358650f3af91b46a8a1a0872b02675..2b2fab485656efdc37ee134cb4ae574b6b403405 100644 --- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp +++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp @@ -39,7 +39,7 @@ public: return std::make_unique<SoftmaxImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/SqrtImpl.hpp b/include/aidge/backend/cpu/operator/SqrtImpl.hpp index 33fa7b5bc802005112a2b47357312883706e43e9..1691d951678509274736d558360c8110958820a9 100644 --- a/include/aidge/backend/cpu/operator/SqrtImpl.hpp +++ b/include/aidge/backend/cpu/operator/SqrtImpl.hpp @@ -40,7 +40,7 @@ public: return std::make_unique<SqrtImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override final; diff --git a/include/aidge/backend/cpu/operator/SubImpl.hpp b/include/aidge/backend/cpu/operator/SubImpl.hpp index 2d957aa67b3061994f7fb2bf9550e4d5338d3967..15c028ae6289f39e0b6e6fd74e51e138b1f2675c 100644 --- a/include/aidge/backend/cpu/operator/SubImpl.hpp +++ b/include/aidge/backend/cpu/operator/SubImpl.hpp @@ -39,7 +39,7 @@ public: return std::make_unique<SubImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/TanhImpl.hpp b/include/aidge/backend/cpu/operator/TanhImpl.hpp index 4169b1a533a8b2382644246ea295a683e6f83f1d..9e44f7bcd2b2392c634421478a096258b3e39795 100644 --- a/include/aidge/backend/cpu/operator/TanhImpl.hpp +++ b/include/aidge/backend/cpu/operator/TanhImpl.hpp @@ -39,7 +39,7 @@ public: return std::make_unique<TanhImpl_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/include/aidge/backend/cpu/operator/TransposeImpl.hpp b/include/aidge/backend/cpu/operator/TransposeImpl.hpp index 3c6913dd71d6642d8b76198a272d64bfaba833e8..8bdcc612ea434e266a97724d45aaeefc8e033bf0 100644 --- a/include/aidge/backend/cpu/operator/TransposeImpl.hpp +++ b/include/aidge/backend/cpu/operator/TransposeImpl.hpp @@ -63,7 +63,6 @@ public: return std::make_unique<TransposeImpl2D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; class TransposeImpl3D_cpu : public OperatorImpl { @@ -74,7 +73,6 @@ public: return std::make_unique<TransposeImpl3D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; class TransposeImpl4D_cpu : public OperatorImpl { @@ -85,7 +83,6 @@ public: return std::make_unique<TransposeImpl4D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; class TransposeImpl5D_cpu : public OperatorImpl { @@ -96,7 +93,6 @@ public: return std::make_unique<TransposeImpl5D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; class TransposeImpl6D_cpu : public OperatorImpl { @@ -107,7 +103,6 @@ public: return std::make_unique<TransposeImpl6D_cpu>(op); } - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; void forward() override; }; diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index abd40bd6af06c52945815fd6245e661710fa1127..d6d75a608e4da7d8b9ed8a28912ff2eb1751e042 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -22,9 +22,9 @@ #include "aidge/utils/Types.h" #include "aidge/utils/ErrorHandling.hpp" -Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::AddImpl_cpu::forward() { diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index 9e0a77e3285c1e3701142828c74898cb9da5b405..8ba6751bf4068a69ed07e362924f59d0f4aca6c5 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -21,9 +21,9 @@ #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::AvgPoolingImpl2D_cpu::forward() { diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp index c84f2cb6b09c707f68ed83cc7554624fc6489b84..96179d11850624f831333c9a4badaddf2221ecff 100644 --- a/src/operator/BatchNormImpl.cpp +++ b/src/operator/BatchNormImpl.cpp @@ -20,9 +20,9 @@ #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::BatchNormImpl2D_cpu::forward() { diff --git a/src/operator/ConcatImpl.cpp b/src/operator/ConcatImpl.cpp index e142b79a8aad5a99a65fdf38de630f3b5668c804..605f4a19ff3856924593b0e6d7815d5de1579c01 100644 --- a/src/operator/ConcatImpl.cpp +++ b/src/operator/ConcatImpl.cpp @@ -21,46 +21,6 @@ #include "aidge/backend/cpu/operator/ConcatImpl.hpp" #include "aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getRawInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims(); - return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! - return 0; -} - -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t inputIdx) const { - assert(inputIdx < mNbConsumedData.size()); - return mNbConsumedData[inputIdx]; -} - -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbProducedData(const Aidge::IOIndex_t outputIdx) const { - assert(outputIdx < mNbProducedData.size()); - return mNbProducedData[outputIdx]; -} - -void Aidge::ConcatImpl_cpu::updateConsummerProducer() { - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); - -} - void Aidge::ConcatImpl_cpu::forward() { assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input in Concat operator"); DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(); diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index 1b4262e394f78ab0bda4a36440ac7b9cb15c164c..5c8d2fe307c70bd7ee3f64e14735417f7ffb0c67 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -22,9 +22,9 @@ #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::ConvDepthWiseImpl2D_cpu::forward() { diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index 34ea7b37ec9929908192bde6f31d84ae581640a2..7457a1a0b75af1f922c5a65ac88aabc813d00069 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -22,9 +22,9 @@ #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::ConvImpl2D_cpu::forward() { diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp index 8e2118e9e78fd364189769ead2eb01f1c55b3c58..098b20776888c6d72110e4bc4c0c3e191febd41c 100644 --- a/src/operator/DivImpl.cpp +++ b/src/operator/DivImpl.cpp @@ -19,9 +19,9 @@ #include "aidge/data/Tensor.hpp" #include "aidge/utils/Types.h" -Aidge::NbElts_t Aidge::DivImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::DivImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::DivImpl_cpu::forward() { diff --git a/src/operator/ErfImpl.cpp b/src/operator/ErfImpl.cpp index 55752e4f5b9f798a6901e108ddcba2f61fdf9774..ace098468c05b80c4116e6f85d00b5fabaf754cd 100644 --- a/src/operator/ErfImpl.cpp +++ b/src/operator/ErfImpl.cpp @@ -19,9 +19,9 @@ #include "aidge/operator/Erf.hpp" #include "aidge/utils/Types.h" -Aidge::NbElts_t Aidge::ErfImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::ErfImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::ErfImpl_cpu::forward() { diff --git a/src/operator/GatherImpl.cpp b/src/operator/GatherImpl.cpp index d80b53e7e864faf3fca289f94aba4f511bcba161..5384f64536955b7cb2ed85af81e52697e9b84a2a 100644 --- a/src/operator/GatherImpl.cpp +++ b/src/operator/GatherImpl.cpp @@ -20,11 +20,6 @@ #include "aidge/operator/Gather.hpp" #include "aidge/utils/Types.h" -Aidge::NbElts_t Aidge::GatherImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; -} - void Aidge::GatherImpl_cpu::forward() { const Gather_Op& op = static_cast<const Gather_Op&>(mOp); diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp index 67847429eb06b24eac9ac43893a0bc24b934f655..340af3eeaf370988f9b12d8535812c938e47078a 100644 --- a/src/operator/LeakyReLUImpl.cpp +++ b/src/operator/LeakyReLUImpl.cpp @@ -22,9 +22,9 @@ #include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp" -Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::LeakyReLUImpl_cpu::forward() { diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp index e21dab07df4c20eb7253e680146042f205bc210b..94591eaa9848b24aeb7afa1e8b6b87a3e6e2b45f 100644 --- a/src/operator/MaxPoolingImpl.cpp +++ b/src/operator/MaxPoolingImpl.cpp @@ -21,9 +21,9 @@ #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::MaxPoolingImpl2D_cpu::forward() { diff --git a/src/operator/MemorizeImpl.cpp b/src/operator/MemorizeImpl.cpp index b2956231ec29784158ea27c68d4ec21a8c4ccc64..8a23bd35585c03c91567c0da5b0727fe1323b754 100644 --- a/src/operator/MemorizeImpl.cpp +++ b/src/operator/MemorizeImpl.cpp @@ -21,7 +21,7 @@ #include "aidge/backend/cpu/operator/MemorizeImpl.hpp" -Aidge::DimSize_t Aidge::MemorizeImpl_cpu::getNbRequiredData( +Aidge::Elts_t Aidge::MemorizeImpl_cpu::getNbRequiredData( Aidge::IOIndex_t inputIdx) const { const Memorize_Op& op = dynamic_cast<const Memorize_Op&>(mOp); @@ -30,18 +30,18 @@ Aidge::DimSize_t Aidge::MemorizeImpl_cpu::getNbRequiredData( if (scheduleStep == 0 && inputIdx == 0) { // No data input is required for the initial step. // Initialization data is required however. - return 0; + return Elts_t::NoneElts(); } else if (scheduleStep > 0 && inputIdx == 1) { // No initialization data is required after the initial step. - return 0; + return Elts_t::NoneElts(); } else { return OperatorImpl::getNbRequiredData(inputIdx); } } -Aidge::NbElts_t Aidge::MemorizeImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, +Aidge::Elts_t Aidge::MemorizeImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const { assert(mOp.getRawOutput(outputIdx) && "requires valid output"); @@ -50,10 +50,10 @@ Aidge::NbElts_t Aidge::MemorizeImpl_cpu::getRequiredMemory(const Aidge::IOIndex_ const unsigned int endStep = op.template getAttr<MemorizeAttr::EndStep>(); if (endStep > 0 && outputIdx == 1 && scheduleStep >= endStep) { - return 0; + return Elts_t::NoneElts(); } else { - return std::static_pointer_cast<Tensor>(mOp.getRawOutput(outputIdx))->size(); + return Elts_t::DataElts(std::static_pointer_cast<Tensor>(mOp.getRawOutput(outputIdx))->size()); } } diff --git a/src/operator/MulImpl.cpp b/src/operator/MulImpl.cpp index 87d180b013e44a49cb887ce722533c50206f3889..d7feb9b76e25a0e874b3682cdc5b3e53bf8e9228 100644 --- a/src/operator/MulImpl.cpp +++ b/src/operator/MulImpl.cpp @@ -23,9 +23,9 @@ #include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::MulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::MulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::MulImpl_cpu::forward() { diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp index 219bf425fa34cdaaa378c49dd7c9837f9d94d97e..cd420a6241723c5d3fa5836838f84ce6bfe965d1 100644 --- a/src/operator/PadImpl.cpp +++ b/src/operator/PadImpl.cpp @@ -22,7 +22,7 @@ #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const { +Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const { assert(inputIdx == 0 && "operator has only one input"); (void) inputIdx; @@ -30,7 +30,7 @@ Aidge::NbElts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) // We must ensure that we do not override data that has not been consummed yet. const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(); const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(); - return (outputSize - inputSize); + return Elts_t::DataElts(outputSize - inputSize); } void Aidge::PadImpl2D_cpu::forward() { diff --git a/src/operator/PopImpl.cpp b/src/operator/PopImpl.cpp index 86850610c75f827d9c29e6a0506397c5a844cb00..02bbddbaed6d9d89e729d6c778a1765fcbab4b4f 100644 --- a/src/operator/PopImpl.cpp +++ b/src/operator/PopImpl.cpp @@ -21,11 +21,11 @@ #include "aidge/backend/cpu/operator/PopImpl.hpp" -Aidge::NbElts_t Aidge::PopImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { +Aidge::Elts_t Aidge::PopImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { assert(mOp.getRawInput(inputIdx) && "requires valid input"); - return std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->size() - / std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims()[0]; + return Elts_t::DataElts(std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->size() + / std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims()[0]); } void Aidge::PopImpl_cpu::forward() { diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp index de79e1978c61387019c7f5fa69932e4bbd52b5bc..811d13804cffdd2477fc830f1779b0fb6271eb0b 100644 --- a/src/operator/PowImpl.cpp +++ b/src/operator/PowImpl.cpp @@ -23,9 +23,9 @@ #include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::PowImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::PowImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::PowImpl_cpu::forward() { diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp index 005521461a40cf36547953ae9bcf5dbb2b0e1094..4bba09b6fbeea1552bf5b7cc7e491291345fca45 100644 --- a/src/operator/ReLUImpl.cpp +++ b/src/operator/ReLUImpl.cpp @@ -22,9 +22,9 @@ #include "aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp" #include "aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp" -Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::ReLUImpl_cpu::forward() { diff --git a/src/operator/ReduceMeanImpl.cpp b/src/operator/ReduceMeanImpl.cpp index 82f96f112016d0498d241ee9ed14989066cbc979..a9f17a28a2a47ec7bc50820d587e8d0f359d2bb3 100644 --- a/src/operator/ReduceMeanImpl.cpp +++ b/src/operator/ReduceMeanImpl.cpp @@ -18,23 +18,6 @@ #include "aidge/operator/ReduceMean.hpp" #include "aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ReduceMeanImpl_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; -} -// Aidge::NbElts_t Aidge::ReduceMeanImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { -// // this implementation can be in-place -// return 0; -// } -// Aidge::NbElts_t Aidge::ReduceMeanImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { -// // this implementation can be in-place -// return 0; -// } -// Aidge::NbElts_t Aidge::ReduceMeanImpl3D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { -// // this implementation can be in-place -// return 0; -// } - void Aidge::ReduceMeanImpl_cpu::forward() { const ReduceMean_Op& op_ = dynamic_cast<const ReduceMean_Op&>(mOp); // Find the correct kernel type diff --git a/src/operator/ReshapeImpl.cpp b/src/operator/ReshapeImpl.cpp index 11df6f663d9a78476103d9671d9d428719c0126d..69c1c3135ce9f32d536bfd2c41b90eb55f7d8986 100644 --- a/src/operator/ReshapeImpl.cpp +++ b/src/operator/ReshapeImpl.cpp @@ -17,9 +17,9 @@ #include "aidge/utils/Types.h" #include "aidge/utils/ErrorHandling.hpp" -Aidge::NbElts_t Aidge::ReshapeImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::ReshapeImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::ReshapeImpl_cpu::forward() { diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp index 6b9aab31a9d61d2d7a5ff89961de3fa6a2b5ebd2..d0b58702c73f01fb62114d335f5c2342908542ea 100644 --- a/src/operator/ScalingImpl.cpp +++ b/src/operator/ScalingImpl.cpp @@ -21,9 +21,9 @@ #include "aidge/backend/cpu/data/GetCPUPtr.h" #include <vector> -Aidge::NbElts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::ScalingImpl_cpu::forward() { diff --git a/src/operator/SigmoidImpl.cpp b/src/operator/SigmoidImpl.cpp index 7322e08ba01bfb931382cf17691e705dfaeeb6c1..dd7ec26cb36777f79d382c815b60d2381544a0bd 100644 --- a/src/operator/SigmoidImpl.cpp +++ b/src/operator/SigmoidImpl.cpp @@ -22,9 +22,9 @@ #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::SigmoidImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::SigmoidImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::SigmoidImpl_cpu::forward() { diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp index c1a6480c1e7c0d681abef12f06a57e140d1e9efd..47b13c4694cea22421811c889b5627e9f1362ac0 100644 --- a/src/operator/SliceImpl.cpp +++ b/src/operator/SliceImpl.cpp @@ -22,42 +22,6 @@ #include <cassert> #include <tuple> -Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } - -Aidge::NbElts_t Aidge::SliceImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t>& inputsSize) const { - (void)outputIdx; - (void)inputsSize; - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} - -void Aidge::SliceImpl_cpu::updateConsummerProducer() { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); -} - void Aidge::SliceImpl_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index 5f5d7411b7bb28ae28480b39c8bfdf5674f877ed..240267613e557c20edcc00e81f4bf20d17d9962f 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -22,9 +22,9 @@ #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::SoftmaxImpl_cpu::forward() { diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp index cb635cce517ef0fc6494e7570bad66e19da89aa2..edb8858fc4ac07fa5725d24688b22d64134afb0e 100644 --- a/src/operator/SqrtImpl.cpp +++ b/src/operator/SqrtImpl.cpp @@ -22,9 +22,9 @@ #include "aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp" #include "aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp" -Aidge::NbElts_t Aidge::SqrtImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::SqrtImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::SqrtImpl_cpu::forward() { diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp index 475f8cb8704739e091f0b8f01ffce680fd851e1f..ffddb59ee3373c4a0a6c2653747744a43fd471d9 100644 --- a/src/operator/SubImpl.cpp +++ b/src/operator/SubImpl.cpp @@ -23,9 +23,9 @@ #include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::SubImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::SubImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::SubImpl_cpu::forward() { diff --git a/src/operator/TanhImpl.cpp b/src/operator/TanhImpl.cpp index c4658440ab00086be6a469c19d5ea89771857fb1..44e180739ed86e25d4be6d0beb693f73bdadbf35 100644 --- a/src/operator/TanhImpl.cpp +++ b/src/operator/TanhImpl.cpp @@ -22,9 +22,9 @@ #include "aidge/backend/cpu/operator/TanhImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::TanhImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::Elts_t Aidge::TanhImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // this implementation can be in-place - return 0; + return Elts_t::DataElts(0); } void Aidge::TanhImpl_cpu::forward() { diff --git a/src/operator/TransposeImpl.cpp b/src/operator/TransposeImpl.cpp index 1fc4458ccb85e4776228a2bf9e1c73589c201a35..710e67b4f5aaa5261a111a8e131a0dd740694a4b 100644 --- a/src/operator/TransposeImpl.cpp +++ b/src/operator/TransposeImpl.cpp @@ -21,27 +21,6 @@ #include "aidge/backend/cpu/operator/TransposeImpl.hpp" #include "aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::TransposeImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; -} -Aidge::NbElts_t Aidge::TransposeImpl3D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; -} -Aidge::NbElts_t Aidge::TransposeImpl4D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; -} -Aidge::NbElts_t Aidge::TransposeImpl5D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; -} -Aidge::NbElts_t Aidge::TransposeImpl6D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; -} - void Aidge::TransposeImpl2D_cpu::forward() { // Find the correct kernel type auto kernelFunc = diff --git a/unit_tests/operator/Test_BatchNormImpl.cpp b/unit_tests/operator/Test_BatchNormImpl.cpp index a1a749d805a45361c671544f5c94aed3421e557d..8c8c1dff3d74c2fce97abd8c3d88bf9840706ee4 100644 --- a/unit_tests/operator/Test_BatchNormImpl.cpp +++ b/unit_tests/operator/Test_BatchNormImpl.cpp @@ -14,6 +14,7 @@ #include "aidge/data/Tensor.hpp" #include "aidge/operator/BatchNorm.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/backend/cpu.hpp" diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp index c0e9be1c6062eaf311d5eaf2515df2b4fd2b8a9e..63a11d19a025b5560075c4b85123d645522da09e 100644 --- a/unit_tests/operator/Test_MetaOperator.cpp +++ b/unit_tests/operator/Test_MetaOperator.cpp @@ -23,6 +23,8 @@ #include "aidge/operator/MetaOperatorDefs.hpp" #include "aidge/operator/Pad.hpp" #include "aidge/operator/Pop.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/scheduler/ParallelScheduler.hpp" using namespace Aidge; @@ -206,7 +208,7 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( Array2D<float, 16, 32>{}); std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( - Array2D<float, 1, 64>{}); + Array2D<float, 32, 64>{}); std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( Array2D<float, 64, 32>{}); std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( @@ -233,7 +235,7 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { g->setBackend("cpu"); auto scheduler = SequentialScheduler(g); - scheduler.forward(true, true); + scheduler.forward(true); g->save("lstm_outside_dims", true, true); @@ -243,10 +245,10 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler(); microGraphScheduler->saveSchedulingDiagram("lstm_scheduling"); - REQUIRE(op->getNbConsumedData(0) == 512); - REQUIRE(op->getNbConsumedData(1) == 32768); - REQUIRE(op->getNbProducedData(0) == 1088); - REQUIRE(op->getNbProducedData(1) == 1088); + REQUIRE(op->getNbConsumedData(0).data == 512); + REQUIRE(op->getNbConsumedData(1).data == 32768); + REQUIRE(op->getNbProducedData(0).data == 34816); + REQUIRE(op->getNbProducedData(1).data == 34816); REQUIRE(microGraphScheduler->getStaticScheduling(0).size() == 26); REQUIRE(microGraphScheduler->getStaticScheduling(1).size() == 24); REQUIRE(microGraphScheduler->getStaticScheduling(15).size() == 24); @@ -350,7 +352,7 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { g->save("lstm_seq", true, true); auto scheduler = SequentialScheduler(g); - scheduler.forward(true, true); + scheduler.forward(true); scheduler.saveSchedulingDiagram("lstm_seq_schedule"); std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( @@ -365,7 +367,7 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState)); } - SECTION("LSTM(forward_values_seq_flatten)") { + SECTION("LSTM(forward_values_seq_flatten)(sequential)") { auto pop = Pop(); auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); @@ -418,8 +420,72 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") { {0.67162132, 0.67162132, 0.67162132}}}); auto scheduler = SequentialScheduler(myGraph); - scheduler.forward(true, true); - scheduler.saveSchedulingDiagram("lstm_seq_flatten_schedule"); + scheduler.generateScheduling(); + scheduler.saveStaticSchedulingDiagram("lstm_static_schedule"); + scheduler.forward(true); + scheduler.saveSchedulingDiagram("lstm_seq_flatten_schedule_seq"); + + op->getOutput(0)->print(); + myHiddenState->print(); + + REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState)); + } + SECTION("LSTM(forward_values_seq_flatten)(parallel)") { + auto pop = Pop(); + auto myLSTM = LSTM(2, 3, 2, true, "ltsm"); + auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator()); + + // Here we test LSTM as it is was flatten in the graph. + // We just borrow its micro-graph into our larger myGraph graph. + auto myGraph = std::make_shared<GraphView>(); + pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0); + myGraph->add(op->getMicroGraph()); + myGraph->add(pop); + + REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8); + REQUIRE(myLSTM->nbData() == 1); + REQUIRE(myLSTM->nbOutputs() == 2); + + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>( + Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}}); + std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>( + Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}}); + std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>( + Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}}); + std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>( + Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}}); + + pop->getOperator()->associateInput(0, myInput); + op->associateInput(17, myInit); + op->associateInput(18, myInit); + + // Weights X + auto prodX = Producer(myInitW); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1); + prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1); + // Weights H + auto prodH = Producer(myInitR); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1); + prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1); + myGraph->add({prodX, prodH}); + + myGraph->setDataType(DataType::Float32); + myGraph->setBackend("cpu"); + myGraph->save("lstm_seq_flatten", true, true); + + std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>( + Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372}, + {0.49801484, 0.49801484, 0.49801484}, + {0.67162132, 0.67162132, 0.67162132}}}); + + auto scheduler = ParallelScheduler(myGraph); + scheduler.generateScheduling(); + scheduler.forward(true); + scheduler.saveSchedulingDiagram("lstm_seq_flatten_schedule_par"); op->getOutput(0)->print(); myHiddenState->print(); diff --git a/unit_tests/operator/Test_PaddedConv.cpp b/unit_tests/operator/Test_PaddedConv.cpp index 03a592e52b7d057065353a7d99c088d9831c67c7..b7584ad069336a270ed07c32d4c07552888b6587 100644 --- a/unit_tests/operator/Test_PaddedConv.cpp +++ b/unit_tests/operator/Test_PaddedConv.cpp @@ -16,6 +16,7 @@ #include "aidge/data/Tensor.hpp" #include "aidge/operator/MetaOperator.hpp" #include "aidge/operator/MetaOperatorDefs.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/backend/cpu.hpp" diff --git a/unit_tests/recipies/Test_ConstantFolding.cpp b/unit_tests/recipies/Test_ConstantFolding.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c4866b1258702b93a1bce80501d9acd094a65741 --- /dev/null +++ b/unit_tests/recipies/Test_ConstantFolding.cpp @@ -0,0 +1,85 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/recipes/Recipes.hpp" +#include "aidge/operator/Add.hpp" +#include "aidge/operator/MatMul.hpp" +#include "aidge/operator/Producer.hpp" +#include "aidge/graph/OpArgs.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/utils/TensorUtils.hpp" +#include <cstddef> + +using namespace Aidge; + +TEST_CASE("[ConstantFolding] test") { + // generate the original GraphView + auto matmul0 = MatMul("matmul0"); + auto add0 = Add(2, "add0"); + auto matmul1 = MatMul("matmul1"); + auto add1 = Add(2, "add1"); + + auto b0 = Producer(std::make_shared<Tensor>(Array1D<float,5>{{1, 2, 3, 4, 5}}), "B0", true); + auto w0 = Producer(std::make_shared<Tensor>(Array2D<float,5,5>{{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}}}), "W0", true); + auto b1 = Producer(std::make_shared<Tensor>(Array1D<float,5>{{1, 2, 3, 4, 5}}), "B1", true); + auto w1 = Producer(std::make_shared<Tensor>(Array2D<float,5,5>{{{6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}),"W1", true); + auto input = Producer(std::make_shared<Tensor>(Array2D<float,2,5>{{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}), "input", true); + + input->addChild(matmul0, 0, 0); + w0->addChild(matmul0, 0, 1); + + matmul0->addChild(add0, 0, 0); + b0->addChild(add0, 0, 1); + + add0->addChild(matmul1, 0, 0); + w1->addChild(matmul1, 0, 1); + + matmul1->addChild(add1, 0, 0); + b1->addChild(add1, 0, 1); + + auto g = std::make_shared<GraphView>(); + g->add({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1}); + g->setBackend("cpu"); + g->forwardDims(); + + // Check original graph + REQUIRE(g->getNodes() == + std::set<std::shared_ptr<Node>>({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1})); + REQUIRE(((matmul0->getParent(0) == input) && (matmul0->getParent(1) == w0))); + REQUIRE(((add0->getParent(0) == matmul0) && (add0->getParent(1) == b0))); + REQUIRE(((matmul1->getParent(0) == add0) && (matmul1->getParent(1) == w1))); + REQUIRE(((add1->getParent(0) == matmul1) && (add1->getParent(1) == b1))); + + auto scheduler = SequentialScheduler(g); + scheduler.forward(); + + const std::shared_ptr<Tensor> result = std::make_shared<Tensor>(Array2D<float,2,5>{{ + { 1201.000000, 1532.000000, 1863.000000, 2194.000000, 785.000000}, + { 2501.000000, 3207.000000, 3913.000000, 4619.000000, 1735.000000} + }}); + + auto add1Op = std::static_pointer_cast<Add_Op>(add1->getOperator()); + REQUIRE(approxEq<float>(*(add1Op->getOutput(0)), *result)); + + // Transform GraphView inplace + constantFolding(g); + + // Check new GraphView + std::set<std::shared_ptr<Node>> newNodes = g->getNodes(); + REQUIRE(newNodes != std::set<std::shared_ptr<Node>>({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1})); + REQUIRE(newNodes.size() == 1); + REQUIRE((*newNodes.cbegin())->type() == "Producer"); + + auto prodOp = std::static_pointer_cast<Producer_Op>((*newNodes.cbegin())->getOperator()); + REQUIRE(approxEq<float>(*(prodOp->getOutput(0)), *result)); +} diff --git a/unit_tests/recipies/Test_FuseBatchNorm.cpp b/unit_tests/recipies/Test_FuseBatchNorm.cpp index 82eec7f0c248b51b8447706168675f19116dbdf8..68a01541894ba25a8841343d2b3943ccc08c7a9d 100644 --- a/unit_tests/recipies/Test_FuseBatchNorm.cpp +++ b/unit_tests/recipies/Test_FuseBatchNorm.cpp @@ -19,7 +19,7 @@ #include "aidge/operator/BatchNorm.hpp" #include "aidge/operator/Producer.hpp" #include "aidge/recipes/Recipes.hpp" -#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/data/Tensor.hpp" diff --git a/unit_tests/recipies/Test_HorizontalTiling.cpp b/unit_tests/recipies/Test_HorizontalTiling.cpp index 5141e4386d46c181a1adc6f65c4820a60fafed85..a8a384f611a8cf99a0aa94c58e9bcd5955f698c4 100644 --- a/unit_tests/recipies/Test_HorizontalTiling.cpp +++ b/unit_tests/recipies/Test_HorizontalTiling.cpp @@ -17,7 +17,7 @@ #include "aidge/operator/Conv.hpp" #include "aidge/operator/ReLU.hpp" #include "aidge/recipes/Recipes.hpp" -#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/operator/Concat.hpp" diff --git a/unit_tests/scheduler/Test_CastMove.cpp b/unit_tests/scheduler/Test_CastMove.cpp index 1c46ee3b760644b1aa71a75900a1c198660cfa43..5ca2cd9de4dcc9dab2c78f7ae1e1bf3090db8f2b 100644 --- a/unit_tests/scheduler/Test_CastMove.cpp +++ b/unit_tests/scheduler/Test_CastMove.cpp @@ -18,7 +18,7 @@ #include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" -#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" #include "aidge/recipes/Recipes.hpp" #include "aidge/backend/cpu.hpp" diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 525dbf43fe9d80550fbd5e089efa4f2cf56cf5f1..0bbe59643df050759c209878135da67a0c94d6ce 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -17,7 +17,8 @@ #include "aidge/graph/Node.hpp" #include "aidge/graph/GraphView.hpp" #include "aidge/graph/OpArgs.hpp" -#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/scheduler/SequentialScheduler.hpp" +#include "aidge/scheduler/ParallelScheduler.hpp" #include "aidge/backend/cpu.hpp" #include "aidge/recipes/GraphViewHelper.hpp" @@ -206,7 +207,7 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { SECTION("Test Residual graph") { } - SECTION("Test Recurrent graph") { + SECTION("Test Recurrent graph (sequential)") { std::shared_ptr<Tensor> in = std::make_shared<Tensor>( Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}}); std::shared_ptr<Tensor> initTensor = std::make_shared<Tensor>( @@ -233,9 +234,54 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { g->setDataType(Aidge::DataType::Int32); g->save("graphRecurrent"); g->forwardDims(); + SequentialScheduler scheduler(g); - REQUIRE_NOTHROW(scheduler.forward(true, true)); - scheduler.saveSchedulingDiagram("schedulingRecurrent"); + REQUIRE_NOTHROW(scheduler.forward(true)); + scheduler.saveStaticSchedulingDiagram("static_schedule"); + scheduler.saveSchedulingDiagram("schedulingRecurrent_seq"); + + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( + Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}}); + std::shared_ptr<Tensor> result = + std::static_pointer_cast<Tensor>(g->getNode("add2")->getOperator()->getRawOutput(0)); + result->print(); + expectedOutput->print(); + bool equal = (*result == *expectedOutput); + REQUIRE(equal); + } + + + SECTION("Test Recurrent graph (parallel)") { + std::shared_ptr<Tensor> in = std::make_shared<Tensor>( + Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}}); + std::shared_ptr<Tensor> initTensor = std::make_shared<Tensor>( + Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}}); + std::shared_ptr<Tensor> biasTensor = std::make_shared<Tensor>( + Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}}); + + auto add1 = Add(2, "add1"); + auto mem = Memorize(3, "mem1"); + auto add2 = Add(2, "add2"); + auto bias = Producer(biasTensor, "bias"); + auto init = Producer(initTensor, "init"); + auto input = Producer(in, "input"); + + std::shared_ptr<GraphView> g = Sequential({add1, mem, add2}); + init->addChild(mem, 0, 1); + mem->addChild(add1, 1, 1); + bias->addChild(add2, 0, 1); + input->addChild(add1, 0, 0); + // Update GraphView inputs/outputs following previous connections: + g->add({mem, add1, add2, init, bias, input}); + + g->setBackend("cpu"); + g->setDataType(Aidge::DataType::Int32); + g->save("graphRecurrent"); + g->forwardDims(); + + ParallelScheduler scheduler(g); + REQUIRE_NOTHROW(scheduler.forward(true)); + scheduler.saveSchedulingDiagram("schedulingRecurrent_par"); std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>( Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}}); @@ -300,7 +346,7 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { SequentialScheduler scheduler(g); std::vector<std::shared_ptr<Aidge::Tensor>> dataIn = {inputTensor}; - REQUIRE_NOTHROW(scheduler.forward(true, false, dataIn)); + REQUIRE_NOTHROW(scheduler.forward(true, dataIn)); scheduler.saveSchedulingDiagram("schedulingSequential");