diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp
index ed838a94cc0c0238a870427c3b774b29f7818b09..d5e5561d02aacd8532f74d2bfd4ee2fb5a5b5dc3 100644
--- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp
@@ -25,6 +25,40 @@
 
 
 namespace Aidge {
+
+template <typename T>
+typename std::enable_if<std::is_floating_point<T>::value, T>::type
+stableMean(const T* vec, size_t size) {
+  T mean = 0;
+  for (size_t i = 0; i < size; ++i) {
+    mean = std::fma<T>(vec[i] - mean, 1.0f / (i + 1), mean);
+  }
+  return mean;
+}
+
+// Specialization for integers: perform the mean computation in float
+template <typename T>
+typename std::enable_if<!std::is_floating_point<T>::value, T>::type
+stableMean(const T* vec, size_t size) {
+  double mean = 0;
+  for (size_t i = 0; i < size; ++i) {
+    mean = std::fma<double>(vec[i] - mean, 1.0f / (i + 1), mean);
+  }
+  return mean;
+}
+
+template <typename T>
+typename std::enable_if<std::is_floating_point<T>::value, T>::type
+castFromFloat(T value) {
+  return value;
+}
+
+template <typename T>
+typename std::enable_if<!std::is_floating_point<T>::value, T>::type
+castFromFloat(double value) {
+  return static_cast<T>(std::nearbyint(value));
+}
+
 template <class I, class O>
 void GlobalAveragePoolingImpl_cpu_forward_kernel(
     const std::vector<DimSize_t> &dims, const void *input_, void *output_) {
@@ -49,12 +83,7 @@ void GlobalAveragePoolingImpl_cpu_forward_kernel(
     for (DimSize_t channel = 0; channel < dims[1]; ++channel) {
       const I *filter_start = std::next(
           input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
-      I mean = 0;
-      for (size_t i = 0; i < in_channel_nb_elems; ++i) {
-        // Single pass numerically stable mean, using the fmaf
-        mean = fmaf(filter_start[i] - mean, 1.0f/(i+1), mean);
-      }
-      output[batch * out_batch_nb_elems + channel] = mean;
+      output[batch * out_batch_nb_elems + channel] = castFromFloat<O>(stableMean<I>(filter_start, in_channel_nb_elems));
     }
   }
 }
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
index 5a143164d7e4fa2585ea72c38eaaa123f215d21a..864b89c4fa4667b70e43ed7436382e30bc150745 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
@@ -25,6 +25,40 @@
 #include "aidge/utils/Registrar.hpp"
 
 namespace Aidge {
+
+template <typename T>
+typename std::enable_if<std::is_floating_point<T>::value, T>::type
+stableMean(const T* vec, size_t len, size_t stride) {
+  T mean = 0;
+  for (size_t i = 0; i < len; ++i) {
+    mean = std::fma<T>(vec[i * stride] - mean, 1.0f / (i + 1), mean);
+  }
+  return mean;
+}
+
+// Specialization for integers: perform the mean computation in float
+template <typename T>
+typename std::enable_if<!std::is_floating_point<T>::value, T>::type
+stableMean(const T* vec, size_t len, size_t stride) {
+  double mean = 0;
+  for (size_t i = 0; i < len; ++i) {
+    mean = std::fma<double>(vec[i * stride] - mean, 1.0f / (i + 1), mean);
+  }
+  return mean;
+}
+
+template <typename T>
+typename std::enable_if<std::is_floating_point<T>::value, T>::type
+castFromFloat(T value) {
+  return value;
+}
+
+template <typename T>
+typename std::enable_if<!std::is_floating_point<T>::value, T>::type
+castFromFloat(double value) {
+  return static_cast<T>(std::nearbyint(value));
+}
+
 template <class I, class O>
 void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
                                     DimSize_t /*keepDims*/,
@@ -50,12 +84,7 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
             for (std::size_t post = 0; post < stride_post; ++post) {
                 const std::size_t idx_i = pre * dim_i * stride_post + post;
                 const std::size_t idx_o = pre * stride_post + post;
-                O mean = 0;
-                for (std::size_t i = 0; i < dim_i; ++i) {
-                    // Single pass numerically stable mean, using the fmaf
-                    mean = fmaf(input[idx_i + i*stride_post] - mean, 1.0f/(i+1), mean);
-                }
-                output[idx_o]  = mean;
+                output[idx_o]  = castFromFloat<O>(stableMean(input + idx_i, dim_i, stride_post));
             }
         }
     } else {
@@ -72,8 +101,9 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
             stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
         }
 
-        const I* inputAccumulation = input;
-        I* outputAccumulation = nullptr;
+        // Type should be the return type of stableMean<I>(), which is always floating point
+        const decltype(stableMean<I>(input, 0, 0))* inputAccumulation = nullptr;
+        decltype(stableMean<I>(input, 0, 0))* outputAccumulation = nullptr;
 
         for (const auto& axisInt : axes) {
             const std::size_t a = static_cast<std::size_t>(axisInt);
@@ -84,23 +114,23 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
                 for (std::size_t post = 0; post < stride_post[a]; ++post) {
                     const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
                     const std::size_t idx_o = pre * stride_post[a] + post;
-                    I mean = 0;
-                    for (std::size_t i = 0; i < dim_i; ++i) {
-                        // Single pass numerically stable mean, using the fmaf
-                        mean = fmaf(inputAccumulation[idx_i + i*stride_post[a]] - mean, 1.0f/(i+1), mean);
+                    if (inputAccumulation == nullptr) {
+                        outputAccumulation[idx_o] = stableMean<I>(input + idx_i, dim_i, stride_post[a]);
+                    }
+                    else {
+                        outputAccumulation[idx_o] = stableMean<I>(inputAccumulation + idx_i, dim_i, stride_post[a]);
                     }
-                    outputAccumulation[idx_o] = mean;
                 }
             }
             std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
-            if (inputAccumulation != input) {
+            if (inputAccumulation != nullptr) {
                 delete[] inputAccumulation;
             }
             inputAccumulation = outputAccumulation;
         }
 
-        // Copy elements from inputAccumulation to output while dividing by divisor
-        std::copy(inputAccumulation, inputAccumulation + outputElements, output);
+        std::transform(inputAccumulation, inputAccumulation + outputElements, output,
+            [](auto value) { return castFromFloat<O>(value); });
         if (outputAccumulation) {
             delete[] outputAccumulation;
         }
diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp
index 271a1e2f9860d92f840916f6b2e396993b0bea39..23bacda590dfed82eca623016787388e56ceed79 100644
--- a/unit_tests/operator/Test_MetaOperator.cpp
+++ b/unit_tests/operator/Test_MetaOperator.cpp
@@ -9,70 +9,79 @@
  *
  ********************************************************************************/
 
-#include <catch2/catch_test_macros.hpp>
 #include <cmath>
 #include <cstdlib>
 #include <memory>
+#include <random>
+
+#include <catch2/catch_test_macros.hpp>
 
-#include "aidge/utils/TensorUtils.hpp"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
 #include "aidge/data/Tensor.hpp"
+#include "aidge/filler/Filler.hpp"
 #include "aidge/operator/Conv.hpp"
+#include "aidge/operator/FC.hpp"
+#include "aidge/operator/Identity.hpp"
 #include "aidge/operator/MetaOperator.hpp"
 #include "aidge/operator/MetaOperatorDefs.hpp"
 #include "aidge/operator/Pad.hpp"
 #include "aidge/operator/Pop.hpp"
-#include "aidge/scheduler/SequentialScheduler.hpp"
+#include "aidge/operator/Stack.hpp"
 #include "aidge/scheduler/ParallelScheduler.hpp"
+#include "aidge/scheduler/SequentialScheduler.hpp"
+#include "aidge/utils/TensorUtils.hpp"
 
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
-  SECTION("PaddedConv(forward)") {
-    std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(
-            Array4D<double, 4, 3, 3, 3>{{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02},
-                                          {1.16492919e-01, 8.21634093e-02, 1.17413265e-01},
-                                          {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}},
-                                         {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01},
-                                          {6.12586558e-01, 8.09918671e-02, 8.40649383e-01},
-                                          {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}},
-                                         {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01},
-                                          {1.36960611e-01, 4.64806728e-01, 4.85150000e-01},
-                                          {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}},
-
-                                        {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01},
-                                          {1.56806559e-01, 6.22280998e-01, 3.15827594e-01},
-                                          {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}},
-                                         {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01},
-                                          {1.67925807e-01, 2.68356150e-01, 6.28875602e-01},
-                                          {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}},
-                                         {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01},
-                                          {7.63047502e-01, 5.12539506e-02, 9.77400493e-01},
-                                          {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}},
-
-                                        {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01},
-                                          {7.10897067e-02, 5.02579011e-01, 3.35236224e-01},
-                                          {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}},
-                                         {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01},
-                                          {1.59875539e-01, 9.13163381e-01, 3.59806060e-01},
-                                          {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}},
-                                         {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01},
-                                          {5.46298282e-01, 2.89698587e-01, 2.62612651e-01},
-                                          {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}},
-
-                                        {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01},
-                                          {8.67878485e-01, 2.93263422e-01, 8.03912714e-01},
-                                          {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}},
-                                         {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01},
-                                          {5.80538620e-01, 6.63031275e-01, 2.07247191e-01},
-                                          {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}},
-                                         {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01},
-                                          {7.34639028e-01, 2.84957200e-02, 9.70225217e-01},
-                                          {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}});
-    std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(
-            Array1D<double, 4>{{0.16884905, 0.27994487, 0.57227465, 0.06435205}});
-    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<double, 2, 3, 5, 5>{
+    SECTION("PaddedConv(forward)") {
+        std::shared_ptr<Tensor> myWeights =
+            std::make_shared<Tensor>(Array4D<double, 4, 3, 3, 3>{
+                {{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02},
+                   {1.16492919e-01, 8.21634093e-02, 1.17413265e-01},
+                   {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}},
+                  {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01},
+                   {6.12586558e-01, 8.09918671e-02, 8.40649383e-01},
+                   {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}},
+                  {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01},
+                   {1.36960611e-01, 4.64806728e-01, 4.85150000e-01},
+                   {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}},
+
+                 {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01},
+                   {1.56806559e-01, 6.22280998e-01, 3.15827594e-01},
+                   {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}},
+                  {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01},
+                   {1.67925807e-01, 2.68356150e-01, 6.28875602e-01},
+                   {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}},
+                  {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01},
+                   {7.63047502e-01, 5.12539506e-02, 9.77400493e-01},
+                   {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}},
+
+                 {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01},
+                   {7.10897067e-02, 5.02579011e-01, 3.35236224e-01},
+                   {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}},
+                  {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01},
+                   {1.59875539e-01, 9.13163381e-01, 3.59806060e-01},
+                   {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}},
+                  {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01},
+                   {5.46298282e-01, 2.89698587e-01, 2.62612651e-01},
+                   {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}},
+
+                 {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01},
+                   {8.67878485e-01, 2.93263422e-01, 8.03912714e-01},
+                   {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}},
+                  {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01},
+                   {5.80538620e-01, 6.63031275e-01, 2.07247191e-01},
+                   {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}},
+                  {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01},
+                   {7.34639028e-01, 2.84957200e-02, 9.70225217e-01},
+                   {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}});
+        std::shared_ptr<Tensor> myBias =
+            std::make_shared<Tensor>(Array1D<double, 4>{
+                {0.16884905, 0.27994487, 0.57227465, 0.06435205}});
+        std::shared_ptr<Tensor> myInput = std::make_shared<
+            Tensor>(Array4D<double, 2, 3, 5, 5>{
             // NCHW
             {{{{0.43224481, 0.9047832, 0.18402257, 0.06162838, 0.52490127},
                {0.27773404, 0.55402353, 0.9485062, 0.31197083, 0.80328607},
@@ -108,93 +117,107 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
                {0.95873236, 0.6742374, 0.55679676, 0.6323497, 0.34072958},
                {0.49694061, 0.79173045, 0.19738225, 0.14755281, 0.80818177},
                {0.02332061, 0.74270703, 0.59415632, 0.08195934, 0.46295434},
-               {0.71426058, 0.85032931, 0.90750818, 0.28768431, 0.4401146}}}}});
-
-    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
-            Array4D<double, 2, 4, 5, 5>{{{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273},
-                {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567},
-                {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523},
-                {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136},
-                {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}},
-
-                {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890},
-                {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475},
-                {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442},
-                {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438},
-                {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}},
-
-                {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092},
-                {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575},
-                {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146},
-                {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581},
-                {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}},
-
-                {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740},
-                {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107},
-                {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523},
-                {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123},
-                {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}},
-
-
-                {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229},
-                {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444},
-                {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241},
-                {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706},
-                {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}},
-
-                {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648},
-                {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705},
-                {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404},
-                {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069},
-                {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}},
-
-                {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888},
-                {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179},
-                {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316},
-                {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387},
-                {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}},
-
-                {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038},
-                {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408},
-                {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357},
-                {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303},
-                {3.16612267, 4.38248920, 5.23248482, 4.21292210, 2.86031270}}}}});
-
-    std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv");
-    auto convOp = std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
-
-    std::shared_ptr<Node> myPad =
+               {0.71426058,
+                0.85032931,
+                0.90750818,
+                0.28768431,
+                0.4401146}}}}});
+
+        std::shared_ptr<Tensor> myOutput = std::make_shared<
+            Tensor>(Array4D<double, 2, 4, 5, 5>{
+            {{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273},
+               {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567},
+               {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523},
+               {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136},
+               {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}},
+
+              {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890},
+               {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475},
+               {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442},
+               {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438},
+               {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}},
+
+              {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092},
+               {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575},
+               {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146},
+               {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581},
+               {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}},
+
+              {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740},
+               {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107},
+               {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523},
+               {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123},
+               {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}},
+
+             {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229},
+               {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444},
+               {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241},
+               {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706},
+               {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}},
+
+              {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648},
+               {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705},
+               {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404},
+               {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069},
+               {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}},
+
+              {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888},
+               {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179},
+               {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316},
+               {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387},
+               {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}},
+
+              {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038},
+               {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408},
+               {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357},
+               {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303},
+               {3.16612267,
+                4.38248920,
+                5.23248482,
+                4.21292210,
+                2.86031270}}}}});
+
+        std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv");
+        auto convOp =
+            std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
+
+        std::shared_ptr<Node> myPad =
             Pad<2>({1, 1, 1, 1}, "myPad", PadBorderType::Constant, 0.0);
-    auto padOp = std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
-
-    convOp->setInput(1, myWeights);
-    convOp->setInput(2, myBias);
-
-    myPad->addChild(myConv, 0, 0);
-    padOp->setInput(0, myInput);
-
-    padOp->setDataType(DataType::Float64);
-    padOp->setBackend("cpu");
-    convOp->setDataType(DataType::Float64);
-    convOp->setBackend("cpu");
-
-    myPad->forward();
-    myConv->forward();
-    convOp -> getOutput(0) -> print();
-
-    double* computedOutput = static_cast<double*>(convOp->getOutput(0)->getImpl()->rawPtr());
-    double* expectedOutput = static_cast<double*>(myOutput->getImpl()->rawPtr());
-    for (std::size_t i = 0; i < myOutput->size(); ++i) {
-        REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5);
-    }
+        auto padOp =
+            std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
+
+        convOp->setInput(1, myWeights);
+        convOp->setInput(2, myBias);
+
+        myPad->addChild(myConv, 0, 0);
+        padOp->setInput(0, myInput);
+
+        padOp->setDataType(DataType::Float64);
+        padOp->setBackend("cpu");
+        convOp->setDataType(DataType::Float64);
+        convOp->setBackend("cpu");
+
+        myPad->forward();
+        myConv->forward();
+        convOp->getOutput(0)->print();
+
+        double *computedOutput =
+            static_cast<double *>(convOp->getOutput(0)->getImpl()->rawPtr());
+        double *expectedOutput =
+            static_cast<double *>(myOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < myOutput->size(); ++i) {
+            REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5);
+        }
 
-    std::shared_ptr<Node> myPaddedConv =
+        std::shared_ptr<Node> myPaddedConv =
             PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1});
-  }
+    }
     SECTION("LSTM(forward)") {
+
         auto pop = Pop();
         auto myLSTM = LSTM(32, 64, 0, true, "ltsm");
-        auto op = std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
+        auto op =
+            std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
 
         auto microGraph = op->getMicroGraph();
         microGraph->save("lstm", false, true);
@@ -209,14 +232,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         }
         REQUIRE(myLSTM->nbOutputs() == 2);
 
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
-            Array2D<float, 16, 32>{});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 32, 64>{});
-        std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
-            Array2D<float, 64, 32>{});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 64, 64>{});
+        std::shared_ptr<Tensor> myInput =
+            std::make_shared<Tensor>(Array2D<float, 16, 32>{});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 32, 64>{});
+        std::shared_ptr<Tensor> myInitW =
+            std::make_shared<Tensor>(Array2D<float, 64, 32>{});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 64, 64>{});
 
         pop->addChild(myLSTM, 0, 0);
         pop->getOperator()->associateInput(0, myInput);
@@ -246,7 +269,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         microGraph->save("lstm_dims", true, true);
         REQUIRE(op->dimsForwarded());
 
-        auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler();
+        auto microGraphScheduler =
+            std::dynamic_pointer_cast<MetaOperator_Op>(op)
+                ->getMicroGraphScheduler();
         microGraphScheduler->saveSchedulingDiagram("lstm_scheduling");
 
         REQUIRE(op->getNbConsumedData(0).data == 512);
@@ -257,11 +282,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         REQUIRE(microGraphScheduler->getStaticScheduling(1).size() == 24);
         REQUIRE(microGraphScheduler->getStaticScheduling(15).size() == 24);
     }
+
     SECTION("LSTM(forward_values)") {
         auto myLSTM = LSTM(2, 3, 0, true, "ltsm");
-        auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
 
-        auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph();
+        auto microGraph =
+            std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph();
         microGraph->save("lstm", false, false);
 
         REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
@@ -276,12 +304,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
         std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
 
         op->associateInput(0, myInput);
         op->associateInput(17, myInit);
@@ -308,12 +338,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         microGraph->save("lstm_values_dims", false, true);
 
         std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
-                Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412},
-                                     {0.25606447, 0.25606447, 0.25606447},
-                                     {0.40323776, 0.40323776, 0.40323776}}});
+            Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412},
+                                  {0.25606447, 0.25606447, 0.25606447},
+                                  {0.40323776, 0.40323776, 0.40323776}}});
 
-
-        auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler();
+        auto microGraphScheduler =
+            std::dynamic_pointer_cast<MetaOperator_Op>(op)
+                ->getMicroGraphScheduler();
         microGraphScheduler->saveSchedulingDiagram("lstm_values_scheduling");
 
         op->getOutput(0)->print();
@@ -321,11 +352,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
         REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
     }
+
     SECTION("LSTM(forward_values_seq)") {
         auto pop = Pop();
         auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
         auto myGraph = Sequential({pop, myLSTM});
-        auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
 
         REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
         REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
@@ -338,13 +371,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
-            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
+            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
+                                     {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
         std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
 
         pop->getOperator()->associateInput(0, myInput);
         op->associateInput(17, myInit);
@@ -371,9 +407,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         scheduler.saveSchedulingDiagram("lstm_seq_schedule");
 
         std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
-                Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
-                                     {0.49801484, 0.49801484, 0.49801484},
-                                     {0.67162132, 0.67162132, 0.67162132}}});
+            Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
+                                  {0.49801484, 0.49801484, 0.49801484},
+                                  {0.67162132, 0.67162132, 0.67162132}}});
 
         myGraph->save("lstm_seq_mygraph", true, true);
 
@@ -382,10 +418,12 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
         REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
     }
+
     SECTION("LSTM(forward_values_seq_flatten)(sequential)") {
         auto pop = Pop();
         auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
-        auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
+        auto op =
+            std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
 
         // Here we test LSTM as it is was flatten in the graph.
         // We just borrow its micro-graph into our larger myGraph graph.
@@ -405,13 +443,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
-            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
+            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
+                                     {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
         std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
 
         pop->getOperator()->associateInput(0, myInput);
         op->associateInput(17, myInit);
@@ -419,16 +460,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
         // Weights X
         auto prodX = Producer(myInitW);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first,
+                        0,
+                        1);
         // Weights H
         auto prodH = Producer(myInitR);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first,
+                        0,
+                        1);
         myGraph->add({prodX, prodH});
 
         myGraph->setDataType(DataType::Float32);
@@ -436,9 +493,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         myGraph->save("lstm_seq_flatten", true, true);
 
         std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
-                Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
-                                     {0.49801484, 0.49801484, 0.49801484},
-                                     {0.67162132, 0.67162132, 0.67162132}}});
+            Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
+                                  {0.49801484, 0.49801484, 0.49801484},
+                                  {0.67162132, 0.67162132, 0.67162132}}});
 
         auto scheduler = SequentialScheduler(myGraph);
         scheduler.generateScheduling();
@@ -454,7 +511,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
     SECTION("LSTM(forward_values_seq_flatten)(parallel)") {
         auto pop = Pop();
         auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
-        auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
+        auto op =
+            std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
 
         // Here we test LSTM as it is was flatten in the graph.
         // We just borrow its micro-graph into our larger myGraph graph.
@@ -474,13 +532,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
-            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
+            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
+                                     {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
         std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
 
         pop->getOperator()->associateInput(0, myInput);
         op->associateInput(17, myInit);
@@ -488,16 +549,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
         // Weights X
         auto prodX = Producer(myInitW);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first,
+                        0,
+                        1);
         // Weights H
         auto prodH = Producer(myInitR);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first,
+                        0,
+                        1);
         myGraph->add({prodX, prodH});
 
         myGraph->setDataType(DataType::Float32);
@@ -505,9 +582,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         myGraph->save("lstm_seq_flatten", true, true);
 
         std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
-                Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
-                                     {0.49801484, 0.49801484, 0.49801484},
-                                     {0.67162132, 0.67162132, 0.67162132}}});
+            Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
+                                  {0.49801484, 0.49801484, 0.49801484},
+                                  {0.67162132, 0.67162132, 0.67162132}}});
 
         auto scheduler = ParallelScheduler(myGraph);
         scheduler.generateScheduling();
@@ -519,4 +596,308 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
         REQUIRE(approxEq<float>(*(op->getOutput(0)), *myHiddenState));
     }
-}
\ No newline at end of file
+
+    SECTION("Leaky(forward)(fixed)") {
+
+        constexpr auto inChannels = 10;
+        constexpr auto outChannels = 5;
+
+        constexpr auto beta = 0.95;
+        constexpr auto threshold = 1.0;
+        constexpr auto nbTimeSteps = 2;
+
+        auto myWeights =
+            std::make_shared<Tensor>(Array2D<float, outChannels, inChannels>{{
+                {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0},
+                {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1},
+                {0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 0.1, 0.2, 0.3, 0.4},
+                {0.4, 0.3, 0.2, 0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5},
+                {0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0},
+            }});
+
+        auto myWeights2 =
+            std::make_shared<Tensor>(Array2D<float, inChannels, outChannels>{{
+                {0.1, 0.2, 0.3, 0.4, 0.5},
+                {0.6, 0.7, 0.8, 0.9, 1.0},
+                {1.0, 0.9, 0.8, 0.7, 0.6},
+                {0.5, 0.4, 0.3, 0.2, 0.1},
+                {0.5, 0.6, 0.7, 0.8, 0.9},
+                {1.0, 0.1, 0.2, 0.3, 0.4},
+                {0.4, 0.3, 0.2, 0.1, 0.0},
+                {0.1, 0.2, 0.3, 0.4, 0.5},
+                {0.9, 0.8, 0.7, 0.6, 0.5},
+                {0.4, 0.3, 0.2, 0.1, 0.0},
+            }});
+
+        auto myInput = std::make_shared<Tensor>(Array2D<float, 2, 10>{{
+            {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0},
+            {1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1},
+        }});
+
+        // py/snn Torch computed result, output of fc1 at time step 1
+        auto expectedOutputlif1ts1 =
+            std::make_shared<Tensor>(Array2D<float, 2, 5>{{
+                {3.850, 2.2000, 2.6500, 1.5000, 1.6500},
+                {2.200, 3.8500, 3.4000, 1.2500, 3.3000},
+            }});
+
+        auto expectedOutputfc2ts1 =
+            std::make_shared<Tensor>(Array2D<float, 2, 10>{{
+                {1.5000,
+                 4.0000,
+                 4.0000,
+                 1.5000,
+                 3.5000,
+                 2.0000,
+                 1.0000,
+                 1.5000,
+                 3.5000,
+                 1.0000},
+                {1.5000,
+                 4.0000,
+                 4.0000,
+                 1.5000,
+                 3.5000,
+                 2.0000,
+                 1.0000,
+                 1.5000,
+                 3.5000,
+                 1.0000},
+            }});
+
+        auto expectedOutputlif1ts2 =
+            std::make_shared<Tensor>(Array2D<float, 2, 5>{{
+                {6.5075, 3.2900, 4.1675, 1.9250, 2.2175},
+                {3.2900, 6.5075, 5.6300, 1.4375, 5.4350},
+            }});
+
+        // NOTE: Same output as before, because for all channels, we have a
+        // potential higher than threshold. Thus the lif neuron fires at every
+        // timestep for every channel.
+        auto expectedOutputfc2ts2 =
+            std::make_shared<Tensor>(Array2D<float, 2, 10>{{
+                {1.5000,
+                 4.0000,
+                 4.0000,
+                 1.5000,
+                 3.5000,
+                 2.0000,
+                 1.0000,
+                 1.5000,
+                 3.5000,
+                 1.0000},
+                {1.5000,
+                 4.0000,
+                 4.0000,
+                 1.5000,
+                 3.5000,
+                 2.0000,
+                 1.0000,
+                 1.5000,
+                 3.5000,
+                 1.0000},
+            }});
+
+        auto init = std::make_shared<Tensor>(Array2D<float, 2, 5>{});
+        uniformFiller<float>(init, 0.0, 0.0);
+
+        auto fc1 = FC(inChannels, outChannels, true, "myfc");
+        auto fc2 = FC(outChannels, inChannels, true, "fc2");
+        // NOTE: Account for init step by adding 1 to the max timestep
+        // parameter.
+        auto lif1 = Leaky(nbTimeSteps + 1, beta, threshold, "leaky");
+
+        // associateInput() does not work
+        fc1->input(1).first->getOperator()->setOutput(0, myWeights);
+        fc2->input(1).first->getOperator()->setOutput(0, myWeights2);
+
+        auto fc1Op =
+            std::static_pointer_cast<OperatorTensor>(fc1->getOperator());
+        auto lif1Op =
+            std::static_pointer_cast<MetaOperator_Op>(lif1->getOperator());
+        auto fc2Op =
+            std::static_pointer_cast<OperatorTensor>(fc2->getOperator());
+
+        fc1Op->associateInput(0, myInput);
+        lif1Op->associateInput(1, init);
+        lif1Op->associateInput(2, init);
+
+        fc1->addChild(lif1, 0, 0);
+        lif1->addChild(fc2, 1, 0);
+
+        auto g = std::make_shared<GraphView>();
+        g->add({fc1, lif1, fc2});
+        g->compile("cpu", DataType::Float32);
+        auto scheduler = SequentialScheduler(g);
+
+        // Forward 1 (simulate timestep 0)
+        scheduler.forward(true);
+        REQUIRE(approxEq<float>(*(lif1Op->getOutput(0)),
+                                *(expectedOutputlif1ts1)));
+        REQUIRE(
+            approxEq<float>(*(fc2Op->getOutput(0)), *(expectedOutputfc2ts1)));
+
+        // Forward 1 (simulate timestep 1)
+        scheduler.forward(true);
+        REQUIRE(approxEq<float>(*(lif1Op->getOutput(0)),
+                                *(expectedOutputlif1ts2)));
+        REQUIRE(
+            approxEq<float>(*(fc2Op->getOutput(0)), *(expectedOutputfc2ts2)));
+    }
+
+    SECTION("Leaky(forward)") {
+
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_real_distribution<float> valueDist(
+            0.1f,
+            1.1f); // Random float distribution between 0 and 1
+        std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
+                                                               std::size_t(4));
+        std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(3),
+                                                              std::size_t(3));
+        std::uniform_int_distribution<int> boolDist(0, 1);
+        std::uniform_real_distribution<float> betaDist(0,1);
+
+        const std::size_t nbDims = nbDimsDist(gen);
+        Log::info("Nbdims : {}", nbDims);
+        std::vector<std::size_t> dims;
+        for (std::size_t i = 0; i < nbDims; ++i) {
+            dims.push_back(dimSizeDist(gen));
+        }
+        Log::info("timesteps : {}", dims[0]);
+        Log::info("dimensions : ");
+        for (auto dim : dims) {
+            Log::info("{}", dim);
+        }
+
+        const auto nbTimeSteps = dims[0];
+        const auto beta = betaDist(gen); 
+
+        auto myLeaky = Leaky(nbTimeSteps, beta, 1.0, "leaky");
+        auto op =
+            std::static_pointer_cast<MetaOperator_Op>(myLeaky->getOperator());
+        // auto stack = Stack(2);
+        auto mem_rec = Stack(nbTimeSteps, "mem_rec");
+        auto spk_rec = Stack(nbTimeSteps, "spk_rec");
+        auto pop = Pop("popinput");
+
+        // Here we test LSTM as it is was flatten in the graph.
+        // We just borrow its micro-graph into our larger myGraph graph.
+        auto myGraph = std::make_shared<GraphView>();
+
+        pop->addChild(op->getMicroGraph()->getOrderedInputs()[0].first, 0, 0);
+        // 0 for mem 1 for stack
+        op->getMicroGraph()->getOrderedOutputs()[1].first->addChild(mem_rec,
+                                                                    0,
+                                                                    0);
+        op->getMicroGraph()->getOrderedOutputs()[0].first->addChild(spk_rec,
+                                                                    0,
+                                                                    0);
+        for (auto node : op->getMicroGraph()->getOrderedOutputs()) {
+            Log::info("name  of output {}", node.first->name());
+        }
+
+        myGraph->add(pop);
+        myGraph->add(op->getMicroGraph());
+        myGraph->add(mem_rec);
+        myGraph->add(spk_rec);
+        myGraph->save("mg", true, true);
+
+        // 3 outputs
+        REQUIRE(myLeaky->nbInputs() == 3);
+        REQUIRE(myLeaky->inputCategory(0) == InputCategory::Data);
+        // Two spikes connected to nothing, + the Add node real output
+        REQUIRE(myLeaky->nbOutputs() == 4);
+
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
+                                     {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
+
+        // std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+        //     Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
+        //                              {{2.0, 3.0}, {4.0, 5.0},
+        //                              {6.0, 7.0}}}});
+
+        // Generate input
+        std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
+        T0->setDataType(DataType::Float32);
+        T0->setBackend("cpu");
+
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>();
+        expectedOutput->setDataType(DataType::Float32);
+        expectedOutput->setBackend("cpu");
+
+        const auto nb_elements =
+            std::accumulate(dims.cbegin(),
+                            dims.cend(),
+                            std::size_t(1),
+                            std::multiplies<std::size_t>());
+        float *input = new float[nb_elements];
+        float *result = new float[nb_elements];
+
+        for (std::size_t i = 0; i < nb_elements; ++i) {
+            input[i] = valueDist(gen);
+        }
+        T0->resize(dims);
+        T0->getImpl()->setRawPtr(input, nb_elements);
+        T0->print();
+
+        // Elements popped at each time step
+        auto nbElementsPerTimeStep = nb_elements / dims[0];
+
+        // Init
+        for (int i = 0; i < nbElementsPerTimeStep; ++i) {
+            result[i] = input[i];
+        }
+
+        // Reccurence
+        for (int i = 1; i < dims[0]; ++i) {
+            auto offset = nbElementsPerTimeStep * i;
+            auto prev = nbElementsPerTimeStep * (i - 1);
+            for (int j = 0; j < nbElementsPerTimeStep; ++j) {
+                auto reset = (result[prev + j] > 1.0 ? 1 : 0);
+                result[offset + j] =
+                    result[prev + j] * beta + input[offset + j] - reset;
+            }
+        }
+
+        expectedOutput->resize(dims);
+        expectedOutput->getImpl()->setRawPtr(result, nb_elements);
+        Log::info("Expected ouptut : ");
+        expectedOutput->print();
+
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
+
+        auto initMemdims =
+            std::vector<std::size_t>(dims.begin() + 1, dims.end());
+        Log::info("dimensions : ");
+        for (auto dim : initMemdims) {
+            Log::info("{}", dim);
+        }
+        std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
+            Array2D<float, 3, 2>{{{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}}});
+
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(initMemdims);
+        myInitR->setDataType(DataType::Float32);
+        myInitR->setBackend("cpu");
+        uniformFiller<float>(myInitR, 0, 0);
+
+        pop->getOperator()->associateInput(0, T0);
+        op->associateInput(1, myInitR);
+        op->associateInput(2, myInitR);
+
+        myGraph->compile("cpu", DataType::Float32);
+
+        auto scheduler = SequentialScheduler(myGraph);
+        REQUIRE_NOTHROW(scheduler.generateScheduling());
+        REQUIRE_NOTHROW(scheduler.forward(true));
+
+        auto memOp =
+            std::static_pointer_cast<OperatorTensor>(spk_rec->getOperator());
+        REQUIRE(approxEq<float>(*(memOp->getOutput(0)), *(expectedOutput)));
+    }
+}