diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp
index 694275067b8b9708bab868da83688716f34e4fae..02f64e3c1da175543f61bf7845ff7a45c6ccea1b 100644
--- a/include/aidge/backend/cpu.hpp
+++ b/include/aidge/backend/cpu.hpp
@@ -19,13 +19,12 @@
 
 #include "aidge/backend/cpu/operator/ArgMaxImpl.hpp"
 #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
-#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
 #include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
 #include "aidge/backend/cpu/operator/ClipImpl.hpp"
+#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
-#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
 #include "aidge/backend/cpu/operator/DivImpl.hpp"
 #include "aidge/backend/cpu/operator/ErfImpl.hpp"
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
@@ -34,21 +33,21 @@
 #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/LnImpl.hpp"
 #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
+#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/MulImpl.hpp"
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
 #include "aidge/backend/cpu/operator/PowImpl.hpp"
+#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
 #include "aidge/backend/cpu/operator/ReduceSumImpl.hpp"
-#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
 #include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
-#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
 #include "aidge/backend/cpu/operator/SliceImpl.hpp"
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
+#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
 #include "aidge/backend/cpu/operator/SubImpl.hpp"
 #include "aidge/backend/cpu/operator/TanhImpl.hpp"
 
 #include "aidge/backend/cpu/data/TensorImpl.hpp"
 
 #endif /* AIDGE_CPU_IMPORTS_H_ */
-
diff --git a/include/aidge/backend/cpu/data/Broadcasting.hpp b/include/aidge/backend/cpu/data/Broadcasting.hpp
index cb969cb54806a204072763a1672ee5266fb6347e..bd648b0c2c05bd888588808896221f49ae079a51 100644
--- a/include/aidge/backend/cpu/data/Broadcasting.hpp
+++ b/include/aidge/backend/cpu/data/Broadcasting.hpp
@@ -16,33 +16,43 @@
 
 namespace Aidge {
 
-// Function to broadCast an input dims vector into the same size as an outputDims vector
-
-    /**
-     * @brief  Broadcast an input dims vector into the same size as an outputDims vector
-     * @details The missing dimensions would be completed by 1
-     * @param outputDims The vector of dimensions to follow 
-     * @param dimsToBroadcast The vecotr of dimensions to braodcast
-     * @return std::vector<std::size_t> a broadcasted vector by addding 1 on the missing dimensions.
-     */
-    std::vector<std::size_t> getBroadcastedDims(const std::vector<std::size_t>& outputDims, const std::vector<std::size_t>& dimsToBroadcast);
-
-    /**
-     * @brief Get a vector of indexes along the dimensions vector from a flattened index
-     * @param dimensions The vector of dimensions we want the indexes on
-     * @param idx The flattened index
-     * @return std::vector<std::size_t> vector of indexes along dimensions.
-     */
-    std::vector<std::size_t> getMultiDimIndices(const std::vector<std::size_t>& dimensions, std::size_t idx);
-
-    // Function to get a flattened index from multi-dimensional indices
-    /**
-     * @brief Get a flattened index the dimensions vector from a given vector of indices on a broadcasted vector
-     * @param dimensions The vector of dimensions we want the flattened index on
-     * @param indices The vector of indices we want to flatten
-     * @return std::size_t The flattened index on the dimensions vector
-     */
-    std::size_t getFlattenedIndex(const std::vector<std::size_t>& dimensions, const std::vector<std::size_t>& indices);
+// Function to broadCast an input dims vector into the same size as an
+// outputDims vector
+
+/**
+ * @brief  Broadcast an input dims vector into the same size as an outputDims
+ * vector
+ * @details The missing dimensions would be completed by 1
+ * @param outputDims The vector of dimensions to follow
+ * @param dimsToBroadcast The vecotr of dimensions to braodcast
+ * @return std::vector<std::size_t> a broadcasted vector by addding 1 on the
+ * missing dimensions.
+ */
+std::vector<std::size_t>
+getBroadcastedDims(const std::vector<std::size_t> &outputDims,
+                   const std::vector<std::size_t> &dimsToBroadcast);
+
+/**
+ * @brief Get a vector of indexes along the dimensions vector from a flattened
+ * index
+ * @param dimensions The vector of dimensions we want the indexes on
+ * @param idx The flattened index
+ * @return std::vector<std::size_t> vector of indexes along dimensions.
+ */
+std::vector<std::size_t>
+getMultiDimIndices(const std::vector<std::size_t> &dimensions,
+                   std::size_t idx);
+
+// Function to get a flattened index from multi-dimensional indices
+/**
+ * @brief Get a flattened index the dimensions vector from a given vector of
+ * indices on a broadcasted vector
+ * @param dimensions The vector of dimensions we want the flattened index on
+ * @param indices The vector of indices we want to flatten
+ * @return std::size_t The flattened index on the dimensions vector
+ */
+std::size_t getFlattenedIndex(const std::vector<std::size_t> &dimensions,
+                              const std::vector<std::size_t> &indices);
 
 } // namespace Aidge
 
diff --git a/include/aidge/backend/cpu/operator/AbsImpl.hpp b/include/aidge/backend/cpu/operator/AbsImpl.hpp
index 8233d47c4d1e2dc7bf724600ec083bcaa0d667e9..046fc709d019d0fe8c97ea95c1cc3267c2a8b35e 100644
--- a/include/aidge/backend/cpu/operator/AbsImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AbsImpl.hpp
@@ -21,11 +21,11 @@
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using AbsImpl_cpu = OperatorImpl_cpu<Abs_Op,
-    void(const std::size_t, const void*, void*)>;
+using AbsImpl_cpu =
+    OperatorImpl_cpu<Abs_Op, void(const std::size_t, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Abs_Op, "cpu", Aidge::AbsImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ABSIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp
index 16e5f9dee26a6f8b760e14a1ad66a40d8f0f7e93..2af3a73dd7f1b23bee494756b677a1b26d5a8164 100644
--- a/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AbsImpl_kernels.hpp
@@ -21,11 +21,11 @@
 namespace Aidge {
 template <class I, class O>
 void AbsImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+                                const void *input_,
+                                void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = std::abs(input[i]);
@@ -34,14 +34,20 @@ void AbsImpl_cpu_forward_kernel(std::size_t inputLenght,
 
 // Kernels registration to implementation entry point
 REGISTRAR(AbsImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::AbsImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(AbsImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::AbsImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(AbsImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::AbsImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::AbsImpl_cpu_forward_kernel<std::int32_t, std::int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ABSIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp
index 5e795922a67be178dde588e8e5e346ec268efe86..756bd9bed412e1f24ea1238ff9bfa2e3e5d6dc0e 100644
--- a/include/aidge/backend/cpu/operator/AddImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AddImpl.hpp
@@ -12,8 +12,8 @@
 #ifndef AIDGE_CPU_OPERATOR_ADDIMPL_H_
 #define AIDGE_CPU_OPERATOR_ADDIMPL_H_
 
-#include <cstddef>  // std::size_t
-#include <memory>   // std::unique_ptr, std::make_unique
+#include <cstddef> // std::size_t
+#include <memory>  // std::unique_ptr, std::make_unique
 #include <string>
 #include <vector>
 
@@ -24,11 +24,16 @@
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using AddImpl_cpu = OperatorImpl_cpu<Add_Op,
-    void(const std::vector<const void*>, const std::vector<std::vector<std::size_t>>&, const std::size_t, const std::vector<std::size_t>&, void*)>;
+using AddImpl_cpu =
+    OperatorImpl_cpu<Add_Op,
+                     void(const std::vector<const void *>,
+                          const std::vector<std::vector<std::size_t>> &,
+                          const std::size_t,
+                          const std::vector<std::size_t> &,
+                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Add_Op, "cpu", Aidge::AddImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ADDIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp
index 4a4ba2a8999c4dc33fc743b5a3a7dad023f9e0dd..a1d15f024acbf48d23bb5659b04c36f918f2c601 100644
--- a/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AddImpl_kernels.hpp
@@ -14,7 +14,7 @@
 
 #include "aidge/utils/Registrar.hpp"
 
-#include <cstdint>     // std::int32_t, std::int64_t
+#include <cstdint> // std::int32_t, std::int64_t
 
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/operator/AddImpl.hpp"
@@ -22,38 +22,52 @@
 namespace Aidge {
 
 template <class I, class O>
-void AddImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const std::vector<std::vector<std::size_t>>& inputDims, const std::size_t outputLength, const std::vector<std::size_t>& outDims, void* output_) {
+void AddImpl_cpu_forward_kernel(
+    const std::vector<const void *> inputs_,
+    const std::vector<std::vector<std::size_t>> &inputDims,
+    const std::size_t outputLength,
+    const std::vector<std::size_t> &outDims,
+    void *output_) {
     // FIXME: missing Add attributes as arguments
-    std::vector<const I*> inputs;
-    for (const auto& input_ : inputs_) {
-        inputs.push_back(static_cast<const I*>(input_));
+    std::vector<const I *> inputs;
+    for (const auto &input_ : inputs_) {
+        inputs.push_back(static_cast<const I *>(input_));
     }
-    O* output = static_cast<O*>(output_);
+    O *output = static_cast<O *>(output_);
 
-	for (std::size_t oIndex = 0; oIndex < outputLength; ++oIndex)
-	{
+    for (std::size_t oIndex = 0; oIndex < outputLength; ++oIndex) {
         output[oIndex] = 0;
-		std::vector<size_t> indexes = getMultiDimIndices(outDims, oIndex);
-		for(std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) {
-			std::size_t idx = getFlattenedIndex(inputDims[iIndex], indexes);
+        std::vector<size_t> indexes = getMultiDimIndices(outDims, oIndex);
+        for (std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) {
+            std::size_t idx = getFlattenedIndex(inputDims[iIndex], indexes);
             output[oIndex] += inputs[iIndex][idx];
-		}
-	}
+        }
+    }
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(AddImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
-    {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<float, float>, nullptr});
+          {ImplSpec::IOSpec{DataType::Any},
+           ImplSpec::IOSpec{DataType::Float32}},
+          {ProdConso::inPlaceModel,
+           Aidge::AddImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(AddImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
-    {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<double, double>, nullptr});
+          {ImplSpec::IOSpec{DataType::Any},
+           ImplSpec::IOSpec{DataType::Float64}},
+          {ProdConso::inPlaceModel,
+           Aidge::AddImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(AddImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
-    {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
+          {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
+          {ProdConso::inPlaceModel,
+           Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>,
+           nullptr});
 REGISTRAR(AddImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
-    {ProdConso::inPlaceModel, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>, nullptr});
-}  // namespace Aidge
+          {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int64}},
+          {ProdConso::inPlaceModel,
+           Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_KERNELS_H_ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu/operator/AndImpl.hpp b/include/aidge/backend/cpu/operator/AndImpl.hpp
index 316a2fb922596642088d133a7fec49c988739bb7..7ce44a45a5763a80c0499642dec93d4313358b10 100644
--- a/include/aidge/backend/cpu/operator/AndImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AndImpl.hpp
@@ -12,21 +12,26 @@
 #ifndef AIDGE_CPU_OPERATOR_ANDIMPL_H_
 #define AIDGE_CPU_OPERATOR_ANDIMPL_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/And.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using AndImpl_cpu = OperatorImpl_cpu<And_Op,
-    void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>;
+                                     void(const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const void *,
+                                          const void *,
+                                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(And_Op, "cpu", Aidge::AndImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ANDIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp
index 197e829f3527ce2f36c3ef5ee812a26477633703..5c3a0152023fb6bde45f88b377a003b565d511c8 100644
--- a/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AndImpl_kernels.hpp
@@ -18,28 +18,27 @@
 
 namespace Aidge {
 template <class I1, class I2, class O>
-void AndImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
-                                const std::vector<std::size_t>& input2Dims,
-                                const std::vector<std::size_t>& outputDims,
-                                const void* input1_,
-                                const void* input2_,
-                                void* output_) {
+void AndImpl_cpu_forward_kernel(const std::vector<std::size_t> &input1Dims,
+                                const std::vector<std::size_t> &input2Dims,
+                                const std::vector<std::size_t> &outputDims,
+                                const void *input1_,
+                                const void *input2_,
+                                void *output_) {
 
-    const I1* input_1 = static_cast<const I1*>(input1_);
-    const I2* input_2 = static_cast<const I2*>(input2_);
-    O* output = static_cast<O*>(output_);
+    const I1 *input_1 = static_cast<const I1 *>(input1_);
+    const I2 *input_2 = static_cast<const I2 *>(input2_);
+    O *output = static_cast<O *>(output_);
 
     size_t totalElements = 1;
     for (size_t dimSize : outputDims) {
         totalElements *= dimSize;
     }
 
-	for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
-	{
-		std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
+    for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) {
+        std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
 
-		std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
-		std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
+        std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
+        std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
 
         output[oIndex] = static_cast<O>(input_1[idx1] == input_2[idx2]);
     }
@@ -47,17 +46,29 @@ void AndImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
 
 // Kernels registration to implementation entry point
 REGISTRAR(AndImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<float, float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::AndImpl_cpu_forward_kernel<float, float, float>,
+           nullptr});
 REGISTRAR(AndImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<double, double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::AndImpl_cpu_forward_kernel<double, double, double>,
+           nullptr});
 REGISTRAR(AndImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::AndImpl_cpu_forward_kernel<std::int32_t,
+                                             std::int32_t,
+                                             std::int32_t>,
+           nullptr});
 REGISTRAR(AndImpl_cpu,
-    {DataType::Int64},
-    {ProdConso::inPlaceModel, Aidge::AndImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int64},
+          {ProdConso::inPlaceModel,
+           Aidge::AndImpl_cpu_forward_kernel<std::int64_t,
+                                             std::int64_t,
+                                             std::int64_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ANDIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp b/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp
index b1a2d5168013e4f9595f4275b98143cfc3509629..57b9adde7745d2582929121dfbdb56587a3d2503 100644
--- a/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ArgMaxImpl.hpp
@@ -25,14 +25,14 @@
 namespace Aidge {
 // Operator implementation entry point for the backend
 using ArgMaxImpl_cpu = OperatorImpl_cpu<ArgMax_Op,
-    void(std::int32_t,
-        DimSize_t,
-        const std::vector<DimSize_t>&,
-        const void *,
-        void *)>;
+                                        void(std::int32_t,
+                                             DimSize_t,
+                                             const std::vector<DimSize_t> &,
+                                             const void *,
+                                             void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(ArgMax_Op, "cpu", Aidge::ArgMaxImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp
index 1bedec701766fc59fac233a1c400df1042369c5a..9cd65b1c74990dac6635391eead0add743f8efd9 100644
--- a/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp
@@ -12,13 +12,13 @@
 #ifndef AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_
 
-#include <algorithm>   // std::for_each
-#include <cstddef>     // std::size_t
-#include <cstdint>     // std::int32_t
-#include <functional>  //std::multiplies
-#include <numeric>     //std::accumulate
-#include <vector>
+#include <algorithm>  // std::for_each
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::int32_t
+#include <functional> //std::multiplies
 #include <limits>
+#include <numeric>    //std::accumulate
+#include <vector>
 
 #include "aidge/backend/cpu/operator/ArgMaxImpl.hpp"
 #include "aidge/data/Data.hpp"
@@ -28,13 +28,13 @@
 namespace Aidge {
 template <class I, class O>
 void ArgMaxImpl_cpu_forward_kernel(std::int32_t axis_,
-                                    DimSize_t select_last_index,
-                                    const std::vector<DimSize_t>& inputDims,
-                                    const void* input_,
-                                    void* output_) {
+                                   DimSize_t select_last_index,
+                                   const std::vector<DimSize_t> &inputDims,
+                                   const void *input_,
+                                   void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     const std::size_t axis = static_cast<std::size_t>(axis_);
 
@@ -53,14 +53,13 @@ void ArgMaxImpl_cpu_forward_kernel(std::int32_t axis_,
             const std::size_t idx_o = pre * stride_post + post;
             I max = std::numeric_limits<I>::min();
             for (std::size_t i = 0; i < dim_i; ++i) {
-                I curr_value = input[idx_i + i*stride_post];
+                I curr_value = input[idx_i + i * stride_post];
                 if (select_last_index) {
-                    if (curr_value>=max) {
+                    if (curr_value >= max) {
                         output[idx_o] = i;
                         max = curr_value;
                     }
-                }
-                else {
+                } else {
                     if (curr_value > max) {
                         output[idx_o] = i;
                         max = curr_value;
@@ -69,19 +68,24 @@ void ArgMaxImpl_cpu_forward_kernel(std::int32_t axis_,
             }
         }
     }
-
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(ArgMaxImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::defaultModel,
+           Aidge::ArgMaxImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(ArgMaxImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::defaultModel,
+           Aidge::ArgMaxImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(ArgMaxImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::defaultModel, Aidge::ArgMaxImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::defaultModel,
+           Aidge::ArgMaxImpl_cpu_forward_kernel<std::int32_t, std::int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ARGMAXIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/AtanImpl.hpp b/include/aidge/backend/cpu/operator/AtanImpl.hpp
index 2f1b4bf0ad666ff9856c24fa675b70d6f830b07c..83527b38509f30e305078adb28aafdaf998a7ce9 100644
--- a/include/aidge/backend/cpu/operator/AtanImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AtanImpl.hpp
@@ -12,22 +12,23 @@
 #ifndef AIDGE_CPU_OPERATOR_ATAN_H_
 #define AIDGE_CPU_OPERATOR_ATAN_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Atan.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using AtanImpl_cpu = OperatorImpl_cpu<Atan_Op,
-    void(const std::size_t, const void*, void*),
-    void(const std::size_t, const void*, const void*, void*)>;
+using AtanImpl_cpu = OperatorImpl_cpu<
+    Atan_Op,
+    void(const std::size_t, const void *, void *),
+    void(const std::size_t, const void *, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Atan_Op, "cpu", Aidge::AtanImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ATAN_H_ */
diff --git a/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp
index 2a786339503354514416705b61cfedfcc0b7c321..916c974f4e4860ca805f3de4037e46a86b110ed6 100644
--- a/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AtanImpl_kernels.hpp
@@ -15,46 +15,49 @@
 #include "aidge/utils/Registrar.hpp"
 
 #include "aidge/backend/cpu/operator/AtanImpl.hpp"
-#include <cmath>  // For atan()
-
+#include <cmath> // For atan()
 
 namespace Aidge {
 template <class I, class O>
 void AtanImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                    const void* input_,
-                                    void* output_) {
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+                                 const void *input_,
+                                 void *output_) {
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     for (size_t i = 0; i < inputLenght; ++i) {
         output[i] = static_cast<O>(atan(input[i]));
     }
-
 }
 
 template <class O, class GI, class GO>
 void AtanImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                     const void* output_, const void* grad_output_,
-				     void* grad_input_) {
-    const O* output = static_cast<const O*>(output_);
-    const GO* grad_output = static_cast<const GO*>(grad_output_);
-    GI* grad_input = static_cast<GI*>(grad_input_);
+                                  const void *output_,
+                                  const void *grad_output_,
+                                  void *grad_input_) {
+    const O *output = static_cast<const O *>(output_);
+    const GO *grad_output = static_cast<const GO *>(grad_output_);
+    GI *grad_input = static_cast<GI *>(grad_input_);
 
     // Apply the derivative of atan for each element in the input array
     for (size_t i = 0; i < inputLenght; ++i) {
         // dx = dy * (1 / (1 + x^2))
-        grad_input[i] = grad_output[i] * static_cast<O>(1.0 / (1.0 + output[i] * output[i]));
+        grad_input[i] = grad_output[i] *
+                        static_cast<O>(1.0 / (1.0 + output[i] * output[i]));
     }
 }
 
-
 // Kernels registration to implementation entry point
 REGISTRAR(AtanImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::AtanImpl_cpu_forward_kernel<float, float>, Aidge::AtanImpl_cpu_backward_kernel<float, float, float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::AtanImpl_cpu_forward_kernel<float, float>,
+           Aidge::AtanImpl_cpu_backward_kernel<float, float, float>});
 REGISTRAR(AtanImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::AtanImpl_cpu_forward_kernel<double, double>, Aidge::AtanImpl_cpu_backward_kernel<double, double, double>});
-}  // namespace Aidge
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::AtanImpl_cpu_forward_kernel<double, double>,
+           Aidge::AtanImpl_cpu_backward_kernel<double, double, double>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ATANIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
index adea96ca43a1ad9d2a49777426913ca4676e4f32..cc1aaad220f738798e228658c65cf39d7b372891 100644
--- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
@@ -17,24 +17,25 @@
 #include <tuple>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/AvgPooling.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using AvgPooling2D_Op = AvgPooling_Op<2>;
-using AvgPoolingImpl2D_cpu = OperatorImpl_cpu<AvgPooling_Op<2>,
-    void(const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 4>&,
-        const void *,
-        void *)>;
+using AvgPoolingImpl2D_cpu =
+    OperatorImpl_cpu<AvgPooling_Op<2>,
+                     void(const std::array<DimSize_t, 2> &,
+                          const std::array<DimSize_t, 2> &,
+                          const std::array<DimSize_t, 4> &,
+                          const void *,
+                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(AvgPooling2D_Op, "cpu", Aidge::AvgPoolingImpl2D_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp
index f6da9dcb026101b93de862499d42ae8734532d52..7010f87fccaff0b0738b8006d1f3a0dc0e9a9de1 100644
--- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_kernels.hpp
@@ -13,8 +13,8 @@
 #define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_
 
 #include <array>
-#include <tuple>
 #include <cmath>
+#include <tuple>
 
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
@@ -33,24 +33,24 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
-                                        const std::array<DimSize_t, 2>& kernelDims,
-                                        const std::array<DimSize_t, 4> &dims,
-                                        const void *input_,
-                                        void *output_) {
+void AvgPoolingImpl2D_cpu_forward_kernel(
+    const std::array<DimSize_t, 2> &strideDims,
+    const std::array<DimSize_t, 2> &kernelDims,
+    const std::array<DimSize_t, 4> &dims,
+    const void *input_,
+    void *output_) {
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
-
     // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
-                                static_cast<float>(strideDims[0])));
+    const std::size_t oxSize = static_cast<std::size_t>(std::floor(
+        static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
+        static_cast<float>(strideDims[0])));
     // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
-                                static_cast<float>(strideDims[1])));
+    const std::size_t oySize = static_cast<std::size_t>(std::floor(
+        static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
+        static_cast<float>(strideDims[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
@@ -60,40 +60,61 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
         for (std::size_t ch = 0; ch < dims[1]; ++ch) {
-            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
-            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
-            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0);
+            const std::size_t oIndex =
+                (ch + batch * dims[1]) * oxSize * oySize;
+            const std::size_t iIndex =
+                (ch + batch * dims[1]) * dims[2] * dims[3];
+            std::fill(output + oIndex, output + (oIndex + oxSize * oySize), 0);
             for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
-                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
+                const signedsize difx =
+                    static_cast<signedsize>(-ox * strideDims[0]);
+                const std::size_t sxMin =
+                    static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                const std::size_t sxMax =
+                    (static_cast<signedsize>(dims[2]) + difx) < 0
+                        ? 0
+                        : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0]
+                                                            : dims[2] + difx);
                 for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
-                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
-                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                    const signedsize dify =
+                        static_cast<signedsize>(-oy * strideDims[1]);
+                    const std::size_t syMin = static_cast<std::size_t>(
+                        std::max(dify, signedsize(0)));
+                    const std::size_t syMax =
+                        (static_cast<signedsize>(dims[3]) + dify) < 0
+                            ? 0
+                            : ((dims[3] + dify) > kernelDims[1]
+                                   ? kernelDims[1]
+                                   : dims[3] + dify);
+                    const std::size_t oIndexFull = oIndex + ox * oySize + oy;
                     const std::size_t ix = ox * strideDims[0];
                     const std::size_t iy = oy * strideDims[1];
 
                     if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                        output[oIndexFull] += static_cast<O>(
-                                               input[iIndex + (ix+0)*dims[3] + (iy+0)] +
-                                               input[iIndex + (ix+0)*dims[3] + (iy+1)] +
-                                               input[iIndex + (ix+0)*dims[3] + (iy+2)] +
-                                               input[iIndex + (ix+1)*dims[3] + (iy+0)] +
-                                               input[iIndex + (ix+1)*dims[3] + (iy+1)] +
-                                               input[iIndex + (ix+1)*dims[3] + (iy+2)] +
-                                               input[iIndex + (ix+2)*dims[3] + (iy+0)] +
-                                               input[iIndex + (ix+2)*dims[3] + (iy+1)] +
-                                               input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9);
+                        output[oIndexFull] +=
+                            static_cast<O>(
+                                input[iIndex + (ix + 0) * dims[3] + (iy + 0)] +
+                                input[iIndex + (ix + 0) * dims[3] + (iy + 1)] +
+                                input[iIndex + (ix + 0) * dims[3] + (iy + 2)] +
+                                input[iIndex + (ix + 1) * dims[3] + (iy + 0)] +
+                                input[iIndex + (ix + 1) * dims[3] + (iy + 1)] +
+                                input[iIndex + (ix + 1) * dims[3] + (iy + 2)] +
+                                input[iIndex + (ix + 2) * dims[3] + (iy + 0)] +
+                                input[iIndex + (ix + 2) * dims[3] + (iy + 1)] +
+                                input[iIndex + (ix + 2) * dims[3] +
+                                      (iy + 2)]) /
+                            O(9);
                     } else {
                         for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
                             for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
+                                output[oIndexFull] +=
+                                    input[iIndex + (ix + sx) * dims[3] +
+                                          (iy + sy)];
                             }
                         }
                         // padding not used
-                        output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin);
+                        output[oIndexFull] /=
+                            (sxMax - sxMin) * (syMax - syMin);
                     }
                 }
             }
@@ -103,14 +124,23 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
 
 // Kernels registration to implementation entry point
 REGISTRAR(AvgPoolingImpl2D_cpu,
-    {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr});
-REGISTRAR(AvgPoolingImpl2D_cpu,
+          {{DataType::Float32, DataFormat::NCHW},
+           {DataType::Float32, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>,
+           nullptr});
+REGISTRAR(
+    AvgPoolingImpl2D_cpu,
     {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
+    {ProdConso::inPlaceModel,
+     Aidge::AvgPoolingImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>,
+     nullptr});
 REGISTRAR(AvgPoolingImpl2D_cpu,
-    {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr});
-}  // namespace Aidge
+          {{DataType::Float64, DataFormat::NCHW},
+           {DataType::Float64, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
index 36a100b21edc6cd63a0176c89f2f1e57c10001c7..d96aa0904792820acf54c9a3cf01357762296c71 100644
--- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
@@ -17,29 +17,30 @@
 #include <tuple>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/BatchNorm.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using BatchNorm2D_Op = BatchNorm_Op<2>;
-using BatchNormImpl2D_cpu = OperatorImpl_cpu<BatchNorm_Op<2>,
-    void(float,
-        float,
-        const std::array<DimSize_t, 4> &,
-        const void *,
-        const void *,
-        const void *,
-        void *,
-        void *,
-        void *,
-        const bool)>;
+using BatchNormImpl2D_cpu =
+    OperatorImpl_cpu<BatchNorm_Op<2>,
+                     void(float,
+                          float,
+                          const std::array<DimSize_t, 4> &,
+                          const void *,
+                          const void *,
+                          const void *,
+                          void *,
+                          void *,
+                          void *,
+                          const bool)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(BatchNorm2D_Op, "cpu", Aidge::BatchNormImpl2D_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp
index ec71e3b8e37e344c551fd643dc7b3957bdddcb67..543a9667737eb5270d929a7a4dd40562fd813fe9 100644
--- a/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp
@@ -14,12 +14,12 @@
 
 #include "aidge/utils/Registrar.hpp"
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include <algorithm>
 #include <array>
 #include <cmath>
-#include <algorithm>
 
 namespace Aidge {
 /**
@@ -38,8 +38,16 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class P, class O>
-void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
+void BatchNormImpl2D_cpu_forward_kernel(float epsilon,
+                                        float momentum,
+                                        const std::array<DimSize_t, 4> &dims,
+                                        const void *input_,
+                                        const void *scale_,
+                                        const void *shift_,
+                                        void *batchMean_,
+                                        void *batchVar_,
+                                        void *output_,
+                                        const bool freeze) {
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const P *scale = static_cast<const P *>(scale_);
@@ -50,18 +58,24 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std
 
     const DimSize_t nbBatch = dims[0];
     const DimSize_t nbChannels = dims[1];
-    const DimSize_t featureMapSize = dims[2]*dims[3];
-
+    const DimSize_t featureMapSize = dims[2] * dims[3];
 
     if ((freeze == true) || (momentum == 0.0f)) {
         for (std::size_t batch = 0; batch < nbBatch; ++batch) {
             for (std::size_t ch = 0; ch < nbChannels; ++ch) {
-                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
-                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
-                const P var = std::sqrt(batchVar[ch] + static_cast<P>(epsilon));
+                const std::size_t ioIndex =
+                    (ch + batch * nbChannels) * featureMapSize;
+                std::fill(output + ioIndex,
+                          output + ioIndex + featureMapSize,
+                          shift[ch]);
+                const P var =
+                    std::sqrt(batchVar[ch] + static_cast<P>(epsilon));
 
-                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
-                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
+                for (std::size_t feature = 0; feature < featureMapSize;
+                     ++feature) {
+                    output[ioIndex + feature] +=
+                        scale[ch] *
+                        (input[ioIndex + feature] - batchMean[ch]) / var;
                 }
             }
         }
@@ -71,25 +85,40 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std
             I sum = I(0);
             I sumSquare = I(0);
             for (std::size_t batch = 0; batch < nbBatch; ++batch) {
-                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
-                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
+                const std::size_t ioIndex =
+                    (ch + batch * nbChannels) * featureMapSize;
+                std::fill(output + ioIndex,
+                          output + ioIndex + featureMapSize,
+                          shift[ch]);
 
-                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
+                for (std::size_t feature = 0; feature < featureMapSize;
+                     ++feature) {
                     sum += input[ioIndex + feature];
-                    sumSquare += input[ioIndex + feature] * input[ioIndex + feature];
+                    sumSquare +=
+                        input[ioIndex + feature] * input[ioIndex + feature];
                 }
             }
             const I inputMean = sum / static_cast<I>(nbDataPerChannel);
-            const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel)  - inputMean*inputMean;
+            const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel) -
+                               inputMean * inputMean;
 
-            batchMean[ch] = batchMean[ch]*(1-momentum) + inputMean*momentum;
-            batchVar[ch] = batchVar[ch]*(1-momentum) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*momentum;
+            batchMean[ch] =
+                batchMean[ch] * (1 - momentum) + inputMean * momentum;
+            batchVar[ch] = batchVar[ch] * (1 - momentum) +
+                           inputVar *
+                               (static_cast<I>(nbDataPerChannel) /
+                                static_cast<I>(nbDataPerChannel - 1)) *
+                               momentum;
 
             const P var = std::sqrt(inputVar + static_cast<P>(epsilon));
             for (std::size_t batch = 0; batch < nbBatch; ++batch) {
-                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
-                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
-                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-inputMean) / var;
+                const std::size_t ioIndex =
+                    (ch + batch * nbChannels) * featureMapSize;
+                for (std::size_t feature = 0; feature < featureMapSize;
+                     ++feature) {
+                    output[ioIndex + feature] +=
+                        scale[ch] * (input[ioIndex + feature] - inputMean) /
+                        var;
                 }
             }
         }
@@ -98,8 +127,11 @@ void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std
 
 // Kernels registration to implementation entry point
 REGISTRAR(BatchNormImpl2D_cpu,
-    {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>, nullptr});
-}  // namespace Aidge
+          {{DataType::Float32, DataFormat::NCHW},
+           {DataType::Float32, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp
index 6da67bb7dd4469b6ca609c5aea1ae70dfca3f939..6603566456dab4d3c8fa833f0d4e17a0ce50c101 100644
--- a/include/aidge/backend/cpu/operator/BitShiftImpl.hpp
+++ b/include/aidge/backend/cpu/operator/BitShiftImpl.hpp
@@ -12,27 +12,28 @@
 #ifndef AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_
 #define AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/BitShift.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using BitShiftImpl_cpu = OperatorImpl_cpu<BitShift_Op,
-    void(const BitShift_Op::BitShiftDirection,
-    const std::vector<std::size_t>&, 
-    const std::vector<std::size_t>&, 
-    const std::vector<std::size_t>&, 
-    const void*, 
-    const void*,
-    void*)>;
-    
-    // Implementation entry point registration to Operator
-    REGISTRAR(BitShift_Op,"cpu",Aidge::BitShiftImpl_cpu::create);
-}  // namespace Aidge
+using BitShiftImpl_cpu =
+    OperatorImpl_cpu<BitShift_Op,
+                     void(const BitShift_Op::BitShiftDirection,
+                          const std::vector<std::size_t> &,
+                          const std::vector<std::size_t> &,
+                          const std::vector<std::size_t> &,
+                          const void *,
+                          const void *,
+                          void *)>;
+
+// Implementation entry point registration to Operator
+REGISTRAR(BitShift_Op, "cpu", Aidge::BitShiftImpl_cpu::create);
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_BITSHIFTIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp
index f815e946ea2e4abaff48a6e5155368d564e88e8c..0d37fc267d13bd6c0edd855d08c22cc0bad559f0 100644
--- a/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp
@@ -14,57 +14,62 @@
 
 #include "aidge/utils/Registrar.hpp"
 
-#include <cstdint>     // std::int32_t, std::int64_t
 #include "aidge/operator/BitShift.hpp"
+#include <cstdint> // std::int32_t, std::int64_t
 
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
 
-
-
 namespace Aidge {
 template <class I1, class I2, class O>
 void BitShiftImpl_cpu_forward_kernel(
-                                const BitShift_Op::BitShiftDirection direction,
-                                const std::vector<std::size_t>& input1Dims,
-                                const std::vector<std::size_t>& input2Dims,
-                                const std::vector<std::size_t>& outputDims,
-                                const void* input1_,
-                                const void* input2_,
-                                void* output_
-                                ) {
+    const BitShift_Op::BitShiftDirection direction,
+    const std::vector<std::size_t> &input1Dims,
+    const std::vector<std::size_t> &input2Dims,
+    const std::vector<std::size_t> &outputDims,
+    const void *input1_,
+    const void *input2_,
+    void *output_) {
+
+    const I1 *input_1 = static_cast<const I1 *>(input1_);
+    const I2 *input_2 = static_cast<const I2 *>(input2_);
+    O *output = static_cast<O *>(output_);
 
-    const I1* input_1 = static_cast<const I1*>(input1_);
-    const I2* input_2 = static_cast<const I2*>(input2_);
-    O* output = static_cast<O*>(output_);
+    const size_t totalElements =
+        std::accumulate(outputDims.begin(),
+                        outputDims.end(),
+                        std::size_t(1),
+                        std::multiplies<std::size_t>());
 
-    const size_t totalElements = std::accumulate(outputDims.begin(), outputDims.end(), std::size_t(1), std::multiplies<std::size_t>());
-    
-    for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
-    {
+    for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) {
         std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
         std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
         std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
-        if(direction == BitShift_Op::BitShiftDirection::right)
+        if (direction == BitShift_Op::BitShiftDirection::right)
 
         {
-                output[oIndex]= input_1[idx1] >> input_2[idx2];
-        }
-        else
-        {
-                output[oIndex] = input_1[idx1] << input_2[idx2];
+            output[oIndex] = input_1[idx1] >> input_2[idx2];
+        } else {
+            output[oIndex] = input_1[idx1] << input_2[idx2];
         }
     }
 }
 
 REGISTRAR(BitShiftImpl_cpu,
-{DataType::Int32},
-{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>,nullptr});
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::BitShiftImpl_cpu_forward_kernel<std::int32_t,
+                                                  std::int32_t,
+                                                  std::int32_t>,
+           nullptr});
 REGISTRAR(BitShiftImpl_cpu,
-{DataType::Int64},
-{ProdConso::inPlaceModel,Aidge::BitShiftImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>,nullptr});
-
+          {DataType::Int64},
+          {ProdConso::inPlaceModel,
+           Aidge::BitShiftImpl_cpu_forward_kernel<std::int64_t,
+                                                  std::int64_t,
+                                                  std::int64_t>,
+           nullptr});
 
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_BitShiftIMPL_KERNELS_H_ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu/operator/ClipImpl.hpp b/include/aidge/backend/cpu/operator/ClipImpl.hpp
index c83836d5aa1d6aae27e3fdce1bbb9561b70ec31e..aec09416601314e744048fb48de83a2f9d7dca88 100644
--- a/include/aidge/backend/cpu/operator/ClipImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ClipImpl.hpp
@@ -12,35 +12,34 @@
 #ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_H_
 #define AIDGE_CPU_OPERATOR_CLIPIMPL_H_
 
-#include <cstddef>  // std::size_t
+#include <algorithm>
+#include <cstddef> // std::size_t
 #include <memory>
-#include <tuple>    // std::tuple
+#include <tuple>   // std::tuple
 #include <vector>
-#include <algorithm>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Clip.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-    using ClipImpl_cpu = OperatorImpl_cpu<Clip_Op,
-    void(float, //Forward Types
-    float, 
-    const void*,
-    const std::size_t, 
-    void*),
-    void(float,//Backward Types
-    float, 
-    const std::size_t,
-    const void*, 
-    const void*,
-    void*)>;
+using ClipImpl_cpu = OperatorImpl_cpu<Clip_Op,
+                                      void(float, // Forward Types
+                                           float,
+                                           const void *,
+                                           const std::size_t,
+                                           void *),
+                                      void(float, // Backward Types
+                                           float,
+                                           const std::size_t,
+                                           const void *,
+                                           const void *,
+                                           void *)>;
 
-    REGISTRAR(Clip_Op,"cpu",Aidge::ClipImpl_cpu::create);
-}  // namespace Aidge
+REGISTRAR(Clip_Op, "cpu", Aidge::ClipImpl_cpu::create);
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
index 1afac4698be2a63790ebac671ecc1e59166c5f94..6438fbed090ea52781da6154d3fe1666a2d93c52 100644
--- a/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ClipImpl_kernels.hpp
@@ -13,65 +13,67 @@
 #ifndef AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_
 
-#include "aidge/utils/Registrar.hpp"
 #include "aidge/backend/cpu/operator/ClipImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
 
 namespace Aidge {
 template <class I, class O>
-void ClipImpl_cpu_forward_kernel(
-        float min_,
-        float max_,
-        const void* input_,
-        const std::size_t length,
-        void* output_) 
-{
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+void ClipImpl_cpu_forward_kernel(float min_,
+                                 float max_,
+                                 const void *input_,
+                                 const std::size_t length,
+                                 void *output_) {
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     for (std::size_t i = 0; i < length; ++i) {
-        output[i] = std::min(std::max(static_cast<float>(input[i]), min_), max_);
+        output[i] =
+            std::min(std::max(static_cast<float>(input[i]), min_), max_);
     }
 }
 
 template <class I, class GI, class GO>
-void ClipImpl_cpu_backward_kernel(
-        float min_,
-        float max_,
-        const std::size_t length,
-        const void* input_, 
-        const void* grad_output_,
-		void* grad_input_)           
-{
-    const I* input = static_cast<const I*>(input_);
-    const GO* grad_output = static_cast<const GO*>(grad_output_);
-    GI* grad_input = static_cast<GI*>(grad_input_);
+void ClipImpl_cpu_backward_kernel(float min_,
+                                  float max_,
+                                  const std::size_t length,
+                                  const void *input_,
+                                  const void *grad_output_,
+                                  void *grad_input_) {
+    const I *input = static_cast<const I *>(input_);
+    const GO *grad_output = static_cast<const GO *>(grad_output_);
+    GI *grad_input = static_cast<GI *>(grad_input_);
 
     for (std::size_t i = 0; i < length; ++i) {
-        grad_input[i] = ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0;
+        grad_input[i] =
+            ((input[i] > min_) && (input[i] < max_)) ? grad_output[i] : 0;
     }
 }
 
 REGISTRAR(ClipImpl_cpu,
-{DataType::Float32},
-{ProdConso::inPlaceModel,
-Aidge::ClipImpl_cpu_forward_kernel<float,float>,
-Aidge::ClipImpl_cpu_backward_kernel<float,float,float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::ClipImpl_cpu_forward_kernel<float, float>,
+           Aidge::ClipImpl_cpu_backward_kernel<float, float, float>});
 REGISTRAR(ClipImpl_cpu,
-{DataType::Float64},
-{ProdConso::inPlaceModel,
-Aidge::ClipImpl_cpu_forward_kernel<double,double>,
-Aidge::ClipImpl_cpu_backward_kernel<double,double,double>});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::ClipImpl_cpu_forward_kernel<double, double>,
+           Aidge::ClipImpl_cpu_backward_kernel<double, double, double>});
 REGISTRAR(ClipImpl_cpu,
-{DataType::Int32},
-{ProdConso::inPlaceModel,
-Aidge::ClipImpl_cpu_forward_kernel<std::int32_t,std::int32_t>,
-Aidge::ClipImpl_cpu_backward_kernel<std::int32_t,std::int32_t,std::int32_t>});
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::ClipImpl_cpu_forward_kernel<std::int32_t, std::int32_t>,
+           Aidge::ClipImpl_cpu_backward_kernel<std::int32_t,
+                                               std::int32_t,
+                                               std::int32_t>});
 REGISTRAR(ClipImpl_cpu,
-{DataType::Int64},
-{ProdConso::inPlaceModel,
-Aidge::ClipImpl_cpu_forward_kernel<std::int64_t,std::int64_t>,
-Aidge::ClipImpl_cpu_backward_kernel<std::int64_t,std::int64_t,std::int64_t>});
+          {DataType::Int64},
+          {ProdConso::inPlaceModel,
+           Aidge::ClipImpl_cpu_forward_kernel<std::int64_t, std::int64_t>,
+           Aidge::ClipImpl_cpu_backward_kernel<std::int64_t,
+                                               std::int64_t,
+                                               std::int64_t>});
 
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_CLIPIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp
index 83e7e030f526e0db3cff4741eabe39e287130562..8e77611e859c20827edec377e8458795d7f90daa 100644
--- a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp
@@ -23,12 +23,12 @@
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using ConstantOfShapeImpl_cpu = OperatorImpl_cpu<ConstantOfShape_Op,
-    void(const std::vector<DimSize_t>, const Tensor&, void *)>;
+using ConstantOfShapeImpl_cpu = OperatorImpl_cpu<
+    ConstantOfShape_Op,
+    void(const std::vector<DimSize_t>, const Tensor &, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(ConstantOfShape_Op, "cpu", Aidge::ConstantOfShapeImpl_cpu::create);
 } // namespace Aidge
 
 #endif /* _AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_H_ */
-
diff --git a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp
index 18ab9c0a77c4545c955fc4fe1f1fc1cbcb763bf7..0c3b631d79120145a3cfb16300a950629c905c96 100644
--- a/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConstantOfShapeImpl_kernels.hpp
@@ -30,42 +30,62 @@
 namespace Aidge {
 template <class O>
 void ConstantOfShapeimpl_cpu_forward_kernel(
-    const std::vector<DimSize_t> output_dims, const Tensor &value,
+    const std::vector<DimSize_t> output_dims,
+    const Tensor &value,
     void *output_) {
 
-  O *output = static_cast<O *>(output_);
-  O val;
-  std::copy(static_cast<O *>(value.getImpl()->hostPtr()),
-            static_cast<O *>(value.getImpl()->hostPtr()) +
-                static_cast<NbElts_t>(1),
-            &val);
-  const size_t output_size = std::accumulate(
-      output_dims.begin(), output_dims.end(), 1, std::multiplies<DimSize_t>());
-  for (size_t i = 0; i < output_size; ++i) {
-    output[i] = val;
-  }
+    O *output = static_cast<O *>(output_);
+    O val;
+    std::copy(static_cast<O *>(value.getImpl()->hostPtr()),
+              static_cast<O *>(value.getImpl()->hostPtr()) +
+                  static_cast<NbElts_t>(1),
+              &val);
+    const size_t output_size = std::accumulate(output_dims.begin(),
+                                               output_dims.end(),
+                                               1,
+                                               std::multiplies<DimSize_t>());
+    for (size_t i = 0; i < output_size; ++i) {
+        output[i] = val;
+    }
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(ConstantOfShapeImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float16}},
-    {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>, nullptr});
+          {ImplSpec::IOSpec{DataType::Int64},
+           ImplSpec::IOSpec{DataType::Float16}},
+          {ProdConso::defaultModel,
+           Aidge::ConstantOfShapeimpl_cpu_forward_kernel<half_float::half>,
+           nullptr});
 REGISTRAR(ConstantOfShapeImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float32}},
-    {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>, nullptr});
+          {ImplSpec::IOSpec{DataType::Int64},
+           ImplSpec::IOSpec{DataType::Float32}},
+          {ProdConso::defaultModel,
+           Aidge::ConstantOfShapeimpl_cpu_forward_kernel<float>,
+           nullptr});
 REGISTRAR(ConstantOfShapeImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Float64}},
-    {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>, nullptr});
+          {ImplSpec::IOSpec{DataType::Int64},
+           ImplSpec::IOSpec{DataType::Float64}},
+          {ProdConso::defaultModel,
+           Aidge::ConstantOfShapeimpl_cpu_forward_kernel<double>,
+           nullptr});
 REGISTRAR(ConstantOfShapeImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int16}},
-    {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>, nullptr});
+          {ImplSpec::IOSpec{DataType::Int64},
+           ImplSpec::IOSpec{DataType::Int16}},
+          {ProdConso::defaultModel,
+           Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int16_t>,
+           nullptr});
 REGISTRAR(ConstantOfShapeImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int32}},
-    {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>, nullptr});
+          {ImplSpec::IOSpec{DataType::Int64},
+           ImplSpec::IOSpec{DataType::Int32}},
+          {ProdConso::defaultModel,
+           Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int32_t>,
+           nullptr});
 REGISTRAR(ConstantOfShapeImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Int64}, ImplSpec::IOSpec{DataType::Int64}},
-    {ProdConso::defaultModel, Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int64_t>, nullptr});
+          {ImplSpec::IOSpec{DataType::Int64},
+           ImplSpec::IOSpec{DataType::Int64}},
+          {ProdConso::defaultModel,
+           Aidge::ConstantOfShapeimpl_cpu_forward_kernel<std::int64_t>,
+           nullptr});
 } // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_CONSTANTOFSHAPEIMPL_KERNELS_H_ */
-
diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
index 5b985accfb7b9778993b557524de7b60060ad437..82d86874649ea521493eee40ec61cef1caaaf304 100644
--- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
@@ -17,39 +17,41 @@
 #include <tuple>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/ConvDepthWise.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using ConvDepthWise1D_Op = ConvDepthWise_Op<1>;
-using ConvDepthWiseImpl1D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<1>,
-    void(const std::array<DimSize_t, 1>&,
-        const std::array<DimSize_t, 1>&,
-        const std::array<DimSize_t, 1>&,
-        const std::array<DimSize_t, 3>&,
-        const void *,
-        const void *,
-        const void *,
-        void *)>;
+using ConvDepthWiseImpl1D_cpu =
+    OperatorImpl_cpu<ConvDepthWise_Op<1>,
+                     void(const std::array<DimSize_t, 1> &,
+                          const std::array<DimSize_t, 1> &,
+                          const std::array<DimSize_t, 1> &,
+                          const std::array<DimSize_t, 3> &,
+                          const void *,
+                          const void *,
+                          const void *,
+                          void *)>;
 
 using ConvDepthWise2D_Op = ConvDepthWise_Op<2>;
-using ConvDepthWiseImpl2D_cpu = OperatorImpl_cpu<ConvDepthWise_Op<2>,
-    void(const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 4> &,
-        const void *,
-        const void *,
-        const void *,
-        void *)>;
+using ConvDepthWiseImpl2D_cpu =
+    OperatorImpl_cpu<ConvDepthWise_Op<2>,
+                     void(const std::array<DimSize_t, 2> &,
+                          const std::array<DimSize_t, 2> &,
+                          const std::array<DimSize_t, 2> &,
+                          const std::array<DimSize_t, 4> &,
+                          const void *,
+                          const void *,
+                          const void *,
+                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(ConvDepthWise1D_Op, "cpu", Aidge::ConvDepthWiseImpl1D_cpu::create);
 REGISTRAR(ConvDepthWise2D_Op, "cpu", Aidge::ConvDepthWiseImpl2D_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp
index 28ed8969aa415ab4151d038869594376480eba43..146e3ed18a8ca9f497f2a606ce22a615d0ea269c 100644
--- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_kernels.hpp
@@ -37,27 +37,27 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
-                            const std::array<DimSize_t, 1>& dilationDims,
-                            const std::array<DimSize_t, 1>& kernelDims,
-                            const std::array<DimSize_t, 3>& inputDims,
-                            const void *input_,
-                            const void *weights_,
-                            const void *biases_,
-                            void *output_) {
+void ConvDepthWiseImpl1D_cpu_forward_kernel(
+    const std::array<DimSize_t, 1> &strideDims,
+    const std::array<DimSize_t, 1> &dilationDims,
+    const std::array<DimSize_t, 1> &kernelDims,
+    const std::array<DimSize_t, 3> &inputDims,
+    const void *input_,
+    const void *weights_,
+    const void *biases_,
+    void *output_) {
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
     const B *biases = static_cast<const B *>(biases_);
     O *output = static_cast<O *>(output_);
 
-
     // output H size
-    const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
-                                static_cast<float>(strideDims[0])));
-
+    const DimSize_t dilated_kernel_x =
+        dilationDims[0] * (kernelDims[0] - 1) + 1;
+    const std::size_t oxSize = static_cast<std::size_t>(std::floor(
+        static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
+        static_cast<float>(strideDims[0])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
@@ -67,23 +67,33 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
         for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
-            const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize;
+            const std::size_t oIndex = (ch + batch * inputDims[1]) * oxSize;
             B biasVal = (biases != nullptr) ? biases[ch] : B(0);
-            std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
-            const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2];
+            std::fill(output + oIndex, output + (oIndex + oxSize), biasVal);
+            const std::size_t iIndex =
+                (ch + batch * inputDims[1]) * inputDims[2];
             const std::size_t wIndex = ch * kernelDims[0];
             for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
-                // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
+                // const signedsize difx = static_cast<signedsize>(- ox *
+                // strideDims[0]); const std::size_t sxMin =
+                // static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                // const std::size_t sxMax =
+                // (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 :
+                // ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] :
+                // inputDims[2] + difx);
                 const std::size_t sxMin = 0;
                 const std::size_t sxMax = dilated_kernel_x;
                 const std::size_t oIndexFull = oIndex + ox;
-                const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
+                const signedsize ix =
+                    static_cast<signedsize>(ox * strideDims[0]);
 
-                for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
-                    output[oIndexFull] += weights[wIndex + sx] *
-                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
+                for (std::size_t sx = sxMin; sx * dilationDims[0] < sxMax;
+                     ++sx) {
+                    output[oIndexFull] +=
+                        weights[wIndex + sx] *
+                        input[iIndex + static_cast<std::size_t>(
+                                           ix + static_cast<signedsize>(
+                                                    sx * dilationDims[0]))];
                 }
             }
         }
@@ -91,16 +101,30 @@ void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& stri
 }
 
 // Kernels registration to implementation entry point
-REGISTRAR(ConvDepthWiseImpl1D_cpu,
+REGISTRAR(
+    ConvDepthWiseImpl1D_cpu,
     {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
+    {ProdConso::inPlaceModel,
+     Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>,
+     nullptr});
 REGISTRAR(ConvDepthWiseImpl1D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr});
+          {{DataType::Any, DataFormat::NCHW},
+           {DataType::Int32, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t,
+                                                         std::int32_t,
+                                                         std::int32_t,
+                                                         std::int32_t>,
+           nullptr});
 REGISTRAR(ConvDepthWiseImpl1D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
-
+          {{DataType::Any, DataFormat::NCHW},
+           {DataType::Float64, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double,
+                                                         double,
+                                                         double,
+                                                         double>,
+           nullptr});
 
 /**
  * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
@@ -116,33 +140,34 @@ REGISTRAR(ConvDepthWiseImpl1D_cpu,
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
-                            const std::array<DimSize_t, 2>& dilationDims,
-                            const std::array<DimSize_t, 2>& kernelDims,
-                            const std::array<DimSize_t, 4>& inputDims,
-                            const void *input_,
-                            const void *weights_,
-                            const void *biases_,
-                            void *output_)
-{
+void ConvDepthWiseImpl2D_cpu_forward_kernel(
+    const std::array<DimSize_t, 2> &strideDims,
+    const std::array<DimSize_t, 2> &dilationDims,
+    const std::array<DimSize_t, 2> &kernelDims,
+    const std::array<DimSize_t, 4> &inputDims,
+    const void *input_,
+    const void *weights_,
+    const void *biases_,
+    void *output_) {
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
     const B *biases = static_cast<const B *>(biases_);
     O *output = static_cast<O *>(output_);
 
-
     // output H size
-    const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
-                                static_cast<float>(strideDims[0])));
+    const DimSize_t dilated_kernel_x =
+        dilationDims[0] * (kernelDims[0] - 1) + 1;
+    const std::size_t oxSize = static_cast<std::size_t>(std::floor(
+        static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
+        static_cast<float>(strideDims[0])));
 
     // output W size
-    const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
-                                static_cast<float>(strideDims[1])));
+    const DimSize_t dilated_kernel_y =
+        dilationDims[1] * (kernelDims[1] - 1) + 1;
+    const std::size_t oySize = static_cast<std::size_t>(std::floor(
+        static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
+        static_cast<float>(strideDims[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
@@ -150,43 +175,72 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
     // weight (outCh, ch, kernelX, kernelY)
     // does not take Dilation attribute into account
     using signedsize = std::make_signed<std::size_t>::type;
-    const std::size_t outChannels_s =  oxSize * oySize;
+    const std::size_t outChannels_s = oxSize * oySize;
 
-    if (dilated_kernel_x ==3 && dilated_kernel_y == 3) {
+    if (dilated_kernel_x == 3 && dilated_kernel_y == 3) {
         for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
             for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
 
                 B biasVal = (biases != nullptr) ? biases[ch] : B(0);
 
-                std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
+                std::size_t iIndex =
+                    (ch + batch * inputDims[1]) * inputDims[2] * inputDims[3];
                 const std::size_t wIndex = ch * 9;
 
-                if (strideDims[0] == 1 && strideDims[1]==1) {
-                    for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) {
+                if (strideDims[0] == 1 && strideDims[1] == 1) {
+                    for (std::size_t ox = 0, oIndex = 0; ox < oxSize;
+                         ++ox, oIndex += oySize, iIndex -= inputDims[3]) {
                         for (std::size_t oy = 0; oy < oySize; ++oy) {
-                            output[oIndex + oy] = biasVal + weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2];
+                            output[oIndex + oy] =
+                                biasVal +
+                                weights[wIndex + 0] * input[iIndex + oy] +
+                                weights[wIndex + 1] * input[iIndex + oy + 1] +
+                                weights[wIndex + 2] * input[iIndex + oy + 2];
                         }
-                        iIndex+=inputDims[3];
+                        iIndex += inputDims[3];
                         for (std::size_t oy = 0; oy < oySize; ++oy) {
-                            output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2];
+                            output[oIndex + oy] +=
+                                weights[wIndex + 3] * input[iIndex + oy] +
+                                weights[wIndex + 4] * input[iIndex + oy + 1] +
+                                weights[wIndex + 5] * input[iIndex + oy + 2];
                         }
-                        iIndex+=inputDims[3];
+                        iIndex += inputDims[3];
                         for (std::size_t oy = 0; oy < oySize; ++oy) {
-                            output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2];
+                            output[oIndex + oy] +=
+                                weights[wIndex + 6] * input[iIndex + oy] +
+                                weights[wIndex + 7] * input[iIndex + oy + 1] +
+                                weights[wIndex + 8] * input[iIndex + oy + 2];
                         }
                     }
                 } else {
-                    for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) {
+                    for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox,
+                                     oIndex += oySize,
+                                     iIndex -= strideDims[0] * inputDims[3]) {
                         for (std::size_t oy = 0; oy < oySize; ++oy) {
-                            output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2];
+                            output[oIndex + oy] +=
+                                weights[wIndex + 0] * input[iIndex + oy] +
+                                weights[wIndex + 1] *
+                                    input[iIndex + oy + strideDims[0]] +
+                                weights[wIndex + 2] *
+                                    input[iIndex + oy + strideDims[0] * 2];
                         }
-                        iIndex+=strideDims[0]*inputDims[3];
+                        iIndex += strideDims[0] * inputDims[3];
                         for (std::size_t oy = 0; oy < oySize; ++oy) {
-                            output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2];
+                            output[oIndex + oy] +=
+                                weights[wIndex + 3] * input[iIndex + oy] +
+                                weights[wIndex + 4] *
+                                    input[iIndex + oy + strideDims[0]] +
+                                weights[wIndex + 5] *
+                                    input[iIndex + oy + strideDims[0] * 2];
                         }
-                        iIndex+=strideDims[0]*inputDims[3];
+                        iIndex += strideDims[0] * inputDims[3];
                         for (std::size_t oy = 0; oy < oySize; ++oy) {
-                            output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2];
+                            output[oIndex + oy] +=
+                                weights[wIndex + 6] * input[iIndex + oy] +
+                                weights[wIndex + 7] *
+                                    input[iIndex + oy + strideDims[0]] +
+                                weights[wIndex + 8] *
+                                    input[iIndex + oy + strideDims[0] * 2];
                         }
                     }
                 }
@@ -200,19 +254,25 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
 
                 B biasVal = (biases != nullptr) ? biases[ch] : B(0);
 
-                const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
+                const std::size_t iIndex =
+                    (ch + batch * inputDims[1]) * inputDims[2] * inputDims[3];
                 const std::size_t wIndex = ch;
 
                 if (strideDims[0] == 1 && strideDims[1] == 1) {
-                    for (; index < iIndex + oxSize*oySize; ++index) {
-                        output[index] = biasVal + weights[wIndex] * input[index];
+                    for (; index < iIndex + oxSize * oySize; ++index) {
+                        output[index] =
+                            biasVal + weights[wIndex] * input[index];
                     }
-                } else  {
-                    std::size_t oIndex =  (ch + batch*inputDims[1]) * oxSize * oySize;
-                    for (std::size_t ox = 0; ox < oxSize; ++ox, oIndex+=oySize) {
-                        index = iIndex + strideDims[0]*inputDims[3];
-                        for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
-                            output[oIndex + oy] += weights[wIndex]*input[index+iy];
+                } else {
+                    std::size_t oIndex =
+                        (ch + batch * inputDims[1]) * oxSize * oySize;
+                    for (std::size_t ox = 0; ox < oxSize;
+                         ++ox, oIndex += oySize) {
+                        index = iIndex + strideDims[0] * inputDims[3];
+                        for (std::size_t oy = 0, iy = 0; oy < oySize;
+                             ++oy, iy += strideDims[1]) {
+                            output[oIndex + oy] +=
+                                weights[wIndex] * input[index + iy];
                         }
                     }
                 }
@@ -223,22 +283,37 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
             for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
 
                 B biasVal = (biases != nullptr) ? biases[ch] : B(0);
-                std::fill(output, output+outChannels_s, biasVal);
+                std::fill(output, output + outChannels_s, biasVal);
 
-                const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
+                const std::size_t iIndex =
+                    (ch + batch * inputDims[1]) * inputDims[2] * inputDims[3];
                 const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
 
                 for (std::size_t ox = 0; ox < oxSize; ++ox) {
                     for (std::size_t oy = 0; oy < oySize; ++oy) {
 
-                        const std::size_t oIndexFull = ox*oySize + oy;
-                        const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
-                        const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
+                        const std::size_t oIndexFull = ox * oySize + oy;
+                        const signedsize ix =
+                            static_cast<signedsize>(ox * strideDims[0]);
+                        const signedsize iy =
+                            static_cast<signedsize>(oy * strideDims[1]);
 
-                        for (std::size_t sx = 0; sx*dilationDims[0] < dilated_kernel_x; ++sx) {
-                            for (std::size_t sy = 0; sy*dilationDims[1] < dilated_kernel_y; ++sy) {
-                                output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
-                                                        input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy*dilationDims[1]))];
+                        for (std::size_t sx = 0;
+                             sx * dilationDims[0] < dilated_kernel_x;
+                             ++sx) {
+                            for (std::size_t sy = 0;
+                                 sy * dilationDims[1] < dilated_kernel_y;
+                                 ++sy) {
+                                output[oIndexFull] +=
+                                    weights[wIndex + sx * kernelDims[1] + sy] *
+                                    input[iIndex +
+                                          static_cast<std::size_t>(
+                                              ix + static_cast<signedsize>(
+                                                       sx * dilationDims[0])) *
+                                              inputDims[3] +
+                                          static_cast<std::size_t>(
+                                              iy + static_cast<signedsize>(
+                                                       sy * dilationDims[1]))];
                             }
                         }
                     }
@@ -249,17 +324,31 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& stri
     }
 }
 
-
 // Kernels registration to implementation entry point
-REGISTRAR(ConvDepthWiseImpl2D_cpu,
+REGISTRAR(
+    ConvDepthWiseImpl2D_cpu,
     {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
+    {ProdConso::inPlaceModel,
+     Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>,
+     nullptr});
 REGISTRAR(ConvDepthWiseImpl2D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>, nullptr});
+          {{DataType::Any, DataFormat::NCHW},
+           {DataType::Int32, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t,
+                                                         std::int32_t,
+                                                         std::int32_t,
+                                                         std::int32_t>,
+           nullptr});
 REGISTRAR(ConvDepthWiseImpl2D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
-}  // namespace Aidge
+          {{DataType::Any, DataFormat::NCHW},
+           {DataType::Float64, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double,
+                                                         double,
+                                                         double,
+                                                         double>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp
index c06d0912f419909013f930867ce3c3238c1a5555..5980554238240ce91ab75e739c14cd32e12d888d 100644
--- a/include/aidge/backend/cpu/operator/ConvImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp
@@ -17,41 +17,41 @@
 #include <tuple>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Conv.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using Conv1D_Op = Conv_Op<1>;
 using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>,
-    void(const std::array<DimSize_t, 1>&,
-        const std::array<DimSize_t, 1>&,
-        const std::array<DimSize_t, 1>&,
-        const std::array<DimSize_t, 3> &,
-        DimSize_t,
-        const void *,
-        const void *,
-        const void *,
-        void *)>;
+                                        void(const std::array<DimSize_t, 1> &,
+                                             const std::array<DimSize_t, 1> &,
+                                             const std::array<DimSize_t, 1> &,
+                                             const std::array<DimSize_t, 3> &,
+                                             DimSize_t,
+                                             const void *,
+                                             const void *,
+                                             const void *,
+                                             void *)>;
 
 using Conv2D_Op = Conv_Op<2>;
 using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>,
-    void(const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 4> &,
-        DimSize_t,
-        const void *,
-        const void *,
-        const void *,
-        void *)>;
+                                        void(const std::array<DimSize_t, 2> &,
+                                             const std::array<DimSize_t, 2> &,
+                                             const std::array<DimSize_t, 2> &,
+                                             const std::array<DimSize_t, 4> &,
+                                             DimSize_t,
+                                             const void *,
+                                             const void *,
+                                             const void *,
+                                             void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create);
 REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp
index b4abac19143d8222cf632757f1c9d4a532cb3661..745cd474dd4b6319910355fa3849169520e18e91 100644
--- a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp
@@ -17,12 +17,12 @@
 #include <tuple>
 #include <vector>
 
-#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Conv.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 /**
@@ -39,16 +39,16 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
-                            const std::array<DimSize_t, 1>& dilationDims,
-                            const std::array<DimSize_t, 1>& kernelDims,
-                            const std::array<DimSize_t, 3>& inputDims,
-                            DimSize_t outChannels,
-                            const void *input_,
-                            const void *weights_,
-                            const void *biases_,
-                            void *output_)
-{
+void ConvImpl1D_cpu_forward_kernel(
+    const std::array<DimSize_t, 1> &strideDims,
+    const std::array<DimSize_t, 1> &dilationDims,
+    const std::array<DimSize_t, 1> &kernelDims,
+    const std::array<DimSize_t, 3> &inputDims,
+    DimSize_t outChannels,
+    const void *input_,
+    const void *weights_,
+    const void *biases_,
+    void *output_) {
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
@@ -56,10 +56,13 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
     O *output = static_cast<O *>(output_);
 
     // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilationDims[0]*(kernelDims[0] - 1) - 1 + strideDims[0]) /
-                                static_cast<float>(strideDims[0])));
-    const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
+    const std::size_t oxSize = static_cast<std::size_t>(
+        std::floor(static_cast<float>(inputDims[2] -
+                                      dilationDims[0] * (kernelDims[0] - 1) -
+                                      1 + strideDims[0]) /
+                   static_cast<float>(strideDims[0])));
+    const DimSize_t dilated_kernel_x =
+        dilationDims[0] * (kernelDims[0] - 1) + 1;
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
@@ -69,25 +72,37 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
         for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
-            const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
+            const std::size_t oIndex = (outCh + batch * outChannels) * oxSize;
             // If bias = nullptr, set B(0)
             B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-            std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
+            std::fill(output + oIndex, output + (oIndex + oxSize), biasVal);
             for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
-                const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
-                const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
+                const std::size_t iIndex =
+                    (inCh + batch * inputDims[1]) * inputDims[2];
+                const std::size_t wIndex =
+                    (inCh + outCh * inputDims[1]) * kernelDims[0];
                 for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                    // const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
-                    // const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                    // const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
+                    // const signedsize difx = static_cast<signedsize>(- ox *
+                    // strideDims[0]); const std::size_t sxMin =
+                    // static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                    // const std::size_t sxMax =
+                    // (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 :
+                    // ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] :
+                    // inputDims[2] + difx);
                     const std::size_t sxMin = 0;
                     const std::size_t sxMax = dilated_kernel_x;
                     const std::size_t oIndexFull = oIndex + ox;
-                    const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
+                    const signedsize ix =
+                        static_cast<signedsize>(ox * strideDims[0]);
 
-                    for (std::size_t sx = sxMin; sx*dilationDims[0] < sxMax; ++sx) {
-                        output[oIndexFull] += weights[wIndex + sx] *
-                                                input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx*dilationDims[0]))];
+                    for (std::size_t sx = sxMin; sx * dilationDims[0] < sxMax;
+                         ++sx) {
+                        output[oIndexFull] +=
+                            weights[wIndex + sx] *
+                            input[iIndex +
+                                  static_cast<std::size_t>(
+                                      ix + static_cast<signedsize>(
+                                               sx * dilationDims[0]))];
                     }
                 }
             }
@@ -97,18 +112,32 @@ void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
 
 // Kernels registration to implementation entry point
 REGISTRAR(ConvImpl1D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
-REGISTRAR(ConvImpl1D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
+          {{DataType::Any, DataFormat::NCHW},
+           {DataType::Float32, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>,
+           nullptr});
 REGISTRAR(ConvImpl1D_cpu,
+          {{DataType::Any, DataFormat::NCHW},
+           {DataType::Float16, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half,
+                                                half_float::half,
+                                                half_float::half,
+                                                half_float::half>,
+           nullptr});
+REGISTRAR(
+    ConvImpl1D_cpu,
     {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr});
-REGISTRAR(ConvImpl1D_cpu,
+    {ProdConso::inPlaceModel,
+     Aidge::ConvImpl1D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>,
+     nullptr});
+REGISTRAR(
+    ConvImpl1D_cpu,
     {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
-
+    {ProdConso::inPlaceModel,
+     Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>,
+     nullptr});
 
 /**
  * @brief Forward kernel for 2D Convolution on CPU backend.
@@ -124,16 +153,16 @@ REGISTRAR(ConvImpl1D_cpu,
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
-                            const std::array<DimSize_t, 2>& dilationDims,
-                            const std::array<DimSize_t, 2>& kernelDims,
-                            const std::array<DimSize_t, 4> &inputDims,
-                            DimSize_t outChannels,
-                            const void *input_,
-                            const void *weights_,
-                            const void *biases_,
-                            void *output_)
-{
+void ConvImpl2D_cpu_forward_kernel(
+    const std::array<DimSize_t, 2> &strideDims,
+    const std::array<DimSize_t, 2> &dilationDims,
+    const std::array<DimSize_t, 2> &kernelDims,
+    const std::array<DimSize_t, 4> &inputDims,
+    DimSize_t outChannels,
+    const void *input_,
+    const void *weights_,
+    const void *biases_,
+    void *output_) {
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
@@ -141,23 +170,24 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
     O *output = static_cast<O *>(output_);
 
     // output H size
-    const DimSize_t dilated_kernel_x = dilationDims[0]*(kernelDims[0] - 1) + 1;
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
-                                static_cast<float>(strideDims[0])));
+    const DimSize_t dilated_kernel_x =
+        dilationDims[0] * (kernelDims[0] - 1) + 1;
+    const std::size_t oxSize = static_cast<std::size_t>(std::floor(
+        static_cast<float>(inputDims[2] - dilated_kernel_x + strideDims[0]) /
+        static_cast<float>(strideDims[0])));
     // output W size
-    const DimSize_t dilated_kernel_y = dilationDims[1]*(kernelDims[1] - 1) + 1;
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
-                                static_cast<float>(strideDims[1])));
-
+    const DimSize_t dilated_kernel_y =
+        dilationDims[1] * (kernelDims[1] - 1) + 1;
+    const std::size_t oySize = static_cast<std::size_t>(std::floor(
+        static_cast<float>(inputDims[3] - dilated_kernel_y + strideDims[1]) /
+        static_cast<float>(strideDims[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
     // input  (batch, inCh, Xin, Yin)
     // weight (outCh, inCh, kernelX, kernelY)
     // does not take Dilation attribute into account
-    const std::size_t outChannels_s =  oxSize * oySize;
+    const std::size_t outChannels_s = oxSize * oySize;
     using signedsize = std::make_signed<std::size_t>::type;
 
     if (dilated_kernel_x == 3 && dilated_kernel_y == 3) {
@@ -165,36 +195,73 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
             for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
                 // If bias = nullptr, set B(0)
                 B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-                std::fill(output, output+outChannels_s, biasVal);
+                std::fill(output, output + outChannels_s, biasVal);
                 for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
-                    std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
-                    const std::size_t wIndex = (inCh + outCh*inputDims[1]) * 9;
-                    if (strideDims[0] == 1 && strideDims[1]==1) {
-                        for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=inputDims[3]) {
+                    std::size_t iIndex = (inCh + batch * inputDims[1]) *
+                                         inputDims[2] * inputDims[3];
+                    const std::size_t wIndex =
+                        (inCh + outCh * inputDims[1]) * 9;
+                    if (strideDims[0] == 1 && strideDims[1] == 1) {
+                        for (std::size_t ox = 0, oIndex = 0; ox < oxSize;
+                             ++ox, oIndex += oySize, iIndex -= inputDims[3]) {
                             for (std::size_t oy = 0; oy < oySize; ++oy) {
-                                output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+1]+weights[wIndex+2]*input[iIndex+oy+2];
+                                output[oIndex + oy] +=
+                                    weights[wIndex + 0] * input[iIndex + oy] +
+                                    weights[wIndex + 1] *
+                                        input[iIndex + oy + 1] +
+                                    weights[wIndex + 2] *
+                                        input[iIndex + oy + 2];
                             }
-                            iIndex+=inputDims[3];
+                            iIndex += inputDims[3];
                             for (std::size_t oy = 0; oy < oySize; ++oy) {
-                                output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+1]+weights[wIndex+5]*input[iIndex+oy+2];
+                                output[oIndex + oy] +=
+                                    weights[wIndex + 3] * input[iIndex + oy] +
+                                    weights[wIndex + 4] *
+                                        input[iIndex + oy + 1] +
+                                    weights[wIndex + 5] *
+                                        input[iIndex + oy + 2];
                             }
-                            iIndex+=inputDims[3];
+                            iIndex += inputDims[3];
                             for (std::size_t oy = 0; oy < oySize; ++oy) {
-                                output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+1]+weights[wIndex+8]*input[iIndex+oy+2];
+                                output[oIndex + oy] +=
+                                    weights[wIndex + 6] * input[iIndex + oy] +
+                                    weights[wIndex + 7] *
+                                        input[iIndex + oy + 1] +
+                                    weights[wIndex + 8] *
+                                        input[iIndex + oy + 2];
                             }
                         }
                     } else {
-                        for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex-=strideDims[0]*inputDims[3]) {
+                        for (std::size_t ox = 0, oIndex = 0; ox < oxSize;
+                             ++ox,
+                                         oIndex += oySize,
+                                         iIndex -=
+                                         strideDims[0] * inputDims[3]) {
                             for (std::size_t oy = 0; oy < oySize; ++oy) {
-                                output[oIndex + oy] += weights[wIndex+0]*input[iIndex+oy]+weights[wIndex+1]*input[iIndex+oy+strideDims[0]]+weights[wIndex+2]*input[iIndex+oy+strideDims[0]*2];
+                                output[oIndex + oy] +=
+                                    weights[wIndex + 0] * input[iIndex + oy] +
+                                    weights[wIndex + 1] *
+                                        input[iIndex + oy + strideDims[0]] +
+                                    weights[wIndex + 2] *
+                                        input[iIndex + oy + strideDims[0] * 2];
                             }
-                            iIndex+=strideDims[0]*inputDims[3];
+                            iIndex += strideDims[0] * inputDims[3];
                             for (std::size_t oy = 0; oy < oySize; ++oy) {
-                                output[oIndex + oy] += weights[wIndex+3]*input[iIndex+oy]+weights[wIndex+4]*input[iIndex+oy+strideDims[0]]+weights[wIndex+5]*input[iIndex+oy+strideDims[0]*2];
+                                output[oIndex + oy] +=
+                                    weights[wIndex + 3] * input[iIndex + oy] +
+                                    weights[wIndex + 4] *
+                                        input[iIndex + oy + strideDims[0]] +
+                                    weights[wIndex + 5] *
+                                        input[iIndex + oy + strideDims[0] * 2];
                             }
-                            iIndex+=strideDims[0]*inputDims[3];
+                            iIndex += strideDims[0] * inputDims[3];
                             for (std::size_t oy = 0; oy < oySize; ++oy) {
-                                output[oIndex + oy] += weights[wIndex+6]*input[iIndex+oy]+weights[wIndex+7]*input[iIndex+oy+strideDims[0]]+weights[wIndex+8]*input[iIndex+oy+strideDims[0]*2];
+                                output[oIndex + oy] +=
+                                    weights[wIndex + 6] * input[iIndex + oy] +
+                                    weights[wIndex + 7] *
+                                        input[iIndex + oy + strideDims[0]] +
+                                    weights[wIndex + 8] *
+                                        input[iIndex + oy + strideDims[0] * 2];
                             }
                         }
                     }
@@ -207,18 +274,26 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
             for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
                 // If bias = nullptr, set B(0)
                 B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-                std::fill(output, output+outChannels_s, biasVal);
+                std::fill(output, output + outChannels_s, biasVal);
                 for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
-                    std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
-                    const std::size_t wIndex = (inCh + outCh*inputDims[1]);
+                    std::size_t iIndex = (inCh + batch * inputDims[1]) *
+                                         inputDims[2] * inputDims[3];
+                    const std::size_t wIndex = (inCh + outCh * inputDims[1]);
                     if (strideDims[0] == 1 && strideDims[1] == 1) {
-                        for (std::size_t oIndex = 0; oIndex < oxSize*oySize; ++oIndex, ++iIndex) {
+                        for (std::size_t oIndex = 0; oIndex < oxSize * oySize;
+                             ++oIndex, ++iIndex) {
                             output[oIndex] += weights[wIndex] * input[iIndex];
                         }
-                    } else  {
-                        for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex+=inputDims[3]*strideDims[0]) {
-                            for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
-                                output[oIndex + oy] += weights[wIndex+0]*input[iIndex+iy];
+                    } else {
+                        for (std::size_t ox = 0, oIndex = 0; ox < oxSize;
+                             ++ox,
+                                         oIndex += oySize,
+                                         iIndex +=
+                                         inputDims[3] * strideDims[0]) {
+                            for (std::size_t oy = 0, iy = 0; oy < oySize;
+                                 ++oy, iy += strideDims[1]) {
+                                output[oIndex + oy] +=
+                                    weights[wIndex + 0] * input[iIndex + iy];
                             }
                         }
                     }
@@ -231,21 +306,36 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
             for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
                 // If bias = nullptr, set B(0)
                 B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-                std::fill(output, output+outChannels_s, biasVal);
+                std::fill(output, output + outChannels_s, biasVal);
                 for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
-                    std::size_t iIndex_channel = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
-                    const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
+                    std::size_t iIndex_channel =
+                        (inCh + batch * inputDims[1]) * inputDims[2] *
+                        inputDims[3];
+                    const std::size_t wIndex = (inCh + outCh * inputDims[1]) *
+                                               kernelDims[0] * kernelDims[1];
 
                     // loop over each ouput line
-                    for (std::size_t ox = 0, oIndex = 0; ox < oxSize; ++ox, oIndex+=oySize, iIndex_channel+=inputDims[3]*strideDims[0]) {
+                    for (std::size_t ox = 0, oIndex = 0; ox < oxSize;
+                         ++ox,
+                                     oIndex += oySize,
+                                     iIndex_channel +=
+                                     inputDims[3] * strideDims[0]) {
                         // loop over associated input line
-                        for (std::size_t ky = 0, ix = 0; ky < kernelDims[0]; ++ky, ix += inputDims[3]*dilationDims[0]) {
+                        for (std::size_t ky = 0, ix = 0; ky < kernelDims[0];
+                             ++ky, ix += inputDims[3] * dilationDims[0]) {
                             // loop over the entire line
-                            for (std::size_t oy = 0, iy = 0; oy < oySize; ++oy, iy+=strideDims[1]) {
-                                const std::size_t iIndex = iIndex_channel + ix + iy;
-                                // loop over elements assosicated with one output
-                                for (std::size_t kx = 0;  kx < kernelDims[0]; ++kx) {
-                                    output[oIndex + oy] += weights[wIndex+kernelDims[0]*ky+kx]*input[iIndex+kx*dilationDims[1]];
+                            for (std::size_t oy = 0, iy = 0; oy < oySize;
+                                 ++oy, iy += strideDims[1]) {
+                                const std::size_t iIndex =
+                                    iIndex_channel + ix + iy;
+                                // loop over elements assosicated with one
+                                // output
+                                for (std::size_t kx = 0; kx < kernelDims[0];
+                                     ++kx) {
+                                    output[oIndex + oy] +=
+                                        weights[wIndex + kernelDims[0] * ky +
+                                                kx] *
+                                        input[iIndex + kx * dilationDims[1]];
                                 }
                             }
                         }
@@ -257,21 +347,34 @@ void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
     }
 }
 
-
-
 // Kernels registration to implementation entry point
 REGISTRAR(ConvImpl2D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
-REGISTRAR(ConvImpl2D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
+          {{DataType::Any, DataFormat::NCHW},
+           {DataType::Float32, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>,
+           nullptr});
 REGISTRAR(ConvImpl2D_cpu,
+          {{DataType::Any, DataFormat::NCHW},
+           {DataType::Float16, DataFormat::NCHW}},
+          {ProdConso::inPlaceModel,
+           Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half,
+                                                half_float::half,
+                                                half_float::half,
+                                                half_float::half>,
+           nullptr});
+REGISTRAR(
+    ConvImpl2D_cpu,
     {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, nullptr});
-REGISTRAR(ConvImpl2D_cpu,
+    {ProdConso::inPlaceModel,
+     Aidge::ConvImpl2D_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>,
+     nullptr});
+REGISTRAR(
+    ConvImpl2D_cpu,
     {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
-}  // namespace Aidge
+    {ProdConso::inPlaceModel,
+     Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>,
+     nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/DivImpl.hpp b/include/aidge/backend/cpu/operator/DivImpl.hpp
index 40c1b678a78713d6c3b27629ae898c715797b9b2..eb6a4715a37ed48662a6827b03ddc0ebacb60fe4 100644
--- a/include/aidge/backend/cpu/operator/DivImpl.hpp
+++ b/include/aidge/backend/cpu/operator/DivImpl.hpp
@@ -24,10 +24,15 @@
 namespace Aidge {
 // Operator implementation entry point for the backend
 using DivImpl_cpu = OperatorImpl_cpu<Div_Op,
-    void(const std::size_t, const std::size_t, const std::size_t, const void*, const void*,void*)>;
+                                     void(const std::size_t,
+                                          const std::size_t,
+                                          const std::size_t,
+                                          const void *,
+                                          const void *,
+                                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Div_Op, "cpu", Aidge::DivImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_DIVIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp b/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp
index ed6e55a79acbe23a689a67c22477f64f785a3aef..16e36f1945edfb2fea4586429fd563a367a93e82 100644
--- a/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/DivImpl_kernels.hpp
@@ -12,10 +12,10 @@
 #ifndef AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_
 
-#include <numeric>     // std::accumulate
-#include <cstddef>     // std::size_t
-#include <cstdint>     // std::int32_t, std::int64_t
-#include <functional>  // std::multiplies
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::int32_t, std::int64_t
+#include <functional> // std::multiplies
+#include <numeric>    // std::accumulate
 
 #include "aidge/utils/Registrar.hpp"
 
@@ -35,11 +35,13 @@ namespace Aidge {
 //     const I2* input_2 = static_cast<const I2*>(input2_);
 //     O* output = static_cast<O*>(output_);
 
-//     const std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
+//     const std::size_t totalElements = std::accumulate(outputDims.cbegin(),
+//     outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
 
 // 	for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
 // 	{
-// 		std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
+// 		std::vector<std::size_t> indexes = getMultiDimIndices(outputDims,
+// oIndex);
 
 // 		std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
 // 		std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
@@ -51,17 +53,17 @@ namespace Aidge {
 
 template <class I1, class I2, class O>
 constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_,
-                                const std::size_t input2size_,
-                                const std::size_t output1size_,
-                                const void* input1_,
-                                const void* input2_,
-                                void* output_) {
+                                          const std::size_t input2size_,
+                                          const std::size_t output1size_,
+                                          const void *input1_,
+                                          const void *input2_,
+                                          void *output_) {
 
-    const I1* input_1 = static_cast<const I1*>(input1_);
-    const I2* input_2 = static_cast<const I2*>(input2_);
-    O* output = static_cast<O*>(output_);
+    const I1 *input_1 = static_cast<const I1 *>(input1_);
+    const I2 *input_2 = static_cast<const I2 *>(input2_);
+    O *output = static_cast<O *>(output_);
 
-// suppose values are contiguous in memory
+    // suppose values are contiguous in memory
     for (std::size_t i = 0; i < output1size_; ++i) {
         const std::size_t in1_id = (input1size_ != 1) ? i : 0;
         const std::size_t in2_id = (input2size_ != 1) ? i : 0;
@@ -71,14 +73,22 @@ constexpr void DivImpl_cpu_forward_kernel(const std::size_t input1size_,
 
 // Kernels registration to implementation entry point
 REGISTRAR(DivImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<float, float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::DivImpl_cpu_forward_kernel<float, float, float>,
+           nullptr});
 REGISTRAR(DivImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<double, double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::DivImpl_cpu_forward_kernel<double, double, double>,
+           nullptr});
 REGISTRAR(DivImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::DivImpl_cpu_forward_kernel<std::int32_t,
+                                             std::int32_t,
+                                             std::int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_DIVIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ErfImpl.hpp b/include/aidge/backend/cpu/operator/ErfImpl.hpp
index 3d2835600367e81499cbe6af81a8475a0cd1b61e..bec2031c0947759f27b77101c5e0ea64dc9fb6da 100644
--- a/include/aidge/backend/cpu/operator/ErfImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ErfImpl.hpp
@@ -21,11 +21,11 @@
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using ErfImpl_cpu = OperatorImpl_cpu<Erf_Op,
-    void(const std::size_t, const void*, void*)>;
+using ErfImpl_cpu =
+    OperatorImpl_cpu<Erf_Op, void(const std::size_t, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Erf_Op, "cpu", Aidge::ErfImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ERFIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp
index 02041f55ce9a1b2476db575b40340b1bb6517ce1..386837b58a61c2f5748cdca4faad9904fce33aa4 100644
--- a/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ErfImpl_kernels.hpp
@@ -21,11 +21,11 @@
 namespace Aidge {
 template <class I, class O>
 void ErfImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+                                const void *input_,
+                                void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = std::erf(input[i]);
@@ -34,14 +34,20 @@ void ErfImpl_cpu_forward_kernel(std::size_t inputLenght,
 
 // Kernels registration to implementation entry point
 REGISTRAR(ErfImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::ErfImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(ErfImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::ErfImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(ErfImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::ErfImpl_cpu_forward_kernel<std::int32_t, std::int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::ErfImpl_cpu_forward_kernel<std::int32_t, std::int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_ERFIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp
index e82352d9cba60440efef87faf97dfd4ed66565b6..6b42aff87edcd0ade8e866f3595d93d250fb0544 100644
--- a/include/aidge/backend/cpu/operator/FCImpl.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl.hpp
@@ -24,25 +24,25 @@
 namespace Aidge {
 // Operator implementation entry point for the backend
 using FCImpl_cpu = OperatorImpl_cpu<FC_Op,
-    void(const DimSize_t,
-        const DimSize_t,
-        const DimSize_t,
-        const void *,
-        const void *,
-        const void *,
-        void *),
-    void(const DimSize_t,
-        const DimSize_t,
-        const DimSize_t,
-        const void *,
-        const void *,
-        const void *,
-        void *,
-        void *,
-        void *)>;
+                                    void(const DimSize_t,
+                                         const DimSize_t,
+                                         const DimSize_t,
+                                         const void *,
+                                         const void *,
+                                         const void *,
+                                         void *),
+                                    void(const DimSize_t,
+                                         const DimSize_t,
+                                         const DimSize_t,
+                                         const void *,
+                                         const void *,
+                                         const void *,
+                                         void *,
+                                         void *,
+                                         void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(FC_Op, "cpu", Aidge::FCImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_FCIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp
index c57f86e6ac6e74acebb48f471991e7181920f7c3..28f09794a7b44fed9cd950c040bda50f9dbf6aee 100644
--- a/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl_kernels.hpp
@@ -19,8 +19,10 @@
 
 namespace Aidge {
 // template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 4>& dims,
-//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
+// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const
+// std::array<DimSize_t, 4>& dims,
+//                                    const void* input_, const void* weights_,
+//                                    const void* biases_, void* output_) {
 //     // FIXME: missing FC attributes as arguments
 //     const I* input = static_cast<const I*>(input_);
 //     const W* weights = static_cast<const W*>(weights_);
@@ -38,13 +40,18 @@ namespace Aidge {
 //     for (std::size_t ix = 0; ix < dims[0]; ++ix) {
 //         for (std::size_t iy = 0; iy < dims[1]; ++iy) {
 //             for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
-//                 const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
-//                 for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
+//                 const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy +
+//                 dims[1] * ix)); for (std::size_t outCh = 0; outCh <
+//                 outputFeatureSize; ++outCh) {
 //                     const std::size_t oIndex = dims[3] * outCh;
-//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize +
-//                                           outCh;  // (iIndex*outputFeatureSize + oIndex)/dims[3];
+//                     const std::size_t wIndex = (inCh + dims[2] * (iy +
+//                     dims[1] * ix)) * outputFeatureSize +
+//                                           outCh;  //
+//                                           (iIndex*outputFeatureSize +
+//                                           oIndex)/dims[3];
 //                     for (std::size_t batch = 0; batch < dims[3]; ++batch) {
-//                         output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
+//                         output[oIndex + batch] += weights[wIndex] *
+//                         input[iIndex + batch];
 //                     }
 //                 }
 //             }
@@ -53,8 +60,10 @@ namespace Aidge {
 // }
 
 // template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 2>& dims,
-//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
+// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const
+// std::array<DimSize_t, 2>& dims,
+//                                    const void* input_, const void* weights_,
+//                                    const void* biases_, void* output_) {
 //     // FIXME: missing FC attributes as arguments
 //     const I* input = static_cast<const I*>(input_);
 //     const W* weights = static_cast<const W*>(weights_);
@@ -74,9 +83,11 @@ namespace Aidge {
 //     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
 //         const std::size_t oIndex = dims[1] * batch;
 //         for (std::size_t i = 0; i < dims[1]; ++i) {
-//             for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
-//                 std::size_t wIndex = i * outputFeatureSize + outCh;  // (iIndex*outputFeatureSize + oIndex)/dims[3];
-//                 output[oIndex + outCh] += weights[wIndex] * input[i + batch];
+//             for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh)
+//             {
+//                 std::size_t wIndex = i * outputFeatureSize + outCh;  //
+//                 (iIndex*outputFeatureSize + oIndex)/dims[3]; output[oIndex +
+//                 outCh] += weights[wIndex] * input[i + batch];
 //             }
 //         }
 //     }
@@ -84,33 +95,35 @@ namespace Aidge {
 
 template <class I, class W, class B, class O>
 void FCImpl_cpu_forward_kernel(const DimSize_t batchSize,
-                            const DimSize_t inputFeatureSize,
-                            const DimSize_t outputFeatureSize,
-                            const void* input_,
-                            const void* weights_,
-                            const void* biases_,
-                            void* output_) {
+                               const DimSize_t inputFeatureSize,
+                               const DimSize_t outputFeatureSize,
+                               const void *input_,
+                               const void *weights_,
+                               const void *biases_,
+                               void *output_) {
     // FIXME: missing FC attributes as arguments
-    const I* input = static_cast<const I*>(input_);
-    const W* weights = static_cast<const W*>(weights_);
-    const B* biases = static_cast<const B*>(biases_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    const W *weights = static_cast<const W *>(weights_);
+    const B *biases = static_cast<const B *>(biases_);
+    O *output = static_cast<O *>(output_);
 
     if (biases == nullptr) {
-        std::fill(output, output+(batchSize*outputFeatureSize), B(0));
-    }
-    else {
+        std::fill(output, output + (batchSize * outputFeatureSize), B(0));
+    } else {
         for (std::size_t batch = 0; batch < batchSize; ++batch) {
-            std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize));
+            std::copy(biases,
+                      biases + outputFeatureSize,
+                      output + (batch * outputFeatureSize));
         }
     }
 
     for (std::size_t batch = 0; batch < batchSize; ++batch) {
         for (std::size_t out = 0; out < outputFeatureSize; ++out) {
-            output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize,
-                                                        input + (batch + 1)*inputFeatureSize,
-                                                        weights + out*inputFeatureSize,
-                                                        output[out + batch*outputFeatureSize]);
+            output[out + batch * outputFeatureSize] =
+                std::inner_product(input + batch * inputFeatureSize,
+                                   input + (batch + 1) * inputFeatureSize,
+                                   weights + out * inputFeatureSize,
+                                   output[out + batch * outputFeatureSize]);
         }
     }
 }
@@ -119,30 +132,28 @@ template <class I, class O, class W, class B>
 void FCImpl_cpu_backward_kernel(const DimSize_t batchSize,
                                 const DimSize_t inputFeatureSize,
                                 const DimSize_t outputFeatureSize,
-                                const void* input_,
-                                const void* originalInput_,
-                                const void* weight_,
-                                void* output_,
-                                void* weightGrad_,
-                                void* biasesGrad_)
-{
+                                const void *input_,
+                                const void *originalInput_,
+                                const void *weight_,
+                                void *output_,
+                                void *weightGrad_,
+                                void *biasesGrad_) {
     // FIXME: missing FC attributes as arguments
-    const I* input  = static_cast<const I*>(input_);
-    const I* originalInput  = static_cast<const I*>(originalInput_);
-    const W* weight = static_cast<const W*>(weight_);
-    O* output       = static_cast<O*>(output_);
-    W* weightGrad   = static_cast<W*>(weightGrad_);
-    B* biasesGrad   = static_cast<B*>(biasesGrad_);
-
+    const I *input = static_cast<const I *>(input_);
+    const I *originalInput = static_cast<const I *>(originalInput_);
+    const W *weight = static_cast<const W *>(weight_);
+    O *output = static_cast<O *>(output_);
+    W *weightGrad = static_cast<W *>(weightGrad_);
+    B *biasesGrad = static_cast<B *>(biasesGrad_);
 
     // bias grad
-    if (biasesGrad == nullptr) { // no bias
+    if (biasesGrad == nullptr) {                              // no bias
         std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0));
     } else {
         for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs
             B sum{0};
             for (std::size_t b = 0; b < batchSize; ++b) {
-                sum += input[b*outputFeatureSize + o];
+                sum += input[b * outputFeatureSize + o];
             }
             biasesGrad[o] = sum;
         }
@@ -153,9 +164,10 @@ void FCImpl_cpu_backward_kernel(const DimSize_t batchSize,
         for (std::size_t c = 0; c < inputFeatureSize; ++c) {
             W sum{0};
             for (std::size_t b = 0; b < batchSize; ++b) {
-                sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o];
+                sum += originalInput[b * inputFeatureSize + c] *
+                       input[b * outputFeatureSize + o];
             }
-            weightGrad[o*inputFeatureSize + c] = sum;
+            weightGrad[o * inputFeatureSize + c] = sum;
         }
     }
 
@@ -164,23 +176,33 @@ void FCImpl_cpu_backward_kernel(const DimSize_t batchSize,
         for (std::size_t c = 0; c < inputFeatureSize; ++c) {
             O sum{0};
             for (std::size_t o = 0; o < outputFeatureSize; ++o) {
-                sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o];
+                sum += weight[o * inputFeatureSize + c] *
+                       input[b * outputFeatureSize + o];
             }
-            output[b*inputFeatureSize + c] = sum;
+            output[b * inputFeatureSize + c] = sum;
         }
     }
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(FCImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float32}},
-    {ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>, Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>});
-REGISTRAR(FCImpl_cpu,
-    {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Float64}},
-    {ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>, Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>});
+          {ImplSpec::IOSpec{DataType::Any},
+           ImplSpec::IOSpec{DataType::Float32}},
+          {ProdConso::defaultModel,
+           Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>,
+           Aidge::FCImpl_cpu_backward_kernel<float, float, float, float>});
 REGISTRAR(FCImpl_cpu,
+          {ImplSpec::IOSpec{DataType::Any},
+           ImplSpec::IOSpec{DataType::Float64}},
+          {ProdConso::defaultModel,
+           Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>,
+           Aidge::FCImpl_cpu_backward_kernel<double, double, double, double>});
+REGISTRAR(
+    FCImpl_cpu,
     {ImplSpec::IOSpec{DataType::Any}, ImplSpec::IOSpec{DataType::Int32}},
-    {ProdConso::defaultModel, Aidge::FCImpl_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>, Aidge::FCImpl_cpu_backward_kernel<int32_t, int32_t, int32_t, int32_t>});
-}  // namespace Aidge
+    {ProdConso::defaultModel,
+     Aidge::FCImpl_cpu_forward_kernel<int32_t, int32_t, int32_t, int32_t>,
+     Aidge::FCImpl_cpu_backward_kernel<int32_t, int32_t, int32_t, int32_t>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_FCIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/FoldImpl.hpp b/include/aidge/backend/cpu/operator/FoldImpl.hpp
index 94ddbdcba8e33e12108968d536037ab1ccab2c8d..ea8f53405aeb5039c3366ee7d605fd5f5fd4fb7b 100644
--- a/include/aidge/backend/cpu/operator/FoldImpl.hpp
+++ b/include/aidge/backend/cpu/operator/FoldImpl.hpp
@@ -17,26 +17,26 @@
 #include <tuple>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Fold.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using Fold2D_Op = Fold_Op<2>;
 using FoldImpl2D_cpu = OperatorImpl_cpu<Fold_Op<2>,
-    void(const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 2>&,
-        const std::array<DimSize_t, 2>&,
-        const std::vector<DimSize_t> &,
-        const void *,
-        void *)>;
+                                        void(const std::array<DimSize_t, 2> &,
+                                             const std::array<DimSize_t, 2> &,
+                                             const std::array<DimSize_t, 2> &,
+                                             const std::array<DimSize_t, 2> &,
+                                             const std::vector<DimSize_t> &,
+                                             const void *,
+                                             void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Fold2D_Op, "cpu", Aidge::FoldImpl2D_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp b/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp
index 8cced8958f49f1cc4215c7cf463cc3391fb29246..7427c253f10d94ba3a54881e09eb4de83b0449f2 100644
--- a/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FoldImpl_kernels.hpp
@@ -14,38 +14,41 @@
 
 #include "aidge/utils/Registrar.hpp"
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/FoldImpl.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include <cmath>
-#include <array>
 #include <algorithm>
+#include <array>
+#include <cmath>
 
 namespace Aidge {
 template <class I, class O>
-void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims,
-                                    const std::array<DimSize_t, 2>& strideDims,
-                                    const std::array<DimSize_t, 2>& dilationDims,
-                                    const std::array<DimSize_t, 2>& kernelDims,
-                                    const std::vector<DimSize_t> &dims,
-                                    const void *input_, void *output_)
-{
+void FoldImpl2D_cpu_forward_kernel(
+    const std::array<DimSize_t, 2> &outputDims,
+    const std::array<DimSize_t, 2> &strideDims,
+    const std::array<DimSize_t, 2> &dilationDims,
+    const std::array<DimSize_t, 2> &kernelDims,
+    const std::vector<DimSize_t> &dims,
+    const void *input_,
+    void *output_) {
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
     const DimSize_t inHeight = outputDims[0];
     const DimSize_t inWidth = outputDims[1];
 
-    const DimSize_t kernelExtentHeight = dilationDims[0] *
-                                            (kernelDims[0] - 1) + 1;
-    const DimSize_t outHeight = 1 + static_cast<DimSize_t>(
-                    floor(static_cast<float>(inHeight - kernelExtentHeight) /
-                            static_cast<float>(strideDims[0])));
-    const DimSize_t kernelExtentWidth = dilationDims[1] *
-                                            (kernelDims[1] - 1) + 1;
-    const DimSize_t outWidth = 1 + static_cast<DimSize_t>(
-                    floor(static_cast<float>(inWidth - kernelExtentWidth) /
-                            static_cast<float>(strideDims[1])));
+    const DimSize_t kernelExtentHeight =
+        dilationDims[0] * (kernelDims[0] - 1) + 1;
+    const DimSize_t outHeight =
+        1 + static_cast<DimSize_t>(
+                floor(static_cast<float>(inHeight - kernelExtentHeight) /
+                      static_cast<float>(strideDims[0])));
+    const DimSize_t kernelExtentWidth =
+        dilationDims[1] * (kernelDims[1] - 1) + 1;
+    const DimSize_t outWidth =
+        1 + static_cast<DimSize_t>(
+                floor(static_cast<float>(inWidth - kernelExtentWidth) /
+                      static_cast<float>(strideDims[1])));
     const DimSize_t outChannels = dims[dims.size() - 2];
     const DimSize_t inChannels = outChannels / kernelDims[0] / kernelDims[1];
 
@@ -58,13 +61,19 @@ void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims,
             const auto inC = outC / kernelDims[0] / kernelDims[1];
 
             for (DimSize_t outH = 0; outH < outHeight; ++outH) {
-                const auto inH = outH * strideDims[0] + inOffsetH * dilationDims[0];
+                const auto inH =
+                    outH * strideDims[0] + inOffsetH * dilationDims[0];
 
                 for (DimSize_t outW = 0; outW < outWidth; ++outW) {
-                    const auto inW = outW * strideDims[1] + inOffsetW * dilationDims[1];
+                    const auto inW =
+                        outW * strideDims[1] + inOffsetW * dilationDims[1];
 
-                    output[((n * inChannels + inC) * inHeight + inH) * inWidth + inW] +=
-                        input[((n * outChannels + outC) * outHeight + outH) * outWidth + outW];
+                    output[((n * inChannels + inC) * inHeight + inH) *
+                               inWidth +
+                           inW] +=
+                        input[((n * outChannels + outC) * outHeight + outH) *
+                                  outWidth +
+                              outW];
                 }
             }
         }
@@ -73,14 +82,20 @@ void FoldImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& outputDims,
 
 // Kernels registration to implementation entry point
 REGISTRAR(FoldImpl2D_cpu,
-    {DataType::Float32},
-    {ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::defaultModel,
+           Aidge::FoldImpl2D_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(FoldImpl2D_cpu,
-    {DataType::Float64},
-    {ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::defaultModel,
+           Aidge::FoldImpl2D_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(FoldImpl2D_cpu,
-    {DataType::Int32},
-    {ProdConso::defaultModel, Aidge::FoldImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::defaultModel,
+           Aidge::FoldImpl2D_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_FOLDIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp
index 4e04b1a595a8660b1528e49921e7e3e7a567829a..2c39b8af09f292a3389bfdfe102984d9e5375de7 100644
--- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp
@@ -22,11 +22,14 @@
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using GlobalAveragePoolingImpl_cpu = OperatorImpl_cpu<GlobalAveragePooling_Op,
+using GlobalAveragePoolingImpl_cpu = OperatorImpl_cpu<
+    GlobalAveragePooling_Op,
     void(const std::vector<DimSize_t> &, const void *, void *)>;
 
 // Implementation entry point registration to Operator
-REGISTRAR(GlobalAveragePooling_Op, "cpu", Aidge::GlobalAveragePoolingImpl_cpu::create);
+REGISTRAR(GlobalAveragePooling_Op,
+          "cpu",
+          Aidge::GlobalAveragePoolingImpl_cpu::create);
 } // namespace Aidge
 
 #endif /* _AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp
index ed838a94cc0c0238a870427c3b774b29f7818b09..f4aee4a2cba7bbf0d9051f12e366b1cc86ecb520 100644
--- a/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/GlobalAveragePoolingImpl_kernels.hpp
@@ -13,8 +13,8 @@
 #define AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_
 
 #include <cstddef>
-#include <functional>  // std::multiplies
-#include <numeric>     // std::accumulate
+#include <functional> // std::multiplies
+#include <numeric>    // std::accumulate
 #include <vector>
 
 #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
@@ -23,52 +23,64 @@
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
 
-
 namespace Aidge {
 template <class I, class O>
 void GlobalAveragePoolingImpl_cpu_forward_kernel(
-    const std::vector<DimSize_t> &dims, const void *input_, void *output_) {
-  // error checking
-    AIDGE_ASSERT(dims.size() >= 3,"GlobalAveragePool needs at least a 3 dimensions "
+    const std::vector<DimSize_t> &dims,
+    const void *input_,
+    void *output_) {
+    // error checking
+    AIDGE_ASSERT(dims.size() >= 3,
+                 "GlobalAveragePool needs at least a 3 dimensions "
                  "input, number of input dim : {}",
                  dims.size());
 
-  // computation
-  const I *input = static_cast<const I *>(input_);
-  O *output = static_cast<O *>(output_);
+    // computation
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
-  DimSize_t nb_elems = std::accumulate(dims.begin(), dims.end(), std::size_t(1),
-                                       std::multiplies<std::size_t>());
+    DimSize_t nb_elems = std::accumulate(dims.begin(),
+                                         dims.end(),
+                                         std::size_t(1),
+                                         std::multiplies<std::size_t>());
 
-  const DimSize_t in_batch_nb_elems{nb_elems / dims[0]};
-  const DimSize_t in_channel_nb_elems{in_batch_nb_elems / dims[1]};
-  const DimSize_t out_batch_nb_elems{dims[1]};
-  // parse channel by channel and fill each output with the average of the
-  // values in the channel
-  for (DimSize_t batch = 0; batch < dims[0]; ++batch) {
-    for (DimSize_t channel = 0; channel < dims[1]; ++channel) {
-      const I *filter_start = std::next(
-          input, (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
-      I mean = 0;
-      for (size_t i = 0; i < in_channel_nb_elems; ++i) {
-        // Single pass numerically stable mean, using the fmaf
-        mean = fmaf(filter_start[i] - mean, 1.0f/(i+1), mean);
-      }
-      output[batch * out_batch_nb_elems + channel] = mean;
+    const DimSize_t in_batch_nb_elems{nb_elems / dims[0]};
+    const DimSize_t in_channel_nb_elems{in_batch_nb_elems / dims[1]};
+    const DimSize_t out_batch_nb_elems{dims[1]};
+    // parse channel by channel and fill each output with the average of the
+    // values in the channel
+    for (DimSize_t batch = 0; batch < dims[0]; ++batch) {
+        for (DimSize_t channel = 0; channel < dims[1]; ++channel) {
+            const I *filter_start = std::next(
+                input,
+                (batch * in_batch_nb_elems) + (channel * in_channel_nb_elems));
+            I mean = 0;
+            for (size_t i = 0; i < in_channel_nb_elems; ++i) {
+                // Single pass numerically stable mean, using the fmaf
+                mean = fmaf(filter_start[i] - mean, 1.0f / (i + 1), mean);
+            }
+            output[batch * out_batch_nb_elems + channel] = mean;
+        }
     }
-  }
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(GlobalAveragePoolingImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>, nullptr});
-REGISTRAR(GlobalAveragePoolingImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float32},
+          {ProdConso::defaultModel,
+           Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(GlobalAveragePoolingImpl_cpu,
+          {DataType::Float64},
+          {ProdConso::defaultModel,
+           Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<double, double>,
+           nullptr});
+REGISTRAR(
+    GlobalAveragePoolingImpl_cpu,
     {DataType::Int32},
-    {ProdConso::defaultModel, Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
+    {ProdConso::defaultModel,
+     Aidge::GlobalAveragePoolingImpl_cpu_forward_kernel<int32_t, int32_t>,
+     nullptr});
 } // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_GLOBALAVERAGEPOOLINGIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/GridSampleImpl.hpp b/include/aidge/backend/cpu/operator/GridSampleImpl.hpp
index 697bb35a983bc108c2a5d65db3c08ef462ffcdbd..380757f8181e73002a9a151802cc3be2fc59c883 100644
--- a/include/aidge/backend/cpu/operator/GridSampleImpl.hpp
+++ b/include/aidge/backend/cpu/operator/GridSampleImpl.hpp
@@ -17,22 +17,23 @@
 #include <tuple>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/GridSample.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using GridSampleImpl_cpu = OperatorImpl_cpu<GridSample_Op,
-    void(const GridSample_Op&,
-        const std::shared_ptr<Tensor>&,
-        const std::shared_ptr<Tensor>&,
-        const std::shared_ptr<Tensor>&)>;
+using GridSampleImpl_cpu =
+    OperatorImpl_cpu<GridSample_Op,
+                     void(const GridSample_Op &,
+                          const std::shared_ptr<Tensor> &,
+                          const std::shared_ptr<Tensor> &,
+                          const std::shared_ptr<Tensor> &)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(GridSample_Op, "cpu", Aidge::GridSampleImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_GRIDSAMPLEIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp b/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp
index fa390e4e9585225ab15b39651198cb3aaae77edb..3362c3875d6efdfdcd5d901cb8d848ebdd448cd3 100644
--- a/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/GridSampleImpl_kernels.hpp
@@ -12,10 +12,10 @@
 #ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
 
-#include <algorithm>  // std::max, std::min
-#include <cmath>      // std::fabs, std::trunf, std::nearbyint
-#include <cstddef>    // std::size_t
-#include <cstdint>    // std::int64_t
+#include <algorithm> // std::max, std::min
+#include <cmath>     // std::fabs, std::trunf, std::nearbyint
+#include <cstddef>   // std::size_t
+#include <cstdint>   // std::int64_t
 
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/GridSampleImpl.hpp"
@@ -23,56 +23,79 @@
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
 
-static bool in_bound(float coord, float lower_bound, float upper_bound) noexcept {
+static bool
+in_bound(float coord, float lower_bound, float upper_bound) noexcept {
     return (coord > lower_bound) && (coord < upper_bound);
 }
 
-static float unnormalized_coord(float coord, float new_lower_bound, float new_upper_bound) noexcept {
-    return (coord + 1) / 2 * (new_upper_bound - new_lower_bound) + new_lower_bound;
+static float unnormalized_coord(float coord,
+                                float new_lower_bound,
+                                float new_upper_bound) noexcept {
+    return (coord + 1) / 2 * (new_upper_bound - new_lower_bound) +
+           new_lower_bound;
 }
 
 // unused
-// static float normalized_coord(float coord, float prev_lower_bound, float prev_upper_bound) noexcept {
-//     return (coord + prev_lower_bound) / (prev_upper_bound-prev_lower_bound) * 2 - 1;
+// static float normalized_coord(float coord, float prev_lower_bound, float
+// prev_upper_bound) noexcept {
+//     return (coord + prev_lower_bound) / (prev_upper_bound-prev_lower_bound)
+//     * 2 - 1;
 // }
 
-static float unnormalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
-    return align_corners ? unnormalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
-                         : unnormalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
+static float unnormalize_grid_sample_coord(float coord,
+                                           std::size_t size,
+                                           bool align_corners) noexcept {
+    return align_corners ? unnormalized_coord(coord,
+                                              0.0f,
+                                              static_cast<float>(size) - 1.0f)
+                         : unnormalized_coord(coord,
+                                              -0.5f,
+                                              static_cast<float>(size) - 0.5f);
 }
 
 // unused
-// static float normalize_grid_sample_coord(float coord, std::size_t size, bool align_corners) noexcept {
-//     return align_corners ? normalized_coord(coord, 0.0f, static_cast<float>(size) - 1.0f)
-//                          : normalized_coord(coord, -0.5f, static_cast<float>(size) - 0.5f);
+// static float normalize_grid_sample_coord(float coord, std::size_t size, bool
+// align_corners) noexcept {
+//     return align_corners ? normalized_coord(coord, 0.0f,
+//     static_cast<float>(size) - 1.0f)
+//                          : normalized_coord(coord, -0.5f,
+//                          static_cast<float>(size) - 0.5f);
 // }
 
-static float update_normalized_coord_with_padding(float coord, Aidge::GridSample_Op::PaddingMode padding_mode) {
+static float update_normalized_coord_with_padding(
+    float coord,
+    Aidge::GridSample_Op::PaddingMode padding_mode) {
     if (!in_bound(coord, -1.0f, 1.0f)) {
         if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
             coord = std::min(std::max(-1.0f, coord), 1.0f);
-        }
-        else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
+        } else if (padding_mode ==
+                   Aidge::GridSample_Op::PaddingMode::Reflection) {
             float abs_coord = std::fabs(coord);
             float int_coord = std::truncf(abs_coord);
-            std::int32_t nb_refl = static_cast<std::int32_t>((int_coord - 1) / 2);
-            float res = ((nb_refl + 1)*2) - abs_coord;
-            coord = (coord > 0) ? (nb_refl % 2 == 0 ? res : -res) \
-                            : (nb_refl % 2 == 0 ? -res : res);
+            std::int32_t nb_refl =
+                static_cast<std::int32_t>((int_coord - 1) / 2);
+            float res = ((nb_refl + 1) * 2) - abs_coord;
+            coord = (coord > 0) ? (nb_refl % 2 == 0 ? res : -res)
+                                : (nb_refl % 2 == 0 ? -res : res);
         }
-
     }
     return coord;
 }
 
-static inline std::int64_t update_unnormalized_coord_with_padding(std::int64_t coord, std::int64_t size, Aidge::GridSample_Op::PaddingMode padding_mode) {
+static inline std::int64_t update_unnormalized_coord_with_padding(
+    std::int64_t coord,
+    std::int64_t size,
+    Aidge::GridSample_Op::PaddingMode padding_mode) {
     if (!in_bound(coord, 0, size)) {
         // out of bound. switch padding mode
         if (padding_mode == Aidge::GridSample_Op::PaddingMode::Border) {
-            coord = std::min(std::max(std::int64_t(0), coord), size-std::int64_t(1));
-        } else if (padding_mode == Aidge::GridSample_Op::PaddingMode::Reflection) {
-            const std::int64_t quotient = coord / (size-1);
-            const std::int64_t remainer = std::abs(coord - quotient*(size-1));
+            coord = std::min(std::max(std::int64_t(0), coord),
+                             size - std::int64_t(1));
+        } else if (padding_mode ==
+                   Aidge::GridSample_Op::PaddingMode::Reflection) {
+            const std::int64_t quotient = coord / (size - 1);
+            const std::int64_t remainer =
+                std::abs(coord - quotient * (size - 1));
             coord = (quotient % 2 == 0) ? remainer : size - 1 - remainer;
         }
     }
@@ -91,17 +114,16 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
-                            const std::shared_ptr<Tensor>& in0,
-                            const std::shared_ptr<Tensor>& in1,
-                            const std::shared_ptr<Tensor>& out)
-{
-    const I* const input = static_cast<const I *>(in0->getImpl()->rawPtr());
-    const I* input_ptr = input;
-    float* const grid = static_cast<float*>(in1->getImpl()->rawPtr());
-    float* grid_ptr = grid;
-    O* const output = static_cast<O*>(out->getImpl()->rawPtr());
-    O* output_ptr = output;
+void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op &op,
+                                         const std::shared_ptr<Tensor> &in0,
+                                         const std::shared_ptr<Tensor> &in1,
+                                         const std::shared_ptr<Tensor> &out) {
+    const I *const input = static_cast<const I *>(in0->getImpl()->rawPtr());
+    const I *input_ptr = input;
+    float *const grid = static_cast<float *>(in1->getImpl()->rawPtr());
+    float *grid_ptr = grid;
+    O *const output = static_cast<O *>(out->getImpl()->rawPtr());
+    O *output_ptr = output;
 
     const std::size_t N = in0->dim(0);
     const std::size_t C = in0->dim(1);
@@ -117,23 +139,20 @@ void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
     const std::size_t out_C_s = out->stride(1);
     const std::size_t out_H_s = out->stride(2);
 
-    float* grid_ptr_N = grid;
-    const I* input_ptr_N = input;
-    O* output_ptr_N = output;
+    float *grid_ptr_N = grid;
+    const I *input_ptr_N = input;
+    O *output_ptr_N = output;
     for (std::size_t n = 0; n < N; ++n) {
         grid_ptr = grid_ptr_N;
         for (std::size_t grid_x = 0; grid_x < grid_H; ++grid_x) {
-            output_ptr = output_ptr_N + grid_x*out_H_s;
+            output_ptr = output_ptr_N + grid_x * out_H_s;
             /*
-            * change grid_x coord to match padding_mode
-            * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
-            * Handle computation of interpolation
-            *   any value outside bounds is considered 0
-            *   if nearest:
-            *   else if linear:
-            *   else if cubic:
-            *   else : nothing
-            */
+             * change grid_x coord to match padding_mode
+             * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according
+             * to align_corners Handle computation of interpolation any value
+             * outside bounds is considered 0 if nearest: else if linear: else
+             * if cubic: else : nothing
+             */
             float x = *grid_ptr;
             x = update_normalized_coord_with_padding(x, op.paddingMode());
             x = unnormalize_grid_sample_coord(x, in_H, op.alignCorners());
@@ -141,7 +160,7 @@ void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
                 const std::int64_t x_rounded = std::nearbyintf(x);
 
                 if (in_bound(x_rounded, 0, in_H)) {
-                    input_ptr = input_ptr_N + x_rounded*in_H_s;
+                    input_ptr = input_ptr_N + x_rounded * in_H_s;
                     for (std::size_t c = 0; c < C; ++c) {
                         *output_ptr = *input_ptr;
                         input_ptr += in_C_s;
@@ -154,46 +173,81 @@ void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
                     }
                 }
             } else if (op.mode() == GridSample_Op::Mode::Linear) {
-                const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
-                const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
-
-                const I* input_ptr_NC = input_ptr_N;
+                const std::int64_t x_inf =
+                    update_unnormalized_coord_with_padding(
+                        static_cast<std::int64_t>(std::floor(x)),
+                        in_H,
+                        op.paddingMode());
+                const std::int64_t x_sup =
+                    update_unnormalized_coord_with_padding(x_inf + 1,
+                                                           in_H,
+                                                           op.paddingMode());
+
+                const I *input_ptr_NC = input_ptr_N;
                 for (std::size_t c = 0; c < C; ++c) {
-                    const I f_inf = in_bound(x_inf, 0, in_H) ?
-                        input_ptr_NC[static_cast<std::size_t>(x_inf)*in_H_s] : I(0);
-                    const I f_sup = in_bound(x_sup, 0, in_H) ?
-                        input_ptr_NC[static_cast<std::size_t>(x_sup)*in_H_s] : I(0);
-
-                    *output_ptr = static_cast<O>(static_cast<I>(x - x_inf)*f_inf \
-                            + static_cast<I>(x_sup - x)*f_sup);
+                    const I f_inf =
+                        in_bound(x_inf, 0, in_H)
+                            ? input_ptr_NC[static_cast<std::size_t>(x_inf) *
+                                           in_H_s]
+                            : I(0);
+                    const I f_sup =
+                        in_bound(x_sup, 0, in_H)
+                            ? input_ptr_NC[static_cast<std::size_t>(x_sup) *
+                                           in_H_s]
+                            : I(0);
+
+                    *output_ptr =
+                        static_cast<O>(static_cast<I>(x - x_inf) * f_inf +
+                                       static_cast<I>(x_sup - x) * f_sup);
 
                     input_ptr_NC += in_C_s;
                     output_ptr += out_C_s;
                 }
             } else if (op.mode() == GridSample_Op::Mode::Cubic) {
-                const std::int64_t x_inf = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_H, op.paddingMode());
-                const std::int64_t x_sup = update_unnormalized_coord_with_padding(x_inf + 1, in_H, op.paddingMode());
-                const std::int64_t x_inf_inf = update_unnormalized_coord_with_padding(x_inf - 1, in_H, op.paddingMode());
-                const std::int64_t x_sup_sup = update_unnormalized_coord_with_padding(x_sup + 1, in_H, op.paddingMode());
+                const std::int64_t x_inf =
+                    update_unnormalized_coord_with_padding(
+                        static_cast<std::int64_t>(std::floor(x)),
+                        in_H,
+                        op.paddingMode());
+                const std::int64_t x_sup =
+                    update_unnormalized_coord_with_padding(x_inf + 1,
+                                                           in_H,
+                                                           op.paddingMode());
+                const std::int64_t x_inf_inf =
+                    update_unnormalized_coord_with_padding(x_inf - 1,
+                                                           in_H,
+                                                           op.paddingMode());
+                const std::int64_t x_sup_sup =
+                    update_unnormalized_coord_with_padding(x_sup + 1,
+                                                           in_H,
+                                                           op.paddingMode());
 
                 const I x1 = static_cast<I>(x - static_cast<float>(x_inf));
                 const I x2 = x1 * x1;
                 const I x3 = x1 * x2;
 
-                const I* input_ptr_NC = input_ptr_N;
+                const I *input_ptr_NC = input_ptr_N;
                 for (std::size_t c = 0; c < C; ++c) {
-                    const I f_inf_inf = in_bound(x_inf_inf, 0, in_H) ? input_ptr_NC[x_inf_inf*in_H_s] : I(0);
-                    const I f_inf = in_bound(x_inf, 0, in_H) ? input_ptr_NC[x_inf*in_H_s] : I(0);
-                    const I f_sup = in_bound(x_sup, 0, in_H) ? input_ptr_NC[x_sup*in_H_s] : I(0);
-                    const I f_sup_sup = in_bound(x_sup_sup, 0, in_H) ? input_ptr_NC[x_sup_sup*in_H_s] : I(0);
+                    const I f_inf_inf = in_bound(x_inf_inf, 0, in_H)
+                                            ? input_ptr_NC[x_inf_inf * in_H_s]
+                                            : I(0);
+                    const I f_inf = in_bound(x_inf, 0, in_H)
+                                        ? input_ptr_NC[x_inf * in_H_s]
+                                        : I(0);
+                    const I f_sup = in_bound(x_sup, 0, in_H)
+                                        ? input_ptr_NC[x_sup * in_H_s]
+                                        : I(0);
+                    const I f_sup_sup = in_bound(x_sup_sup, 0, in_H)
+                                            ? input_ptr_NC[x_sup_sup * in_H_s]
+                                            : I(0);
 
                     const I m_inf = (f_sup - f_inf_inf) / I(2);
                     const I m_sup = (f_sup_sup - f_inf) / I(2);
 
-                    *output_ptr = f_inf \
-                        + x1 * m_inf \
-                        + x2 * (3 * (f_sup - f_inf) - 2 * m_inf - m_sup) \
-                        + x3 * (2*(f_inf - f_sup) + m_inf + m_sup);
+                    *output_ptr =
+                        f_inf + x1 * m_inf +
+                        x2 * (3 * (f_sup - f_inf) - 2 * m_inf - m_sup) +
+                        x3 * (2 * (f_inf - f_sup) + m_inf + m_sup);
 
                     input_ptr_NC += in_C_s;
                     output_ptr += out_C_s;
@@ -212,18 +266,30 @@ void GridSampleImpl1D_cpu_forward_kernel(const GridSample_Op& op,
 // Kernels registration to implementation entry point
 // only accept 1st input with only 1 spatial feat. (nb dims = 1)
 REGISTRAR(GridSampleImpl_cpu,
-    {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}},
-    {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr});
+          {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}},
+           {{DataType::Float16}}},
+          {ProdConso::defaultModel,
+           Aidge::GridSampleImpl1D_cpu_forward_kernel<half_float::half,
+                                                      half_float::half>,
+           nullptr});
 REGISTRAR(GridSampleImpl_cpu,
-    {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}},
-    {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<float, float>, nullptr});
+          {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}},
+           {{DataType::Float32}}},
+          {ProdConso::defaultModel,
+           Aidge::GridSampleImpl1D_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(GridSampleImpl_cpu,
-    {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}},
-    {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<double, double>, nullptr});
+          {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}},
+           {{DataType::Float64}}},
+          {ProdConso::defaultModel,
+           Aidge::GridSampleImpl1D_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(GridSampleImpl_cpu,
-    {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}},
-    {ProdConso::defaultModel, Aidge::GridSampleImpl1D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-
+          {{{DataType::Any, DataFormat::Any, {{-1, -1}}}, {DataType::Any}},
+           {{DataType::Int32}}},
+          {ProdConso::defaultModel,
+           Aidge::GridSampleImpl1D_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
 
 /**
  * @brief Forward kernel for 1D GridSample on CPU backend.
@@ -236,16 +302,15 @@ REGISTRAR(GridSampleImpl_cpu,
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
-                            const std::shared_ptr<Tensor>& in0,
-                            const std::shared_ptr<Tensor>& in1,
-                            const std::shared_ptr<Tensor>& out)
-{
-    const I* input = static_cast<const I *>(in0->getImpl()->rawPtr());
-    const I* input_ptr = input;
-    float* const grid = static_cast<float*>(in0->getImpl()->rawPtr());
-    float* grid_ptr = grid;
-    O* const output = static_cast<O*>(out->getImpl()->rawPtr());
+void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op &op,
+                                         const std::shared_ptr<Tensor> &in0,
+                                         const std::shared_ptr<Tensor> &in1,
+                                         const std::shared_ptr<Tensor> &out) {
+    const I *input = static_cast<const I *>(in0->getImpl()->rawPtr());
+    const I *input_ptr = input;
+    float *const grid = static_cast<float *>(in0->getImpl()->rawPtr());
+    float *grid_ptr = grid;
+    O *const output = static_cast<O *>(out->getImpl()->rawPtr());
 
     const std::size_t N = in0->dim(0);
     const std::size_t C = in0->dim(1);
@@ -267,25 +332,22 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
     const std::size_t out_H_s = out->stride(2);
     const std::size_t out_W_s = out->stride(3);
 
-
-    float* grid_ptr_N = grid;
-    const I* input_ptr_N = input;
-    O* output_ptr_N = output;
+    float *grid_ptr_N = grid;
+    const I *input_ptr_N = input;
+    O *output_ptr_N = output;
     for (std::size_t n = 0; n < N; ++n) {
         for (std::size_t grid_y = 0; grid_y < grid_H; ++grid_y) {
             for (std::size_t grid_x = 0; grid_x < grid_W; ++grid_x) {
-                O* output_ptr = output_ptr_N + grid_y*out_H_s + grid_y*out_W_s;
-                grid_ptr = grid_ptr_N + grid_y*grid_H_s + grid_x*grid_W_s;
+                O *output_ptr =
+                    output_ptr_N + grid_y * out_H_s + grid_y * out_W_s;
+                grid_ptr = grid_ptr_N + grid_y * grid_H_s + grid_x * grid_W_s;
                 /*
-                * change grid_x coord to match padding_mode
-                * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5] according to align_corners
-                * Handle computation of interpolation
-                *   any value outside bounds is considered 0
-                *   if nearest:
-                *   else if linear:
-                *   else if cubic:
-                *   else : nothing
-                */
+                 * change grid_x coord to match padding_mode
+                 * Change range from [-1, 1] to [0, H-1] or [-0.5, H-0.5]
+                 * according to align_corners Handle computation of
+                 * interpolation any value outside bounds is considered 0 if
+                 * nearest: else if linear: else if cubic: else : nothing
+                 */
                 float x = *grid_ptr;
                 float y = grid_ptr[grid_Coord_s];
                 x = update_normalized_coord_with_padding(x, op.paddingMode());
@@ -296,8 +358,10 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
                     const std::int64_t x_rounded = std::nearbyintf(x);
                     const std::int64_t y_rounded = std::nearbyintf(y);
 
-                    if (in_bound(x_rounded, 0, in_W) && in_bound(y_rounded, 0, in_H)) {
-                        input_ptr = input_ptr_N + y_rounded*in_H_s + x_rounded*in_W_s;
+                    if (in_bound(x_rounded, 0, in_W) &&
+                        in_bound(y_rounded, 0, in_H)) {
+                        input_ptr = input_ptr_N + y_rounded * in_H_s +
+                                    x_rounded * in_W_s;
                         for (std::size_t c = 0; c < C; ++c) {
                             *output_ptr = *input_ptr;
                             input_ptr += in_C_s;
@@ -310,97 +374,199 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
                         }
                     }
                 } else if (op.mode() == GridSample_Op::Mode::Linear) {
-                    const std::int64_t x_r = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode()); // right
-                    const std::int64_t x_l = update_unnormalized_coord_with_padding(x_r + 1, in_W, op.paddingMode()); // left
-
-                    const std::int64_t y_t = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode()); // top
-                    const std::int64_t y_b = update_unnormalized_coord_with_padding(y_t + 1, in_H, op.paddingMode()); // bottom
-
-                    const I* input_ptr_NC = input_ptr_N;
+                    const std::int64_t x_r =
+                        update_unnormalized_coord_with_padding(
+                            static_cast<std::int64_t>(std::floor(x)),
+                            in_W,
+                            op.paddingMode()); // right
+                    const std::int64_t x_l =
+                        update_unnormalized_coord_with_padding(
+                            x_r + 1,
+                            in_W,
+                            op.paddingMode()); // left
+
+                    const std::int64_t y_t =
+                        update_unnormalized_coord_with_padding(
+                            static_cast<std::int64_t>(std::floor(y)),
+                            in_H,
+                            op.paddingMode()); // top
+                    const std::int64_t y_b =
+                        update_unnormalized_coord_with_padding(
+                            y_t + 1,
+                            in_H,
+                            op.paddingMode()); // bottom
+
+                    const I *input_ptr_NC = input_ptr_N;
                     for (std::size_t c = 0; c < C; ++c) {
 
-                        const I f_tr = (in_bound(x_r, 0, in_W) && in_bound(y_t, 0, in_H)) ?
-                            input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
-                                         + static_cast<std::size_t>(x_r)*in_W_s]
+                        const I f_tr =
+                            (in_bound(x_r, 0, in_W) && in_bound(y_t, 0, in_H))
+                                ? input_ptr_NC[static_cast<std::size_t>(y_t) *
+                                                   in_H_s +
+                                               static_cast<std::size_t>(x_r) *
+                                                   in_W_s]
                                 : I(0);
-                        const I f_tl = (in_bound(x_l, 0, in_W) && in_bound(y_t, 0, in_H)) ?
-                            input_ptr_NC[static_cast<std::size_t>(y_t)*in_H_s
-                                         + static_cast<std::size_t>(x_l)*in_W_s]
+                        const I f_tl =
+                            (in_bound(x_l, 0, in_W) && in_bound(y_t, 0, in_H))
+                                ? input_ptr_NC[static_cast<std::size_t>(y_t) *
+                                                   in_H_s +
+                                               static_cast<std::size_t>(x_l) *
+                                                   in_W_s]
                                 : I(0);
-                        const I f_br = (in_bound(x_r, 0, in_W) && in_bound(y_b, 0, in_H)) ?
-                            input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
-                                         + static_cast<std::size_t>(x_r)*in_W_s]
+                        const I f_br =
+                            (in_bound(x_r, 0, in_W) && in_bound(y_b, 0, in_H))
+                                ? input_ptr_NC[static_cast<std::size_t>(y_b) *
+                                                   in_H_s +
+                                               static_cast<std::size_t>(x_r) *
+                                                   in_W_s]
                                 : I(0);
-                        const I f_bl = (in_bound(x_l, 0, in_W) && in_bound(y_b, 0, in_H)) ?
-                            input_ptr_NC[static_cast<std::size_t>(y_b)*in_H_s
-                                         + static_cast<std::size_t>(x_l)*in_W_s]
+                        const I f_bl =
+                            (in_bound(x_l, 0, in_W) && in_bound(y_b, 0, in_H))
+                                ? input_ptr_NC[static_cast<std::size_t>(y_b) *
+                                                   in_H_s +
+                                               static_cast<std::size_t>(x_l) *
+                                                   in_W_s]
                                 : I(0);
 
                         // compute weighted sum of the 4 corners
-                        const I w_tr = static_cast<I>((y - static_cast<float>(y_t))*(static_cast<float>(x_r) - x));
-                        const I w_tl = static_cast<I>((y - static_cast<float>(y_t))*(x - static_cast<float>(x_l)));
-                        const I w_br = static_cast<I>((static_cast<float>(y_b) - y)*(static_cast<float>(x_r) - x));
-                        const I w_bl = static_cast<I>((static_cast<float>(y_b) - y)*(x - static_cast<float>(x_l)));
-
-                        *output_ptr = static_cast<O>(w_tr*f_tr + w_tl*f_tl + w_br*f_br + w_bl*f_bl);
+                        const I w_tr =
+                            static_cast<I>((y - static_cast<float>(y_t)) *
+                                           (static_cast<float>(x_r) - x));
+                        const I w_tl =
+                            static_cast<I>((y - static_cast<float>(y_t)) *
+                                           (x - static_cast<float>(x_l)));
+                        const I w_br =
+                            static_cast<I>((static_cast<float>(y_b) - y) *
+                                           (static_cast<float>(x_r) - x));
+                        const I w_bl =
+                            static_cast<I>((static_cast<float>(y_b) - y) *
+                                           (x - static_cast<float>(x_l)));
+
+                        *output_ptr =
+                            static_cast<O>(w_tr * f_tr + w_tl * f_tl +
+                                           w_br * f_br + w_bl * f_bl);
 
                         input_ptr_NC += in_C_s;
                         output_ptr += out_C_s;
                     }
                 } else if (op.mode() == GridSample_Op::Mode::Cubic) {
                     /*
-                    *  .. .. .. .. .. ..
-                    *  .. 00 01 02 03 ..
-                    *  .. 10 11 12 13 ..
-                    *  .. 20 21 22 23 ..
-                    *  .. 30 31 32 33 ..
-                    *  .. .. .. .. .. ..
-                    */
-                    const std::int64_t x_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(x)), in_W, op.paddingMode());
-                    const std::int64_t x_0 = update_unnormalized_coord_with_padding(x_1 - 1, in_W, op.paddingMode());
-                    const std::int64_t x_2 = update_unnormalized_coord_with_padding(x_1 + 1, in_W, op.paddingMode());
-                    const std::int64_t x_3 = update_unnormalized_coord_with_padding(x_1 + 2, in_W, op.paddingMode());
-
-                    const std::int64_t y_1 = update_unnormalized_coord_with_padding(static_cast<std::int64_t>(std::floor(y)), in_H, op.paddingMode());
-                    const std::int64_t y_0 = update_unnormalized_coord_with_padding(y_1 - 1, in_H, op.paddingMode());
-                    const std::int64_t y_2 = update_unnormalized_coord_with_padding(y_1 + 1, in_H, op.paddingMode());
-                    const std::int64_t y_3 = update_unnormalized_coord_with_padding(y_1 + 2, in_H, op.paddingMode());
-
-                    const I* input_ptr_NC = input_ptr_N;
+                     *  .. .. .. .. .. ..
+                     *  .. 00 01 02 03 ..
+                     *  .. 10 11 12 13 ..
+                     *  .. 20 21 22 23 ..
+                     *  .. 30 31 32 33 ..
+                     *  .. .. .. .. .. ..
+                     */
+                    const std::int64_t x_1 =
+                        update_unnormalized_coord_with_padding(
+                            static_cast<std::int64_t>(std::floor(x)),
+                            in_W,
+                            op.paddingMode());
+                    const std::int64_t x_0 =
+                        update_unnormalized_coord_with_padding(
+                            x_1 - 1,
+                            in_W,
+                            op.paddingMode());
+                    const std::int64_t x_2 =
+                        update_unnormalized_coord_with_padding(
+                            x_1 + 1,
+                            in_W,
+                            op.paddingMode());
+                    const std::int64_t x_3 =
+                        update_unnormalized_coord_with_padding(
+                            x_1 + 2,
+                            in_W,
+                            op.paddingMode());
+
+                    const std::int64_t y_1 =
+                        update_unnormalized_coord_with_padding(
+                            static_cast<std::int64_t>(std::floor(y)),
+                            in_H,
+                            op.paddingMode());
+                    const std::int64_t y_0 =
+                        update_unnormalized_coord_with_padding(
+                            y_1 - 1,
+                            in_H,
+                            op.paddingMode());
+                    const std::int64_t y_2 =
+                        update_unnormalized_coord_with_padding(
+                            y_1 + 1,
+                            in_H,
+                            op.paddingMode());
+                    const std::int64_t y_3 =
+                        update_unnormalized_coord_with_padding(
+                            y_1 + 2,
+                            in_H,
+                            op.paddingMode());
+
+                    const I *input_ptr_NC = input_ptr_N;
 
                     for (std::size_t c = 0; c < C; ++c) {
-                        const I f_00 = in_bound(x_0, 0, in_W) && in_bound(y_0, 0, in_H) ?
-                                        input_ptr_NC[x_0*in_W_s + y_0*in_H_s] : I(0);
-                        const I f_01 = in_bound(x_0, 0, in_W) && in_bound(y_1, 0, in_H) ?
-                                        input_ptr_NC[x_0*in_W_s + y_1*in_H_s] : I(0);
-                        const I f_02 = in_bound(x_0, 0, in_W) && in_bound(y_2, 0, in_H) ?
-                                        input_ptr_NC[x_0*in_W_s + y_2*in_H_s] : I(0);
-                        const I f_03 = in_bound(x_0, 0, in_W) && in_bound(y_3, 0, in_H) ?
-                                        input_ptr_NC[x_0*in_W_s + y_3*in_H_s] : I(0);
-                        const I f_10 = in_bound(x_1, 0, in_W) && in_bound(y_0, 0, in_H) ?
-                                        input_ptr_NC[x_1*in_W_s + y_0*in_H_s] : I(0);
-                        const I f_20 = in_bound(x_2, 0, in_W) && in_bound(y_0, 0, in_H) ?
-                                        input_ptr_NC[x_2*in_W_s + y_0*in_H_s] : I(0);
-                        const I f_30 = in_bound(x_3, 0, in_W) && in_bound(y_0, 0, in_H) ?
-                                        input_ptr_NC[x_3*in_W_s + y_0*in_H_s] : I(0);
-                        const I f_11 = in_bound(x_1, 0, in_W) && in_bound(y_1, 0, in_H) ?
-                                        input_ptr_NC[x_1*in_W_s + y_1*in_H_s] : I(0);
-                        const I f_12 = in_bound(x_1, 0, in_W) && in_bound(y_2, 0, in_H) ?
-                                        input_ptr_NC[x_1*in_W_s + y_2*in_H_s] : I(0);
-                        const I f_13 = in_bound(x_1, 0, in_W) && in_bound(y_3, 0, in_H) ?
-                                        input_ptr_NC[x_1*in_W_s + y_3*in_H_s] : I(0);
-                        const I f_21 = in_bound(x_2, 0, in_W) && in_bound(y_1, 0, in_H) ?
-                                        input_ptr_NC[x_2*in_W_s + y_1*in_H_s] : I(0);
-                        const I f_22 = in_bound(x_2, 0, in_W) && in_bound(y_2, 0, in_H) ?
-                                        input_ptr_NC[x_2*in_W_s + y_2*in_H_s] : I(0);
-                        const I f_23 = in_bound(x_2, 0, in_W) && in_bound(y_3, 0, in_H) ?
-                                        input_ptr_NC[x_2*in_W_s + y_3*in_H_s] : I(0);
-                        const I f_31 = in_bound(x_3, 0, in_W) && in_bound(y_1, 0, in_H) ?
-                                        input_ptr_NC[x_3*in_W_s + y_1*in_H_s] : I(0);
-                        const I f_32 = in_bound(x_3, 0, in_W) && in_bound(y_2, 0, in_H) ?
-                                        input_ptr_NC[x_3*in_W_s + y_2*in_H_s] : I(0);
-                        const I f_33 = in_bound(x_3, 0, in_W) && in_bound(y_3, 0, in_H) ?
-                                        input_ptr_NC[x_3*in_W_s + y_3*in_H_s] : I(0);
+                        const I f_00 =
+                            in_bound(x_0, 0, in_W) && in_bound(y_0, 0, in_H)
+                                ? input_ptr_NC[x_0 * in_W_s + y_0 * in_H_s]
+                                : I(0);
+                        const I f_01 =
+                            in_bound(x_0, 0, in_W) && in_bound(y_1, 0, in_H)
+                                ? input_ptr_NC[x_0 * in_W_s + y_1 * in_H_s]
+                                : I(0);
+                        const I f_02 =
+                            in_bound(x_0, 0, in_W) && in_bound(y_2, 0, in_H)
+                                ? input_ptr_NC[x_0 * in_W_s + y_2 * in_H_s]
+                                : I(0);
+                        const I f_03 =
+                            in_bound(x_0, 0, in_W) && in_bound(y_3, 0, in_H)
+                                ? input_ptr_NC[x_0 * in_W_s + y_3 * in_H_s]
+                                : I(0);
+                        const I f_10 =
+                            in_bound(x_1, 0, in_W) && in_bound(y_0, 0, in_H)
+                                ? input_ptr_NC[x_1 * in_W_s + y_0 * in_H_s]
+                                : I(0);
+                        const I f_20 =
+                            in_bound(x_2, 0, in_W) && in_bound(y_0, 0, in_H)
+                                ? input_ptr_NC[x_2 * in_W_s + y_0 * in_H_s]
+                                : I(0);
+                        const I f_30 =
+                            in_bound(x_3, 0, in_W) && in_bound(y_0, 0, in_H)
+                                ? input_ptr_NC[x_3 * in_W_s + y_0 * in_H_s]
+                                : I(0);
+                        const I f_11 =
+                            in_bound(x_1, 0, in_W) && in_bound(y_1, 0, in_H)
+                                ? input_ptr_NC[x_1 * in_W_s + y_1 * in_H_s]
+                                : I(0);
+                        const I f_12 =
+                            in_bound(x_1, 0, in_W) && in_bound(y_2, 0, in_H)
+                                ? input_ptr_NC[x_1 * in_W_s + y_2 * in_H_s]
+                                : I(0);
+                        const I f_13 =
+                            in_bound(x_1, 0, in_W) && in_bound(y_3, 0, in_H)
+                                ? input_ptr_NC[x_1 * in_W_s + y_3 * in_H_s]
+                                : I(0);
+                        const I f_21 =
+                            in_bound(x_2, 0, in_W) && in_bound(y_1, 0, in_H)
+                                ? input_ptr_NC[x_2 * in_W_s + y_1 * in_H_s]
+                                : I(0);
+                        const I f_22 =
+                            in_bound(x_2, 0, in_W) && in_bound(y_2, 0, in_H)
+                                ? input_ptr_NC[x_2 * in_W_s + y_2 * in_H_s]
+                                : I(0);
+                        const I f_23 =
+                            in_bound(x_2, 0, in_W) && in_bound(y_3, 0, in_H)
+                                ? input_ptr_NC[x_2 * in_W_s + y_3 * in_H_s]
+                                : I(0);
+                        const I f_31 =
+                            in_bound(x_3, 0, in_W) && in_bound(y_1, 0, in_H)
+                                ? input_ptr_NC[x_3 * in_W_s + y_1 * in_H_s]
+                                : I(0);
+                        const I f_32 =
+                            in_bound(x_3, 0, in_W) && in_bound(y_2, 0, in_H)
+                                ? input_ptr_NC[x_3 * in_W_s + y_2 * in_H_s]
+                                : I(0);
+                        const I f_33 =
+                            in_bound(x_3, 0, in_W) && in_bound(y_3, 0, in_H)
+                                ? input_ptr_NC[x_3 * in_W_s + y_3 * in_H_s]
+                                : I(0);
 
                         const I mx_11 = (f_21 - f_01) / I(2);
                         const I mx_12 = (f_22 - f_02) / I(2);
@@ -412,38 +578,63 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
                         const I my_21 = (f_22 - f_20) / I(2);
                         const I my_22 = (f_23 - f_21) / I(2);
 
-                        const I mxy_11 = (f_22 - f_20 - f_02 - + f_00) / I(4);
-                        const I mxy_12 = (f_23 - f_21 - f_03 - + f_01) / I(4);
-                        const I mxy_21 = (f_32 - f_30 - f_12 - + f_10) / I(4);
-                        const I mxy_22 = (f_33 - f_31 - f_13 - + f_11) / I(4);
+                        const I mxy_11 = (f_22 - f_20 - f_02 - +f_00) / I(4);
+                        const I mxy_12 = (f_23 - f_21 - f_03 - +f_01) / I(4);
+                        const I mxy_21 = (f_32 - f_30 - f_12 - +f_10) / I(4);
+                        const I mxy_22 = (f_33 - f_31 - f_13 - +f_11) / I(4);
 
                         const I a_00 = f_11;
                         const I a_10 = mx_11;
-                        const I a_20 = I(3)*(f_21 - f_11) - I(2)*mx_11 - mx_21;
-                        const I a_30 = I(2)*(f_11 - f_21) + mx_11 + mx_21;
+                        const I a_20 =
+                            I(3) * (f_21 - f_11) - I(2) * mx_11 - mx_21;
+                        const I a_30 = I(2) * (f_11 - f_21) + mx_11 + mx_21;
                         const I a_01 = my_11;
                         const I a_11 = mxy_11;
-                        const I a_21 = I(3)*(my_21 - my_11) - I(2)*mxy_11 - mxy_21;
-                        const I a_31 = I(2)*(my_11 - my_21) + mxy_11 + mxy_21;
-                        const I a_02 = I(3)*(f_12 - f_11) - I(2)*my_11 - my_12;
-                        const I a_12 = I(3)*(mx_12 - mx_11) - I(2)*mxy_11 - mxy_12;
-                        const I a_22 = I(9)*(f_11 + f_22 - f_21 - f_12) + I(3)*(I(2)*(mx_11 - mx_12 + my_11 - my_21) + mx_21 - mx_22 + my_12 - my_22) + mxy_22 + I(2)*(mxy_12 + mxy_21 + I(2)*mxy_11);
-                        const I a_32 = - mxy_12 - mxy_22 + I(2)*(my_22 - my_12 - mxy_11 - mxy_21 + I(2)*(my_21 - my_11) + I(3)*(f_21 + f_12 - f_11 - f_22)) + I(3)*(mx_12 + mx_22 - mx_11 - mx_21);
-                        const I a_03 = I(2)*(f_11 - f_12) + my_11 + my_12;
-                        const I a_13 = I(2)*(mx_11 - mx_12) + mxy_11 + mxy_12;
-                        const I a_23 = - mxy_21 - mxy_22 + I(2)*(-mx_21 + mx_22 - mxy_11 - mxy_12 + I(2)*(mx_12 - mx_11) + I(3)*(f_12 + f_21 - f_11 - f_22)) + I(3)*(my_21 + my_22 - my_11 - my_12);
-                        const I a_33 = mxy_11 + mxy_21 + mxy_12 + mxy_22 + I(2)*(mx_11 + mx_21 - mx_12 - mx_22 + my_11 - my_21 + my_12 - my_22 + I(2)*(f_11 - f_21 - f_12 + f_22));
-
-                        const I x2 = static_cast<I>(x*x);
-                        const I x3 = static_cast<I>(x*x*x);
-                        const I y2 = static_cast<I>(y*y);
-                        const I y3 = static_cast<I>(y*y*y);
-
-                        *output_ptr = static_cast<O>( \
-                            a_00 + a_10*x + a_20*x2 + a_30*x3 \
-                            + a_01*y + a_11*x*y + a_21*x2*y + a_31*x3*y \
-                            + a_02*y2 + a_12*x*y2 + a_22*x2*y2 + a_32*x3*y2 \
-                            + a_03*y3 + a_13*x*y3 + a_23*x2*y3 + a_33*x3*y3);
+                        const I a_21 =
+                            I(3) * (my_21 - my_11) - I(2) * mxy_11 - mxy_21;
+                        const I a_31 =
+                            I(2) * (my_11 - my_21) + mxy_11 + mxy_21;
+                        const I a_02 =
+                            I(3) * (f_12 - f_11) - I(2) * my_11 - my_12;
+                        const I a_12 =
+                            I(3) * (mx_12 - mx_11) - I(2) * mxy_11 - mxy_12;
+                        const I a_22 =
+                            I(9) * (f_11 + f_22 - f_21 - f_12) +
+                            I(3) * (I(2) * (mx_11 - mx_12 + my_11 - my_21) +
+                                    mx_21 - mx_22 + my_12 - my_22) +
+                            mxy_22 + I(2) * (mxy_12 + mxy_21 + I(2) * mxy_11);
+                        const I a_32 =
+                            -mxy_12 - mxy_22 +
+                            I(2) * (my_22 - my_12 - mxy_11 - mxy_21 +
+                                    I(2) * (my_21 - my_11) +
+                                    I(3) * (f_21 + f_12 - f_11 - f_22)) +
+                            I(3) * (mx_12 + mx_22 - mx_11 - mx_21);
+                        const I a_03 = I(2) * (f_11 - f_12) + my_11 + my_12;
+                        const I a_13 =
+                            I(2) * (mx_11 - mx_12) + mxy_11 + mxy_12;
+                        const I a_23 =
+                            -mxy_21 - mxy_22 +
+                            I(2) * (-mx_21 + mx_22 - mxy_11 - mxy_12 +
+                                    I(2) * (mx_12 - mx_11) +
+                                    I(3) * (f_12 + f_21 - f_11 - f_22)) +
+                            I(3) * (my_21 + my_22 - my_11 - my_12);
+                        const I a_33 =
+                            mxy_11 + mxy_21 + mxy_12 + mxy_22 +
+                            I(2) * (mx_11 + mx_21 - mx_12 - mx_22 + my_11 -
+                                    my_21 + my_12 - my_22 +
+                                    I(2) * (f_11 - f_21 - f_12 + f_22));
+
+                        const I x2 = static_cast<I>(x * x);
+                        const I x3 = static_cast<I>(x * x * x);
+                        const I y2 = static_cast<I>(y * y);
+                        const I y3 = static_cast<I>(y * y * y);
+
+                        *output_ptr = static_cast<O>(
+                            a_00 + a_10 * x + a_20 * x2 + a_30 * x3 +
+                            a_01 * y + a_11 * x * y + a_21 * x2 * y +
+                            a_31 * x3 * y + a_02 * y2 + a_12 * x * y2 +
+                            a_22 * x2 * y2 + a_32 * x3 * y2 + a_03 * y3 +
+                            a_13 * x * y3 + a_23 * x2 * y3 + a_33 * x3 * y3);
 
                         input_ptr_NC += in_C_s;
                         output_ptr += out_C_s;
@@ -461,17 +652,34 @@ void GridSampleImpl2D_cpu_forward_kernel(const GridSample_Op& op,
 // Kernels registration to implementation entry point
 // only accept 1st input with only 2 spatial feat. (nb dims = 2)
 REGISTRAR(GridSampleImpl_cpu,
-    {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float16}}},
-    {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<half_float::half, half_float::half>, nullptr});
+          {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}},
+            {DataType::Any}},
+           {{DataType::Float16}}},
+          {ProdConso::defaultModel,
+           Aidge::GridSampleImpl2D_cpu_forward_kernel<half_float::half,
+                                                      half_float::half>,
+           nullptr});
 REGISTRAR(GridSampleImpl_cpu,
-    {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float32}}},
-    {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<float, float>, nullptr});
+          {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}},
+            {DataType::Any}},
+           {{DataType::Float32}}},
+          {ProdConso::defaultModel,
+           Aidge::GridSampleImpl2D_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(GridSampleImpl_cpu,
-    {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Float64}}},
-    {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<double, double>, nullptr});
+          {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}},
+            {DataType::Any}},
+           {{DataType::Float64}}},
+          {ProdConso::defaultModel,
+           Aidge::GridSampleImpl2D_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(GridSampleImpl_cpu,
-    {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}}, {DataType::Any}}, {{DataType::Int32}}},
-    {ProdConso::defaultModel, Aidge::GridSampleImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-}  // namespace Aidge
+          {{{DataType::Any, DataFormat::Any, {{-1, -1}, {-1, -1}}},
+            {DataType::Any}},
+           {{DataType::Int32}}},
+          {ProdConso::defaultModel,
+           Aidge::GridSampleImpl2D_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
index 1e8c1a14435f53ad7a63b327944e0bb8c70c8661..31fab26f763f5c943ebb212d704c7888796039a9 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
@@ -16,26 +16,21 @@
 #include <tuple>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/LeakyReLU.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using LeakyReLUImpl_cpu = OperatorImpl_cpu<LeakyReLU_Op,
-    void(const float,
-        std::size_t,
-        const void*,
-        void*),
-    void(const float,
-        std::size_t,
-        const void*,
-        void*)>;
+using LeakyReLUImpl_cpu =
+    OperatorImpl_cpu<LeakyReLU_Op,
+                     void(const float, std::size_t, const void *, void *),
+                     void(const float, std::size_t, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(LeakyReLU_Op, "cpu", Aidge::LeakyReLUImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp
index bc856f703aee8ba422887d43cb96db2132fc4603..546a1f47b4f140e548aac95f6dd1382915b48496 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_kernels.hpp
@@ -19,12 +19,12 @@
 namespace Aidge {
 template <class I, class O>
 void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_,
-                                     std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+                                      std::size_t inputLenght,
+                                      const void *input_,
+                                      void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
     const I negativeSlope = static_cast<const I>(negativeSlope_);
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
@@ -34,29 +34,35 @@ void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_,
 
 template <class I, class O>
 void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_,
-                                     std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+                                       std::size_t inputLenght,
+                                       const void *input_,
+                                       void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
     const I negativeSlope = static_cast<const I>(negativeSlope_);
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i];
+        output[i] = (input[i] > 0) ? input[i] : negativeSlope * input[i];
     }
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(LeakyReLUImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>, Aidge::LeakyReLUImpl_cpu_backward_kernel<float, float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>,
+           Aidge::LeakyReLUImpl_cpu_backward_kernel<float, float>});
 REGISTRAR(LeakyReLUImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>, Aidge::LeakyReLUImpl_cpu_backward_kernel<double, double>});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>,
+           Aidge::LeakyReLUImpl_cpu_backward_kernel<double, double>});
 REGISTRAR(LeakyReLUImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::LeakyReLUImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::LeakyReLUImpl_cpu_backward_kernel<int32_t, int32_t>});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::LeakyReLUImpl_cpu_forward_kernel<int32_t, int32_t>,
+           Aidge::LeakyReLUImpl_cpu_backward_kernel<int32_t, int32_t>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/LnImpl.hpp b/include/aidge/backend/cpu/operator/LnImpl.hpp
old mode 100755
new mode 100644
index d48a7ae437d9ed1c7769d3628691993c1e9dcb90..5e9487af367cf0c7edebf2ae599e9eaf40eaa9c8
--- a/include/aidge/backend/cpu/operator/LnImpl.hpp
+++ b/include/aidge/backend/cpu/operator/LnImpl.hpp
@@ -12,22 +12,23 @@
 #ifndef AIDGE_CPU_OPERATOR_LNIMPL_H_
 #define AIDGE_CPU_OPERATOR_LNIMPL_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Ln.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using LnImpl_cpu = OperatorImpl_cpu<Ln_Op,
-    void(const std::size_t, const void*, void*),
-    void(const std::size_t, const void*, const void*, void*)>;
+using LnImpl_cpu = OperatorImpl_cpu<
+    Ln_Op,
+    void(const std::size_t, const void *, void *),
+    void(const std::size_t, const void *, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Ln_Op, "cpu", Aidge::LnImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_LNIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp
old mode 100755
new mode 100644
index b30b05bb806de08d4e70c67e66979fb3138980df..3c89e91ecebdc1711b036ee39028533c1732c9af
--- a/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LnImpl_kernels.hpp
@@ -19,49 +19,54 @@
 namespace Aidge {
 template <class I, class O>
 void LnImpl_cpu_forward_kernel(std::size_t inputLenght,
-                               const void* input_,
-                               void* output_) {
+                               const void *input_,
+                               void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-	const float eps = 1.0e-20f;
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
+    const float eps = 1.0e-20f;
 
-//#pragma omp parallel for if (inputLenght > 1024)
+    // #pragma omp parallel for if (inputLenght > 1024)
     for (std::size_t i = 0; i < inputLenght; ++i) {
-		if (input[i] > I(eps)) {
-			output[i] = std::log(input[i]);
-		} else {
-			output[i] = std::log(I(eps));
-		}
+        if (input[i] > I(eps)) {
+            output[i] = std::log(input[i]);
+        } else {
+            output[i] = std::log(I(eps));
+        }
     }
 }
 
 template <class I, class GI, class GO>
 void LnImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                const void* input_, const void* grad_output_,
-	                            void* grad_input_) {
-						 
-    const I* input = static_cast<const I*>(input_);
-    const GO* grad_output = static_cast<const GO*>(grad_output_);
-    GI* grad_input = static_cast<GI*>(grad_input_);
-	const float eps = 1.0e-20f;
-	
+                                const void *input_,
+                                const void *grad_output_,
+                                void *grad_input_) {
+
+    const I *input = static_cast<const I *>(input_);
+    const GO *grad_output = static_cast<const GO *>(grad_output_);
+    GI *grad_input = static_cast<GI *>(grad_input_);
+    const float eps = 1.0e-20f;
+
     for (std::size_t i = 0; i < inputLenght; ++i) {
-		if (input[i] > I(eps)) {
-			grad_input[i] = grad_output[i] / input[i];
-		} else {
-			grad_input[i] = GI(0);
-		}
+        if (input[i] > I(eps)) {
+            grad_input[i] = grad_output[i] / input[i];
+        } else {
+            grad_input[i] = GI(0);
+        }
     }
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(LnImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<float, float>, Aidge::LnImpl_cpu_backward_kernel<float, float, float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::LnImpl_cpu_forward_kernel<float, float>,
+           Aidge::LnImpl_cpu_backward_kernel<float, float, float>});
 REGISTRAR(LnImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::LnImpl_cpu_forward_kernel<double, double>, Aidge::LnImpl_cpu_backward_kernel<double, double, double>});
-}  // namespace Aidge
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::LnImpl_cpu_forward_kernel<double, double>,
+           Aidge::LnImpl_cpu_backward_kernel<double, double, double>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_LNIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/MatMulImpl.hpp b/include/aidge/backend/cpu/operator/MatMulImpl.hpp
index c07aa5f8ffa62f5fffe3ca02638cc3c66cdaeedb..70fc8d450a30868d2a7c29969e441f1c389d0b4b 100644
--- a/include/aidge/backend/cpu/operator/MatMulImpl.hpp
+++ b/include/aidge/backend/cpu/operator/MatMulImpl.hpp
@@ -16,20 +16,24 @@
 #include <memory>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/MatMul.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using MatMulImpl_cpu = OperatorImpl_cpu<MatMul_Op,
-    void(const std::size_t, const std::size_t, const std::size_t,
-                              const void *, const void *, void *)>;
+                                        void(const std::size_t,
+                                             const std::size_t,
+                                             const std::size_t,
+                                             const void *,
+                                             const void *,
+                                             void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(MatMul_Op, "cpu", Aidge::MatMulImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp
index 5fc13baf49b1d0606eb4af5a54eec83fa5dce22a..5b5ed930a71d2d0f0184e1a95390f239704844ed 100644
--- a/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MatMulImpl_kernels.hpp
@@ -17,19 +17,24 @@
 namespace Aidge {
 
 template <class I, class O>
-void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, const std::size_t m,
-                                    const void* input1_, const void* input2_, void* __restrict output_) {
+void MatMulImpl_cpu_forward_kernel(const std::size_t n,
+                                   const std::size_t k,
+                                   const std::size_t m,
+                                   const void *input1_,
+                                   const void *input2_,
+                                   void *__restrict output_) {
     // FIXME: missing MatMul parameters as arguments
-    const I* input1 = static_cast<const I*>(input1_);
-    const I* input2 = static_cast<const I*>(input2_);
-    O* __restrict output = static_cast<O* __restrict>(output_);
+    const I *input1 = static_cast<const I *>(input1_);
+    const I *input2 = static_cast<const I *>(input2_);
+    O *__restrict output = static_cast<O *__restrict>(output_);
 
     std::memset(output, O(0), n * m * sizeof(O));
 
     for (std::size_t i = 0; i < n; ++i) {
         for (std::size_t l = 0; l < k; ++l) {
             for (std::size_t j = 0; j < m; ++j) {
-                output[i*m + j] += static_cast<O>(input1[i*k + l] * input2[l*m + j]);
+                output[i * m + j] +=
+                    static_cast<O>(input1[i * k + l] * input2[l * m + j]);
             }
         }
     }
@@ -37,14 +42,20 @@ void MatMulImpl_cpu_forward_kernel(const std::size_t n, const std::size_t k, con
 
 // Kernels registration to implementation entry point
 REGISTRAR(MatMulImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::defaultModel,
+           Aidge::MatMulImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(MatMulImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::defaultModel,
+           Aidge::MatMulImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(MatMulImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::defaultModel, Aidge::MatMulImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::defaultModel,
+           Aidge::MatMulImpl_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
index 68cc3621514de97d9837e10bcf90218abe559aaa..ceafebf4e6e1ff64ce144f8bbf0ceef88d150f88 100644
--- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
@@ -17,25 +17,26 @@
 #include <tuple>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/MaxPooling.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using MaxPooling2D_Op = MaxPooling_Op<2>;
-using MaxPoolingImpl2D_cpu = OperatorImpl_cpu<MaxPooling_Op<2>,
-    void(const std::array<DimSize_t, 2>&,
-                            const std::array<DimSize_t, 2>&,
-                            const bool,
-                            const std::array<DimSize_t, 4> &,
-                            const void *,
-                            void *)>;
+using MaxPoolingImpl2D_cpu =
+    OperatorImpl_cpu<MaxPooling_Op<2>,
+                     void(const std::array<DimSize_t, 2> &,
+                          const std::array<DimSize_t, 2> &,
+                          const bool,
+                          const std::array<DimSize_t, 4> &,
+                          const void *,
+                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(MaxPooling2D_Op, "cpu", Aidge::MaxPoolingImpl2D_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp
index 7b6f04f141eb701849a8d436561bcf9e37471cfa..0d853a010e141c7f77efd29dd42c610f6cfdcbf6 100644
--- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_kernels.hpp
@@ -16,8 +16,8 @@
 #include <cmath>
 #include <tuple>
 
-#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
 #include "aidge/data/Data.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
@@ -33,24 +33,25 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
-                                        const std::array<DimSize_t, 2>& kernelDims,
-                                        const bool /*ceilMode*/,
-                                        const std::array<DimSize_t, 4> &dims,
-                                        const void *input_,
-                                        void *output_) {
+void MaxPoolingImpl2D_cpu_forward_kernel(
+    const std::array<DimSize_t, 2> &strideDims,
+    const std::array<DimSize_t, 2> &kernelDims,
+    const bool /*ceilMode*/,
+    const std::array<DimSize_t, 4> &dims,
+    const void *input_,
+    void *output_) {
     // FIXME: missing convolution parameters as arguments
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
     // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
-                                static_cast<float>(strideDims[0])));
+    const std::size_t oxSize = static_cast<std::size_t>(std::floor(
+        static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
+        static_cast<float>(strideDims[0])));
     // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
-                                static_cast<float>(strideDims[1])));
+    const std::size_t oySize = static_cast<std::size_t>(std::floor(
+        static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
+        static_cast<float>(strideDims[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
@@ -60,17 +61,32 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
         for (std::size_t ch = 0; ch < dims[1]; ++ch) {
-            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
-            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
+            const std::size_t oIndex =
+                (ch + batch * dims[1]) * oxSize * oySize;
+            const std::size_t iIndex =
+                (ch + batch * dims[1]) * dims[2] * dims[3];
             for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
-                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
+                const signedsize difx =
+                    static_cast<signedsize>(-ox * strideDims[0]);
+                const std::size_t sxMin =
+                    static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                const std::size_t sxMax =
+                    (static_cast<signedsize>(dims[2]) + difx) < 0
+                        ? 0
+                        : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0]
+                                                            : dims[2] + difx);
                 for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
-                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
-                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                    const signedsize dify =
+                        static_cast<signedsize>(-oy * strideDims[1]);
+                    const std::size_t syMin = static_cast<std::size_t>(
+                        std::max(dify, signedsize(0)));
+                    const std::size_t syMax =
+                        (static_cast<signedsize>(dims[3]) + dify) < 0
+                            ? 0
+                            : ((dims[3] + dify) > kernelDims[1]
+                                   ? kernelDims[1]
+                                   : dims[3] + dify);
+                    const std::size_t oIndexFull = oIndex + ox * oySize + oy;
                     const std::size_t ix = ox * strideDims[0];
                     const std::size_t iy = oy * strideDims[1];
 
@@ -78,11 +94,12 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
                     bool valid = false;
 
                     for (unsigned int channel = 0; channel < dims[1];
-                            ++channel){
+                         ++channel) {
                         for (unsigned int sy = syMin; sy < syMax; ++sy) {
-                            for (unsigned int sx = sxMin; sx < sxMax; ++sx)
-                            {
-                                const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
+                            for (unsigned int sx = sxMin; sx < sxMax; ++sx) {
+                                const I value =
+                                    input[iIndex + (ix + sx) * dims[3] +
+                                          (iy + sy)];
 
                                 if (!valid || value > poolValue) {
                                     poolValue = value;
@@ -98,7 +115,7 @@ void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideD
     }
 }
 
-//N2D2 version
+// N2D2 version
 /*
 template <class T>
 void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha,
@@ -127,16 +144,13 @@ void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha,
                     const unsigned int syMin = (unsigned int)std::max(
                         desc.padding[1] - (int)(oy * desc.stride[1]), 0);
                     const unsigned int sxMax = Utils::clamp
-                        <int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0],
-                              0,
-                              desc.pool[0]);
-                    const unsigned int syMax = Utils::clamp
-                        <int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1],
-                              0,
-                              desc.pool[1]);
+                        <int>(inputs.dimX() + desc.padding[0] - ox *
+desc.stride[0], 0, desc.pool[0]); const unsigned int syMax = Utils::clamp
+                        <int>(inputs.dimY() + desc.padding[1] - oy *
+desc.stride[1], 0, desc.pool[1]);
 
-                    const int ix = (int)(ox * desc.stride[0]) - desc.padding[0];
-                    const int iy = (int)(oy * desc.stride[1]) - desc.padding[1];
+                    const int ix = (int)(ox * desc.stride[0]) -
+desc.padding[0]; const int iy = (int)(oy * desc.stride[1]) - desc.padding[1];
 
                     T poolValue(0.0);
 
@@ -201,14 +215,20 @@ void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha,
 
 // Kernels registration to implementation entry point
 REGISTRAR(MaxPoolingImpl2D_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(MaxPoolingImpl2D_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(MaxPoolingImpl2D_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/MulImpl.hpp b/include/aidge/backend/cpu/operator/MulImpl.hpp
index 05fceba17471229d83d9f8738614b2e747121b49..806d73d9b3e2ec1681ccb04b6c7ef14a8f28bc1e 100644
--- a/include/aidge/backend/cpu/operator/MulImpl.hpp
+++ b/include/aidge/backend/cpu/operator/MulImpl.hpp
@@ -12,36 +12,36 @@
 #ifndef AIDGE_CPU_OPERATOR_MULIMPL_H_
 #define AIDGE_CPU_OPERATOR_MULIMPL_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Mul.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using MulImpl_cpu = OperatorImpl_cpu<Mul_Op,
-    void(const std::vector<std::size_t>&,
-        const std::vector<std::size_t>&, 
-        const std::vector<std::size_t>&, 
-        const void*, 
-        const void*,
-        void*),
-    void(const std::size_t, 
-        const std::size_t, 
-        const std::size_t,
-        const std::vector<std::size_t>,
-        const std::vector<std::size_t>,
-        const void*, 
-        const void*, 
-        const void*, 
-        void*, 
-        void*)>;
+                                     void(const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const void *,
+                                          const void *,
+                                          void *),
+                                     void(const std::size_t,
+                                          const std::size_t,
+                                          const std::size_t,
+                                          const std::vector<std::size_t>,
+                                          const std::vector<std::size_t>,
+                                          const void *,
+                                          const void *,
+                                          const void *,
+                                          void *,
+                                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Mul_Op, "cpu", Aidge::MulImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_MULIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
index c015b8f0182608fecd3da94220e9411decfd186c..d1e7caab3359198c87814d31efd906301a99c3bc 100644
--- a/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MulImpl_kernels.hpp
@@ -14,73 +14,69 @@
 
 #include "aidge/utils/Registrar.hpp"
 
-#include <cstdint>     // std::int32_t, std::int64_t
+#include <cstdint> // std::int32_t, std::int64_t
 
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/operator/MulImpl.hpp"
 
 namespace Aidge {
 template <class I1, class I2, class O>
-void MulImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
-                                const std::vector<std::size_t>& input2Dims,
-                                const std::vector<std::size_t>& outputDims,
-                                const void* input1_,
-                                const void* input2_,
-                                void* output_) {
+void MulImpl_cpu_forward_kernel(const std::vector<std::size_t> &input1Dims,
+                                const std::vector<std::size_t> &input2Dims,
+                                const std::vector<std::size_t> &outputDims,
+                                const void *input1_,
+                                const void *input2_,
+                                void *output_) {
 
-    const I1* input_1 = static_cast<const I1*>(input1_);
-    const I2* input_2 = static_cast<const I2*>(input2_);
-    O* output = static_cast<O*>(output_);
+    const I1 *input_1 = static_cast<const I1 *>(input1_);
+    const I2 *input_2 = static_cast<const I2 *>(input2_);
+    O *output = static_cast<O *>(output_);
 
     size_t totalElements = 1;
     for (size_t dimSize : outputDims) {
         totalElements *= dimSize;
     }
 
-	for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
-	{
-		std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
+    for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) {
+        std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
 
-		std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
-		std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
+        std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
+        std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
 
         output[oIndex] = input_1[idx1] * input_2[idx2];
     }
 }
 
 template <class I1, class I2, class O>
-void MulImpl_cpu_backward_kernel(const std::size_t input0Length, 
+void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
                                  const std::size_t input1Length,
                                  const std::size_t grad0Length,
                                  const std::vector<std::size_t> input0Dims,
                                  const std::vector<std::size_t> input1Dims,
-                                 const void* input0_, 
-                                 const void* input1_, 
-                                 const void* grad_output_, 
-                                 void* gradientInput0,
-                                 void* gradientInput1)
-{
-    const auto* input0 = static_cast<const I1*>(input0_);
-    const auto* input1 = static_cast<const I1*>(input1_);
-    const auto* grad_output = static_cast<const O*>(grad_output_);
-    auto* grad_input_0 = static_cast<I1*>(gradientInput0);
-    auto* grad_input_1 = static_cast<I2*>(gradientInput1);
-
-
-    if(input0Dims.size() >= input1Dims.size())
-    {
-        AIDGE_ASSERT(input0Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
-
-        for(auto i = 0U; i < input0Length; ++i)
-        {
+                                 const void *input0_,
+                                 const void *input1_,
+                                 const void *grad_output_,
+                                 void *gradientInput0,
+                                 void *gradientInput1) {
+    const auto *input0 = static_cast<const I1 *>(input0_);
+    const auto *input1 = static_cast<const I1 *>(input1_);
+    const auto *grad_output = static_cast<const O *>(grad_output_);
+    auto *grad_input_0 = static_cast<I1 *>(gradientInput0);
+    auto *grad_input_1 = static_cast<I2 *>(gradientInput1);
+
+    if (input0Dims.size() >= input1Dims.size()) {
+        AIDGE_ASSERT(
+            input0Length == grad0Length,
+            "Incorrect dimensions between Mul input and output tensors");
+
+        for (auto i = 0U; i < input0Length; ++i) {
             const auto indices = getMultiDimIndices(input1Dims, i);
             const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
 
             grad_input_0[i] = input1[flattenedIndex] * grad_output[i];
         }
 
-        for(std::size_t i = 0 ; i < grad0Length; ++i)
-        {
+        for (std::size_t i = 0; i < grad0Length; ++i) {
             const auto indices = getMultiDimIndices(input1Dims, i);
             const auto flattenedIndex = getFlattenedIndex(input1Dims, indices);
 
@@ -88,18 +84,18 @@ void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
         }
 
     } else {
-        AIDGE_ASSERT(input1Length == grad0Length, "Incorrect dimensions between Mul input and output tensors");
+        AIDGE_ASSERT(
+            input1Length == grad0Length,
+            "Incorrect dimensions between Mul input and output tensors");
 
-        for(auto i = 0U; i < input1Length; ++i)
-        {
+        for (auto i = 0U; i < input1Length; ++i) {
             const auto indices = getMultiDimIndices(input0Dims, i);
             const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
 
             grad_input_1[i] = input0[flattenedIndex] * grad_output[i];
         }
 
-        for(std::size_t i = 0 ; i < grad0Length; ++i)
-        {
+        for (std::size_t i = 0; i < grad0Length; ++i) {
             const auto indices = getMultiDimIndices(input0Dims, i);
             const auto flattenedIndex = getFlattenedIndex(input0Dims, indices);
 
@@ -110,17 +106,33 @@ void MulImpl_cpu_backward_kernel(const std::size_t input0Length,
 
 // Kernels registration to implementation entry point
 REGISTRAR(MulImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<float, float, float>, Aidge::MulImpl_cpu_backward_kernel<float, float, float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::MulImpl_cpu_forward_kernel<float, float, float>,
+           Aidge::MulImpl_cpu_backward_kernel<float, float, float>});
 REGISTRAR(MulImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<double, double, double>, Aidge::MulImpl_cpu_backward_kernel<double, double, double>});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::MulImpl_cpu_forward_kernel<double, double, double>,
+           Aidge::MulImpl_cpu_backward_kernel<double, double, double>});
 REGISTRAR(MulImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, Aidge::MulImpl_cpu_backward_kernel<std::int32_t, std::int32_t, std::int32_t>});
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::MulImpl_cpu_forward_kernel<std::int32_t,
+                                             std::int32_t,
+                                             std::int32_t>,
+           Aidge::MulImpl_cpu_backward_kernel<std::int32_t,
+                                              std::int32_t,
+                                              std::int32_t>});
 REGISTRAR(MulImpl_cpu,
-    {DataType::Int64},
-    {ProdConso::inPlaceModel, Aidge::MulImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, Aidge::MulImpl_cpu_backward_kernel<std::int64_t, std::int64_t, std::int64_t>});
-}  // namespace Aidge
+          {DataType::Int64},
+          {ProdConso::inPlaceModel,
+           Aidge::MulImpl_cpu_forward_kernel<std::int64_t,
+                                             std::int64_t,
+                                             std::int64_t>,
+           Aidge::MulImpl_cpu_backward_kernel<std::int64_t,
+                                              std::int64_t,
+                                              std::int64_t>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_MULIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/OperatorImpl.hpp b/include/aidge/backend/cpu/operator/OperatorImpl.hpp
index 45f099e8140395181d8be1600c61024efaa9c6a7..f3cd825dc1b48c21f09f3e063922ce0b80424bce 100644
--- a/include/aidge/backend/cpu/operator/OperatorImpl.hpp
+++ b/include/aidge/backend/cpu/operator/OperatorImpl.hpp
@@ -12,9 +12,9 @@
 #ifndef AIDGE_CPU_OPERATOR_IMPL_H_
 #define AIDGE_CPU_OPERATOR_IMPL_H_
 
-#include <cstddef>  // std::size_t
+#include <cstddef> // std::size_t
 #include <memory>
-#include <tuple>    // std::tuple
+#include <tuple>   // std::tuple
 #include <vector>
 
 #include "aidge/backend/OperatorImpl.hpp"
@@ -23,30 +23,36 @@
 
 namespace Aidge {
 template <class Op, class FwdFunc, class BwdFunc = void()>
-class OperatorImpl_cpu : public OperatorImpl,
-    public Registrable<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>, ImplSpec, Impl<FwdFunc, BwdFunc>>
-{
-public:
-    OperatorImpl_cpu(const Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>> create(const Op& op) {
+class OperatorImpl_cpu
+    : public OperatorImpl,
+      public Registrable<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>,
+                         ImplSpec,
+                         Impl<FwdFunc, BwdFunc>> {
+  public:
+    OperatorImpl_cpu(const Op &op) : OperatorImpl(op, "cpu") {}
+
+    static std::unique_ptr<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>>
+    create(const Op &op) {
         return std::make_unique<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>>(op);
     }
 
     virtual std::shared_ptr<ProdConso> getProdConso() const override {
-        const auto impl = Registrar<OperatorImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+        const auto impl = Registrar<OperatorImpl_cpu>::create(
+            getBestMatch(getRequiredSpec()));
         return impl.prodConso(mOp);
     }
 
     virtual std::vector<ImplSpec> getAvailableImplSpecs() const override {
-        // return Registrar<OperatorImpl_cpu>::getKeys(); // Note: cannot return set due to python binding 
-        std::set<ImplSpec> implSpecsSet = Registrar<OperatorImpl_cpu>::getKeys();
+        // return Registrar<OperatorImpl_cpu>::getKeys(); // Note: cannot
+        // return set due to python binding
+        std::set<ImplSpec> implSpecsSet =
+            Registrar<OperatorImpl_cpu>::getKeys();
         return std::vector<ImplSpec>(implSpecsSet.begin(), implSpecsSet.end());
     }
 
     void forward() override;
     void backward() override;
 };
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_IMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp
index bc0bd8cad3b630b89f728d78b59652f31bbcf410..33f3b7aa6dfc9798b02779090c5e722340e3260c 100644
--- a/include/aidge/backend/cpu/operator/PadImpl.hpp
+++ b/include/aidge/backend/cpu/operator/PadImpl.hpp
@@ -17,46 +17,47 @@
 #include <tuple>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Pad.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 class Pad_ProdConso_cpu : public ProdConso {
-public:
-    Pad_ProdConso_cpu(const Operator& op): ProdConso(op) {}
+  public:
+    Pad_ProdConso_cpu(const Operator &op) : ProdConso(op) {}
 
-    static std::unique_ptr<ProdConso> defaultModel(const Operator& op) {
+    static std::unique_ptr<ProdConso> defaultModel(const Operator &op) {
         return std::make_unique<Pad_ProdConso_cpu>(op);
     }
 
-    Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    Elts_t
+    getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
 };
 
 // Operator implementation entry point for the backend
 using Pad1D_Op = Pad_Op<1>;
 using PadImpl1D_cpu = OperatorImpl_cpu<Pad_Op<1>,
-    void(const std::array<DimSize_t, 2>&,
-                            const PadBorderType,
-                            const double,
-                            const std::array<DimSize_t, 3> &,
-                            const void *,
-                            void *)>;
+                                       void(const std::array<DimSize_t, 2> &,
+                                            const PadBorderType,
+                                            const double,
+                                            const std::array<DimSize_t, 3> &,
+                                            const void *,
+                                            void *)>;
 
 using Pad2D_Op = Pad_Op<2>;
 using PadImpl2D_cpu = OperatorImpl_cpu<Pad_Op<2>,
-    void(const std::array<DimSize_t, 4>&,
-                            const PadBorderType,
-                            const double,
-                            const std::array<DimSize_t, 4> &,
-                            const void *,
-                            void *)>;
+                                       void(const std::array<DimSize_t, 4> &,
+                                            const PadBorderType,
+                                            const double,
+                                            const std::array<DimSize_t, 4> &,
+                                            const void *,
+                                            void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Pad1D_Op, "cpu", Aidge::PadImpl1D_cpu::create);
 REGISTRAR(Pad2D_Op, "cpu", Aidge::PadImpl2D_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_PADIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp
index 6d218cb1d719e8576f6c013ac5a1b9c60a739852..08cb58fd2d137d16028222a8698a6387a9d703f5 100644
--- a/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp
@@ -12,10 +12,10 @@
 #ifndef AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_
 
-#include <algorithm>  // std::max, std::min
+#include <algorithm> // std::max, std::min
 #include <array>
-#include <cstddef>    // std::size_t
-#include <cstdint>    // std::int32_t
+#include <cstddef>   // std::size_t
+#include <cstdint>   // std::int32_t
 
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
 #include "aidge/utils/Registrar.hpp"
@@ -32,22 +32,23 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorders,
-                                const PadBorderType borderType,
-                                const double borderValue,
-                                const std::array<DimSize_t, 3>& dims,
-                                const void *input_,
-                                void *output_)
-{
+void PadImpl1D_cpu_forward_kernel(
+    const std::array<DimSize_t, 2> &beginEndBorders,
+    const PadBorderType borderType,
+    const double borderValue,
+    const std::array<DimSize_t, 3> &dims,
+    const void *input_,
+    void *output_) {
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
-    const std::size_t oxSize = dims[2] + beginEndBorders[0] + beginEndBorders[1];
+    const std::size_t oxSize =
+        dims[2] + beginEndBorders[0] + beginEndBorders[1];
 
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
         for (std::size_t ch = 0; ch < dims[1]; ++ch) {
-            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2];
-            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize;
+            const std::size_t iIndex = (ch + batch * dims[1]) * dims[2];
+            const std::size_t oIndex = (ch + batch * dims[1]) * oxSize;
 
             for (unsigned int ox = 0; ox < oxSize; ++ox) {
                 const std::size_t oIndexFull = oIndex + ox;
@@ -55,19 +56,24 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder
                 O outputValue = static_cast<O>(borderValue);
 
                 if (borderType == PadBorderType::Constant) {
-                    int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[0]);
+                    int ix = static_cast<int>(ox) -
+                             static_cast<int>(beginEndBorders[0]);
 
-                    if (ix >= 0  && ix < static_cast<int>(dims[2])) {
-                        outputValue = input[iIndex + static_cast<std::size_t>(ix)];
+                    if (ix >= 0 && ix < static_cast<int>(dims[2])) {
+                        outputValue =
+                            input[iIndex + static_cast<std::size_t>(ix)];
                     }
-                }
-                else if (borderType == PadBorderType::Edge) {
-                    int ix = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(ox) - static_cast<int>(beginEndBorders[0])));
+                } else if (borderType == PadBorderType::Edge) {
+                    int ix = std::max(
+                        0,
+                        std::min(static_cast<int>(dims[2]) - 1,
+                                 static_cast<int>(ox) -
+                                     static_cast<int>(beginEndBorders[0])));
 
                     outputValue = input[iIndex + static_cast<std::size_t>(ix)];
-                }
-                else if (borderType == PadBorderType::Reflect) {
-                    int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[0]);
+                } else if (borderType == PadBorderType::Reflect) {
+                    int ix = static_cast<int>(ox) -
+                             static_cast<int>(beginEndBorders[0]);
 
                     if (ix < 0)
                         ix = 0 - ix;
@@ -75,9 +81,11 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder
                         ix = static_cast<int>(dims[2]) - ix;
 
                     outputValue = input[iIndex + static_cast<std::size_t>(ix)];
-                }
-                else if (borderType == PadBorderType::Wrap) {
-                    int ix = (static_cast<int>(dims[2]) + static_cast<int>(ox) - static_cast<int>(beginEndBorders[0])) % static_cast<int>(dims[2]);
+                } else if (borderType == PadBorderType::Wrap) {
+                    int ix =
+                        (static_cast<int>(dims[2]) + static_cast<int>(ox) -
+                         static_cast<int>(beginEndBorders[0])) %
+                        static_cast<int>(dims[2]);
 
                     outputValue = input[iIndex + static_cast<std::size_t>(ix)];
                 }
@@ -90,15 +98,26 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder
 
 // Kernels registration to implementation entry point
 REGISTRAR(PadImpl1D_cpu,
-    {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr});
+          {{DataType::Float32, DataFormat::NCHW},
+           {DataType::Float32, DataFormat::NCHW}},
+          {Pad_ProdConso_cpu::defaultModel,
+           Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>,
+                                               cpptype_t<DataType::Float32>>,
+           nullptr});
 REGISTRAR(PadImpl1D_cpu,
-    {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
-    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr});
+          {{DataType::Float64, DataFormat::NCHW},
+           {DataType::Float64, DataFormat::NCHW}},
+          {Pad_ProdConso_cpu::defaultModel,
+           Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>,
+                                               cpptype_t<DataType::Float64>>,
+           nullptr});
 REGISTRAR(PadImpl1D_cpu,
-    {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
-    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr});
-
+          {{DataType::Int32, DataFormat::NCHW},
+           {DataType::Int32, DataFormat::NCHW}},
+          {Pad_ProdConso_cpu::defaultModel,
+           Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>,
+                                               cpptype_t<DataType::Int32>>,
+           nullptr});
 
 /**
  * @brief Forward kernel for 2D Padding on CPU backend.
@@ -110,47 +129,76 @@ REGISTRAR(PadImpl1D_cpu,
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorders,
-                                const PadBorderType borderType,
-                                const double borderValue,
-                                const std::array<DimSize_t, 4> &dims,
-                                const void *input_,
-                                void *output_)
-{
+void PadImpl2D_cpu_forward_kernel(
+    const std::array<DimSize_t, 4> &beginEndBorders,
+    const PadBorderType borderType,
+    const double borderValue,
+    const std::array<DimSize_t, 4> &dims,
+    const void *input_,
+    void *output_) {
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
-    const std::size_t oySize = dims[2] + beginEndBorders[0] + beginEndBorders[2];
-    const std::size_t oxSize = dims[3] + beginEndBorders[1] + beginEndBorders[3];
+    const std::size_t oySize =
+        dims[2] + beginEndBorders[0] + beginEndBorders[2];
+    const std::size_t oxSize =
+        dims[3] + beginEndBorders[1] + beginEndBorders[3];
 
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
         for (std::size_t ch = 0; ch < dims[1]; ++ch) {
-            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
-            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
+            const std::size_t iIndex =
+                (ch + batch * dims[1]) * dims[2] * dims[3];
+            const std::size_t oIndex =
+                (ch + batch * dims[1]) * oxSize * oySize;
 
             for (std::uint32_t oy = 0; oy < oySize; ++oy) {
                 for (std::uint32_t ox = 0; ox < oxSize; ++ox) {
-                    const std::size_t oIndexFull = oIndex + oy*oxSize + ox;
+                    const std::size_t oIndexFull = oIndex + oy * oxSize + ox;
 
                     O outputValue = static_cast<O>(borderValue);
 
                     if (borderType == PadBorderType::Constant) {
-                        std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1]);
-                        std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0]);
+                        std::int32_t ix =
+                            static_cast<std::int32_t>(ox) -
+                            static_cast<std::int32_t>(beginEndBorders[1]);
+                        std::int32_t iy =
+                            static_cast<std::int32_t>(oy) -
+                            static_cast<std::int32_t>(beginEndBorders[0]);
 
-                        if (ix >= 0  && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0  && iy < static_cast<std::int32_t>(dims[2])) {
-                            outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
+                        if (ix >= 0 &&
+                            ix < static_cast<std::int32_t>(dims[3]) &&
+                            iy >= 0 &&
+                            iy < static_cast<std::int32_t>(dims[2])) {
+                            outputValue =
+                                input[iIndex +
+                                      static_cast<std::size_t>(iy) * dims[3] +
+                                      static_cast<std::size_t>(ix)];
                         }
-                    }
-                    else if (borderType == PadBorderType::Edge) {
-                        std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1])));
-                        std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0])));
+                    } else if (borderType == PadBorderType::Edge) {
+                        std::int32_t ix = std::max(
+                            0,
+                            std::min(static_cast<std::int32_t>(dims[3]) - 1,
+                                     static_cast<std::int32_t>(ox) -
+                                         static_cast<std::int32_t>(
+                                             beginEndBorders[1])));
+                        std::int32_t iy = std::max(
+                            0,
+                            std::min(static_cast<std::int32_t>(dims[2]) - 1,
+                                     static_cast<std::int32_t>(oy) -
+                                         static_cast<std::int32_t>(
+                                             beginEndBorders[0])));
 
-                        outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
-                    }
-                    else if (borderType == PadBorderType::Reflect) {
-                        std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1]);
-                        std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0]);
+                        outputValue =
+                            input[iIndex +
+                                  static_cast<std::size_t>(iy) * dims[3] +
+                                  static_cast<std::size_t>(ix)];
+                    } else if (borderType == PadBorderType::Reflect) {
+                        std::int32_t ix =
+                            static_cast<std::int32_t>(ox) -
+                            static_cast<std::int32_t>(beginEndBorders[1]);
+                        std::int32_t iy =
+                            static_cast<std::int32_t>(oy) -
+                            static_cast<std::int32_t>(beginEndBorders[0]);
 
                         if (ix < 0)
                             ix = 0 - ix;
@@ -161,13 +209,26 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
                         if (iy >= static_cast<std::int32_t>(dims[2]))
                             iy = static_cast<std::int32_t>(dims[2]) - iy;
 
-                        outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
-                    }
-                    else if (borderType == PadBorderType::Wrap) {
-                        std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[3]);
-                        std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[0])) % static_cast<std::int32_t>(dims[2]);
+                        outputValue =
+                            input[iIndex +
+                                  static_cast<std::size_t>(iy) * dims[3] +
+                                  static_cast<std::size_t>(ix)];
+                    } else if (borderType == PadBorderType::Wrap) {
+                        std::int32_t ix =
+                            (static_cast<std::int32_t>(dims[3]) +
+                             static_cast<std::int32_t>(ox) -
+                             static_cast<std::int32_t>(beginEndBorders[1])) %
+                            static_cast<std::int32_t>(dims[3]);
+                        std::int32_t iy =
+                            (static_cast<std::int32_t>(dims[2]) +
+                             static_cast<std::int32_t>(oy) -
+                             static_cast<std::int32_t>(beginEndBorders[0])) %
+                            static_cast<std::int32_t>(dims[2]);
 
-                        outputValue = input[iIndex + static_cast<std::size_t>(iy)*dims[3] + static_cast<std::size_t>(ix)];
+                        outputValue =
+                            input[iIndex +
+                                  static_cast<std::size_t>(iy) * dims[3] +
+                                  static_cast<std::size_t>(ix)];
                     }
 
                     output[oIndexFull] = outputValue;
@@ -179,14 +240,26 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
 
 // Kernels registration to implementation entry point
 REGISTRAR(PadImpl2D_cpu,
-    {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr});
+          {{DataType::Float32, DataFormat::NCHW},
+           {DataType::Float32, DataFormat::NCHW}},
+          {Pad_ProdConso_cpu::defaultModel,
+           Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float32>,
+                                               cpptype_t<DataType::Float32>>,
+           nullptr});
 REGISTRAR(PadImpl2D_cpu,
-    {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
-    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr});
+          {{DataType::Float64, DataFormat::NCHW},
+           {DataType::Float64, DataFormat::NCHW}},
+          {Pad_ProdConso_cpu::defaultModel,
+           Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float64>,
+                                               cpptype_t<DataType::Float64>>,
+           nullptr});
 REGISTRAR(PadImpl2D_cpu,
-    {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
-    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr});
-}  // namespace Aidge
+          {{DataType::Int32, DataFormat::NCHW},
+           {DataType::Int32, DataFormat::NCHW}},
+          {Pad_ProdConso_cpu::defaultModel,
+           Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Int32>,
+                                               cpptype_t<DataType::Int32>>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp
index cfbb8173d1f83162519016a8f2b3c3166977a5b7..973fe7fe553d60cca81f0e028bd26d58a2084cac 100644
--- a/include/aidge/backend/cpu/operator/PowImpl.hpp
+++ b/include/aidge/backend/cpu/operator/PowImpl.hpp
@@ -12,23 +12,34 @@
 #ifndef AIDGE_CPU_OPERATOR_POWIMPL_H_
 #define AIDGE_CPU_OPERATOR_POWIMPL_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Pow.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using PowImpl_cpu = OperatorImpl_cpu<Pow_Op,
-    void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*),
-    void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, const void*, void*, void*)>;
-
+                                     void(const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const void *,
+                                          const void *,
+                                          void *),
+                                     void(const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const void *,
+                                          const void *,
+                                          const void *,
+                                          void *,
+                                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_POWIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp
index ab9b2ccc7b823842decd044b90a5c6364cedc9c9..78ca9a3086f34fd248cd4b3eb444184aedfa90b0 100644
--- a/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp
@@ -20,76 +20,100 @@
 
 namespace Aidge {
 template <class I1, class I2, class O>
-void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
-                                const std::vector<std::size_t>& input2Dims,
-                                const std::vector<std::size_t>& outputDims,
-                                const void* input1_,
-                                const void* input2_,
-                                void* output_) {
+void PowImpl_cpu_forward_kernel(const std::vector<std::size_t> &input1Dims,
+                                const std::vector<std::size_t> &input2Dims,
+                                const std::vector<std::size_t> &outputDims,
+                                const void *input1_,
+                                const void *input2_,
+                                void *output_) {
 
-    const I1* input_1 = static_cast<const I1*>(input1_);
-    const I2* input_2 = static_cast<const I2*>(input2_);
-    O* output = static_cast<O*>(output_);
+    const I1 *input_1 = static_cast<const I1 *>(input1_);
+    const I2 *input_2 = static_cast<const I2 *>(input2_);
+    O *output = static_cast<O *>(output_);
 
-    std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
-	for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) 
-	{
-		std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
+    std::size_t totalElements =
+        std::accumulate(outputDims.cbegin(),
+                        outputDims.cend(),
+                        std::size_t(1),
+                        std::multiplies<std::size_t>());
+    for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) {
+        std::vector<std::size_t> indexes =
+            getMultiDimIndices(outputDims, oIndex);
+
+        std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
+        std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
 
-		std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
-		std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
-		
         output[oIndex] = std::pow(input_1[idx1], input_2[idx2]);
-	}
+    }
 }
 
 template <class I1, class I2, class O>
-void PowImpl_cpu_backward_kernel(const std::vector<std::size_t>& input0Dims,
-                                const std::vector<std::size_t>& input1Dims,
-                                const std::vector<std::size_t>& outputDims,
-                                const void* input0_,
-                                const void* input1_,
-                                const void* gradOutput_,
-                                void* gradientInput0_,
-                                void* gradientInput1_) {
-	const I1* input0 = static_cast<const I1*>(input0_);
-	I1* grad0 = static_cast<I1*>(gradientInput0_);
-    const I2* input1 = static_cast<const I2*>(input1_);
-    I2* grad1 = static_cast<I2*>(gradientInput1_);
-    const O* gradOut = static_cast<const O*>(gradOutput_);
+void PowImpl_cpu_backward_kernel(const std::vector<std::size_t> &input0Dims,
+                                 const std::vector<std::size_t> &input1Dims,
+                                 const std::vector<std::size_t> &outputDims,
+                                 const void *input0_,
+                                 const void *input1_,
+                                 const void *gradOutput_,
+                                 void *gradientInput0_,
+                                 void *gradientInput1_) {
+    const I1 *input0 = static_cast<const I1 *>(input0_);
+    I1 *grad0 = static_cast<I1 *>(gradientInput0_);
+    const I2 *input1 = static_cast<const I2 *>(input1_);
+    I2 *grad1 = static_cast<I2 *>(gradientInput1_);
+    const O *gradOut = static_cast<const O *>(gradOutput_);
 
     // Fill input grads with zeros
-	std::size_t input0Elements = std::accumulate(input0Dims.cbegin(), input0Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
-	std::fill(grad0, grad0 + input0Elements, I1(0));
-	std::size_t input1Elements = std::accumulate(input1Dims.cbegin(), input1Dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
-	std::fill(grad1, grad1 + input1Elements, I2(0));
+    std::size_t input0Elements =
+        std::accumulate(input0Dims.cbegin(),
+                        input0Dims.cend(),
+                        std::size_t(1),
+                        std::multiplies<std::size_t>());
+    std::fill(grad0, grad0 + input0Elements, I1(0));
+    std::size_t input1Elements =
+        std::accumulate(input1Dims.cbegin(),
+                        input1Dims.cend(),
+                        std::size_t(1),
+                        std::multiplies<std::size_t>());
+    std::fill(grad1, grad1 + input1Elements, I2(0));
 
-	std::size_t totalElements = std::accumulate(outputDims.cbegin(), outputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
-    for (size_t oIndex = 0; oIndex < totalElements; ++oIndex)
-    {
+    std::size_t totalElements =
+        std::accumulate(outputDims.cbegin(),
+                        outputDims.cend(),
+                        std::size_t(1),
+                        std::multiplies<std::size_t>());
+    for (size_t oIndex = 0; oIndex < totalElements; ++oIndex) {
         // Compute indexes in inputs 0 and 1 to support broadcasting
-        std::vector<std::size_t> indexes = getMultiDimIndices(outputDims, oIndex);
+        std::vector<std::size_t> indexes =
+            getMultiDimIndices(outputDims, oIndex);
         std::size_t idx0 = getFlattenedIndex(input0Dims, indexes);
         std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
 
         // grad0 = grad_output * (input1 * pow(input0, (input1 -1)))
-        grad0[idx0] += gradOut[oIndex]*input1[idx1]* std::pow(input0[idx0], input1[idx1]-1);
+        grad0[idx0] += gradOut[oIndex] * input1[idx1] *
+                       std::pow(input0[idx0], input1[idx1] - 1);
 
         // grad1 = grad_output * (output * ln(input0))
-        grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) * std::log(input0[idx0]);
+        grad1[idx1] += gradOut[oIndex] * std::pow(input0[idx0], input1[idx1]) *
+                       std::log(input0[idx0]);
     }
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(PowImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, Aidge::PowImpl_cpu_backward_kernel<float, float, float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::PowImpl_cpu_forward_kernel<float, float, float>,
+           Aidge::PowImpl_cpu_backward_kernel<float, float, float>});
 REGISTRAR(PowImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, Aidge::PowImpl_cpu_backward_kernel<double, double, double>});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::PowImpl_cpu_forward_kernel<double, double, double>,
+           Aidge::PowImpl_cpu_backward_kernel<double, double, double>});
 REGISTRAR(PowImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>, Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::PowImpl_cpu_forward_kernel<int32_t, int32_t, int32_t>,
+           Aidge::PowImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
index 5b900618abce83ff1c3822d4f61cc62c93f5081f..366b81f20cc1ac3ea7757a9edebc73267b46c661 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
@@ -12,9 +12,9 @@
 #ifndef AIDGE_CPU_OPERATOR_RELUIMPL_H_
 #define AIDGE_CPU_OPERATOR_RELUIMPL_H_
 
-#include <cstddef>  // std::size_t
+#include <cstddef> // std::size_t
 #include <memory>
-#include <tuple>    // std::tuple
+#include <tuple>   // std::tuple
 #include <vector>
 
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
@@ -24,12 +24,13 @@
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using ReLUImpl_cpu = OperatorImpl_cpu<ReLU_Op,
-    void(const std::size_t, const void*, void*),
-    void(const std::size_t, const void*, const void*, void*)>;
+using ReLUImpl_cpu = OperatorImpl_cpu<
+    ReLU_Op,
+    void(const std::size_t, const void *, void *),
+    void(const std::size_t, const void *, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(ReLU_Op, "cpu", Aidge::ReLUImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_RELUIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp
index e39e9b7decd91e392c5db7e9e9bc4ed0f366829d..246063a7295b0c31f1598700fe7da0d641ec1e46 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp
@@ -12,9 +12,9 @@
 #ifndef AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_
 
-#include <cstddef>  // std::size_t
+#include <cstddef> // std::size_t
 #include <memory>
-#include <tuple>    // std::tuple
+#include <tuple>   // std::tuple
 #include <vector>
 
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
@@ -27,13 +27,13 @@ namespace Aidge {
 // Kernels
 template <class I, class O>
 void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+                                 const void *input_,
+                                 void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
-//#pragma omp parallel for if (inputLenght > 1024)
+    // #pragma omp parallel for if (inputLenght > 1024)
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = (input[i] > 0) ? input[i] : 0;
     }
@@ -41,11 +41,12 @@ void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
 
 template <class I, class GI, class GO>
 void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                  const void* input_, const void* grad_output_,
-				  void* grad_input_) {
-    const I* input = static_cast<const I*>(input_);
-    const GO* grad_output = static_cast<const GO*>(grad_output_);
-    GI* grad_input = static_cast<GI*>(grad_input_);
+                                  const void *input_,
+                                  const void *grad_output_,
+                                  void *grad_input_) {
+    const I *input = static_cast<const I *>(input_);
+    const GO *grad_output = static_cast<const GO *>(grad_output_);
+    GI *grad_input = static_cast<GI *>(grad_input_);
     for (std::size_t i = 0; i < inputLenght; ++i) {
         grad_input[i] = (input[i] > 0) ? grad_output[i] : 0;
     }
@@ -53,14 +54,20 @@ void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght,
 
 // Kernels registration to implementation entry point
 REGISTRAR(ReLUImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<float, float>, Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::ReLUImpl_cpu_forward_kernel<float, float>,
+           Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>});
 REGISTRAR(ReLUImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<double, double>, Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::ReLUImpl_cpu_forward_kernel<double, double>,
+           Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>});
 REGISTRAR(ReLUImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::ReLUImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::ReLUImpl_cpu_forward_kernel<int32_t, int32_t>,
+           Aidge::ReLUImpl_cpu_backward_kernel<int32_t, int32_t, int32_t>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
index 1c50805d5af768dfc160488fda1e8fadfa798454..97f0cedc268fa825c068b90513a3bc8fc66d6532 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
@@ -24,15 +24,16 @@
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using ReduceMeanImpl_cpu = OperatorImpl_cpu<ReduceMean_Op,
-    void(const std::vector<std::int32_t>&,
-                            DimSize_t,
-                            const std::vector<DimSize_t>&,
-                            const void *,
-                            void *)>;
+using ReduceMeanImpl_cpu =
+    OperatorImpl_cpu<ReduceMean_Op,
+                     void(const std::vector<std::int32_t> &,
+                          DimSize_t,
+                          const std::vector<DimSize_t> &,
+                          const void *,
+                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(ReduceMean_Op, "cpu", Aidge::ReduceMeanImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
index 5a143164d7e4fa2585ea72c38eaaa123f215d21a..339db53ad260f8b56fa2dbda71b8ce18460724c1 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
@@ -12,11 +12,11 @@
 #ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_
 
-#include <algorithm>   // std::for_each
-#include <cstddef>     // std::size_t
-#include <cstdint>     // std::int32_t
-#include <functional>  //std::multiplies
-#include <numeric>     //std::accumulate
+#include <algorithm>  // std::for_each
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::int32_t
+#include <functional> //std::multiplies
+#include <numeric>    //std::accumulate
 #include <vector>
 
 #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
@@ -26,24 +26,35 @@
 
 namespace Aidge {
 template <class I, class O>
-void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
-                                    DimSize_t /*keepDims*/,
-                                    const std::vector<DimSize_t>& inputDims,
-                                    const void* input_,
-                                    void* output_) {
+void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t> &axes,
+                                       DimSize_t /*keepDims*/,
+                                       const std::vector<DimSize_t> &inputDims,
+                                       const void *input_,
+                                       void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     const std::size_t nb_dims = inputDims.size();
-    const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
+    const std::size_t totalElements =
+        std::accumulate(inputDims.cbegin(),
+                        inputDims.cend(),
+                        1,
+                        std::multiplies<std::size_t>());
 
-    if (axes.empty()){
-        std::copy_n(input,totalElements, output);
-    }
-    else if (axes.size() == 1) {
-        const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>());
-        const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>());
+    if (axes.empty()) {
+        std::copy_n(input, totalElements, output);
+    } else if (axes.size() == 1) {
+        const std::size_t stride_pre =
+            std::accumulate(inputDims.cbegin(),
+                            inputDims.cbegin() + axes[0],
+                            1,
+                            std::multiplies<std::size_t>());
+        const std::size_t stride_post =
+            std::accumulate(inputDims.crbegin(),
+                            inputDims.crbegin() + nb_dims - 1 - axes[0],
+                            1,
+                            std::multiplies<std::size_t>());
 
         const std::size_t dim_i = inputDims[axes[0]];
         for (std::size_t pre = 0; pre < stride_pre; ++pre) {
@@ -53,54 +64,69 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
                 O mean = 0;
                 for (std::size_t i = 0; i < dim_i; ++i) {
                     // Single pass numerically stable mean, using the fmaf
-                    mean = fmaf(input[idx_i + i*stride_post] - mean, 1.0f/(i+1), mean);
+                    mean = fmaf(input[idx_i + i * stride_post] - mean,
+                                1.0f / (i + 1),
+                                mean);
                 }
-                output[idx_o]  = mean;
+                output[idx_o] = mean;
             }
         }
     } else {
         std::size_t outputElements = totalElements;
 
-        auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
+        auto stride_post =
+            std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
         stride_post[nb_dims - 1] = 1;
-        for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) {
-            stride_post[i] = stride_post[i+1]*inputDims[i+1];
+        for (std::size_t i = nb_dims - 2; i != static_cast<std::size_t>(-1);
+             --i) {
+            stride_post[i] = stride_post[i + 1] * inputDims[i + 1];
         }
-        auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
+        auto stride_pre =
+            std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
         stride_pre[0] = 1;
         for (std::size_t i = 1; i < nb_dims; ++i) {
-            stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
+            stride_pre[i] = stride_pre[i - 1] * inputDims[i - 1];
         }
 
-        const I* inputAccumulation = input;
-        I* outputAccumulation = nullptr;
+        const I *inputAccumulation = input;
+        I *outputAccumulation = nullptr;
 
-        for (const auto& axisInt : axes) {
+        for (const auto &axisInt : axes) {
             const std::size_t a = static_cast<std::size_t>(axisInt);
             outputElements /= inputDims[a];
             outputAccumulation = new I[outputElements];
             const std::size_t dim_i = inputDims[a];
             for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) {
                 for (std::size_t post = 0; post < stride_post[a]; ++post) {
-                    const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
+                    const std::size_t idx_i =
+                        pre * dim_i * stride_post[a] + post;
                     const std::size_t idx_o = pre * stride_post[a] + post;
                     I mean = 0;
                     for (std::size_t i = 0; i < dim_i; ++i) {
                         // Single pass numerically stable mean, using the fmaf
-                        mean = fmaf(inputAccumulation[idx_i + i*stride_post[a]] - mean, 1.0f/(i+1), mean);
+                        mean = fmaf(
+                            inputAccumulation[idx_i + i * stride_post[a]] -
+                                mean,
+                            1.0f / (i + 1),
+                            mean);
                     }
                     outputAccumulation[idx_o] = mean;
                 }
             }
-            std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
+            std::for_each(stride_pre.get() + a + 1,
+                          stride_pre.get() + nb_dims,
+                          [dim_i](std::size_t &val) { val /= dim_i; });
             if (inputAccumulation != input) {
                 delete[] inputAccumulation;
             }
             inputAccumulation = outputAccumulation;
         }
 
-        // Copy elements from inputAccumulation to output while dividing by divisor
-        std::copy(inputAccumulation, inputAccumulation + outputElements, output);
+        // Copy elements from inputAccumulation to output while dividing by
+        // divisor
+        std::copy(inputAccumulation,
+                  inputAccumulation + outputElements,
+                  output);
         if (outputAccumulation) {
             delete[] outputAccumulation;
         }
@@ -109,14 +135,20 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
 
 // Kernels registration to implementation entry point
 REGISTRAR(ReduceMeanImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(ReduceMeanImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(ReduceMeanImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::ReduceMeanImpl_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp
index 4138c62c24149c15cfad5e85e8f50889b2b6a433..7e01ef5a55c088d5f8e674c9142ea3a92b553927 100644
--- a/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp
@@ -24,15 +24,16 @@
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using ReduceSumImpl_cpu = OperatorImpl_cpu<ReduceSum_Op,
-    void(const std::vector<std::int32_t>&,
-                            DimSize_t,
-                            const std::vector<DimSize_t>&,
-                            const void *,
-                            void *)>;
+using ReduceSumImpl_cpu =
+    OperatorImpl_cpu<ReduceSum_Op,
+                     void(const std::vector<std::int32_t> &,
+                          DimSize_t,
+                          const std::vector<DimSize_t> &,
+                          const void *,
+                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(ReduceSum_Op, "cpu", Aidge::ReduceSumImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp
index 72671421796a0d5e799e6f762dfcaf02457220f3..773d099ec7c2747a2d6fcf5c0b6e95c03d5ad605 100644
--- a/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp
@@ -12,11 +12,11 @@
 #ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_
 
-#include <algorithm>   // std::for_each
-#include <cstddef>     // std::size_t
-#include <cstdint>     // std::int32_t
-#include <functional>  //std::multiplies
-#include <numeric>     //std::accumulate
+#include <algorithm>  // std::for_each
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::int32_t
+#include <functional> //std::multiplies
+#include <numeric>    //std::accumulate
 #include <vector>
 
 #include "aidge/backend/cpu/operator/ReduceSumImpl.hpp"
@@ -26,24 +26,35 @@
 
 namespace Aidge {
 template <class I, class O>
-void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
-                                    DimSize_t /*keepDims*/,
-                                    const std::vector<DimSize_t>& inputDims,
-                                    const void* input_,
-                                    void* output_) {
+void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t> &axes,
+                                      DimSize_t /*keepDims*/,
+                                      const std::vector<DimSize_t> &inputDims,
+                                      const void *input_,
+                                      void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     const std::size_t nb_dims = inputDims.size();
-    const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
+    const std::size_t totalElements =
+        std::accumulate(inputDims.cbegin(),
+                        inputDims.cend(),
+                        1,
+                        std::multiplies<std::size_t>());
 
-    if (axes.empty()){
-        std::copy_n(input,totalElements, output);
-    }
-    else if (axes.size() == 1) {
-        const std::size_t stride_pre = std::accumulate(inputDims.cbegin(), inputDims.cbegin() + axes[0], 1, std::multiplies<std::size_t>());
-        const std::size_t stride_post = std::accumulate(inputDims.crbegin(), inputDims.crbegin() + nb_dims -1 - axes[0], 1, std::multiplies<std::size_t>());
+    if (axes.empty()) {
+        std::copy_n(input, totalElements, output);
+    } else if (axes.size() == 1) {
+        const std::size_t stride_pre =
+            std::accumulate(inputDims.cbegin(),
+                            inputDims.cbegin() + axes[0],
+                            1,
+                            std::multiplies<std::size_t>());
+        const std::size_t stride_post =
+            std::accumulate(inputDims.crbegin(),
+                            inputDims.crbegin() + nb_dims - 1 - axes[0],
+                            1,
+                            std::multiplies<std::size_t>());
 
         const std::size_t dim_i = inputDims[axes[0]];
         for (std::size_t pre = 0; pre < stride_pre; ++pre) {
@@ -52,53 +63,62 @@ void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
                 const std::size_t idx_o = pre * stride_post + post;
                 O sum = 0;
                 for (std::size_t i = 0; i < dim_i; ++i) {
-                    sum +=input[idx_i + i*stride_post];
+                    sum += input[idx_i + i * stride_post];
                 }
-                output[idx_o]  = sum;
+                output[idx_o] = sum;
             }
         }
     } else {
         std::size_t outputElements = totalElements;
 
-        auto stride_post = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
+        auto stride_post =
+            std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
         stride_post[nb_dims - 1] = 1;
-        for (std::size_t i = nb_dims-2; i != static_cast<std::size_t>(-1); --i) {
-            stride_post[i] = stride_post[i+1]*inputDims[i+1];
+        for (std::size_t i = nb_dims - 2; i != static_cast<std::size_t>(-1);
+             --i) {
+            stride_post[i] = stride_post[i + 1] * inputDims[i + 1];
         }
-        auto stride_pre = std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
+        auto stride_pre =
+            std::unique_ptr<std::size_t[]>(new std::size_t[nb_dims]);
         stride_pre[0] = 1;
         for (std::size_t i = 1; i < nb_dims; ++i) {
-            stride_pre[i] = stride_pre[i-1]*inputDims[i-1];
+            stride_pre[i] = stride_pre[i - 1] * inputDims[i - 1];
         }
 
-        const I* inputAccumulation = input;
-        I* outputAccumulation = nullptr;
+        const I *inputAccumulation = input;
+        I *outputAccumulation = nullptr;
 
-        for (const auto& axisInt : axes) {
+        for (const auto &axisInt : axes) {
             const std::size_t a = static_cast<std::size_t>(axisInt);
             outputElements /= inputDims[a];
             outputAccumulation = new I[outputElements];
             const std::size_t dim_i = inputDims[a];
             for (std::size_t pre = 0; pre < stride_pre[a]; ++pre) {
                 for (std::size_t post = 0; post < stride_post[a]; ++post) {
-                    const std::size_t idx_i = pre * dim_i * stride_post[a] + post;
+                    const std::size_t idx_i =
+                        pre * dim_i * stride_post[a] + post;
                     const std::size_t idx_o = pre * stride_post[a] + post;
                     I sum = 0;
                     for (std::size_t i = 0; i < dim_i; ++i) {
-                        sum += inputAccumulation[idx_i + i*stride_post[a]];
+                        sum += inputAccumulation[idx_i + i * stride_post[a]];
                     }
                     outputAccumulation[idx_o] = sum;
                 }
             }
-            std::for_each(stride_pre.get()+a+1, stride_pre.get()+nb_dims, [dim_i] (std::size_t& val) { val /= dim_i; });
+            std::for_each(stride_pre.get() + a + 1,
+                          stride_pre.get() + nb_dims,
+                          [dim_i](std::size_t &val) { val /= dim_i; });
             if (inputAccumulation != input) {
                 delete[] inputAccumulation;
             }
             inputAccumulation = outputAccumulation;
         }
 
-        // Copy elements from inputAccumulation to output while dividing by divisor
-        std::copy(inputAccumulation, inputAccumulation + outputElements, output);
+        // Copy elements from inputAccumulation to output while dividing by
+        // divisor
+        std::copy(inputAccumulation,
+                  inputAccumulation + outputElements,
+                  output);
         if (outputAccumulation) {
             delete[] outputAccumulation;
         }
@@ -107,14 +127,20 @@ void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
 
 // Kernels registration to implementation entry point
 REGISTRAR(ReduceSumImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(ReduceSumImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(ReduceSumImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::ReduceSumImpl_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
index c1cc247c548701d43e01b1e92d02f42a11cfc710..86ccc326ee15c47e07bb4870526b81782dc02c8d 100644
--- a/include/aidge/backend/cpu/operator/ScalingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
@@ -12,27 +12,27 @@
 #ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
 #define __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Scaling.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include <array>
 #include <memory>
 #include <vector>
-#include <array>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using ScalingImpl_cpu = OperatorImpl_cpu<Scaling_Op,
-    void(const float,
-        const std::size_t,
-        const bool,
-        std::size_t,
-        const void*,
-        void*)>;
+                                         void(const float,
+                                              const std::size_t,
+                                              const bool,
+                                              std::size_t,
+                                              const void *,
+                                              void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Scaling_Op, "cpu", Aidge::ScalingImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp
index c758c9cf39e76bb370c6d03c28e3a670c280eefc..532d364c549c2084f8c03f0b1c19f66b215c8fb2 100644
--- a/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp
@@ -12,21 +12,20 @@
 #ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_
 
+#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
 #include <cmath>
 #include <cstddef>
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
 
-//TODO : improve propagate, n2d2 :
+// TODO : improve propagate, n2d2 :
 /*
 template<typename T>
-void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>& output,
-                                          std::size_t batchSize, std::size_t nbChannels,
-                                          std::size_t height, std::size_t width,
-                                          bool isClipped,
-                                          const std::vector<Float_T>& clippingFactorPerChannel,
-                                          const std::vector<Float_T>& scalingFactorPerChannel,
-                                          std::size_t quantizedNbBits, bool isOutputUnsigned)
+void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>&
+output, std::size_t batchSize, std::size_t nbChannels, std::size_t height,
+std::size_t width, bool isClipped, const std::vector<Float_T>&
+clippingFactorPerChannel, const std::vector<Float_T>& scalingFactorPerChannel,
+                                          std::size_t quantizedNbBits, bool
+isOutputUnsigned)
 {
     std::size_t index = 0;
     for (std::size_t batch = 0; batch < batchSize; batch++) {
@@ -34,12 +33,13 @@ void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>& out
             for(std::size_t y = 0; y < height; y++) {
                 for(std::size_t x = 0; x < width; x++) {
 
-                    T res = isClipped ? Clip(input(index), clippingFactorPerChannel[ch])
-                                    : input(index);
-                    res = Scale(res, scalingFactorPerChannel[ch]);
+                    T res = isClipped ? Clip(input(index),
+clippingFactorPerChannel[ch]) : input(index); res = Scale(res,
+scalingFactorPerChannel[ch]);
 
                     if(quantizedNbBits > 0) {
-                        res = saturate(std::round(res), quantizedNbBits, isOutputUnsigned);
+                        res = saturate(std::round(res), quantizedNbBits,
+isOutputUnsigned);
                     }
                     output(index) = (T) res;
                     index++;
@@ -50,24 +50,22 @@ void N2D2::floatingPointScaling_propagate(const Tensor<T>& input, Tensor<T>& out
 }
 */
 
-
 namespace Aidge {
 
-template <class O>
-const O& clamp(const O& x, const O& min, const O& max)
-{
+template <class O> const O &clamp(const O &x, const O &min, const O &max) {
     return (x < min) ? min : (x > max) ? max : x;
 }
 
-template<class O>
-O saturate(const O value, const std::size_t quantizedNbBits, const bool isOutputUnsigned) {
+template <class O>
+O saturate(const O value,
+           const std::size_t quantizedNbBits,
+           const bool isOutputUnsigned) {
     // TODO: no assertions in kernel
     assert(quantizedNbBits > 0);
 
-    const O min = isOutputUnsigned ? 0 :
-                                  -(1ll << (quantizedNbBits - 1ll));
-    const O max = isOutputUnsigned ? (1ll << quantizedNbBits) - 1ll :
-                                   (1ll << (quantizedNbBits - 1ll)) - 1ll;
+    const O min = isOutputUnsigned ? 0 : -(1ll << (quantizedNbBits - 1ll));
+    const O max = isOutputUnsigned ? (1ll << quantizedNbBits) - 1ll
+                                   : (1ll << (quantizedNbBits - 1ll)) - 1ll;
 
     return clamp(value, min, max);
 }
@@ -77,31 +75,39 @@ void ScalingImpl_cpu_forward_kernel(const float scalingFactor,
                                     const std::size_t quantizedNbBits,
                                     const bool isOutputUnsigned,
                                     std::size_t inputLenght,
-                                    const void* input_,
-                                    void* output_) {
+                                    const void *input_,
+                                    void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = static_cast<O>(input[i] * static_cast<I>(scalingFactor));
 
-        if(quantizedNbBits > 0) {
-            output[i] = saturate(std::round(output[i]), quantizedNbBits, isOutputUnsigned);
+        if (quantizedNbBits > 0) {
+            output[i] = saturate(std::round(output[i]),
+                                 quantizedNbBits,
+                                 isOutputUnsigned);
         }
     }
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(ScalingImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::ScalingImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(ScalingImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::ScalingImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(ScalingImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::ScalingImpl_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp
index ee1c36edecbe50cc1765da59737509a2b6333caf..cae00b0a662fd948bc0466b541a41deabdb59f14 100644
--- a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp
@@ -12,22 +12,23 @@
 #ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_
 #define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Sigmoid.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using SigmoidImpl_cpu = OperatorImpl_cpu<Sigmoid_Op,
-    void(const std::size_t, const void*, void*),
-    void(const std::size_t, const void*, const void*, void*)>;
+using SigmoidImpl_cpu = OperatorImpl_cpu<
+    Sigmoid_Op,
+    void(const std::size_t, const void *, void *),
+    void(const std::size_t, const void *, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Sigmoid_Op, "cpu", Aidge::SigmoidImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp
index dfd71ce0a878efbeb779f3a67ad4ccc762bb8363..6f796fdd0bd0ab59b2b0ad930a796c6a7b2e4bf0 100644
--- a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp
@@ -19,29 +19,30 @@
 namespace Aidge {
 template <class I, class O>
 void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                    const void* input_,
-                                    void* output_) {
+                                    const void *input_,
+                                    void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
-//#pragma omp parallel for if (inputLenght > 1024)
+    // #pragma omp parallel for if (inputLenght > 1024)
     for (std::size_t i = 0; i < inputLenght; ++i) {
-		if (input[i] > I(0)) {
-			output[i] = O(1) / (O(1) + std::exp(-input[i]));
-		} else {
-			output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i]));
-		}
+        if (input[i] > I(0)) {
+            output[i] = O(1) / (O(1) + std::exp(-input[i]));
+        } else {
+            output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i]));
+        }
     }
 }
 
 template <class O, class GI, class GO>
 void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                     const void* output_, const void* grad_output_,
-				     void* grad_input_) {
-    const O* output = static_cast<const O*>(output_);
-    const GO* grad_output = static_cast<const GO*>(grad_output_);
-    GI* grad_input = static_cast<GI*>(grad_input_);
+                                     const void *output_,
+                                     const void *grad_output_,
+                                     void *grad_input_) {
+    const O *output = static_cast<const O *>(output_);
+    const GO *grad_output = static_cast<const GO *>(grad_output_);
+    GI *grad_input = static_cast<GI *>(grad_input_);
     for (std::size_t i = 0; i < inputLenght; ++i) {
         grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i];
     }
@@ -49,11 +50,15 @@ void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght,
 
 // Kernels registration to implementation entry point
 REGISTRAR(SigmoidImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>, Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::SigmoidImpl_cpu_forward_kernel<float, float>,
+           Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>});
 REGISTRAR(SigmoidImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>, Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>});
-}  // namespace Aidge
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::SigmoidImpl_cpu_forward_kernel<double, double>,
+           Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp
index fd98b38d7117eaa14e35fe3cb89abf95b2913997..b224506169078a394f0c577eedf419237a2e3848 100644
--- a/include/aidge/backend/cpu/operator/SliceImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp
@@ -12,29 +12,29 @@
 #ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_H__
 #define AIDGE_CPU_OPERATOR_SLICEIMPL_H__
 
+#include <array>
 #include <memory>
 #include <vector>
-#include <array>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Slice.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using SliceImpl_cpu = OperatorImpl_cpu<Slice_Op,
-    void(const std::vector<std::int64_t>&,
-                            const std::vector<std::int64_t>&,
-                            const std::vector<std::int8_t>&,
-                            const std::vector<std::int64_t>&,
-                            const std::vector<DimSize_t>&,
-                            const void*,
-                            void*)>;
+                                       void(const std::vector<std::int64_t> &,
+                                            const std::vector<std::int64_t> &,
+                                            const std::vector<std::int8_t> &,
+                                            const std::vector<std::int64_t> &,
+                                            const std::vector<DimSize_t> &,
+                                            const void *,
+                                            void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Slice_Op, "cpu", Aidge::SliceImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */
diff --git a/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp
index 1bf4c491723c570fa8bfd1774beca1630d2de9be..5b5396b15de65344a27e5a741b205d71b980431c 100644
--- a/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp
@@ -17,57 +17,76 @@
 #include <cstddef>
 #include <iterator>
 
-#include "aidge/utils/Registrar.hpp"
 #include "aidge/backend/cpu/operator/SliceImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
 
 namespace Aidge {
 
-template<class I, class O>
-void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts,
-                                const std::vector<std::int64_t>& ends,
-                                const std::vector<std::int8_t>& axes,
-                                const std::vector<std::int64_t>& steps,
-                                const std::vector<DimSize_t>& inputDims,
-                                const void* input_,
-                                void* output_)
-{
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+template <class I, class O>
+void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t> &starts,
+                                  const std::vector<std::int64_t> &ends,
+                                  const std::vector<std::int8_t> &axes,
+                                  const std::vector<std::int64_t> &steps,
+                                  const std::vector<DimSize_t> &inputDims,
+                                  const void *input_,
+                                  void *output_) {
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     const std::size_t nbDims = inputDims.size();
     std::vector<DimSize_t> dims = inputDims;
-    DimSize_t totalSize = std::accumulate(inputDims.cbegin(), inputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
-    const I* inputAccumulation = input;
-    I* outputAccumulation = nullptr;
+    DimSize_t totalSize = std::accumulate(inputDims.cbegin(),
+                                          inputDims.cend(),
+                                          std::size_t(1),
+                                          std::multiplies<std::size_t>());
+    const I *inputAccumulation = input;
+    I *outputAccumulation = nullptr;
     const std::size_t nbAxes = starts.size();
     for (std::size_t i = 0; i < nbAxes; ++i) {
-        const DimIdx_t axis = axes[i] >= 0 ?
-                                    static_cast<DimIdx_t>(axes[i]) :
-                                    static_cast<DimIdx_t>(axes[i] + static_cast<DimIdx_t>(inputDims.size()));
-        const DimSize_t start = std::min(starts[i] >= 0 ?
-                                                static_cast<DimSize_t>(starts[i]) :
-                                                static_cast<DimSize_t>(starts[i] + static_cast<std::int64_t>(inputDims[axis])),
-                                         dims[axis]-1);
-        const DimSize_t end = ends[i] >= 0 ?
-                                        static_cast<DimSize_t>(ends[i]) :
-                                        static_cast<DimSize_t>(ends[i] + static_cast<std::int64_t>(inputDims[axis]));
+        const DimIdx_t axis =
+            axes[i] >= 0
+                ? static_cast<DimIdx_t>(axes[i])
+                : static_cast<DimIdx_t>(
+                      axes[i] + static_cast<DimIdx_t>(inputDims.size()));
+        const DimSize_t start = std::min(
+            starts[i] >= 0
+                ? static_cast<DimSize_t>(starts[i])
+                : static_cast<DimSize_t>(
+                      starts[i] + static_cast<std::int64_t>(inputDims[axis])),
+            dims[axis] - 1);
+        const DimSize_t end =
+            ends[i] >= 0
+                ? static_cast<DimSize_t>(ends[i])
+                : static_cast<DimSize_t>(
+                      ends[i] + static_cast<std::int64_t>(inputDims[axis]));
         const std::int64_t step = steps[i];
 
-        const std::size_t sliceSize = static_cast<std::size_t>(std::ceil((static_cast<float>(end) - static_cast<float>(start)) / static_cast<float>(step)));
+        const std::size_t sliceSize = static_cast<std::size_t>(
+            std::ceil((static_cast<float>(end) - static_cast<float>(start)) /
+                      static_cast<float>(step)));
 
         outputAccumulation = new I[totalSize];
-        const std::size_t stride_pre = std::accumulate(dims.cbegin(), dims.cbegin() + axis, 1, std::multiplies<std::size_t>());
-        const std::size_t stride_post = std::accumulate(dims.crbegin(), dims.crbegin() + nbDims -1 - axis, 1, std::multiplies<std::size_t>());
-        for (std::size_t outer = 0; outer < stride_pre; ++outer)
-        {
-            const std::size_t idx_in = outer * stride_post * dims[axis] + start * stride_post;
+        const std::size_t stride_pre =
+            std::accumulate(dims.cbegin(),
+                            dims.cbegin() + axis,
+                            1,
+                            std::multiplies<std::size_t>());
+        const std::size_t stride_post =
+            std::accumulate(dims.crbegin(),
+                            dims.crbegin() + nbDims - 1 - axis,
+                            1,
+                            std::multiplies<std::size_t>());
+        for (std::size_t outer = 0; outer < stride_pre; ++outer) {
+            const std::size_t idx_in =
+                outer * stride_post * dims[axis] + start * stride_post;
             const std::size_t idx_out = outer * stride_post * sliceSize;
             std::size_t addedSlices = 0;
-            for (std::size_t inner = 0; inner < sliceSize; ++inner)
-            {
-                std::copy_n(std::next(inputAccumulation, idx_in + inner * step * stride_post),
+            for (std::size_t inner = 0; inner < sliceSize; ++inner) {
+                std::copy_n(std::next(inputAccumulation,
+                                      idx_in + inner * step * stride_post),
                             stride_post,
-                            std::next(outputAccumulation, idx_out + addedSlices * stride_post));
+                            std::next(outputAccumulation,
+                                      idx_out + addedSlices * stride_post));
                 addedSlices++;
             }
         }
@@ -79,7 +98,6 @@ void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts,
             delete[] inputAccumulation;
         }
         inputAccumulation = outputAccumulation;
-
     }
     // Copy elements from inputAccumulation to output while dividing by divisor
     std::copy_n(inputAccumulation, totalSize, output);
@@ -89,14 +107,20 @@ void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts,
 }
 
 REGISTRAR(SliceImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::SliceImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(SliceImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::SliceImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(SliceImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::SliceImpl_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
index ec2c2696ed6e2ba8cad1536519298d9331921c07..56e8d603ef2382b1725f46ba337cd9e65a132277 100644
--- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
@@ -12,21 +12,22 @@
 #ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_
 #define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Softmax.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using SoftmaxImpl_cpu = OperatorImpl_cpu<Softmax_Op,
-    void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>;
+using SoftmaxImpl_cpu = OperatorImpl_cpu<
+    Softmax_Op,
+    void(std::size_t, const std::vector<DimSize_t> &, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Softmax_Op, "cpu", Aidge::SoftmaxImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp
index 07486a48f1b8cf29f6a6ef8aa934a9decdbafef7..ad569c1386f35eadc87707e089bccc802ae48278 100644
--- a/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp
@@ -12,21 +12,23 @@
 #ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_
 
-#include "aidge/utils/Registrar.hpp"
-#include <cstddef>
-#include <cmath>
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/data/Data.hpp"
+#include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include <cmath>
+#include <cstddef>
 
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
 
 namespace Aidge {
 template <class I, class O>
-void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
-{
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx,
+                                    const std::vector<DimSize_t> &inputDims,
+                                    const void *input_,
+                                    void *output_) {
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     std::size_t postAxisElems = 1;
     for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
@@ -41,20 +43,23 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi
         for (std::size_t j = 0; j < postAxisElems; ++j) {
             I maxVal = input[i * inputDims[axisIdx] * postAxisElems + j];
             for (std::size_t k = 1; k < inputDims[axisIdx]; ++k) {
-                std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
+                std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems +
+                                    k * postAxisElems + j;
                 maxVal = std::max(maxVal, input[inIdx]);
             }
 
             // Calculate sum of exponentials within the axis
             I sumExp = 0;
             for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
-                std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
+                std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems +
+                                    k * postAxisElems + j;
                 sumExp += std::exp(input[inIdx] - maxVal);
             }
 
             // Calculate softmax for the current slice along the axis
-            for (std::size_t  k = 0; k < inputDims[axisIdx]; ++k) {
-                std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems + k * postAxisElems + j;
+            for (std::size_t k = 0; k < inputDims[axisIdx]; ++k) {
+                std::size_t inIdx = i * inputDims[axisIdx] * postAxisElems +
+                                    k * postAxisElems + j;
                 output[inIdx] = std::exp(input[inIdx] - maxVal) / sumExp;
             }
         }
@@ -62,14 +67,20 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi
 }
 
 REGISTRAR(SoftmaxImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>,
+           nullptr});
 REGISTRAR(SoftmaxImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>,
+           nullptr});
 REGISTRAR(SoftmaxImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::SoftmaxImpl_cpu_forward_kernel<int32_t, int32_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/SqrtImpl.hpp b/include/aidge/backend/cpu/operator/SqrtImpl.hpp
index dba75d1c58fb19ab2284ee0e98a32bff7ac58557..6f442cf15351c5f5c6da3ccff770fe9e90d241ca 100644
--- a/include/aidge/backend/cpu/operator/SqrtImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SqrtImpl.hpp
@@ -12,7 +12,7 @@
 #ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_H_
 #define AIDGE_CPU_OPERATOR_SQRTIMPL_H_
 
-#include <cstddef>  // std::size_t
+#include <cstddef> // std::size_t
 #include <memory>
 #include <tuple>
 #include <vector>
@@ -24,12 +24,13 @@
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using SqrtImpl_cpu = OperatorImpl_cpu<Sqrt_Op,
-    void(const std::size_t, const void*, void*),
-    void(const std::size_t, const void*, void*)>;
+using SqrtImpl_cpu =
+    OperatorImpl_cpu<Sqrt_Op,
+                     void(const std::size_t, const void *, void *),
+                     void(const std::size_t, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Sqrt_Op, "cpu", Aidge::SqrtImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp
index 0464119cad60742bc58c79da984b30776bc7932f..827383926f6848f1b599e110785299fba053b149 100644
--- a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp
@@ -12,8 +12,8 @@
 #ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_
 #define AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_
 
-#include <cmath>    // std::sqrt
-#include <cstddef>  // std::size_t
+#include <cmath>   // std::sqrt
+#include <cstddef> // std::size_t
 
 #include "aidge/utils/Registrar.hpp"
 
@@ -22,11 +22,11 @@
 namespace Aidge {
 template <class I, class O>
 void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+                                 const void *input_,
+                                 void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i])));
@@ -35,26 +35,33 @@ void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght,
 
 template <class I, class O>
 void SqrtImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+                                  const void *input_,
+                                  void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = static_cast<O>(0.5/(std::sqrt(static_cast<float>(input[i]))));
+        output[i] =
+            static_cast<O>(0.5 / (std::sqrt(static_cast<float>(input[i]))));
     }
 }
 
 REGISTRAR(SqrtImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<float, float>, Aidge::SqrtImpl_cpu_backward_kernel<float, float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::SqrtImpl_cpu_forward_kernel<float, float>,
+           Aidge::SqrtImpl_cpu_backward_kernel<float, float>});
 REGISTRAR(SqrtImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<double, double>, Aidge::SqrtImpl_cpu_backward_kernel<double, double>});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::SqrtImpl_cpu_forward_kernel<double, double>,
+           Aidge::SqrtImpl_cpu_backward_kernel<double, double>});
 REGISTRAR(SqrtImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<int32_t, int32_t>, Aidge::SqrtImpl_cpu_backward_kernel<int32_t, int32_t>});
-}  // namespace Aidge
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::SqrtImpl_cpu_forward_kernel<int32_t, int32_t>,
+           Aidge::SqrtImpl_cpu_backward_kernel<int32_t, int32_t>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/SubImpl.hpp b/include/aidge/backend/cpu/operator/SubImpl.hpp
index 2bb22bda74edf7db09404fd5613b6714ddcdf513..7fd0f95d0c0083ecee3a8e56b14ba7abefe721f1 100644
--- a/include/aidge/backend/cpu/operator/SubImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SubImpl.hpp
@@ -12,21 +12,26 @@
 #ifndef AIDGE_CPU_OPERATOR_SUBIMPL_H_
 #define AIDGE_CPU_OPERATOR_SUBIMPL_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Sub.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
 using SubImpl_cpu = OperatorImpl_cpu<Sub_Op,
-    void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>;
+                                     void(const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const std::vector<std::size_t> &,
+                                          const void *,
+                                          const void *,
+                                          void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Sub_Op, "cpu", Aidge::SubImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SUBIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp
index 0486ed2105b23e95f9cdfcda578e14900fcb2c8e..5db82774af6df10889205dab55d93197845c1f0a 100644
--- a/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp
@@ -14,54 +14,64 @@
 
 #include "aidge/utils/Registrar.hpp"
 
-#include <cstddef>     // std::size_t
-#include <cstdint>     // std::int32_t, std::int64_t
+#include <cstddef> // std::size_t
+#include <cstdint> // std::int32_t, std::int64_t
 #include <vector>
 
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/operator/SubImpl.hpp"
 
-
 namespace Aidge {
 template <class I1, class I2, class O>
-void SubImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
-                                const std::vector<std::size_t>& input2Dims,
-                                const std::vector<std::size_t>& outputDims,
-                                const void* input1_,
-                                const void* input2_,
-                                void* output_) {
+void SubImpl_cpu_forward_kernel(const std::vector<std::size_t> &input1Dims,
+                                const std::vector<std::size_t> &input2Dims,
+                                const std::vector<std::size_t> &outputDims,
+                                const void *input1_,
+                                const void *input2_,
+                                void *output_) {
 
-    const I1* input_1 = static_cast<const I1*>(input1_);
-    const I2* input_2 = static_cast<const I2*>(input2_);
-    O* output = static_cast<O*>(output_);
+    const I1 *input_1 = static_cast<const I1 *>(input1_);
+    const I2 *input_2 = static_cast<const I2 *>(input2_);
+    O *output = static_cast<O *>(output_);
 
     size_t totalElements = 1;
     for (size_t dimSize : outputDims) {
         totalElements *= dimSize;
     }
 
-	for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
-	{
-		std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
-		std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
-		std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
+    for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) {
+        std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
+        std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
+        std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
         output[oIndex] = input_1[idx1] - input_2[idx2];
-	}
+    }
 }
 
 // Kernels registration to implementation entry point
 REGISTRAR(SubImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, nullptr});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::SubImpl_cpu_forward_kernel<float, float, float>,
+           nullptr});
 REGISTRAR(SubImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr});
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::SubImpl_cpu_forward_kernel<double, double, double>,
+           nullptr});
 REGISTRAR(SubImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
+          {DataType::Int32},
+          {ProdConso::inPlaceModel,
+           Aidge::SubImpl_cpu_forward_kernel<std::int32_t,
+                                             std::int32_t,
+                                             std::int32_t>,
+           nullptr});
 REGISTRAR(SubImpl_cpu,
-    {DataType::Int64},
-    {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr});
-}  // namespace Aidge
+          {DataType::Int64},
+          {ProdConso::inPlaceModel,
+           Aidge::SubImpl_cpu_forward_kernel<std::int64_t,
+                                             std::int64_t,
+                                             std::int64_t>,
+           nullptr});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/TanhImpl.hpp b/include/aidge/backend/cpu/operator/TanhImpl.hpp
index b1c2217bd29805eca2cf7b7906316756b75a74e0..fd853184435d7e0dfc24fc3d799e4aa7843e490f 100644
--- a/include/aidge/backend/cpu/operator/TanhImpl.hpp
+++ b/include/aidge/backend/cpu/operator/TanhImpl.hpp
@@ -12,22 +12,23 @@
 #ifndef AIDGE_CPU_OPERATOR_TANHIMPL_H_
 #define AIDGE_CPU_OPERATOR_TANHIMPL_H_
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Tanh.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include <memory>
 #include <vector>
 
 namespace Aidge {
 // Operator implementation entry point for the backend
-using TanhImpl_cpu = OperatorImpl_cpu<Tanh_Op,
-    void(const std::size_t, const void*, void*),
-    void(const std::size_t, const void*, const void*, void*)>;
+using TanhImpl_cpu = OperatorImpl_cpu<
+    Tanh_Op,
+    void(const std::size_t, const void *, void *),
+    void(const std::size_t, const void *, const void *, void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Tanh_Op, "cpu", Aidge::TanhImpl_cpu::create);
-}  // namespace Aidge
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_TANHIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp
index fdcac210484b11f2220dcc2a6813efed503d1913..939237c97d49c26c14e0895c7f1e84b19c8a8ff3 100644
--- a/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp
@@ -19,13 +19,13 @@
 namespace Aidge {
 template <class I, class O>
 void TanhImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+                                 const void *input_,
+                                 void *output_) {
 
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
 
-//#pragma omp parallel for if (inputLenght > 1024)
+    // #pragma omp parallel for if (inputLenght > 1024)
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = std::tanh(input[i]);
     }
@@ -33,11 +33,12 @@ void TanhImpl_cpu_forward_kernel(std::size_t inputLenght,
 
 template <class O, class GI, class GO>
 void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                  const void* output_, const void* grad_output_,
-			          void* grad_input_) {
-    const O* output = static_cast<const O*>(output_);
-    const GO* grad_output = static_cast<const GO*>(grad_output_);
-    GI* grad_input = static_cast<GI*>(grad_input_);
+                                  const void *output_,
+                                  const void *grad_output_,
+                                  void *grad_input_) {
+    const O *output = static_cast<const O *>(output_);
+    const GO *grad_output = static_cast<const GO *>(grad_output_);
+    GI *grad_input = static_cast<GI *>(grad_input_);
     for (std::size_t i = 0; i < inputLenght; ++i) {
         grad_input[i] = (O(1) - output[i] * output[i]) * grad_output[i];
     }
@@ -45,11 +46,15 @@ void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght,
 
 // Kernels registration to implementation entry point
 REGISTRAR(TanhImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<float, float>, Aidge::TanhImpl_cpu_backward_kernel<float, float, float>});
+          {DataType::Float32},
+          {ProdConso::inPlaceModel,
+           Aidge::TanhImpl_cpu_forward_kernel<float, float>,
+           Aidge::TanhImpl_cpu_backward_kernel<float, float, float>});
 REGISTRAR(TanhImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<double, double>, Aidge::TanhImpl_cpu_backward_kernel<double, double, double>});
-}  // namespace Aidge
+          {DataType::Float64},
+          {ProdConso::inPlaceModel,
+           Aidge::TanhImpl_cpu_forward_kernel<double, double>,
+           Aidge::TanhImpl_cpu_backward_kernel<double, double, double>});
+} // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ */
diff --git a/include/aidge/utils/sys_info/CpuVersionInfo.hpp b/include/aidge/utils/sys_info/CpuVersionInfo.hpp
index 887ce839e079349d9d64505f7184831ffc4cf1c2..0a34ad25777ec5388e19f59aa8b9d009d855d47c 100644
--- a/include/aidge/utils/sys_info/CpuVersionInfo.hpp
+++ b/include/aidge/utils/sys_info/CpuVersionInfo.hpp
@@ -12,24 +12,33 @@ namespace Aidge {
 #define GIT_COMMIT_HASH ""
 #endif
 void showCpuVersion() {
-    Log::info("Aidge backend CPU: {} ({}), {} {}", PROJECT_VERSION, GIT_COMMIT_HASH, __DATE__, __TIME__);
-        // Compiler version
-    #if defined(__clang__)
+    Log::info("Aidge backend CPU: {} ({}), {} {}",
+              PROJECT_VERSION,
+              GIT_COMMIT_HASH,
+              __DATE__,
+              __TIME__);
+    // Compiler version
+#if defined(__clang__)
     /* Clang/LLVM. ---------------------------------------------- */
-        Log::info("Clang/LLVM compiler version: {}.{}.{}\n", __clang_major__ , __clang_minor__, __clang_patchlevel__);
-    #elif defined(__ICC) || defined(__INTEL_COMPILER)
+    Log::info("Clang/LLVM compiler version: {}.{}.{}\n",
+              __clang_major__,
+              __clang_minor__,
+              __clang_patchlevel__);
+#elif defined(__ICC) || defined(__INTEL_COMPILER)
     /* Intel ICC/ICPC. ------------------------------------------ */
-        Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER);
-    #elif defined(__GNUC__) || defined(__GNUG__)
+    Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER);
+#elif defined(__GNUC__) || defined(__GNUG__)
     /* GNU GCC/G++. --------------------------------------------- */
-        Log::info("GNU GCC/G++ compiler version: {}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
-    #elif defined(_MSC_VER)
+    Log::info("GNU GCC/G++ compiler version: {}.{}.{}",
+              __GNUC__,
+              __GNUC_MINOR__,
+              __GNUC_PATCHLEVEL__);
+#elif defined(_MSC_VER)
     /* Microsoft Visual Studio. --------------------------------- */
-        Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER);
-    #else
-        Log::info("Unknown compiler\n");
-    #endif
-
+    Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER);
+#else
+    Log::info("Unknown compiler\n");
+#endif
 }
-}  // namespace Aidge
-#endif  // AIDGE_UTILS_SYS_INFO_CPU_VERSION_INFO_H
+} // namespace Aidge
+#endif // AIDGE_UTILS_SYS_INFO_CPU_VERSION_INFO_H
diff --git a/python_binding/pybind_cpu.cpp b/python_binding/pybind_cpu.cpp
index d5022e1d469ae4171e796baed6c1aa061dd95765..1fee8571de75f66d7fcbd05f2efbc39880d0081b 100644
--- a/python_binding/pybind_cpu.cpp
+++ b/python_binding/pybind_cpu.cpp
@@ -6,14 +6,13 @@ namespace py = pybind11;
 
 namespace Aidge {
 
-void init_cpu_sys_info(py::module& m);
+void init_cpu_sys_info(py::module &m);
 
-void init_Aidge(py::module& m){
+void init_Aidge(py::module &m) {
     init_cpu_sys_info(m);
 }
 
-
 PYBIND11_MODULE(aidge_backend_cpu, m) {
     init_Aidge(m);
 }
-}
+} // namespace Aidge
diff --git a/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp b/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp
index 573bee3659c65f90935e03c06eff5a2998bb9f5b..6540c09def9ff3bc763af3d00f5346b9b4b4717a 100644
--- a/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp
+++ b/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp
@@ -1,9 +1,9 @@
-#include <pybind11/pybind11.h>
 #include "aidge/utils/sys_info/CpuVersionInfo.hpp"
+#include <pybind11/pybind11.h>
 
 namespace py = pybind11;
 namespace Aidge {
-void init_cpu_sys_info(py::module& m){
+void init_cpu_sys_info(py::module &m) {
     m.def("show_cpu_version", &showCpuVersion);
 }
-}
+} // namespace Aidge
diff --git a/src/data/Broadcasting.cpp b/src/data/Broadcasting.cpp
index 22977aa772e3f3f4810a59ff1fc024cc21c66bd1..c198e9c6f29bf06156513e2881106a189f2f1e33 100644
--- a/src/data/Broadcasting.cpp
+++ b/src/data/Broadcasting.cpp
@@ -11,17 +11,20 @@
 
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 
-std::vector<std::size_t> Aidge::getBroadcastedDims(const std::vector<std::size_t>& outputDims, const std::vector<std::size_t>& dimsToBroadcast){
+std::vector<std::size_t>
+Aidge::getBroadcastedDims(const std::vector<std::size_t> &outputDims,
+                          const std::vector<std::size_t> &dimsToBroadcast) {
     std::vector<std::size_t> broadcastedDims(outputDims.size(), 1);
-		for(int j=dimsToBroadcast.size()-1; j>=0; --j)
-		{
-			std::size_t idx = outputDims.size() - (dimsToBroadcast.size()-j);
-			broadcastedDims[idx] = dimsToBroadcast[j];
-		}
+    for (int j = dimsToBroadcast.size() - 1; j >= 0; --j) {
+        std::size_t idx = outputDims.size() - (dimsToBroadcast.size() - j);
+        broadcastedDims[idx] = dimsToBroadcast[j];
+    }
     return broadcastedDims;
 }
 
-std::vector<std::size_t> Aidge::getMultiDimIndices(const std::vector<std::size_t>& dimensions, std::size_t idx){
+std::vector<std::size_t>
+Aidge::getMultiDimIndices(const std::vector<std::size_t> &dimensions,
+                          std::size_t idx) {
     std::vector<std::size_t> indices(dimensions.size(), 0);
 
     for (int i = dimensions.size() - 1; i >= 0; --i) {
@@ -32,15 +35,16 @@ std::vector<std::size_t> Aidge::getMultiDimIndices(const std::vector<std::size_t
     return indices;
 }
 
-std::size_t Aidge::getFlattenedIndex(const std::vector<std::size_t>& dimensions, const std::vector<std::size_t>& indices){
+std::size_t
+Aidge::getFlattenedIndex(const std::vector<std::size_t> &dimensions,
+                         const std::vector<std::size_t> &indices) {
     std::size_t flattenedIdx = 0;
     std::size_t stride = 1;
 
     for (int i = dimensions.size() - 1; i >= 0; --i) {
-        std::size_t idx = dimensions[i]>1 ? indices[i] : 0;
+        std::size_t idx = dimensions[i] > 1 ? indices[i] : 0;
         flattenedIdx += idx * stride;
         stride *= dimensions[i];
     }
     return flattenedIdx;
 }
-
diff --git a/src/operator/AbsImpl.cpp b/src/operator/AbsImpl.cpp
index 130d6cf7a64e1e75b8ef128974101a477f802caf..a0671bacff84150e15388ba853d4271164503c55 100644
--- a/src/operator/AbsImpl.cpp
+++ b/src/operator/AbsImpl.cpp
@@ -19,22 +19,21 @@
 #include "aidge/operator/Abs.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::AbsImpl_cpu::forward() {
-    const Abs_Op& op = static_cast<const Abs_Op&>(mOp);
+template <> void Aidge::AbsImpl_cpu::forward() {
+    const Abs_Op &op = static_cast<const Abs_Op &>(mOp);
 
     // Find the correct kernel type
-    const auto impl = Registrar<AbsImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<AbsImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.forward(
-        op.getInput(0)->size(),
-        op.getInput(0)->getImpl()->rawPtr(),
-        op.getOutput(0)->getImpl()->rawPtr()
-    );
+    impl.forward(op.getInput(0)->size(),
+                 op.getInput(0)->getImpl()->rawPtr(),
+                 op.getOutput(0)->getImpl()->rawPtr());
 }
 
-template <>
-void Aidge::AbsImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Abs_Op on backend cpu");
+template <> void Aidge::AbsImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Abs_Op on backend cpu");
 }
diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp
index 457a0b17e531fac35ff873f9eedca7bbbe82d459..171e5333b4ff33a4bcf038d33da5c4e402a0430e 100644
--- a/src/operator/AddImpl.cpp
+++ b/src/operator/AddImpl.cpp
@@ -19,24 +19,30 @@
 #include "aidge/backend/cpu/operator/AddImpl_kernels.hpp"
 #include "aidge/data/Data.hpp"
 #include "aidge/data/Tensor.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/utils/ErrorHandling.hpp"
+#include "aidge/utils/Types.h"
 
-template <>
-void  Aidge::AddImpl_cpu::forward() {
-    const Add_Op& op = static_cast<const Add_Op&>(mOp);
+template <> void Aidge::AddImpl_cpu::forward() {
+    const Add_Op &op = static_cast<const Add_Op &>(mOp);
     // Check inputs
     AIDGE_ASSERT(op.getInput(0), "missing input in Add operator");
-    AIDGE_ASSERT(op.getInput(0)->hasImpl(), "cannot run Add forward because the 0-th input has no implementation.");
+    AIDGE_ASSERT(op.getInput(0)->hasImpl(),
+                 "cannot run Add forward because the 0-th input has no "
+                 "implementation.");
     DataType datatypeFirstInput = op.getInput(0)->dataType();
     for (IOIndex_t i = 1; i < op.nbInputs(); ++i) {
         AIDGE_ASSERT(op.getInput(i), "missing input in Add operator");
-        AIDGE_ASSERT(op.getInput(i)->hasImpl(), "cannot run Add forward because the {}-th input has no implementation.", i);
-        AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput, "Cannot add inputs with two differents data type.");
+        AIDGE_ASSERT(op.getInput(i)->hasImpl(),
+                     "cannot run Add forward because the {}-th input has no "
+                     "implementation.",
+                     i);
+        AIDGE_ASSERT(op.getInput(i)->dataType() == datatypeFirstInput,
+                     "Cannot add inputs with two differents data type.");
     }
 
     // Find the correct kernel type
-    const auto impl = Registrar<AddImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<AddImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Convert input data (no overhead if not needed!)
     // TODO: right now, if needed, memory will be allocated/deallocated at each
@@ -44,29 +50,30 @@ void  Aidge::AddImpl_cpu::forward() {
     // this class to avoid that.
     const std::size_t nbDims = op.getOutput(0)->nbDims();
     std::vector<std::vector<std::size_t>> inputsDims;
-    std::vector<const void*> opInputs;
+    std::vector<const void *> opInputs;
     std::vector<std::shared_ptr<Tensor>> inputsFallback(op.nbInputs());
     for (IOIndex_t i = 0; i < op.nbInputs(); ++i) {
         std::vector<std::size_t> inputDims(nbDims, 1);
         auto dims = op.getInput(i)->dims();
-		for(std::size_t j=dims.size()-1; j+1>0; --j)
-		{
-			std::size_t idx = nbDims - (dims.size()-j);
-			inputDims[idx] = dims[j];
-		}
+        for (std::size_t j = dims.size() - 1; j + 1 > 0; --j) {
+            std::size_t idx = nbDims - (dims.size() - j);
+            inputDims[idx] = dims[j];
+        }
         inputsDims.push_back(inputDims);
-        const auto& input = op.getInput(i)->refCastFrom(inputsFallback[i], *op.getOutput(0));
+        const auto &input =
+            op.getInput(i)->refCastFrom(inputsFallback[i], *op.getOutput(0));
         opInputs.push_back(input.getImpl()->rawPtr());
     }
 
     impl.forward(opInputs,
-               inputsDims,
-               op.getOutput(0)->size(),
-               op.getOutput(0)->dims(),
-               getCPUPtr(op.getRawOutput(0)));
+                 inputsDims,
+                 op.getOutput(0)->size(),
+                 op.getOutput(0)->dims(),
+                 getCPUPtr(op.getRawOutput(0)));
 }
 
-template <>
-void Aidge::AddImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Add_Op on backend cpu");
+template <> void Aidge::AddImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Add_Op on backend cpu");
 }
diff --git a/src/operator/AndImpl.cpp b/src/operator/AndImpl.cpp
index 2e0f59769ad86f6e4143ab59d089706e34792244..40f2c8997958d4253aa2efb215d47d2dde759d12 100644
--- a/src/operator/AndImpl.cpp
+++ b/src/operator/AndImpl.cpp
@@ -15,35 +15,37 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/operator/And.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/operator/And.hpp"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/AndImpl.hpp"
 #include "aidge/backend/cpu/operator/AndImpl_kernels.hpp"
 
-template <>
-void Aidge::AndImpl_cpu::forward() {
-    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
-    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
-
+template <> void Aidge::AndImpl_cpu::forward() {
+    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
+    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
 
     // Find the correct kernel type
-    const auto impl = Registrar<AndImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<AndImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(inputDims0,
-        inputDims1,
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawInput(1)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 inputDims1,
+                 std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawInput(1)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::AndImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for And_Op on backend cpu");
+template <> void Aidge::AndImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for And_Op on backend cpu");
 }
diff --git a/src/operator/ArgMaxImpl.cpp b/src/operator/ArgMaxImpl.cpp
index b8fb85a7cd86a788cda69307d5ed8f363619f9f0..29bfc2f1d99b27fd82de1136be846f8920dbd853 100644
--- a/src/operator/ArgMaxImpl.cpp
+++ b/src/operator/ArgMaxImpl.cpp
@@ -14,26 +14,27 @@
 #include <memory>
 #include <vector>
 
-#include "aidge/utils/Types.h"
-#include "aidge/operator/ArgMax.hpp"
 #include "aidge/backend/cpu/operator/ArgMaxImpl_kernels.hpp"
+#include "aidge/operator/ArgMax.hpp"
+#include "aidge/utils/Types.h"
 
-template <>
-void Aidge::ArgMaxImpl_cpu::forward() {
-    const ArgMax_Op& op_ = dynamic_cast<const ArgMax_Op&>(mOp);
+template <> void Aidge::ArgMaxImpl_cpu::forward() {
+    const ArgMax_Op &op_ = dynamic_cast<const ArgMax_Op &>(mOp);
 
     // Find the correct kernel type
-    const auto impl = Registrar<ArgMaxImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ArgMaxImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.axis(),
-                op_.selectLastIndex(),
-                op_.getInput(0)->dims(),
-                op_.getInput(0)->getImpl()->rawPtr(),
-                op_.getOutput(0)->getImpl()->rawPtr());
+                 op_.selectLastIndex(),
+                 op_.getInput(0)->dims(),
+                 op_.getInput(0)->getImpl()->rawPtr(),
+                 op_.getOutput(0)->getImpl()->rawPtr());
 }
 
-template <>
-void Aidge::ArgMaxImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ArgMax_Op on backend cpu");
+template <> void Aidge::ArgMaxImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for ArgMax_Op on backend cpu");
 }
diff --git a/src/operator/AtanImpl.cpp b/src/operator/AtanImpl.cpp
index af3393e7eb13fad4b414172edc7d1ab32ffcc573..2a0210aa6c654596899c471b78c054bd9ba2c154 100644
--- a/src/operator/AtanImpl.cpp
+++ b/src/operator/AtanImpl.cpp
@@ -15,40 +15,45 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/operator/Atan.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/AtanImpl.hpp"
 #include "aidge/backend/cpu/operator/AtanImpl_kernels.hpp"
 
-template <>
-void Aidge::AtanImpl_cpu::forward() {
-	const Atan_Op& op_ = dynamic_cast<const Atan_Op&>(mOp);
+template <> void Aidge::AtanImpl_cpu::forward() {
+    const Atan_Op &op_ = dynamic_cast<const Atan_Op &>(mOp);
     std::shared_ptr<Tensor> in0 = op_.getInput(0);
     std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(in0->size(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::AtanImpl_cpu::backward() {
-    const Atan_Op& op_ = dynamic_cast<const Atan_Op&>(mOp);
-    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+template <> void Aidge::AtanImpl_cpu::backward() {
+    const Atan_Op &op_ = dynamic_cast<const Atan_Op &>(mOp);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
     std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
-    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
+    AIDGE_ASSERT(out0,
+                 "missing output #0 for current {} operator",
+                 op_.type());
 
     // Find the correct kernel type
-    const auto impl = Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<AtanImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+    impl.backward(gra_int0->size(),
+                  getCPUPtr(out0),
+                  getCPUPtr(gra_out0),
+                  getCPUPtr(gra_int0));
 }
diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp
index 01a5e8cf1772161f5cf98d3a8bd52f43ac7a1d0d..4f896340a1a5bb3d5231625848974cee43e0d807 100644
--- a/src/operator/AvgPoolingImpl.cpp
+++ b/src/operator/AvgPoolingImpl.cpp
@@ -21,24 +21,24 @@
 #include "aidge/operator/AvgPooling.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::AvgPoolingImpl2D_cpu::forward() {
-    const auto& op_ = dynamic_cast<const AvgPooling_Op<2>&>(mOp);
+template <> void Aidge::AvgPoolingImpl2D_cpu::forward() {
+    const auto &op_ = dynamic_cast<const AvgPooling_Op<2> &>(mOp);
     assert(op_.getInput(0) && "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<AvgPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl = Registrar<AvgPoolingImpl2D_cpu>::create(
+        getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.strideDims(),
-               op_.kernelDims(),
-               op_.getInput(0)->template dims<4>(),
-               getCPUPtr(op_.getInput(0)),
-               getCPUPtr(op_.getOutput(0)));
+                 op_.kernelDims(),
+                 op_.getInput(0)->template dims<4>(),
+                 getCPUPtr(op_.getInput(0)),
+                 getCPUPtr(op_.getOutput(0)));
 }
 
-template <>
-void Aidge::AvgPoolingImpl2D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for AvgPooling_Op<2> on backend cpu");
+template <> void Aidge::AvgPoolingImpl2D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for AvgPooling_Op<2> on backend cpu");
 }
-
diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp
index 9f1d986e63f14e6038c80054e5e3bc631ec24224..4781b0d61f63b7651c3076c39356f6f5b2768236 100644
--- a/src/operator/BatchNormImpl.cpp
+++ b/src/operator/BatchNormImpl.cpp
@@ -11,19 +11,17 @@
 
 #include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
 
-
 #include <numeric> // std::accumulate
 #include <vector>
 
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/operator/BatchNorm.hpp"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/BatchNormImpl_kernels.hpp"
 
-template <>
-void Aidge::BatchNormImpl2D_cpu::forward() {
-    const auto& op_ = dynamic_cast<const BatchNorm_Op<2>&>(mOp);
+template <> void Aidge::BatchNormImpl2D_cpu::forward() {
+    const auto &op_ = dynamic_cast<const BatchNorm_Op<2> &>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 for BatchNorm Operator");
     AIDGE_ASSERT(op_.getInput(1), "missing input #1 for BatchNorm Operator");
     AIDGE_ASSERT(op_.getInput(2), "missing input #2 for BatchNorm Operator");
@@ -33,22 +31,24 @@ void Aidge::BatchNormImpl2D_cpu::forward() {
     AIDGE_ASSERT(op_.getOutput(0)->nbDims() == 4, "");
 
     // Find the correct kernel type
-    const auto impl = Registrar<BatchNormImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl = Registrar<BatchNormImpl2D_cpu>::create(
+        getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.epsilon(),
-            op_.momentum(),
-            op_.getInput(0)->template dims<4>(),
-            getCPUPtr(op_.getRawInput(0)),
-            getCPUPtr(op_.getRawInput(1)),
-            getCPUPtr(op_.getRawInput(2)),
-            getCPUPtr(op_.getRawInput(3)),
-            getCPUPtr(op_.getRawInput(4)),
-            getCPUPtr(op_.getRawOutput(0)),
-            true);
+                 op_.momentum(),
+                 op_.getInput(0)->template dims<4>(),
+                 getCPUPtr(op_.getRawInput(0)),
+                 getCPUPtr(op_.getRawInput(1)),
+                 getCPUPtr(op_.getRawInput(2)),
+                 getCPUPtr(op_.getRawInput(3)),
+                 getCPUPtr(op_.getRawInput(4)),
+                 getCPUPtr(op_.getRawOutput(0)),
+                 true);
 }
 
-template <>
-void Aidge::BatchNormImpl2D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BatchNorm_Op<2> on backend cpu");
+template <> void Aidge::BatchNormImpl2D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for BatchNorm_Op<2> on backend cpu");
 }
diff --git a/src/operator/BitShiftImpl.cpp b/src/operator/BitShiftImpl.cpp
index 1e0f79fd29fd140f0b41c64d245b9b240da80028..80725e8201407444d4b6b1a26e3809189dd8b4ae 100644
--- a/src/operator/BitShiftImpl.cpp
+++ b/src/operator/BitShiftImpl.cpp
@@ -10,48 +10,46 @@
  ********************************************************************************/
 
 #include <cassert>
-#include <chrono>  // std::chrono::milliseconds
+#include <chrono> // std::chrono::milliseconds
 #include <numeric>
-#include <thread>  // std::this_thread::sleep_for
+#include <thread> // std::this_thread::sleep_for
 #include <vector>
 
-
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/BitShiftImpl.hpp"
 #include "aidge/backend/cpu/operator/BitShiftImpl_kernels.hpp"
 
-template<>
-void Aidge::BitShiftImpl_cpu::forward() {
-
-    const auto& op_ = dynamic_cast<const BitShift_Op&>(mOp);
+template <> void Aidge::BitShiftImpl_cpu::forward() {
 
+    const auto &op_ = dynamic_cast<const BitShift_Op &>(mOp);
 
-    const auto impl = Registrar<BitShiftImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<BitShiftImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
-
-    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
-    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
+    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
+    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
 
     BitShift_Op::BitShiftDirection direction = op_.direction();
 
     // Call kernel
-    impl.forward(
-        direction,
-        inputDims0,
-        inputDims1,
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawInput(1)),
-        getCPUPtr(mOp.getRawOutput(0)));
-        
+    impl.forward(direction,
+                 inputDims0,
+                 inputDims1,
+                 std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawInput(1)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::BitShiftImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for BitShift_Op on backend cpu");
+template <> void Aidge::BitShiftImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for BitShift_Op on backend cpu");
 }
\ No newline at end of file
diff --git a/src/operator/ClipImpl.cpp b/src/operator/ClipImpl.cpp
index 931d25426a8f6e08363bfc08d23f1714e934634c..cf539768c94443ffda5eae01a0e5c0f4c1b347ee 100644
--- a/src/operator/ClipImpl.cpp
+++ b/src/operator/ClipImpl.cpp
@@ -12,56 +12,54 @@
 #include <memory>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Clip.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/utils/ErrorHandling.hpp"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/ClipImpl.hpp"
 #include "aidge/backend/cpu/operator/ClipImpl_kernels.hpp"
 
-template<>
-void Aidge::ClipImpl_cpu::forward() {
+template <> void Aidge::ClipImpl_cpu::forward() {
 
-	const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp);
+    const Clip_Op &op_ = dynamic_cast<const Clip_Op &>(mOp);
     std::shared_ptr<Tensor> in0 = op_.getInput(0);
     std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     AIDGE_ASSERT(in0, "missing input #0");
     /*AIDGE_ASSERT(in1, "missing input #1 -> Min value empty shape Tensor");
     AIDGE_ASSERT(in2, "missing input #2 -> Max value empty shape Tensor");*/
     // Find the correct kernel type
-    const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.forward(
-       op_.min(),
-       op_.max(),
-       getCPUPtr(mOp.getRawInput(0)), 
-       in0->size(), 
-       getCPUPtr(mOp.getRawOutput(0))
-    );
+    impl.forward(op_.min(),
+                 op_.max(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 in0->size(),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template<>
-void Aidge::ClipImpl_cpu::backward() {
+template <> void Aidge::ClipImpl_cpu::backward() {
 
-    const Clip_Op& op_ = dynamic_cast<const Clip_Op&>(mOp);
-    std::shared_ptr<Tensor> in0  = op_.getInput(0);
-    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+    const Clip_Op &op_ = dynamic_cast<const Clip_Op &>(mOp);
+    std::shared_ptr<Tensor> in0 = op_.getInput(0);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     std::shared_ptr<Tensor> gra_in0 = op_.getInput(0)->grad();
-    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
-    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
-    
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
+    AIDGE_ASSERT(out0,
+                 "missing output #0 for current {} operator",
+                 op_.type());
+
     // Find the correct kernel type
-    const auto impl = Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ClipImpl_cpu>::create(getBestMatch(getRequiredSpec()));
     // Call kernel
-    impl.backward(
-        op_.min(),
-        op_.max(),
-        gra_in0->size(), 
-        getCPUPtr(in0), 
-        getCPUPtr(gra_out0), 
-        getCPUPtr(gra_in0)
-    );
+    impl.backward(op_.min(),
+                  op_.max(),
+                  gra_in0->size(),
+                  getCPUPtr(in0),
+                  getCPUPtr(gra_out0),
+                  getCPUPtr(gra_in0));
 }
diff --git a/src/operator/ConstantOfShapeImpl.cpp b/src/operator/ConstantOfShapeImpl.cpp
index 16e4b762ba04e5f01bfccf965f6de3650fa2e734..03b5a9726a4a132e279cf53b034db496853afc31 100644
--- a/src/operator/ConstantOfShapeImpl.cpp
+++ b/src/operator/ConstantOfShapeImpl.cpp
@@ -23,22 +23,24 @@
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::ConstantOfShapeImpl_cpu::forward() {
-  const ConstantOfShape_Op &op_ = static_cast<const ConstantOfShape_Op &>(mOp);
-  // Check if input is provided
-  AIDGE_ASSERT(op_.getInput(0), "{} : Missing input 0", __func__);
+template <> void Aidge::ConstantOfShapeImpl_cpu::forward() {
+    const ConstantOfShape_Op &op_ =
+        static_cast<const ConstantOfShape_Op &>(mOp);
+    // Check if input is provided
+    AIDGE_ASSERT(op_.getInput(0), "{} : Missing input 0", __func__);
 
     // Find the correct kernel type
-    const auto impl = Registrar<ConstantOfShapeImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl = Registrar<ConstantOfShapeImpl_cpu>::create(
+        getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.getOutput(0)->dims(),
-             op_.value(), 
-             op_.getOutput(0)->getImpl()->rawPtr());
+                 op_.value(),
+                 op_.getOutput(0)->getImpl()->rawPtr());
 }
 
-template <>
-void Aidge::ConstantOfShapeImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConstantOfShape_Op on backend cpu");
+template <> void Aidge::ConstantOfShapeImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for ConstantOfShape_Op on backend cpu");
 }
diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp
index d86bba8d1abf348eb25e2d9c69d04b5c33a8a176..35f0b8ad484ab8e165106ee5b4dcb7385d9511ae 100644
--- a/src/operator/ConvDepthWiseImpl.cpp
+++ b/src/operator/ConvDepthWiseImpl.cpp
@@ -21,78 +21,98 @@
 #include "aidge/utils/Log.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::ConvDepthWiseImpl1D_cpu::forward() {
-    const auto& op_ = dynamic_cast<const ConvDepthWise_Op<1>&>(mOp);
+template <> void Aidge::ConvDepthWiseImpl1D_cpu::forward() {
+    const auto &op_ = dynamic_cast<const ConvDepthWise_Op<1> &>(mOp);
 
-    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator");
-    AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator");
+    AIDGE_ASSERT(op_.getInput(0),
+                 "missing input #0 in ConvDepthWise Operator");
+    AIDGE_ASSERT(op_.getInput(1),
+                 "missing input #1 in ConvDepthWise Operator");
 
-    AIDGE_ASSERT((op_.getInput(0)->nbDims() == 3), "support for 4-dimensions tensors only");
+    AIDGE_ASSERT((op_.getInput(0)->nbDims() == 3),
+                 "support for 4-dimensions tensors only");
 
     // Find the correct kernel type
-    const auto impl = Registrar<ConvDepthWiseImpl1D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl = Registrar<ConvDepthWiseImpl1D_cpu>::create(
+        getBestMatch(getRequiredSpec()));
 
     // Convert input data (no overhead if not needed!)
     // TODO: right now, if needed, memory will be allocated/deallocated at each
     // call to forward(). We might put the following shared_ptr as members of
     // this class to avoid that.
     std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
-    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
-    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
-    const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
+    const auto &input0 =
+        op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
+    const auto &input1 =
+        op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
+    const auto &input2 =
+        (op_.getInput(2))
+            ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0))
+            : Tensor();
 
     // Call kernel
-    impl.forward(op_.strideDims(),
-                op_.dilationDims(),
-                op_.kernelDims(), // Conv attributes
-               op_.getInput(0)->template dims<3>(), // input dimensions
-               input0.getImpl()->rawPtr(), // input
-               input1.getImpl()->rawPtr(), // weight
-               (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, // bias
-               getCPUPtr(mOp.getRawOutput(0)) // output
-            );
+    impl.forward(
+        op_.strideDims(),
+        op_.dilationDims(),
+        op_.kernelDims(),                    // Conv attributes
+        op_.getInput(0)->template dims<3>(), // input dimensions
+        input0.getImpl()->rawPtr(),          // input
+        input1.getImpl()->rawPtr(),          // weight
+        (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, // bias
+        getCPUPtr(mOp.getRawOutput(0))                            // output
+    );
 }
 
-template <>
-void Aidge::ConvDepthWiseImpl1D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConvDepthWise_Op<1> on backend cpu");
+template <> void Aidge::ConvDepthWiseImpl1D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for ConvDepthWise_Op<1> on backend cpu");
 }
 
-template <>
-void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
-    const auto& op_ = dynamic_cast<const ConvDepthWise_Op<2>&>(mOp);
+template <> void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
+    const auto &op_ = dynamic_cast<const ConvDepthWise_Op<2> &>(mOp);
 
-    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator");
-    AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator");
-    AIDGE_ASSERT(op_.getInput(2), "missing input #2 in ConvDepthWise Operator");
+    AIDGE_ASSERT(op_.getInput(0),
+                 "missing input #0 in ConvDepthWise Operator");
+    AIDGE_ASSERT(op_.getInput(1),
+                 "missing input #1 in ConvDepthWise Operator");
+    AIDGE_ASSERT(op_.getInput(2),
+                 "missing input #2 in ConvDepthWise Operator");
 
-    AIDGE_ASSERT((op_.getInput(0)->nbDims() == 4), "support for 4-dimensions tensors only");
+    AIDGE_ASSERT((op_.getInput(0)->nbDims() == 4),
+                 "support for 4-dimensions tensors only");
 
     // Find the correct kernel type
-    const auto impl = Registrar<ConvDepthWiseImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl = Registrar<ConvDepthWiseImpl2D_cpu>::create(
+        getBestMatch(getRequiredSpec()));
 
-        // Convert input data (no overhead if not needed!)
+    // Convert input data (no overhead if not needed!)
     // TODO: right now, if needed, memory will be allocated/deallocated at each
     // call to forward(). We might put the following shared_ptr as members of
     // this class to avoid that.
     std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
-    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
-    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
-    const auto& input2 = op_.getInput(2) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
+    const auto &input0 =
+        op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
+    const auto &input1 =
+        op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
+    const auto &input2 =
+        op_.getInput(2)
+            ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0))
+            : Tensor();
 
     // Call kernel
     impl.forward(op_.strideDims(),
-            op_.dilationDims(),
-            op_.kernelDims(),
-            op_.getInput(0)->template dims<4>(),
-            input0.getImpl()->rawPtr(),
-            input1.getImpl()->rawPtr(),
-            op_.getInput(2) ?  input2.getImpl()->rawPtr() : nullptr,
-            getCPUPtr(op_.getRawOutput(0)));
+                 op_.dilationDims(),
+                 op_.kernelDims(),
+                 op_.getInput(0)->template dims<4>(),
+                 input0.getImpl()->rawPtr(),
+                 input1.getImpl()->rawPtr(),
+                 op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr,
+                 getCPUPtr(op_.getRawOutput(0)));
 }
 
-template <>
-void Aidge::ConvDepthWiseImpl2D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ConvDepthWise_Op<2> on backend cpu");
+template <> void Aidge::ConvDepthWiseImpl2D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for ConvDepthWise_Op<2> on backend cpu");
 }
diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp
index fdfe19fbf4bf3e71c86aa28b966cfb21a1b5ba40..8382c48e65d265c93a49c43f5edc927b98e877b6 100644
--- a/src/operator/ConvImpl.cpp
+++ b/src/operator/ConvImpl.cpp
@@ -22,78 +22,92 @@
 #include "aidge/operator/Conv.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::ConvImpl1D_cpu::forward() {
-    const auto& op_ = static_cast<const Conv_Op<1>&>(mOp);
+template <> void Aidge::ConvImpl1D_cpu::forward() {
+    const auto &op_ = static_cast<const Conv_Op<1> &>(mOp);
 
     // FIXME: uncomment the following code once memory handling will work
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
     AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator.");
 
     // Find the correct kernel type
-    const auto impl = Registrar<ConvImpl1D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ConvImpl1D_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Convert input data (no overhead if not needed!)
     // TODO: right now, if needed, memory will be allocated/deallocated at each
     // call to forward(). We might put the following shared_ptr as members of
     // this class to avoid that.
     std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
-    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
-    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
-    const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
+    const auto &input0 =
+        op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
+    const auto &input1 =
+        op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
+    const auto &input2 =
+        (op_.getInput(2))
+            ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0))
+            : Tensor();
 
     // Call kernel
-    impl.forward(op_.strideDims(),
-            op_.dilationDims(),
-            op_.kernelDims(),
-            op_.getInput(0)->template dims<3>(), // input dimensions
-            dynamic_cast<const Conv_Op<1>&>(mOp).outChannels(), // outChannels
-            input0.getImpl()->rawPtr(), // input
-            input1.getImpl()->rawPtr(), // weight
-            op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias
-            getCPUPtr(mOp.getRawOutput(0)) // output
-            );
+    impl.forward(
+        op_.strideDims(),
+        op_.dilationDims(),
+        op_.kernelDims(),
+        op_.getInput(0)->template dims<3>(), // input dimensions
+        dynamic_cast<const Conv_Op<1> &>(mOp).outChannels(),    // outChannels
+        input0.getImpl()->rawPtr(),                             // input
+        input1.getImpl()->rawPtr(),                             // weight
+        op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias
+        getCPUPtr(mOp.getRawOutput(0))                          // output
+    );
 }
 
-template <>
-void Aidge::ConvImpl1D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<1> on backend cpu");
+template <> void Aidge::ConvImpl1D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Conv_Op<1> on backend cpu");
 }
 
-template <>
-void Aidge::ConvImpl2D_cpu::forward() {
-    const auto& op_ = dynamic_cast<const Conv_Op<2>&>(mOp);
+template <> void Aidge::ConvImpl2D_cpu::forward() {
+    const auto &op_ = dynamic_cast<const Conv_Op<2> &>(mOp);
 
     // FIXME: uncomment the following code once memory handling will work
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
     AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator.");
 
     // Find the correct kernel type
-    const auto impl = Registrar<ConvImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ConvImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Convert input data (no overhead if not needed!)
     // TODO: right now, if needed, memory will be allocated/deallocated at each
     // call to forward(). We might put the following shared_ptr as members of
     // this class to avoid that.
     std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
-    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
-    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
-    const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
+    const auto &input0 =
+        op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
+    const auto &input1 =
+        op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
+    const auto &input2 =
+        (op_.getInput(2))
+            ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0))
+            : Tensor();
 
     // Call kernel
-    impl.forward(op_.strideDims(),
-            op_.dilationDims(),
-            op_.kernelDims(),
-            op_.getInput(0)->template dims<4>(), // input dimensions
-            dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels
-            input0.getImpl()->rawPtr(), // input
-            input1.getImpl()->rawPtr(), // weight
-            op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias
-            getCPUPtr(mOp.getRawOutput(0)) // output
-            );
+    impl.forward(
+        op_.strideDims(),
+        op_.dilationDims(),
+        op_.kernelDims(),
+        op_.getInput(0)->template dims<4>(), // input dimensions
+        dynamic_cast<const Conv_Op<2> &>(mOp).outChannels(),    // outChannels
+        input0.getImpl()->rawPtr(),                             // input
+        input1.getImpl()->rawPtr(),                             // weight
+        op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias
+        getCPUPtr(mOp.getRawOutput(0))                          // output
+    );
 }
 
-template <>
-void Aidge::ConvImpl2D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Conv_Op<2> on backend cpu");
+template <> void Aidge::ConvImpl2D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Conv_Op<2> on backend cpu");
 }
diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp
index 135b32b5005a961e55910e758f9b7102ca51b63c..0bf0b1678cfce0112b4b9914f40a9954ff16c966 100644
--- a/src/operator/DivImpl.cpp
+++ b/src/operator/DivImpl.cpp
@@ -19,20 +19,20 @@
 #include "aidge/data/Tensor.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::DivImpl_cpu::forward() {
+template <> void Aidge::DivImpl_cpu::forward() {
     // Find the correct kernel type
     // auto kernelFunc = Registrar<DivImplForward_cpu>::create({
     //     std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
     //     std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
     //     std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
 
-    // const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+    // const std::vector<std::size_t> inputDims0 =
+    // getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
     //                                                                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
-    // const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+    // const std::vector<std::size_t> inputDims1 =
+    // getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
     //                                                                std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
 
-
     // auto a = std::static_pointer_cast<Tensor>(mOp.getRawInput(0));
     // auto b = std::static_pointer_cast<Tensor>(mOp.getRawInput(1));
 
@@ -44,41 +44,54 @@ void Aidge::DivImpl_cpu::forward() {
     //     getCPUPtr(mOp.getRawInput(1)),
     //     getCPUPtr(mOp.getRawOutput(0)));
 
-/////////////////////////////////////////////////////////////////
+    /////////////////////////////////////////////////////////////////
 
     // [5,2,1,7] & [2,6,7]
     // 1. Same number of dimensions -> [5,2,1,7] & [1,2,6,7]
     // 2. Find the highest equal dimension -> 3
-    //    Exception: if the first diverging dimension is the last one, then -> 4 (dims.size())
+    //    Exception: if the first diverging dimension is the last one, then ->
+    //    4 (dims.size())
     // 3. Compute the highest number of contiguous data -> 7
     // 4. Compute stride and offset step for the broadcast mechanism
     // 5. Call a simple kernel
-    const auto& opTensor = static_cast<const Div_Op&>(mOp);
+    const auto &opTensor = static_cast<const Div_Op &>(mOp);
 
     // Find the correct kernel type
-    const auto impl = Registrar<DivImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<DivImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Compute compatible input dimensions
-    std::vector<std::size_t>        dims0   = opTensor.getInput(0)->dims();
-    std::vector<std::size_t>        dims1   = opTensor.getInput(1)->dims();
-    const std::vector<std::size_t>& outDims = opTensor.getOutput(0)->dims();
+    std::vector<std::size_t> dims0 = opTensor.getInput(0)->dims();
+    std::vector<std::size_t> dims1 = opTensor.getInput(1)->dims();
+    const std::vector<std::size_t> &outDims = opTensor.getOutput(0)->dims();
 
-    // special case for equal dimensions, the kernel is called with the entire arrays at once
+    // special case for equal dimensions, the kernel is called with the entire
+    // arrays at once
     if (dims0 == dims1) {
-        const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin(), dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
-        impl.forward(input0_contiguous_size, input0_contiguous_size, input0_contiguous_size,
-                    getCPUPtr(mOp.getRawInput(0)),
-                    getCPUPtr(mOp.getRawInput(1)),
-                    getCPUPtr(mOp.getRawOutput(0)));
+        const std::size_t input0_contiguous_size =
+            std::accumulate(dims0.cbegin(),
+                            dims0.cend(),
+                            std::size_t(1),
+                            std::multiplies<std::size_t>());
+        impl.forward(input0_contiguous_size,
+                     input0_contiguous_size,
+                     input0_contiguous_size,
+                     getCPUPtr(mOp.getRawInput(0)),
+                     getCPUPtr(mOp.getRawInput(1)),
+                     getCPUPtr(mOp.getRawOutput(0)));
         return;
     }
 
-    // set dimensions to be of equal size by filling the smallest one with ones.
+    // set dimensions to be of equal size by filling the smallest one with
+    // ones.
     if (dims0.size() > dims1.size()) {
-        dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
-    }
-    else if (dims1.size() > dims0.size()) {
-        dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
+        dims1.insert(dims1.cbegin(),
+                     dims0.size() - dims1.size(),
+                     std::size_t(1));
+    } else if (dims1.size() > dims0.size()) {
+        dims0.insert(dims0.cbegin(),
+                     dims1.size() - dims0.size(),
+                     std::size_t(1));
     }
 
     const std::size_t nbDims = dims0.size();
@@ -87,11 +100,14 @@ void Aidge::DivImpl_cpu::forward() {
     // std::size_t contiguousIdx = nbDims - 1;
     std::size_t contiguousIdx = nbDims;
     while (contiguousIdx-- > 0) {
-    // for (; contiguousIdx+1 > 0; --contiguousIdx) {
+        // for (; contiguousIdx+1 > 0; --contiguousIdx) {
         if (dims0[contiguousIdx] != dims1[contiguousIdx]) {
-            if (contiguousIdx == (nbDims -1)) { // last dimensions of one of the input Tensor are of size 1
-                const std::vector<std::size_t>& dims = (dims0[contiguousIdx] == 1) ? dims0 : dims1;
-                while ((contiguousIdx+1 > 0) && (dims[contiguousIdx] == 1)) {
+            if (contiguousIdx ==
+                (nbDims - 1)) { // last dimensions of one of the input Tensor
+                                // are of size 1
+                const std::vector<std::size_t> &dims =
+                    (dims0[contiguousIdx] == 1) ? dims0 : dims1;
+                while ((contiguousIdx + 1 > 0) && (dims[contiguousIdx] == 1)) {
                     --contiguousIdx;
                 }
             }
@@ -101,21 +117,41 @@ void Aidge::DivImpl_cpu::forward() {
     ++contiguousIdx;
 
     // Compute the highest number of contiguous data for each Tensor
-    const std::size_t input0_contiguous_size = std::accumulate(dims0.cbegin()+contiguousIdx, dims0.cend(), std::size_t(1), std::multiplies<std::size_t>());
-    const std::size_t input1_contiguous_size = std::accumulate(dims1.cbegin()+contiguousIdx, dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
-    const std::size_t output_contiguous_size = std::accumulate(outDims.cbegin()+contiguousIdx, outDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
+    const std::size_t input0_contiguous_size =
+        std::accumulate(dims0.cbegin() + contiguousIdx,
+                        dims0.cend(),
+                        std::size_t(1),
+                        std::multiplies<std::size_t>());
+    const std::size_t input1_contiguous_size =
+        std::accumulate(dims1.cbegin() + contiguousIdx,
+                        dims1.cend(),
+                        std::size_t(1),
+                        std::multiplies<std::size_t>());
+    const std::size_t output_contiguous_size =
+        std::accumulate(outDims.cbegin() + contiguousIdx,
+                        outDims.cend(),
+                        std::size_t(1),
+                        std::multiplies<std::size_t>());
 
     // initialize strides to iterate through data because of broadcasting
-    std::unique_ptr<std::int32_t[]> stride_post0 = std::make_unique<std::int32_t[]>(contiguousIdx);
-    std::unique_ptr<std::int32_t[]> stride_post1 = std::make_unique<std::int32_t[]>(contiguousIdx);
-    std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(contiguousIdx);
-    std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(contiguousIdx);
+    std::unique_ptr<std::int32_t[]> stride_post0 =
+        std::make_unique<std::int32_t[]>(contiguousIdx);
+    std::unique_ptr<std::int32_t[]> stride_post1 =
+        std::make_unique<std::int32_t[]>(contiguousIdx);
+    std::unique_ptr<std::int32_t[]> stride_step0 =
+        std::make_unique<std::int32_t[]>(contiguousIdx);
+    std::unique_ptr<std::int32_t[]> stride_step1 =
+        std::make_unique<std::int32_t[]>(contiguousIdx);
     if (contiguousIdx > 0) {
         stride_post0[contiguousIdx - 1] = 1;
         stride_post1[contiguousIdx - 1] = 1;
-        for (std::size_t i = contiguousIdx - 2; i != static_cast<std::size_t>(-1); --i) {
-            stride_post0[i] = stride_post0[i+1]*static_cast<std::int32_t>(dims0[i+1]);
-            stride_post1[i] = stride_post1[i+1]*static_cast<std::int32_t>(dims1[i+1]);
+        for (std::size_t i = contiguousIdx - 2;
+             i != static_cast<std::size_t>(-1);
+             --i) {
+            stride_post0[i] =
+                stride_post0[i + 1] * static_cast<std::int32_t>(dims0[i + 1]);
+            stride_post1[i] =
+                stride_post1[i + 1] * static_cast<std::int32_t>(dims1[i + 1]);
         }
         for (std::size_t i = 0; i != contiguousIdx; ++i) {
             stride_step0[i] = (dims0[i] == 1) ? 1 - stride_post0[i] : 1;
@@ -128,17 +164,24 @@ void Aidge::DivImpl_cpu::forward() {
     std::size_t offsetIn1 = 0;
     std::size_t offsetOut = 0;
 
-
     std::size_t dim = contiguousIdx - 1;
-    const std::size_t nbStacks = std::accumulate(outDims.cbegin(), outDims.cbegin() + contiguousIdx, std::size_t(1), std::multiplies<std::size_t>());
+    const std::size_t nbStacks =
+        std::accumulate(outDims.cbegin(),
+                        outDims.cbegin() + contiguousIdx,
+                        std::size_t(1),
+                        std::multiplies<std::size_t>());
     for (std::size_t stack = 0; stack < nbStacks;) {
-        impl.forward(input0_contiguous_size, input1_contiguous_size, output_contiguous_size,
-                    getCPUPtr(mOp.getRawInput(0), offsetIn0*input0_contiguous_size),
-                    getCPUPtr(mOp.getRawInput(1), offsetIn1*input1_contiguous_size),
-                    getCPUPtr(mOp.getRawOutput(0), offsetOut*output_contiguous_size));
+        impl.forward(
+            input0_contiguous_size,
+            input1_contiguous_size,
+            output_contiguous_size,
+            getCPUPtr(mOp.getRawInput(0), offsetIn0 * input0_contiguous_size),
+            getCPUPtr(mOp.getRawInput(1), offsetIn1 * input1_contiguous_size),
+            getCPUPtr(mOp.getRawOutput(0),
+                      offsetOut * output_contiguous_size));
         if (++stack < nbStacks) {
             std::size_t tmp_stack = stack;
-            while(tmp_stack % outDims[dim] == 0) {
+            while (tmp_stack % outDims[dim] == 0) {
                 tmp_stack /= outDims[dim];
                 dim--;
             }
@@ -150,7 +193,8 @@ void Aidge::DivImpl_cpu::forward() {
     }
 }
 
-template <>
-void Aidge::DivImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Div_Op on backend cpu");
+template <> void Aidge::DivImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Div_Op on backend cpu");
 }
diff --git a/src/operator/ErfImpl.cpp b/src/operator/ErfImpl.cpp
index 42c6ce878abe227f74d7df4a9bf31ebc4c63eb88..1be68647af052a08b9a2bae687caa1f91aa402c1 100644
--- a/src/operator/ErfImpl.cpp
+++ b/src/operator/ErfImpl.cpp
@@ -19,22 +19,21 @@
 #include "aidge/operator/Erf.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::ErfImpl_cpu::forward() {
-    const Erf_Op& op = static_cast<const Erf_Op&>(mOp);
+template <> void Aidge::ErfImpl_cpu::forward() {
+    const Erf_Op &op = static_cast<const Erf_Op &>(mOp);
 
     // Find the correct kernel type
-    const auto impl = Registrar<ErfImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ErfImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.forward(
-        op.getInput(0)->size(),
-        op.getInput(0)->getImpl()->rawPtr(),
-        op.getOutput(0)->getImpl()->rawPtr()
-    );
+    impl.forward(op.getInput(0)->size(),
+                 op.getInput(0)->getImpl()->rawPtr(),
+                 op.getOutput(0)->getImpl()->rawPtr());
 }
 
-template <>
-void Aidge::ErfImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Erf_Op on backend cpu");
+template <> void Aidge::ErfImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Erf_Op on backend cpu");
 }
diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp
index 359452712f94be078122266089cc1da89baf50d5..d0e8a5463f949c8d4cff22fb52c1dbff5aab448c 100644
--- a/src/operator/FCImpl.cpp
+++ b/src/operator/FCImpl.cpp
@@ -11,7 +11,7 @@
 
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
 
-#include <cstddef>  // std::size_t
+#include <cstddef> // std::size_t
 #include <functional>
 #include <memory>
 #include <tuple>
@@ -22,59 +22,72 @@
 #include "aidge/utils/ErrorHandling.hpp"
 #include "aidge/utils/Types.h"
 
-
-template <>
-void Aidge::FCImpl_cpu::forward()
-{
-    const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
+template <> void Aidge::FCImpl_cpu::forward() {
+    const FC_Op &op_ = dynamic_cast<const FC_Op &>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0");
     AIDGE_ASSERT(op_.getInput(1), "missing input #1");
 
-    const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Convert input data (no overhead if not needed!)
     // TODO: right now, if needed, memory will be allocated/deallocated at each
     // call to forward(). We might put the following shared_ptr as members of
     // this class to avoid that.
     std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
-    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0)));
-    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0)));
-    const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))) : Tensor();
+    const auto &input0 =
+        op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0)));
+    const auto &input1 =
+        op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0)));
+    const auto &input2 =
+        (op_.getInput(2))
+            ? op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0)))
+            : Tensor();
 
     // Call kernel
     const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1;
     impl.forward(batchSize,
-        input1.dims()[1], // nb input features
-        input1.dims()[0], // nb output features
-        input0.getImpl()->rawPtr(),
-        input1.getImpl()->rawPtr(),
-        (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr,
-        getCPUPtr(mOp.getRawOutput(0)));
+                 input1.dims()[1], // nb input features
+                 input1.dims()[0], // nb output features
+                 input0.getImpl()->rawPtr(),
+                 input1.getImpl()->rawPtr(),
+                 (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr,
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::FCImpl_cpu::backward()
-{
-    const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
-    const auto& fc_grad = op_.getOutput(0)->grad();
+template <> void Aidge::FCImpl_cpu::backward() {
+    const FC_Op &op_ = dynamic_cast<const FC_Op &>(mOp);
+    const auto &fc_grad = op_.getOutput(0)->grad();
     AIDGE_ASSERT(fc_grad, "missing ouput #0 gradient");
     AIDGE_ASSERT(op_.getInput(0)->grad(), "missing input #0 gradient");
     AIDGE_ASSERT(op_.getInput(1)->grad(), "missing input #1 gradient");
 
-    const auto impl = Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<FCImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Convert input data (no overhead if not needed!)
     // TODO: right now, if needed, memory will be allocated/deallocated at each
     // call to forward(). We might put the following shared_ptr as members of
     // this class to avoid that.
-    std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback;
-    const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0)));
-    const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0)));
-    const auto& input2grad = (op_.getInput(2)) ? op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))) : Tensor();
+    std::shared_ptr<Tensor> input0gradFallback, input1gradFallback,
+        input2gradFallback;
+    const auto &input0grad =
+        op_.getInput(0)->grad()->refCastFrom(input0gradFallback,
+                                             *(op_.getOutput(0)));
+    const auto &input1grad =
+        op_.getInput(1)->grad()->refCastFrom(input1gradFallback,
+                                             *(op_.getOutput(0)));
+    const auto &input2grad =
+        (op_.getInput(2))
+            ? op_.getInput(2)->grad()->refCastFrom(input2gradFallback,
+                                                   *(op_.getOutput(0)))
+            : Tensor();
 
     // Call kernel
-    const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1;
-    impl.backward(batchSize,
+    const auto batchSize =
+        (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1;
+    impl.backward(
+        batchSize,
         input1grad.dims()[1], // nb input features
         input1grad.dims()[0], // nb output features
         getCPUPtr(fc_grad),
diff --git a/src/operator/FoldImpl.cpp b/src/operator/FoldImpl.cpp
index 10f3d7b50bac9a1fbfc403609bdccb67a79cceac..fde5bf744d344af9abf9fb395858da16c94c0e69 100644
--- a/src/operator/FoldImpl.cpp
+++ b/src/operator/FoldImpl.cpp
@@ -15,32 +15,34 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/operator/Conv.hpp"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/FoldImpl.hpp"
 #include "aidge/backend/cpu/operator/FoldImpl_kernels.hpp"
 
-template <>
-void Aidge::FoldImpl2D_cpu::forward() {
-    const auto& op_ = static_cast<const Fold_Op<2>&>(mOp);
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
+template <> void Aidge::FoldImpl2D_cpu::forward() {
+    const auto &op_ = static_cast<const Fold_Op<2> &>(mOp);
+    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) &&
+           "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<FoldImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<FoldImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.outputDims(),
-                op_.strideDims(),
-                op_.dilationDims(),
-                op_.kernelDims(),
-                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-                getCPUPtr(mOp.getRawInput(0)),
-                getCPUPtr(mOp.getRawOutput(0)));
+                 op_.strideDims(),
+                 op_.dilationDims(),
+                 op_.kernelDims(),
+                 std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::FoldImpl2D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Fold_Op<2> on backend cpu");
+template <> void Aidge::FoldImpl2D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Fold_Op<2> on backend cpu");
 }
diff --git a/src/operator/GlobalAveragePoolingImpl.cpp b/src/operator/GlobalAveragePoolingImpl.cpp
index c53f92e199aee30d55ddafe39b5ef121979acbf7..bb00cf25bcbfd7c2b263c6a9c5023fae823ec3a1 100644
--- a/src/operator/GlobalAveragePoolingImpl.cpp
+++ b/src/operator/GlobalAveragePoolingImpl.cpp
@@ -23,24 +23,24 @@
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
 
-
-template <>
-void Aidge::GlobalAveragePoolingImpl_cpu::forward()
-{
-    const GlobalAveragePooling_Op& op_ = static_cast<const GlobalAveragePooling_Op&>(mOp);
+template <> void Aidge::GlobalAveragePoolingImpl_cpu::forward() {
+    const GlobalAveragePooling_Op &op_ =
+        static_cast<const GlobalAveragePooling_Op &>(mOp);
     // Check if input is provided
     AIDGE_ASSERT(op_.getInput(0), "missing input 0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<GlobalAveragePoolingImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl = Registrar<GlobalAveragePoolingImpl_cpu>::create(
+        getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.getInput(0)->dims(),
-               op_.getInput(0)->getImpl()->rawPtr(),
-               op_.getOutput(0)->getImpl()->rawPtr());
+                 op_.getInput(0)->getImpl()->rawPtr(),
+                 op_.getOutput(0)->getImpl()->rawPtr());
 }
 
-template <>
-void Aidge::GlobalAveragePoolingImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for GlobalAveragePooling_Op on backend cpu");
+template <> void Aidge::GlobalAveragePoolingImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(std::runtime_error,
+                         "Backward not yet implemented for "
+                         "GlobalAveragePooling_Op on backend cpu");
 }
diff --git a/src/operator/GridSampleImpl.cpp b/src/operator/GridSampleImpl.cpp
index 5b87390fc3de21d5d406d893e4827e80cce06c35..859e756bbc70d1a5906c15665c4ede4ead38dbdb 100644
--- a/src/operator/GridSampleImpl.cpp
+++ b/src/operator/GridSampleImpl.cpp
@@ -19,30 +19,33 @@
 #include "aidge/operator/GridSample.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::GridSampleImpl_cpu::forward() {
-    const auto& op_ = static_cast<const GridSample_Op&>(mOp);
+template <> void Aidge::GridSampleImpl_cpu::forward() {
+    const auto &op_ = static_cast<const GridSample_Op &>(mOp);
 
     // Find the correct kernel type
-    const auto impl = Registrar<GridSampleImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<GridSampleImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Convert input data (no overhead if not needed!)
     // TODO: right now, if needed, memory will be allocated/deallocated at each
     // call to forward(). We might put the following shared_ptr as members of
     // this class to avoid that.
     std::shared_ptr<Tensor> input0Fallback, input1Fallback;
-    const auto& input0 = std::make_shared<Tensor>(op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)));
-    const auto& input1 = std::make_shared<Tensor>(op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)));
+    const auto &input0 = std::make_shared<Tensor>(
+        op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0)));
+    const auto &input1 = std::make_shared<Tensor>(
+        op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0)));
 
     // Call kernel
     impl.forward(op_,
-            input0, // input
-            input1, // grid
-            op_.getOutput(0) // output
-            );
+                 input0,          // input
+                 input1,          // grid
+                 op_.getOutput(0) // output
+    );
 }
 
-template <>
-void Aidge::GridSampleImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for GridSample_Op on backend cpu");
+template <> void Aidge::GridSampleImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for GridSample_Op on backend cpu");
 }
diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp
index 6c0802dd967d2a20b34a2f1ca91fc0640c063c83..0dfeb5520e1034b998186eb9d2fc19d693630502 100644
--- a/src/operator/LeakyReLUImpl.cpp
+++ b/src/operator/LeakyReLUImpl.cpp
@@ -18,41 +18,41 @@
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/LeakyReLU.hpp"
 #include "aidge/utils/Log.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
 
-template <>
-void Aidge::LeakyReLUImpl_cpu::forward() {
-    const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp);
+template <> void Aidge::LeakyReLUImpl_cpu::forward() {
+    const LeakyReLU_Op &op_ = dynamic_cast<const LeakyReLU_Op &>(mOp);
 
     std::shared_ptr<Tensor> in0 = op_.getInput(0);
     std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.negativeSlope(),
-        in0->size(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 in0->size(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::LeakyReLUImpl_cpu::backward() {
+template <> void Aidge::LeakyReLUImpl_cpu::backward() {
     // reversing in and out Data for backprop
-    const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp);
-    std::shared_ptr<Tensor> in0  = op_.getOutput(0)->grad();
+    const LeakyReLU_Op &op_ = dynamic_cast<const LeakyReLU_Op &>(mOp);
+    std::shared_ptr<Tensor> in0 = op_.getOutput(0)->grad();
     std::shared_ptr<Tensor> out0 = op_.getInput(0)->grad();
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<LeakyReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.backward(op_.negativeSlope(),
-        in0->size(),
-        getCPUPtr(in0),
-        getCPUPtr(out0));
+                  in0->size(),
+                  getCPUPtr(in0),
+                  getCPUPtr(out0));
 }
\ No newline at end of file
diff --git a/src/operator/LnImpl.cpp b/src/operator/LnImpl.cpp
index 79df733963ea8826439530d3adccde6affc9dfa8..6e019de9b01d9e5e425eea5e2d9ba45592f3ab44 100644
--- a/src/operator/LnImpl.cpp
+++ b/src/operator/LnImpl.cpp
@@ -15,41 +15,46 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/operator/Ln.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/LnImpl.hpp"
 #include "aidge/backend/cpu/operator/LnImpl_kernels.hpp"
 
-template <>
-void Aidge::LnImpl_cpu::forward() {
-    const Ln_Op& op_ = static_cast<const Ln_Op&>(mOp);
-	std::shared_ptr<Tensor> in0 = op_.getInput(0);
+template <> void Aidge::LnImpl_cpu::forward() {
+    const Ln_Op &op_ = static_cast<const Ln_Op &>(mOp);
+    std::shared_ptr<Tensor> in0 = op_.getInput(0);
     std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(in0->size(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::LnImpl_cpu::backward() {
-    const Ln_Op& op_ = dynamic_cast<const Ln_Op&>(mOp);
-	std::shared_ptr<Tensor> in0  = op_.getInput(0);
-    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+template <> void Aidge::LnImpl_cpu::backward() {
+    const Ln_Op &op_ = dynamic_cast<const Ln_Op &>(mOp);
+    std::shared_ptr<Tensor> in0 = op_.getInput(0);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
     std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
-    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
+    AIDGE_ASSERT(out0,
+                 "missing output #0 for current {} operator",
+                 op_.type());
 
     // Find the correct kernel type
-    const auto impl = Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<LnImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.backward(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+    impl.backward(gra_int0->size(),
+                  getCPUPtr(in0),
+                  getCPUPtr(gra_out0),
+                  getCPUPtr(gra_int0));
 }
diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp
index ccd3265ed230e4f9cdc5ad85785a6473d9f131f0..21c01e9a24aa9107a6dd046f7014569c90f36f3a 100644
--- a/src/operator/MatMulImpl.cpp
+++ b/src/operator/MatMulImpl.cpp
@@ -9,9 +9,9 @@
  *
  ********************************************************************************/
 
-#include <cstddef>  // std::size_t
-#include <cstdint>  // std::int32_t
-#include <numeric>  // std::accumulate
+#include <cstddef> // std::size_t
+#include <cstdint> // std::int32_t
+#include <numeric> // std::accumulate
 #include <vector>
 
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
@@ -21,18 +21,21 @@
 #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
 #include "aidge/backend/cpu/operator/MatMulImpl_kernels.hpp"
 
-template <>
-void Aidge::MatMulImpl_cpu::forward()
-{
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1");
+template <> void Aidge::MatMulImpl_cpu::forward() {
+    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) &&
+           "missing input #0");
+    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) &&
+           "missing input #1");
 
     // Find the correct kernel type
-    const auto impl = Registrar<MatMulImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<MatMulImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Compute compatible input dimensions
-    std::vector<std::size_t> dims0 = static_cast<const MatMul_Op&>(mOp).getInput(0)->dims();
-    std::vector<std::size_t> dims1 = static_cast<const MatMul_Op&>(mOp).getInput(1)->dims();
+    std::vector<std::size_t> dims0 =
+        static_cast<const MatMul_Op &>(mOp).getInput(0)->dims();
+    std::vector<std::size_t> dims1 =
+        static_cast<const MatMul_Op &>(mOp).getInput(1)->dims();
 
     // keep second-to-last dimension of dims0
     const std::size_t keepDim0 = (dims0.size() > 1) ? 1 : 0;
@@ -47,10 +50,13 @@ void Aidge::MatMulImpl_cpu::forward()
     }
 
     if (dims0.size() > dims1.size()) {
-        dims1.insert(dims1.cbegin(), dims0.size() - dims1.size(), std::size_t(1));
-    }
-    else if (dims1.size() > dims0.size()) {
-        dims0.insert(dims0.cbegin(), dims1.size() - dims0.size(), std::size_t(1));
+        dims1.insert(dims1.cbegin(),
+                     dims0.size() - dims1.size(),
+                     std::size_t(1));
+    } else if (dims1.size() > dims0.size()) {
+        dims0.insert(dims0.cbegin(),
+                     dims1.size() - dims0.size(),
+                     std::size_t(1));
     }
 
     // const std::size_t dims_size = std::max(dims0.size(), dims1.size());
@@ -58,25 +64,41 @@ void Aidge::MatMulImpl_cpu::forward()
     const std::size_t nbDims = dims0.size();
 
     // initialize strides to iterate through data because of broadcasting
-    std::unique_ptr<std::size_t[]> stride_post0 = std::make_unique<std::size_t[]>(nbDims - 2);
-    std::unique_ptr<std::size_t[]> stride_post1 = std::make_unique<std::size_t[]>(nbDims - 2);
-    std::unique_ptr<std::int32_t[]> stride_step0 = std::make_unique<std::int32_t[]>(nbDims - 2);
-    std::unique_ptr<std::int32_t[]> stride_step1 = std::make_unique<std::int32_t[]>(nbDims - 2);
+    std::unique_ptr<std::size_t[]> stride_post0 =
+        std::make_unique<std::size_t[]>(nbDims - 2);
+    std::unique_ptr<std::size_t[]> stride_post1 =
+        std::make_unique<std::size_t[]>(nbDims - 2);
+    std::unique_ptr<std::int32_t[]> stride_step0 =
+        std::make_unique<std::int32_t[]>(nbDims - 2);
+    std::unique_ptr<std::int32_t[]> stride_step1 =
+        std::make_unique<std::int32_t[]>(nbDims - 2);
     if (nbDims > 2) {
         stride_post0[nbDims - 3] = 1;
         stride_post1[nbDims - 3] = 1;
-        for (std::size_t i = nbDims-4; i != static_cast<std::size_t>(-1); --i) {
-            stride_post0[i] = stride_post0[i+1]*dims0[i+1];
-            stride_post1[i] = stride_post1[i+1]*dims1[i+1];
+        for (std::size_t i = nbDims - 4; i != static_cast<std::size_t>(-1);
+             --i) {
+            stride_post0[i] = stride_post0[i + 1] * dims0[i + 1];
+            stride_post1[i] = stride_post1[i + 1] * dims1[i + 1];
         }
-        for (std::size_t i = 0; i != nbDims-2; ++i) {
-            stride_step0[i] = (dims0[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post0[i]) : 1;
-            stride_step1[i] = (dims1[i] == 1) ? 1 - static_cast<std::int32_t>(stride_post1[i]) : 1;
+        for (std::size_t i = 0; i != nbDims - 2; ++i) {
+            stride_step0[i] =
+                (dims0[i] == 1)
+                    ? 1 - static_cast<std::int32_t>(stride_post0[i])
+                    : 1;
+            stride_step1[i] =
+                (dims1[i] == 1)
+                    ? 1 - static_cast<std::int32_t>(stride_post1[i])
+                    : 1;
         }
     }
 
-    const std::vector<std::size_t>& outDims = static_cast<const MatMul_Op&>(mOp).getOutput(0)->dims();
-    const std::size_t nbMatrices = std::accumulate(outDims.cbegin(), outDims.cend() - keepDim0 - keepDim1, 1, std::multiplies<std::size_t>());
+    const std::vector<std::size_t> &outDims =
+        static_cast<const MatMul_Op &>(mOp).getOutput(0)->dims();
+    const std::size_t nbMatrices =
+        std::accumulate(outDims.cbegin(),
+                        outDims.cend() - keepDim0 - keepDim1,
+                        1,
+                        std::multiplies<std::size_t>());
     std::size_t dim = outDims.size() - 1 - keepDim0 - keepDim1;
 
     // variables for arrays offsets
@@ -86,17 +108,20 @@ void Aidge::MatMulImpl_cpu::forward()
     const std::size_t n = dims0[nbDims - 2];
     const std::size_t k = dims0[nbDims - 1];
     const std::size_t m = dims1[nbDims - 1];
-    const std::size_t matrix0Size = n*k;
-    const std::size_t matrix1Size = k*m;
-    const std::size_t matrixOutSize = n*m;
+    const std::size_t matrix0Size = n * k;
+    const std::size_t matrix1Size = k * m;
+    const std::size_t matrixOutSize = n * m;
     for (std::size_t stack = 0; stack < nbMatrices;) {
-        impl.forward(n, k, m,
-                    getCPUPtr(mOp.getRawInput(0), offsetIn0*matrix0Size),
-                    getCPUPtr(mOp.getRawInput(1), offsetIn1*matrix1Size),
-                    getCPUPtr(mOp.getRawOutput(0), offsetOut*matrixOutSize));
+        impl.forward(
+            n,
+            k,
+            m,
+            getCPUPtr(mOp.getRawInput(0), offsetIn0 * matrix0Size),
+            getCPUPtr(mOp.getRawInput(1), offsetIn1 * matrix1Size),
+            getCPUPtr(mOp.getRawOutput(0), offsetOut * matrixOutSize));
         if (++stack < nbMatrices) {
             std::size_t tmp_stack = stack;
-            while(tmp_stack % outDims[dim] == 0) {
+            while (tmp_stack % outDims[dim] == 0) {
                 tmp_stack /= outDims[dim];
                 dim--;
             }
@@ -110,8 +135,9 @@ void Aidge::MatMulImpl_cpu::forward()
 
 // void Aidge::MatMulImpl_cpu::forward()
 // {
-//     assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
-//     assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1");
+//     assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing
+//     input #0"); assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1))
+//     && "missing input #1");
 
 //     // Find the correct kernel type
 //     auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
@@ -126,7 +152,8 @@ void Aidge::MatMulImpl_cpu::forward()
 //         getCPUPtr(mOp.getRawOutput(0)));
 // }
 
-template <>
-void Aidge::MatMulImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MatMul_Op on backend cpu");
+template <> void Aidge::MatMulImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for MatMul_Op on backend cpu");
 }
diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp
index 90075a397be3f082ef95fd4df074c99d926fd385..2e8616c48b6b49986ddc9317722298a19c7bb554 100644
--- a/src/operator/MaxPoolingImpl.cpp
+++ b/src/operator/MaxPoolingImpl.cpp
@@ -19,24 +19,25 @@
 #include "aidge/utils/Log.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::MaxPoolingImpl2D_cpu::forward() {
-    const auto& op_ = dynamic_cast<const MaxPooling_Op<2>&>(mOp);
+template <> void Aidge::MaxPoolingImpl2D_cpu::forward() {
+    const auto &op_ = dynamic_cast<const MaxPooling_Op<2> &>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator.");
 
     // Find the correct kernel type
-    const auto impl = Registrar<MaxPoolingImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl = Registrar<MaxPoolingImpl2D_cpu>::create(
+        getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.strideDims(),
-                op_.kernelDims(),
-                op_.ceilMode(),
-                op_.getInput(0)->template dims<4>(),
-                getCPUPtr(mOp.getRawInput(0)),
-                getCPUPtr(mOp.getRawOutput(0)));
+                 op_.kernelDims(),
+                 op_.ceilMode(),
+                 op_.getInput(0)->template dims<4>(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::MaxPoolingImpl2D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for MaxPooling_Op<2> on backend cpu");
+template <> void Aidge::MaxPoolingImpl2D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for MaxPooling_Op<2> on backend cpu");
 }
diff --git a/src/operator/MulImpl.cpp b/src/operator/MulImpl.cpp
index ea5e3d3ab8ac24934a0cb6f9042858fa094700af..07c3fb27cf0fe7e8dcb24ad355821be5dd8bd15e 100644
--- a/src/operator/MulImpl.cpp
+++ b/src/operator/MulImpl.cpp
@@ -15,37 +15,38 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/operator/Mul.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/operator/Mul.hpp"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/MulImpl.hpp"
 #include "aidge/backend/cpu/operator/MulImpl_kernels.hpp"
 
-template <>
-void Aidge::MulImpl_cpu::forward() {
-    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
-    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
+template <> void Aidge::MulImpl_cpu::forward() {
+    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
+    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
 
     // Find the correct kernel type
-    const auto impl = Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(inputDims0,
-        inputDims1,
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawInput(1)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 inputDims1,
+                 std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawInput(1)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::MulImpl_cpu::backward() {
-    const Mul_Op& op_ = dynamic_cast<const Mul_Op&>(mOp);
-    
+template <> void Aidge::MulImpl_cpu::backward() {
+    const Mul_Op &op_ = dynamic_cast<const Mul_Op &>(mOp);
+
     auto in0 = op_.getInput(0);
     auto in1 = op_.getInput(1);
     auto in0grad = op_.getInput(0)->grad();
@@ -53,17 +54,18 @@ void Aidge::MulImpl_cpu::backward() {
     auto out0grad = op_.getOutput(0)->grad();
 
     // Find the correct kernel type
-    const auto impl = Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<MulImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.backward(/* input0Length */ in0grad->size(), 
-               /* input1Length */ in1grad->size(),
-               /* grad0Length  */ out0grad->size(),
-               /* input0Dims   */ in0->dims(),
-               /* input1Dims   */ in1->dims(),
-               getCPUPtr(in0), 
-               getCPUPtr(in1), 
-               getCPUPtr(out0grad), 
-               getCPUPtr(in0grad), 
-               getCPUPtr(in1grad));
+    impl.backward(/* input0Length */ in0grad->size(),
+                  /* input1Length */ in1grad->size(),
+                  /* grad0Length  */ out0grad->size(),
+                  /* input0Dims   */ in0->dims(),
+                  /* input1Dims   */ in1->dims(),
+                  getCPUPtr(in0),
+                  getCPUPtr(in1),
+                  getCPUPtr(out0grad),
+                  getCPUPtr(in0grad),
+                  getCPUPtr(in1grad));
 }
diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp
index cdae21f8ed2757128f6a36b661b0897a4ba65f89..1ac74501de146464c2ef85d34f834d7b9f2a6153 100644
--- a/src/operator/PadImpl.cpp
+++ b/src/operator/PadImpl.cpp
@@ -11,66 +11,73 @@
 
 #include <vector>
 
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/operator/Conv.hpp"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
 #include "aidge/backend/cpu/operator/PadImpl_kernels.hpp"
 
-Aidge::Elts_t Aidge::Pad_ProdConso_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const {
-    AIDGE_ASSERT(inputIdx == 0, "input index out of range."
-        "{} Operator has only one input", mOp.type());
-    (void) inputIdx;
-
+Aidge::Elts_t Aidge::Pad_ProdConso_cpu::getNbRequiredProtected(
+    Aidge::IOIndex_t inputIdx) const {
+    AIDGE_ASSERT(inputIdx == 0,
+                 "input index out of range."
+                 "{} Operator has only one input",
+                 mOp.type());
+    (void)inputIdx;
 
     // Padding cannot be in-place!
-    // We must ensure that we do not override data that has not been consummed yet.
-    const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size();
-    const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size();
+    // We must ensure that we do not override data that has not been consummed
+    // yet.
+    const auto inputSize =
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size();
+    const auto outputSize =
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size();
     return Elts_t::DataElts(outputSize - inputSize);
 }
 
-template <>
-void Aidge::PadImpl1D_cpu::forward() {
-    const auto& op_ = dynamic_cast<const Pad_Op<1>&>(mOp);
+template <> void Aidge::PadImpl1D_cpu::forward() {
+    const auto &op_ = dynamic_cast<const Pad_Op<1> &>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator.");
 
     // Find the correct kernel type
-    const auto impl = Registrar<PadImpl1D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<PadImpl1D_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.beginEndBorders(),
-                op_.borderType(),
-                op_.borderValue(),
-                op_.getInput(0)->template dims<3>(),
-                getCPUPtr(mOp.getRawInput(0)),
-                getCPUPtr(mOp.getRawOutput(0)));
+                 op_.borderType(),
+                 op_.borderValue(),
+                 op_.getInput(0)->template dims<3>(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::PadImpl1D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<1> on backend cpu");
+template <> void Aidge::PadImpl1D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Pad_Op<1> on backend cpu");
 }
 
-template <>
-void Aidge::PadImpl2D_cpu::forward() {
-    const auto& op_ = dynamic_cast<const Pad_Op<2>&>(mOp);
+template <> void Aidge::PadImpl2D_cpu::forward() {
+    const auto &op_ = dynamic_cast<const Pad_Op<2> &>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator.");
 
     // Find the correct kernel type
-    const auto impl = Registrar<PadImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<PadImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.beginEndBorders(),
-                op_.borderType(),
-                op_.borderValue(),
-                op_.getInput(0)->template dims<4>(),
-                getCPUPtr(mOp.getRawInput(0)),
-                getCPUPtr(mOp.getRawOutput(0)));
+                 op_.borderType(),
+                 op_.borderValue(),
+                 op_.getInput(0)->template dims<4>(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::PadImpl2D_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<2> on backend cpu");
+template <> void Aidge::PadImpl2D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Pad_Op<2> on backend cpu");
 }
diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp
index 74a7be71e176ba8e1cb8851050e575d6aa7465df..3a8d27c28fcd5b372c6a3d396fe2bd3e92da60d0 100644
--- a/src/operator/PowImpl.cpp
+++ b/src/operator/PowImpl.cpp
@@ -15,36 +15,37 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/operator/Pow.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/operator/Pow.hpp"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/PowImpl.hpp"
 #include "aidge/backend/cpu/operator/PowImpl_kernels.hpp"
 
-template <>
-void Aidge::PowImpl_cpu::forward() {
-    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
-    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
+template <> void Aidge::PowImpl_cpu::forward() {
+    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
+    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
 
     // Find the correct kernel type
-    const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(inputDims0,
-        inputDims1,
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawInput(1)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 inputDims1,
+                 std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawInput(1)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::PowImpl_cpu::backward() {
-    const Pow_Op& op_ = dynamic_cast<const Pow_Op&>(mOp);
+template <> void Aidge::PowImpl_cpu::backward() {
+    const Pow_Op &op_ = dynamic_cast<const Pow_Op &>(mOp);
 
     auto in0 = op_.getInput(0);
     auto in1 = op_.getInput(1);
@@ -52,21 +53,24 @@ void Aidge::PowImpl_cpu::backward() {
     auto in1grad = op_.getInput(1)->grad();
     auto out0grad = op_.getOutput(0)->grad();
 
-    const std::vector<std::size_t> input0gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(),
-                                                                       std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims());
-    const std::vector<std::size_t> input1gradDims = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(),
-                                                                       std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims());
+    const std::vector<std::size_t> input0gradDims = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->grad()->dims());
+    const std::vector<std::size_t> input1gradDims = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->grad()->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->grad()->dims());
 
     // Find the correct kernel type
-    const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.backward(input0gradDims,
-                input1gradDims,
-                out0grad->dims(),
-                getCPUPtr(in0),
-                getCPUPtr(in1),
-                getCPUPtr(out0grad),
-                getCPUPtr(in0grad),
-                getCPUPtr(in1grad));
+                  input1gradDims,
+                  out0grad->dims(),
+                  getCPUPtr(in0),
+                  getCPUPtr(in1),
+                  getCPUPtr(out0grad),
+                  getCPUPtr(in0grad),
+                  getCPUPtr(in1grad));
 }
\ No newline at end of file
diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp
index 832f91aad347fc081439ec487d06b14b0e2fe8da..e81c373b560c834bf7cb7af815751d7b9a93719e 100644
--- a/src/operator/ReLUImpl.cpp
+++ b/src/operator/ReLUImpl.cpp
@@ -12,43 +12,48 @@
 #include <memory>
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/ReLU.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/utils/ErrorHandling.hpp"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/ReLUImpl_kernels.hpp"
 
-template <>
-void Aidge::ReLUImpl_cpu::forward() {
-	const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp);
+template <> void Aidge::ReLUImpl_cpu::forward() {
+    const ReLU_Op &op_ = dynamic_cast<const ReLU_Op &>(mOp);
     std::shared_ptr<Tensor> in0 = op_.getInput(0);
     std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(in0->size(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::ReLUImpl_cpu::backward() {
-    const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp);
-    std::shared_ptr<Tensor> in0  = op_.getInput(0);
-    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+template <> void Aidge::ReLUImpl_cpu::backward() {
+    const ReLU_Op &op_ = dynamic_cast<const ReLU_Op &>(mOp);
+    std::shared_ptr<Tensor> in0 = op_.getInput(0);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
-    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
-    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
+    AIDGE_ASSERT(out0,
+                 "missing output #0 for current {} operator",
+                 op_.type());
 
     // Find the correct kernel type
-    const auto impl = Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ReLUImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.backward(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+    impl.backward(gra_int0->size(),
+                  getCPUPtr(in0),
+                  getCPUPtr(gra_out0),
+                  getCPUPtr(gra_int0));
 }
diff --git a/src/operator/ReduceMeanImpl.cpp b/src/operator/ReduceMeanImpl.cpp
index 622672569372ff4e9f135e36255095f4246d5920..d6ae3fe10322f3caafb7219027bc1e335435c031 100644
--- a/src/operator/ReduceMeanImpl.cpp
+++ b/src/operator/ReduceMeanImpl.cpp
@@ -14,31 +14,31 @@
 #include <memory>
 #include <vector>
 
-#include "aidge/utils/Types.h"
-#include "aidge/operator/ReduceMean.hpp"
 #include "aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp"
+#include "aidge/operator/ReduceMean.hpp"
+#include "aidge/utils/Types.h"
 
-template <>
-void Aidge::ReduceMeanImpl_cpu::forward() {
-    const ReduceMean_Op& op_ = dynamic_cast<const ReduceMean_Op&>(mOp);
+template <> void Aidge::ReduceMeanImpl_cpu::forward() {
+    const ReduceMean_Op &op_ = dynamic_cast<const ReduceMean_Op &>(mOp);
 
     // Find the correct kernel type
-    const auto impl = Registrar<ReduceMeanImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ReduceMeanImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.axes(),
-                op_.keepDims(),
-                op_.getInput(0)->dims(),
-                op_.getInput(0)->getImpl()->rawPtr(),
-                op_.getOutput(0)->getImpl()->rawPtr());
+                 op_.keepDims(),
+                 op_.getInput(0)->dims(),
+                 op_.getInput(0)->getImpl()->rawPtr(),
+                 op_.getOutput(0)->getImpl()->rawPtr());
 }
 
-template <>
-void Aidge::ReduceMeanImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ReduceMean_Op on backend cpu");
+template <> void Aidge::ReduceMeanImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for ReduceMean_Op on backend cpu");
 }
 
-
 // void Aidge::ReduceMeanImpl1D_cpu::forward() {
 
 //     // Find the correct kernel type
@@ -48,7 +48,8 @@ void Aidge::ReduceMeanImpl_cpu::backward() {
 //         std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
 
 //     // Call kernel
-//     kernelFunc(dynamic_cast<const ReduceMean_Op<1>&>(mOp).getStaticAttributes(),
+//     kernelFunc(dynamic_cast<const
+//     ReduceMean_Op<1>&>(mOp).getStaticAttributes(),
 //                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
 //                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
 //                std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
@@ -63,7 +64,8 @@ void Aidge::ReduceMeanImpl_cpu::backward() {
 //         std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
 
 //     // Call kernel
-//     kernelFunc(dynamic_cast<const ReduceMean_Op<2>&>(mOp).getStaticAttributes(),
+//     kernelFunc(dynamic_cast<const
+//     ReduceMean_Op<2>&>(mOp).getStaticAttributes(),
 //                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
 //                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
 //                std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
@@ -78,7 +80,8 @@ void Aidge::ReduceMeanImpl_cpu::backward() {
 //         std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
 
 //     // Call kernel
-//     kernelFunc(dynamic_cast<const ReduceMean_Op<3>&>(mOp).getStaticAttributes(),
+//     kernelFunc(dynamic_cast<const
+//     ReduceMean_Op<3>&>(mOp).getStaticAttributes(),
 //                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
 //                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
 //                std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
diff --git a/src/operator/ReduceSumImpl.cpp b/src/operator/ReduceSumImpl.cpp
index aad0801835a74ecefb046f3dc64729ae1f8bd8bb..b0f7c575df00a88d3f53d5feb80ee240f444b8ea 100644
--- a/src/operator/ReduceSumImpl.cpp
+++ b/src/operator/ReduceSumImpl.cpp
@@ -14,26 +14,27 @@
 #include <memory>
 #include <vector>
 
-#include "aidge/utils/Types.h"
-#include "aidge/operator/ReduceSum.hpp"
 #include "aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp"
+#include "aidge/operator/ReduceSum.hpp"
+#include "aidge/utils/Types.h"
 
-template <>
-void Aidge::ReduceSumImpl_cpu::forward() {
-    const ReduceSum_Op& op_ = dynamic_cast<const ReduceSum_Op&>(mOp);
+template <> void Aidge::ReduceSumImpl_cpu::forward() {
+    const ReduceSum_Op &op_ = dynamic_cast<const ReduceSum_Op &>(mOp);
 
     // Find the correct kernel type
-    const auto impl = Registrar<ReduceSumImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ReduceSumImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.axes(),
-                op_.keepDims(),
-                op_.getInput(0)->dims(),
-                op_.getInput(0)->getImpl()->rawPtr(),
-                op_.getOutput(0)->getImpl()->rawPtr());
+                 op_.keepDims(),
+                 op_.getInput(0)->dims(),
+                 op_.getInput(0)->getImpl()->rawPtr(),
+                 op_.getOutput(0)->getImpl()->rawPtr());
 }
 
-template <>
-void Aidge::ReduceSumImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ReduceSum_Op on backend cpu");
+template <> void Aidge::ReduceSumImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for ReduceSum_Op on backend cpu");
 }
diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp
index 1e7a408f267c5eb2d60d188f0ed2ba0394222561..11ddb8ffbfceb109c0b03e5d4b3378fe6d60dc31 100644
--- a/src/operator/ScalingImpl.cpp
+++ b/src/operator/ScalingImpl.cpp
@@ -10,35 +10,36 @@
  ********************************************************************************/
 
 #include <cassert>
-#include <numeric>    // std::accumulate
 #include <functional> // std::multiplies
+#include <numeric>    // std::accumulate
 #include <vector>
 
 #include "aidge/operator/Scaling.hpp"
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
 #include "aidge/backend/cpu/operator/ScalingImpl_kernels.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
-template <>
-void Aidge::ScalingImpl_cpu::forward() {
-    const auto& op_ = dynamic_cast<const Scaling_Op&>(mOp);
+template <> void Aidge::ScalingImpl_cpu::forward() {
+    const auto &op_ = dynamic_cast<const Scaling_Op &>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Scaling Operator.");
 
     // Find the correct kernel type
-    const auto impl = Registrar<ScalingImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<ScalingImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.scalingFactor(),
-            op_.quantizedNbBits(),
-            op_.isOutputUnsigned(),
-            op_.getInput(0)->size(),
-            getCPUPtr(mOp.getRawInput(0)),
-            getCPUPtr(mOp.getRawOutput(0)));
+                 op_.quantizedNbBits(),
+                 op_.isOutputUnsigned(),
+                 op_.getInput(0)->size(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::ScalingImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Scaling_Op on backend cpu");
+template <> void Aidge::ScalingImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Scaling_Op on backend cpu");
 }
diff --git a/src/operator/SigmoidImpl.cpp b/src/operator/SigmoidImpl.cpp
index cdcbac85df3a38fea9b7100324e0618949262fc9..7242ef15444df135ce9fa661980b9cbd3f2a906f 100644
--- a/src/operator/SigmoidImpl.cpp
+++ b/src/operator/SigmoidImpl.cpp
@@ -15,40 +15,45 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/operator/Sigmoid.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
 #include "aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp"
 
-template <>
-void Aidge::SigmoidImpl_cpu::forward() {
-	const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp);
+template <> void Aidge::SigmoidImpl_cpu::forward() {
+    const Sigmoid_Op &op_ = dynamic_cast<const Sigmoid_Op &>(mOp);
     std::shared_ptr<Tensor> in0 = op_.getInput(0);
     std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(in0->size(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::SigmoidImpl_cpu::backward() {
-    const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp);
-    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+template <> void Aidge::SigmoidImpl_cpu::backward() {
+    const Sigmoid_Op &op_ = dynamic_cast<const Sigmoid_Op &>(mOp);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
-    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
-    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
+    AIDGE_ASSERT(out0,
+                 "missing output #0 for current {} operator",
+                 op_.type());
 
     // Find the correct kernel type
-    const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+    impl.backward(gra_int0->size(),
+                  getCPUPtr(out0),
+                  getCPUPtr(gra_out0),
+                  getCPUPtr(gra_int0));
 }
diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp
index 945c1bc752feb8e6a194b1aff99b26f01a6a0e69..eebf77b3c868a08c668d102722786c27f8ea3e2a 100644
--- a/src/operator/SliceImpl.cpp
+++ b/src/operator/SliceImpl.cpp
@@ -19,25 +19,26 @@
 #include "aidge/utils/Log.hpp"
 #include "aidge/utils/Types.h"
 
-template <>
-void Aidge::SliceImpl_cpu::forward() {
-    const auto& op_ = dynamic_cast<const Slice_Op&>(mOp);
+template <> void Aidge::SliceImpl_cpu::forward() {
+    const auto &op_ = dynamic_cast<const Slice_Op &>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Slice Operator.");
 
     // Find the correct kernel type
-    const auto impl = Registrar<SliceImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<SliceImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(op_.starts(),
-            op_.ends(),
-            op_.axes(),
-            op_.steps(),
-            op_.getInput(0)->dims(),
-            getCPUPtr(mOp.getRawInput(0)),
-            getCPUPtr(mOp.getRawOutput(0)));
+                 op_.ends(),
+                 op_.axes(),
+                 op_.steps(),
+                 op_.getInput(0)->dims(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::SliceImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Slice_Op on backend cpu");
+template <> void Aidge::SliceImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Slice_Op on backend cpu");
 }
diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp
index 8b6933f22f3673476f4a9f1e261fbcdc09857300..a517a64f0e790151c4c6a1e077a34f26b6cb2c59 100644
--- a/src/operator/SoftmaxImpl.cpp
+++ b/src/operator/SoftmaxImpl.cpp
@@ -15,30 +15,37 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/operator/Softmax.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
 #include "aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp"
 
-template <>
-void Aidge::SoftmaxImpl_cpu::forward() {
-    const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp);
+template <> void Aidge::SoftmaxImpl_cpu::forward() {
+    const auto &op_ = dynamic_cast<const Softmax_Op &>(mOp);
     AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty");
-    std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis();
+    std::int32_t axis = (op_.axis() >= 0)
+                            ? op_.axis()
+                            : op_.getInput(0)->nbDims() + op_.axis();
 
     // Find the correct kernel type
-    const auto impl = Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(static_cast<std::size_t>(axis), // axisIdx
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
+                 std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
+                 std::static_pointer_cast<Tensor>(mOp.getRawInput(0))
+                     ->getImpl()
+                     ->rawPtr(),
+                 std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))
+                     ->getImpl()
+                     ->rawPtr());
 }
 
-template <>
-void Aidge::SoftmaxImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Softmax_Op on backend cpu");
+template <> void Aidge::SoftmaxImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Softmax_Op on backend cpu");
 }
diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp
index 25bdb42fd5140ef4f64d704fc3a5ccf237f17f81..e02700f5d51bcab47302fe077a4ea13ce6c35887 100644
--- a/src/operator/SqrtImpl.cpp
+++ b/src/operator/SqrtImpl.cpp
@@ -21,34 +21,34 @@
 #include "aidge/backend/cpu/operator/SqrtImpl.hpp"
 #include "aidge/backend/cpu/operator/SqrtImpl_kernels.hpp"
 
-template <>
-void Aidge::SqrtImpl_cpu::forward() {
-    std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0));
-    std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0));
+template <> void Aidge::SqrtImpl_cpu::forward() {
+    std::shared_ptr<Tensor> in0 =
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(0));
+    std::shared_ptr<Tensor> out0 =
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0));
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(in0->size(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::SqrtImpl_cpu::backward() {
+template <> void Aidge::SqrtImpl_cpu::backward() {
     // reversing in and out Data for backprop
-    const Sqrt_Op& op_ = dynamic_cast<const Sqrt_Op&>(mOp);
-    std::shared_ptr<Tensor> out0grad  = op_.getOutput(0)->grad();
+    const Sqrt_Op &op_ = dynamic_cast<const Sqrt_Op &>(mOp);
+    std::shared_ptr<Tensor> out0grad = op_.getOutput(0)->grad();
     std::shared_ptr<Tensor> in0grad = op_.getInput(0)->grad();
     AIDGE_ASSERT(out0grad, "missing output #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.backward(out0grad->size(),
-        getCPUPtr(out0grad),
-        getCPUPtr(in0grad));
+    impl.backward(out0grad->size(), getCPUPtr(out0grad), getCPUPtr(in0grad));
 }
\ No newline at end of file
diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp
index d43771b967889183801cb93418c967ce9d9c8453..719dad900a0b6b7d54c0f62325602f6e1fad7e59 100644
--- a/src/operator/SubImpl.cpp
+++ b/src/operator/SubImpl.cpp
@@ -15,34 +15,37 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/operator/Sub.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/operator/Sub.hpp"
+#include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/SubImpl.hpp"
 #include "aidge/backend/cpu/operator/SubImpl_kernels.hpp"
 
-template <>
-void Aidge::SubImpl_cpu::forward() {
-    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
-    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-                                                                   std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
+template <> void Aidge::SubImpl_cpu::forward() {
+    const std::vector<std::size_t> inputDims0 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
+    const std::vector<std::size_t> inputDims1 = getBroadcastedDims(
+        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
 
     // Find the correct kernel type
-    const auto impl = Registrar<SubImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<SubImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(inputDims0,
-        inputDims1,
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawInput(1)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 inputDims1,
+                 std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawInput(1)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::SubImpl_cpu::backward() {
-    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Sub_Op on backend cpu");
+template <> void Aidge::SubImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(
+        std::runtime_error,
+        "Backward not yet implemented for Sub_Op on backend cpu");
 }
diff --git a/src/operator/TanhImpl.cpp b/src/operator/TanhImpl.cpp
index ed8dce08b9f710c9e5830b2c72ffef71013edb6e..2565402fa10a840d859befba4fbc58906c0d0f52 100644
--- a/src/operator/TanhImpl.cpp
+++ b/src/operator/TanhImpl.cpp
@@ -15,41 +15,45 @@
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/operator/Tanh.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/TanhImpl.hpp"
 #include "aidge/backend/cpu/operator/TanhImpl_kernels.hpp"
 
-template <>
-void Aidge::TanhImpl_cpu::forward() {
-	const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp);
+template <> void Aidge::TanhImpl_cpu::forward() {
+    const Tanh_Op &op_ = dynamic_cast<const Tanh_Op &>(mOp);
     std::shared_ptr<Tensor> in0 = op_.getInput(0);
     std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
     impl.forward(in0->size(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawOutput(0)));
+                 getCPUPtr(mOp.getRawInput(0)),
+                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
-template <>
-void Aidge::TanhImpl_cpu::backward() {
-    const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp);
-    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
-    std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();		
-    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
-    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
+template <> void Aidge::TanhImpl_cpu::backward() {
+    const Tanh_Op &op_ = dynamic_cast<const Tanh_Op &>(mOp);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
+    std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
+    AIDGE_ASSERT(out0,
+                 "missing output #0 for current {} operator",
+                 op_.type());
 
     // Find the correct kernel type
-    const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+    const auto impl =
+        Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+    impl.backward(gra_int0->size(),
+                  getCPUPtr(out0),
+                  getCPUPtr(gra_out0),
+                  getCPUPtr(gra_int0));
 }
-
diff --git a/unit_tests/data/Test_TensorImpl.cpp b/unit_tests/data/Test_TensorImpl.cpp
index 4bfa10ab4e3d3f522015dbcb3654e105fbb14525..2c651a760fa01e814eb59bf60cd88b462c75d3b5 100644
--- a/unit_tests/data/Test_TensorImpl.cpp
+++ b/unit_tests/data/Test_TensorImpl.cpp
@@ -10,43 +10,46 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
 #include <chrono>
+#include <cstddef> // std::size_t
+#include <cstdint> // std::uint16_t
 #include <iostream>
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric> // std::accumulate
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
-#include "aidge/data/Tensor.hpp"
 #include "aidge/backend/cpu/data/TensorImpl.hpp"
-#include "aidge/operator/Add.hpp"
 #include "aidge/backend/cpu/operator/AddImpl.hpp"
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Add.hpp"
 
 namespace Aidge {
 
-TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") {
+TEST_CASE("Test addition of Tensors", "[TensorImpl][Add]") {
     constexpr std::uint16_t NBTRIALS = 10;
     // Create a random number generator
     std::random_device rd;
     std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
-    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
-    std::uniform_int_distribution<int> boolDist(0,1);
+    std::uniform_real_distribution<float> valueDist(
+        0.1f,
+        1.1f); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
+                                                           std::size_t(10));
+    std::uniform_int_distribution<int> boolDist(0, 1);
 
     // Create MatMul Operator
     std::shared_ptr<Node> mySub = Add();
-    auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator());
+    auto op = std::static_pointer_cast<OperatorTensor>(mySub->getOperator());
     op->setDataType(DataType::Float32);
     op->setBackend("cpu");
 
     // Create 2 input Tensors
     std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
-    op->associateInput(0,T0);
+    op->associateInput(0, T0);
     T0->setDataType(DataType::Float32);
     T0->setBackend("cpu");
     std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
-    op -> associateInput(1,T1);
+    op->associateInput(1, T1);
     T1->setDataType(DataType::Float32);
     T1->setBackend("cpu");
 
@@ -64,7 +67,8 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") {
 
     for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
         // generate 2 random Tensors
-        // handle dimensions, replace some dimensions with '1' to get broadcasting
+        // handle dimensions, replace some dimensions with '1' to get
+        // broadcasting
         constexpr std::size_t nbDims = 4;
         std::vector<std::size_t> dims;
         for (std::size_t i = 0; i < nbDims; ++i) {
@@ -84,37 +88,51 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") {
         }
 
         // create arrays and fill them with random values
-        float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-        float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
-        float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
+        float *array0 = new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+        float *array1 = new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]];
+        float *result =
+            new float[dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]];
 
-        for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
+        for (std::size_t i = 0; i < dims0[0] * dims0[1] * dims0[2] * dims0[3];
+             ++i) {
             array0[i] = valueDist(gen);
         }
-        for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
+        for (std::size_t i = 0; i < dims1[0] * dims1[1] * dims1[2] * dims1[3];
+             ++i) {
             array1[i] = valueDist(gen);
         }
 
         // compute true result
-        const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-        const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
+        const std::size_t strides0[nbDims] = {dims0[1] * dims0[2] * dims0[3],
+                                              dims0[2] * dims0[3],
+                                              dims0[3],
+                                              1};
+        const std::size_t strides1[nbDims] = {dims1[1] * dims1[2] * dims1[3],
+                                              dims1[2] * dims1[3],
+                                              dims1[3],
+                                              1};
         for (std::size_t a = 0; a < dimsOut[0]; ++a) {
             for (std::size_t b = 0; b < dimsOut[1]; ++b) {
-                const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                            + strides0[1] * ((dims0[1] > 1) ? b : 0);
-                const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
-                                            + strides1[1] * ((dims1[1] > 1) ? b : 0);
+                const std::size_t idx0_0 =
+                    strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                    strides0[1] * ((dims0[1] > 1) ? b : 0);
+                const std::size_t idx1_0 =
+                    strides1[0] * ((dims1[0] > 1) ? a : 0) +
+                    strides1[1] * ((dims1[1] > 1) ? b : 0);
                 for (std::size_t c = 0; c < dimsOut[2]; ++c) {
-                    const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
+                    const std::size_t idx_out =
+                        dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
                     for (std::size_t d = 0; d < dimsOut[3]; ++d) {
-                        std::size_t idx0 = idx0_0
-                                            + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                            + ((dims0[3] > 1) ? d : 0);
-                        std::size_t idx1 = idx1_0
-                                            + strides1[2] * ((dims1[2] > 1) ? c : 0)
-                                            + ((dims1[3] > 1) ? d : 0);
+                        std::size_t idx0 =
+                            idx0_0 + strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                            ((dims0[3] > 1) ? d : 0);
+                        std::size_t idx1 =
+                            idx1_0 + strides1[2] * ((dims1[2] > 1) ? c : 0) +
+                            ((dims1[3] > 1) ? d : 0);
                         result[idx_out + d] = array0[idx0] + array1[idx1];
-                        // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
+                        // std::cout << "(" << idx0 << ", " << idx1 << ") -> "
+                        // << array0[idx0] << " - " << array1[idx1] << " -> "
+                        // << idx_out + d << std::endl;
                     }
                 }
             }
@@ -123,34 +141,41 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") {
         // conversion to Aidge::Tensors
         // input0
         T0->resize(dims0);
-        T0->getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+        T0->getImpl()->setRawPtr(array0,
+                                 dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
         // input1
         T1->resize(dims1);
-        T1->getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
+        T1->getImpl()->setRawPtr(array1,
+                                 dims1[0] * dims1[1] * dims1[2] * dims1[3]);
 
         // results
         Tres.resize(dimsOut);
-        Tres.getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+        Tres.getImpl()->setRawPtr(
+            result,
+            dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
         Tensor T2 = *T0 + *T1;
         REQUIRE(T2 == Tres);
 
-    // no implementation
+        // no implementation
         Tensor T3(T1->dims());
         REQUIRE_THROWS(*T0 + T3);
 
         // // wrong backend
-        // static Registrar<Add_Op> registrarAddImpl_custom("custom", [](const Add_Op& op) { return std::make_unique<AddImpl_cpu>(op); } );
-        // static Registrar<Tensor> registrarTensorImpl_custom_Int32({"custom", DataType::Int32},
+        // static Registrar<Add_Op> registrarAddImpl_custom("custom", [](const
+        // Add_Op& op) { return std::make_unique<AddImpl_cpu>(op); } ); static
+        // Registrar<Tensor> registrarTensorImpl_custom_Int32({"custom",
+        // DataType::Int32},
         //             [] (DeviceIdx_t device, std::vector<DimSize_t> dims) {
-        //                 return std::make_shared<TensorImpl_cpu<int>>(device, dims);
+        //                 return std::make_shared<TensorImpl_cpu<int>>(device,
+        //                 dims);
         //             }
         //         );
         // T1.setBackend("custom");
         // REQUIRE_THROWS(T0 + T1);
 
-    // wrong datatype
+        // wrong datatype
         Tensor T4(T1->dims());
         T4.setDataType(DataType::Float64);
         REQUIRE_THROWS(*T0 + T4);
@@ -161,34 +186,38 @@ TEST_CASE("Test addition of Tensors","[TensorImpl][Add]") {
     }
 }
 
-TEST_CASE("Test substraction of Tensors","[TensorImpl][Sub]") {
+TEST_CASE("Test substraction of Tensors", "[TensorImpl][Sub]") {
     Tensor T0 = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}};
     Tensor T1 = Array3D<int, 2, 2, 2>{{{{7, 1}, {3, 7}}, {{54, 0}, {7, 12}}}};
     Tensor T2 = T0 - T1;
     T2.print();
-    REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{{{{-6,1},{0,-3}},{{-49,6},{0,-4}}}}));
+    REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{
+                      {{{-6, 1}, {0, -3}}, {{-49, 6}, {0, -4}}}}));
 
     Tensor T3(T1.dims());
     REQUIRE_THROWS(T0 - T3);
 }
 
-TEST_CASE("Test multiplication of Tensors","[TensorImpl][Mul]") {
+TEST_CASE("Test multiplication of Tensors", "[TensorImpl][Mul]") {
     Tensor T0 = Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}};
     Tensor T1 = Array3D<int, 2, 2, 2>{{{{7, 2}, {3, 7}}, {{5, 6}, {7, 8}}}};
     Tensor T2 = T0 * T1;
     T2.print();
-    REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{{{{7,4},{9,28}},{{25,36},{49,64}}}}));
+    REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{
+                      {{{7, 4}, {9, 28}}, {{25, 36}, {49, 64}}}}));
 
     Tensor T3(T1.dims());
     REQUIRE_THROWS(T0 * T3);
 }
 
-TEST_CASE("Test division of Tensors","[TensorImpl][Div]") {
-    Tensor T0 = Array3D<int, 2, 2, 2>{{{{7,4},{9,28}},{{25,36},{49,64}}}};
+TEST_CASE("Test division of Tensors", "[TensorImpl][Div]") {
+    Tensor T0 =
+        Array3D<int, 2, 2, 2>{{{{7, 4}, {9, 28}}, {{25, 36}, {49, 64}}}};
     Tensor T1 = Array3D<int, 2, 2, 2>{{{{7, 2}, {3, 7}}, {{5, 6}, {7, 8}}}};
     Tensor T2 = T0 / T1;
     T2.print();
-    REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}}));
+    REQUIRE(T2 == Tensor(Array3D<int, 2, 2, 2>{
+                      {{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}}));
 
     Tensor T3(T1.dims());
     REQUIRE_THROWS(T0 / T3);
diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp
index bca4025705cb1c851dcf3e9accbf016c4535120a..718f333a6ac278c0b4921b753c6e9d336a9a0089 100644
--- a/unit_tests/operator/Test_AddImpl.cpp
+++ b/unit_tests/operator/Test_AddImpl.cpp
@@ -19,49 +19,46 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
-    std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
-        {                                       //
-            {                                   //
-                {{20, 47},{21, 48},{22, 49}},   //
-                {{23, 50},{24, 51},{25, 52}},   //
-                {{26, 53},{27, 54},{28, 55}}    //
-            },                                  //
-            {                                   //
-                {{29, 56},{30, 57},{31, 58}},   //
-                {{32, 59},{33, 60},{34, 61}},   //
-                {{35, 62},{36, 63},{37, 64}}    //
-            },                                  //
-            {                                   //
-                {{38, 65},{39, 66},{40, 67}},   //
-                {{41, 68},{42, 69},{43, 70}},   //
-                {{44, 71},{45, 72},{46, 73}}    //
-            }                                   //
-        }                                       //
-    });                                         //
-
-    SECTION("Two inputs") {
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
+    std::shared_ptr<Tensor> input1 =
+        std::make_shared<Tensor>(Array4D<int, 3, 3, 3, 2>{
             {
+                //
                 {
-                    {{40,  94},{42,  96},{44,  98}},
-                    {{46, 100},{48, 102},{50, 104}},
-                    {{52, 106},{54, 108},{56, 110}}
-                },
+                    //
+                    {{20, 47}, {21, 48}, {22, 49}}, //
+                    {{23, 50}, {24, 51}, {25, 52}}, //
+                    {{26, 53}, {27, 54}, {28, 55}}  //
+                },                                  //
                 {
-                    {{58, 112},{60, 114},{62, 116}},
-                    {{64, 118},{66, 120},{68, 122}},
-                    {{70, 124},{72, 126},{74, 128}}
-                },
+                    //
+                    {{29, 56}, {30, 57}, {31, 58}}, //
+                    {{32, 59}, {33, 60}, {34, 61}}, //
+                    {{35, 62}, {36, 63}, {37, 64}}  //
+                },                                  //
                 {
-                    {{76, 130},{78, 132},{80, 134}},
-                    {{82, 136},{84, 138},{86, 140}},
-                    {{88, 142},{90, 144},{92, 146}}
-                }
-            }
-        });
+                    //
+                    {{38, 65}, {39, 66}, {40, 67}}, //
+                    {{41, 68}, {42, 69}, {43, 70}}, //
+                    {{44, 71}, {45, 72}, {46, 73}}  //
+                } //
+            } //
+        }); //
+
+    SECTION("Two inputs") {
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array4D<int, 3, 3, 3, 2>{{{{{40, 94}, {42, 96}, {44, 98}},
+                                       {{46, 100}, {48, 102}, {50, 104}},
+                                       {{52, 106}, {54, 108}, {56, 110}}},
+                                      {{{58, 112}, {60, 114}, {62, 116}},
+                                       {{64, 118}, {66, 120}, {68, 122}},
+                                       {{70, 124}, {72, 126}, {74, 128}}},
+                                      {{{76, 130}, {78, 132}, {80, 134}},
+                                       {{82, 136}, {84, 138}, {86, 140}},
+                                       {{88, 142}, {90, 144}, {92, 146}}}}});
 
         std::shared_ptr<Node> myAdd = Add();
-        auto op = std::static_pointer_cast<OperatorTensor>(myAdd -> getOperator());
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myAdd->getOperator());
         op->associateInput(0, input1);
         op->associateInput(1, input1);
         op->setBackend("cpu");
@@ -72,54 +69,70 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
     }
 
     SECTION("Broadcasting") {
-        std::shared_ptr<Tensor> input_0 = std::make_shared<Tensor>(Array4D<int,3,1,3,2> {
-        {                                       //
-            {                                   //
-                {{0, 1},{2, 3},{4, 5}}          //
-            },                                  //
-            {                                   //
-                {{6, 7},{8, 9},{10, 11}}        //
-            },                                  //
-            {                                   //
-                {{12, 13},{14, 15},{16, 17}}    //
-            }                                   //
-        }                                       //
-        });                                     //
-        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
-        {                                       //
-            {                                   //
-                {{20, 21},{22, 23},{24, 25}},   //
-                {{26, 27},{28, 29},{30, 31}},   //
-                {{32, 33},{34, 35},{36, 37}}    //
-            }                                   //
-        }                                       //
-        });                                     //
+        std::shared_ptr<Tensor> input_0 =
+            std::make_shared<Tensor>(Array4D<int, 3, 1, 3, 2>{
+                {
+                    //
+                    {
+                        //
+                        {{0, 1}, {2, 3}, {4, 5}} //
+                    },                           //
+                    {
+                        //
+                        {{6, 7}, {8, 9}, {10, 11}} //
+                    },                             //
+                    {
+                        //
+                        {{12, 13}, {14, 15}, {16, 17}} //
+                    } //
+                } //
+            }); //
+        std::shared_ptr<Tensor> input_1 =
+            std::make_shared<Tensor>(Array4D<int, 1, 3, 3, 2>{
+                {
+                    //
+                    {
+                        //
+                        {{20, 21}, {22, 23}, {24, 25}}, //
+                        {{26, 27}, {28, 29}, {30, 31}}, //
+                        {{32, 33}, {34, 35}, {36, 37}}  //
+                    } //
+                } //
+            }); //
 
-        std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{100,200}});
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
-            {                                               //
-                {                                           //
-                    {{ 120, 222},{ 124, 226},{ 128, 230}},  //
-                    {{ 126, 228},{ 130, 232},{ 134, 236}},  //
-                    {{ 132, 234},{ 136, 238},{ 140, 242}}   //
-                },                                          //
-                {                                           //
-                    {{ 126, 228},{ 130, 232},{ 134, 236}},  //
-                    {{ 132, 234},{ 136, 238},{ 140, 242}},  //
-                    {{ 138, 240},{ 142, 244},{ 146, 248}}   //
-                },                                          //
-                {                                           //
-                    {{ 132, 234},{ 136, 238},{140, 242}},   //
-                    {{ 138, 240},{ 142, 244},{146, 248}},   //
-                    {{ 144, 246},{ 148, 250},{152, 254}}    //
-                }                                           //
-            }                                               //
-        });                                                 //
+        std::shared_ptr<Tensor> input_2 =
+            std::make_shared<Tensor>(Array1D<int, 2>{{100, 200}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array4D<int, 3, 3, 3, 2>{
+                {
+                    //
+                    {
+                        //
+                        {{120, 222}, {124, 226}, {128, 230}}, //
+                        {{126, 228}, {130, 232}, {134, 236}}, //
+                        {{132, 234}, {136, 238}, {140, 242}}  //
+                    },                                        //
+                    {
+                        //
+                        {{126, 228}, {130, 232}, {134, 236}}, //
+                        {{132, 234}, {136, 238}, {140, 242}}, //
+                        {{138, 240}, {142, 244}, {146, 248}}  //
+                    },                                        //
+                    {
+                        //
+                        {{132, 234}, {136, 238}, {140, 242}}, //
+                        {{138, 240}, {142, 244}, {146, 248}}, //
+                        {{144, 246}, {148, 250}, {152, 254}}  //
+                    } //
+                } //
+            }); //
 
         std::shared_ptr<Node> myAdd_0 = Add();
         std::shared_ptr<Node> myAdd_1 = Add();
-        auto op_0 = std::static_pointer_cast<OperatorTensor>(myAdd_0 -> getOperator());
-        auto op_1 = std::static_pointer_cast<OperatorTensor>(myAdd_1 -> getOperator());
+        auto op_0 =
+            std::static_pointer_cast<OperatorTensor>(myAdd_0->getOperator());
+        auto op_1 =
+            std::static_pointer_cast<OperatorTensor>(myAdd_1->getOperator());
         op_0->associateInput(0, input_0);
         op_0->associateInput(1, input_1);
 
diff --git a/unit_tests/operator/Test_AndImpl.cpp b/unit_tests/operator/Test_AndImpl.cpp
index 053bb3ea4ed913bd388f3ae049c4d6402ad58d59..f78f719bbf66f4c2d732b1f7425cf383f58ce536 100644
--- a/unit_tests/operator/Test_AndImpl.cpp
+++ b/unit_tests/operator/Test_AndImpl.cpp
@@ -10,7 +10,7 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/And.hpp"
@@ -20,16 +20,20 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") {
-        SECTION("ForwardDims")
-    {
+    SECTION("ForwardDims") {
         constexpr std::uint16_t NBTRIALS = 10;
         // Create a random number generator
         std::random_device rd;
         std::mt19937 gen(rd());
-        std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
-        std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
-        std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
-        std::uniform_int_distribution<int> boolDist(0,1);
+        std::uniform_real_distribution<float> valueDist(
+            0.1f,
+            1.1f); // Random float distribution between 0 and 1
+        std::uniform_int_distribution<std::size_t> dimSizeDist(
+            std::size_t(2),
+            std::size_t(10));
+        std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
+                                                              std::size_t(5));
+        std::uniform_int_distribution<int> boolDist(0, 1);
 
         SECTION("Same dimensions") {
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
@@ -39,18 +43,21 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") {
                     dims[i] = dimSizeDist(gen);
                 }
 
-                std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput1 =
+                    std::make_shared<Tensor>(dims);
                 myInput1->setBackend("cpu");
                 myInput1->setDataType(DataType::Float32);
                 myInput1->zeros();
-                std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput2 =
+                    std::make_shared<Tensor>(dims);
                 myInput2->setBackend("cpu");
                 myInput2->setDataType(DataType::Float32);
                 myInput2->zeros();
                 std::shared_ptr<Node> myAnd = And();
-                auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator());
-                op->associateInput(0,myInput1);
-                op->associateInput(1,myInput2);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myAnd->getOperator());
+                op->associateInput(0, myInput1);
+                op->associateInput(1, myInput2);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
                 op->forwardDims();
@@ -73,22 +80,24 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") {
                     if (boolDist(gen)) {
                         dims2[i] = dim;
                     }
-                    expectedOutDims.push_back(std::max(dims1[i],dims2[i]));
+                    expectedOutDims.push_back(std::max(dims1[i], dims2[i]));
                 }
 
-
-                std::shared_ptr<Tensor> myInput1 = std::make_shared<Tensor>(dims1);
+                std::shared_ptr<Tensor> myInput1 =
+                    std::make_shared<Tensor>(dims1);
                 myInput1->setBackend("cpu");
                 myInput1->setDataType(DataType::Float32);
                 myInput1->zeros();
-                std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(dims2);
+                std::shared_ptr<Tensor> myInput2 =
+                    std::make_shared<Tensor>(dims2);
                 myInput2->setBackend("cpu");
                 myInput2->setDataType(DataType::Float32);
                 myInput2->zeros();
                 std::shared_ptr<Node> myAnd = And();
-                auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator());
-                op->associateInput(0,myInput1);
-                op->associateInput(1,myInput2);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myAnd->getOperator());
+                op->associateInput(0, myInput1);
+                op->associateInput(1, myInput2);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
 
@@ -100,66 +109,68 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") {
         }
     }
     SECTION("Same size inputs") {
-        std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
-        {                                       //
-            {                                   //
-                {{20, 15},{31, 11},{22, 49}},   //
-                {{41, 10},{24, 51},{27, 52}},   //
-                {{26, 53},{27, 54},{28, 55}}    //
-            },                                  //
-            {                                   //
-                {{29, 56},{30, 57},{31, 58}},   //
-                {{32, 59},{33, 60},{34, 61}},   //
-                {{35, 62},{36, 63},{37, 64}}    //
-            },                                  //
-            {                                   //
-                {{38, 65},{39, 66},{40, 67}},   //
-                {{41, 68},{42, 69},{43, 70}},   //
-                {{44, 71},{45, 72},{46, 73}}    //
-            }                                   //
-        }                                       //
-    });                                         //
-        std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
-            {                                       //
-                {                                   //
-                    {{20, 47},{21, 48},{22, 49}},   //
-                    {{23, 50},{24, 51},{25, 52}},   //
-                    {{17, 53},{27, 26},{14, 33}}    //
-                },                                  //
-                {                                   //
-                    {{29, 56},{30, 57},{31, 58}},   //
-                    {{72, 44},{33, 20},{27, 55}},   //
-                    {{35, 24},{25, 63},{28, 64}}    //
-                },                                  //
-                {                                   //
-                    {{32, 65},{39, 66},{40, 70}},   //
-                    {{41, 53},{42, 60},{34, 70}},   //
-                    {{44, 71},{30, 12},{46, 73}}    //
-                }                                   //
-            }                                       //
-        });                                         //
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
-            {
-                {
-                    {{1, 0},{0, 0},{1, 1}},
-                    {{0, 0},{1, 1},{0, 1}},
-                    {{0, 1},{1, 0},{0, 0}}
-                },
+        std::shared_ptr<Tensor> input1 =
+            std::make_shared<Tensor>(Array4D<int, 3, 3, 3, 2>{
                 {
-                    {{1, 1},{1, 1},{1, 1}},
-                    {{0, 0},{1, 0},{0, 0}},
-                    {{1, 0},{0, 1},{0, 1}}
-                },
+                    //
+                    {
+                        //
+                        {{20, 15}, {31, 11}, {22, 49}}, //
+                        {{41, 10}, {24, 51}, {27, 52}}, //
+                        {{26, 53}, {27, 54}, {28, 55}}  //
+                    },                                  //
+                    {
+                        //
+                        {{29, 56}, {30, 57}, {31, 58}}, //
+                        {{32, 59}, {33, 60}, {34, 61}}, //
+                        {{35, 62}, {36, 63}, {37, 64}}  //
+                    },                                  //
+                    {
+                        //
+                        {{38, 65}, {39, 66}, {40, 67}}, //
+                        {{41, 68}, {42, 69}, {43, 70}}, //
+                        {{44, 71}, {45, 72}, {46, 73}}  //
+                    } //
+                } //
+            }); //
+        std::shared_ptr<Tensor> input2 =
+            std::make_shared<Tensor>(Array4D<int, 3, 3, 3, 2>{
                 {
-                    {{0, 1},{1, 1},{1, 0}},
-                    {{1, 0},{1, 0},{0, 1}},
-                    {{1, 1},{0, 0},{1, 1}}
-                }
-            }
-        });
+                    //
+                    {
+                        //
+                        {{20, 47}, {21, 48}, {22, 49}}, //
+                        {{23, 50}, {24, 51}, {25, 52}}, //
+                        {{17, 53}, {27, 26}, {14, 33}}  //
+                    },                                  //
+                    {
+                        //
+                        {{29, 56}, {30, 57}, {31, 58}}, //
+                        {{72, 44}, {33, 20}, {27, 55}}, //
+                        {{35, 24}, {25, 63}, {28, 64}}  //
+                    },                                  //
+                    {
+                        //
+                        {{32, 65}, {39, 66}, {40, 70}}, //
+                        {{41, 53}, {42, 60}, {34, 70}}, //
+                        {{44, 71}, {30, 12}, {46, 73}}  //
+                    } //
+                } //
+            }); //
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array4D<int, 3, 3, 3, 2>{{{{{1, 0}, {0, 0}, {1, 1}},
+                                       {{0, 0}, {1, 1}, {0, 1}},
+                                       {{0, 1}, {1, 0}, {0, 0}}},
+                                      {{{1, 1}, {1, 1}, {1, 1}},
+                                       {{0, 0}, {1, 0}, {0, 0}},
+                                       {{1, 0}, {0, 1}, {0, 1}}},
+                                      {{{0, 1}, {1, 1}, {1, 0}},
+                                       {{1, 0}, {1, 0}, {0, 1}},
+                                       {{1, 1}, {0, 0}, {1, 1}}}}});
 
         std::shared_ptr<Node> myAnd = And();
-        auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator());
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myAnd->getOperator());
         op->associateInput(0, input1);
         op->associateInput(1, input2);
         op->setBackend("cpu");
@@ -170,29 +181,37 @@ TEST_CASE("[cpu/operator] And(forward)", "[And][CPU]") {
     }
 
     SECTION("Broadcasting") {
-        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
-        {                                       //
-            {                                   //
-                {{10, 20},{22, 23},{20, 20}},   //
-                {{10, 15},{10, 29},{20, 20}},   //
-                {{26, 25},{33, 20},{10, 20}}    //
-            }                                   //
-        }                                       //
-        });                                     //
-
-        std::shared_ptr<Tensor> input_2 = std::make_shared<Tensor>(Array1D<int,2> {{10, 20}});  
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
-            {                                   //
-                {                               //
-                    {{ 1, 1},{ 0, 0},{ 0, 1}},  //
-                    {{ 1, 0},{ 1, 0},{ 0, 1}},  //
-                    {{ 0, 0},{ 0, 1},{ 1, 1}}   //
-                }                               //
-            }                                   //
-        });                                     //
+        std::shared_ptr<Tensor> input_1 =
+            std::make_shared<Tensor>(Array4D<int, 1, 3, 3, 2>{
+                {
+                    //
+                    {
+                        //
+                        {{10, 20}, {22, 23}, {20, 20}}, //
+                        {{10, 15}, {10, 29}, {20, 20}}, //
+                        {{26, 25}, {33, 20}, {10, 20}}  //
+                    } //
+                } //
+            }); //
+
+        std::shared_ptr<Tensor> input_2 =
+            std::make_shared<Tensor>(Array1D<int, 2>{{10, 20}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array4D<int, 1, 3, 3, 2>{
+                {
+                    //
+                    {
+                        //
+                        {{1, 1}, {0, 0}, {0, 1}}, //
+                        {{1, 0}, {1, 0}, {0, 1}}, //
+                        {{0, 0}, {0, 1}, {1, 1}}  //
+                    } //
+                } //
+            }); //
 
         std::shared_ptr<Node> myAnd = And();
-        auto op = std::static_pointer_cast<OperatorTensor>(myAnd -> getOperator());
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myAnd->getOperator());
         op->associateInput(0, input_1);
         op->associateInput(1, input_2);
         op->setDataType(DataType::Int32);
diff --git a/unit_tests/operator/Test_ArgMaxImpl.cpp b/unit_tests/operator/Test_ArgMaxImpl.cpp
index 9915d90423e976db1bdd2a694a2cfd7beb380cee..3890e5b1fdfa63d3df05c5e791174cebbdebb211 100644
--- a/unit_tests/operator/Test_ArgMaxImpl.cpp
+++ b/unit_tests/operator/Test_ArgMaxImpl.cpp
@@ -11,8 +11,8 @@
 
 #include <catch2/catch_test_macros.hpp>
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric> // std::accumulate
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/ArgMax.hpp"
@@ -24,41 +24,48 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") {
-    SECTION("ForwardDims")
-    {
+    SECTION("ForwardDims") {
         constexpr std::uint16_t NBTRIALS = 10;
         // Create a random number generator
         std::random_device rd;
         std::mt19937 gen(rd());
-        std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
-        std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
-        std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
-        std::uniform_int_distribution<int> boolDist(0,1);
+        std::uniform_real_distribution<float> valueDist(
+            0.1f,
+            1.1f); // Random float distribution between 0 and 1
+        std::uniform_int_distribution<std::size_t> dimSizeDist(
+            std::size_t(2),
+            std::size_t(10));
+        std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
+                                                              std::size_t(5));
+        std::uniform_int_distribution<int> boolDist(0, 1);
 
         SECTION("KeepDims") {
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
                 DimSize_t nbDims = nbDimsDist(gen);
                 std::vector<DimSize_t> dims(nbDims);
                 std::vector<DimSize_t> expectedOutDims(nbDims);
-                std::uniform_int_distribution<std::int32_t> axisDist(std::int32_t(0), std::int32_t(nbDims-1));
+                std::uniform_int_distribution<std::int32_t> axisDist(
+                    std::int32_t(0),
+                    std::int32_t(nbDims - 1));
                 std::int32_t axis = axisDist(gen);
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
                     if (i == axis) {
                         expectedOutDims[i] = 1;
-                    }
-                    else {
+                    } else {
                         expectedOutDims[i] = dims[i];
                     }
                 }
 
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
                 myInput->zeros();
                 std::shared_ptr<Node> myArgMax = ArgMax(axis);
-                auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
-                op->associateInput(0,myInput);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myArgMax->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
                 op->forwardDims();
@@ -72,24 +79,28 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") {
                 DimSize_t nbDims = nbDimsDist(gen);
                 std::vector<DimSize_t> dims(nbDims);
                 std::vector<DimSize_t> expectedOutDims;
-                std::uniform_int_distribution<std::int32_t> axisDist(std::int32_t(0), std::int32_t(nbDims-1));
+                std::uniform_int_distribution<std::int32_t> axisDist(
+                    std::int32_t(0),
+                    std::int32_t(nbDims - 1));
                 std::int32_t axis = axisDist(gen);
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
-                    if(i != axis) {
+                    if (i != axis) {
                         expectedOutDims.push_back(dims[i]);
                     }
                 }
-                if(expectedOutDims.empty()) {
+                if (expectedOutDims.empty()) {
                     expectedOutDims.push_back(1);
                 }
 
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
                 std::shared_ptr<Node> myArgMax = ArgMax(axis, false);
-                auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
-                op->associateInput(0,myInput);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myArgMax->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
 
@@ -101,40 +112,22 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") {
         }
     }
     SECTION("3D Tensor") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,2,3,4> {
-                {
-                    {
-                        { 1.0, 2.0, 3.0, 4.0},
-                        { 8.0, 0.0, 17.0, 1.0},
-                        { 5.0, 10.0, 6.0, 0.0}
-                    },
-                    {
-                        { 7.0, 1.0, 9.0, 4.0},
-                        { 0.0, 8.0, 4.0, 2.0},
-                        { 9.0, 2.0, 0.0, 5.0}
-                    }
-                }
-            });
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array3D<float, 2, 3, 4>{{{{1.0, 2.0, 3.0, 4.0},
+                                      {8.0, 0.0, 17.0, 1.0},
+                                      {5.0, 10.0, 6.0, 0.0}},
+                                     {{7.0, 1.0, 9.0, 4.0},
+                                      {0.0, 8.0, 4.0, 2.0},
+                                      {9.0, 2.0, 0.0, 5.0}}}});
         SECTION("Axis 2") {
 
-            Tensor myOutput = Tensor(Array3D<float,2,3, 1> {
-               { 
-                    { 
-                        {3.0},
-                        {2.0},
-                        {1.0}
-                    },
-                    {
-                        {2.0},
-                        {1.0},
-                        {0.0}
-                    }
-               }
-            });
+            Tensor myOutput = Tensor(Array3D<float, 2, 3, 1>{
+                {{{3.0}, {2.0}, {1.0}}, {{2.0}, {1.0}, {0.0}}}});
 
             std::shared_ptr<Node> myArgMax = ArgMax(2);
-            auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myArgMax->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myArgMax->forward();
@@ -143,16 +136,13 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") {
         }
         SECTION("Axis 2 with keep_dims false") {
 
-            Tensor myOutput = Tensor(Array2D<float,2,3> {
-               { 
-                    { 3.0, 2.0, 1.0 },
-                    { 2.0, 1.0, 0.0 }
-               }
-            });
+            Tensor myOutput = Tensor(
+                Array2D<float, 2, 3>{{{3.0, 2.0, 1.0}, {2.0, 1.0, 0.0}}});
 
-            std::shared_ptr<Node> myArgMax = ArgMax(2,0);
-            auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
-            op->associateInput(0,myInput);
+            std::shared_ptr<Node> myArgMax = ArgMax(2, 0);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myArgMax->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myArgMax->forward();
@@ -160,20 +150,13 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") {
             REQUIRE(*(op->getOutput(0)) == myOutput);
         }
         SECTION("Axis 1") {
-            Tensor myOutput = Tensor(Array3D<float,2,1,4> {
-                {
-                    {
-                        { 1.0, 2.0, 1.0, 0.0 }
-                    },
-                    {
-                        { 2.0, 1.0, 0.0, 2.0 }
-                    }
-                }
-            });
+            Tensor myOutput = Tensor(Array3D<float, 2, 1, 4>{
+                {{{1.0, 2.0, 1.0, 0.0}}, {{2.0, 1.0, 0.0, 2.0}}}});
 
             std::shared_ptr<Node> myArgMax = ArgMax(1);
-            auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myArgMax->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myArgMax->forward();
@@ -181,47 +164,42 @@ TEST_CASE("[cpu/operator] ArgMax(forward)", "[ArgMax][CPU]") {
             REQUIRE(*(op->getOutput(0)) == myOutput);
         }
         SECTION("Axis 0") {
-            Tensor myOutput = Tensor(Array3D<float,1,3,4> {
-                {
-                    {
-                        { 1.0, 0.0, 1.0, 0.0 },
-                        { 0.0, 1.0, 0.0, 1.0 },
-                        { 1.0, 0.0, 0.0, 1.0 }
-                    }
-                }
-            });
+            Tensor myOutput =
+                Tensor(Array3D<float, 1, 3, 4>{{{{1.0, 0.0, 1.0, 0.0},
+                                                 {0.0, 1.0, 0.0, 1.0},
+                                                 {1.0, 0.0, 0.0, 1.0}}}});
 
             std::shared_ptr<Node> myArgMax = ArgMax(0);
-            auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myArgMax->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
-            std::cout << " ...............  "<< std::endl;
+            std::cout << " ...............  " << std::endl;
             myArgMax->forward();
             op->getOutput(0)->print();
-            std::cout <<"------"<<std::endl;
+            std::cout << "------" << std::endl;
             myOutput.print();
 
             REQUIRE(*(op->getOutput(0)) == myOutput);
         }
     }
     SECTION("Select_Last_Index") {
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array1D<float,10> {
-            {
-                1.0, 5.0, 9.0, 0.0, 6.0, 2.0, 9.0, 4.0, 3.0, 9.0
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {{9}});
+        std::shared_ptr<Tensor> myInput =
+            std::make_shared<Tensor>(Array1D<float, 10>{
+                {1.0, 5.0, 9.0, 0.0, 6.0, 2.0, 9.0, 4.0, 3.0, 9.0}});
+        std::shared_ptr<Tensor> myOutput =
+            std::make_shared<Tensor>(Array1D<float, 1>{{9}});
 
         std::shared_ptr<Node> myArgMax = ArgMax(0, 1, 1);
-        auto op = std::static_pointer_cast<OperatorTensor>(myArgMax -> getOperator());
-        op->associateInput(0,myInput);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myArgMax->getOperator());
+        op->associateInput(0, myInput);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         myArgMax->forward();
         op->getOutput(0)->print();
 
         REQUIRE(*(op->getOutput(0)) == *myOutput);
-
     }
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_Atan.cpp b/unit_tests/operator/Test_Atan.cpp
index 9548e35d81b0423125424a4198d82558c4e57df4..6adfb71c68512c6ec1aac4dcb10a42ca8b054823 100644
--- a/unit_tests/operator/Test_Atan.cpp
+++ b/unit_tests/operator/Test_Atan.cpp
@@ -21,57 +21,75 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Atan(forward)") {
-  SECTION("1D Tensor") {
-    std::shared_ptr<Tensor> input0 =
-        std::make_shared<Tensor>(Array1D<float, 10>{
-            {0.41384590, 0.43120754, 0.93762982, 0.31049860, 0.77547199,
-             0.09514862, 0.16145366, 0.42776686, 0.43487436, 0.41170865}});
-    std::shared_ptr<Tensor> expectedOutput =
-        std::make_shared<Tensor>(Array1D<float, 10>{
-            {0.39238522, 0.40711672, 0.75322037, 0.30106049, 0.65960488,
-             0.09486303, 0.16007232, 0.40421187, 0.4102045, 0.39055911}});
+    SECTION("1D Tensor") {
+        std::shared_ptr<Tensor> input0 =
+            std::make_shared<Tensor>(Array1D<float, 10>{{0.41384590,
+                                                         0.43120754,
+                                                         0.93762982,
+                                                         0.31049860,
+                                                         0.77547199,
+                                                         0.09514862,
+                                                         0.16145366,
+                                                         0.42776686,
+                                                         0.43487436,
+                                                         0.41170865}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array1D<float, 10>{{0.39238522,
+                                                         0.40711672,
+                                                         0.75322037,
+                                                         0.30106049,
+                                                         0.65960488,
+                                                         0.09486303,
+                                                         0.16007232,
+                                                         0.40421187,
+                                                         0.4102045,
+                                                         0.39055911}});
 
-    std::shared_ptr<Node> myAtan = Atan();
-    auto op = std::static_pointer_cast<OperatorTensor>(myAtan->getOperator());
-    op->associateInput(0, input0);
-    op->setDataType(DataType::Float32);
-    op->setBackend("cpu");
-    myAtan->forward();
+        std::shared_ptr<Node> myAtan = Atan();
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myAtan->getOperator());
+        op->associateInput(0, input0);
+        op->setDataType(DataType::Float32);
+        op->setBackend("cpu");
+        myAtan->forward();
 
-    float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-    float* expectedPtr =
-        static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-    for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
-      REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
+        float *resPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedPtr =
+            static_cast<float *>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
+            REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
+        }
     }
-  }
 
-  SECTION("3D Tensor") {
-    std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
-        Array3D<float, 2, 2, 3>{{{
-                                     {0.97037154, 0.86208081, 0.77767169},
-                                     {0.38160080, 0.11422747, 0.77284443},
-                                 },
-                                 {{0.51592529, 0.72543722, 0.54641193},
-                                  {0.93866944, 0.97767913, 0.34172094}}}});
-    std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
-        Array3D<float, 2, 2, 3>{{{{0.77036231, 0.71146592, 0.66097706},
-                                  {0.36454508, 0.11373451, 0.65796196}},
-                                 {{0.47630652, 0.62759472, 0.50008428},
-                                  {0.75377332, 0.77411225, 0.32928031}}}});
+    SECTION("3D Tensor") {
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array3D<float, 2, 2, 3>{{{
+                                         {0.97037154, 0.86208081, 0.77767169},
+                                         {0.38160080, 0.11422747, 0.77284443},
+                                     },
+                                     {{0.51592529, 0.72543722, 0.54641193},
+                                      {0.93866944, 0.97767913, 0.34172094}}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array3D<float, 2, 2, 3>{{{{0.77036231, 0.71146592, 0.66097706},
+                                      {0.36454508, 0.11373451, 0.65796196}},
+                                     {{0.47630652, 0.62759472, 0.50008428},
+                                      {0.75377332, 0.77411225, 0.32928031}}}});
 
-    std::shared_ptr<Node> myAtan = Atan();
-    auto op = std::static_pointer_cast<OperatorTensor>(myAtan->getOperator());
-    op->associateInput(0, input0);
-    op->setDataType(DataType::Float32);
-    op->setBackend("cpu");
-    myAtan->forward();
+        std::shared_ptr<Node> myAtan = Atan();
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myAtan->getOperator());
+        op->associateInput(0, input0);
+        op->setDataType(DataType::Float32);
+        op->setBackend("cpu");
+        myAtan->forward();
 
-    float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-    float* expectedPtr =
-        static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-    for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
-      REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
+        float *resPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedPtr =
+            static_cast<float *>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
+            REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
+        }
     }
-  }
 }
diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp
index aaa2757830c245275d02792a7a5a2eb1db32d7b8..b6f166d6d84096d95c3d7c6e729d310bf6a39b47 100644
--- a/unit_tests/operator/Test_AvgPoolingImpl.cpp
+++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp
@@ -10,8 +10,8 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <memory>
 #include <cstdlib>
+#include <memory>
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/AvgPooling.hpp"
@@ -21,57 +21,40 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") {
-    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW
-        {
-            {
-                {{  0,   1,   2,   3,   4},
-                 {  5,   6,   7,   8,   9},
-                 { 10,  11,  12,  13,  14},
-                 { 15,  16,  17,  18,  19},
-                 { 20,  21,  22,  23,  24}},
+    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+        Array4D<float, 2, 2, 5, 5>{// NCHW
+                                   {{{{0, 1, 2, 3, 4},
+                                      {5, 6, 7, 8, 9},
+                                      {10, 11, 12, 13, 14},
+                                      {15, 16, 17, 18, 19},
+                                      {20, 21, 22, 23, 24}},
 
-                {{ 25,  26,  27,  28,  29},
-                 { 30,  31,  32,  33,  34},
-                 { 35,  36,  37,  38,  39},
-                 { 40,  41,  42,  43,  44},
-                 { 45,  46,  47,  48,  49}}
-            },
-            {
-                {{100, 101, 102, 103, 104},
-                 {105, 106, 107, 108, 109},
-                 {110, 111, 112, 113, 114},
-                 {115, 116, 117, 118, 119},
-                 {120, 121, 122, 123, 124}},
+                                     {{25, 26, 27, 28, 29},
+                                      {30, 31, 32, 33, 34},
+                                      {35, 36, 37, 38, 39},
+                                      {40, 41, 42, 43, 44},
+                                      {45, 46, 47, 48, 49}}},
+                                    {{{100, 101, 102, 103, 104},
+                                      {105, 106, 107, 108, 109},
+                                      {110, 111, 112, 113, 114},
+                                      {115, 116, 117, 118, 119},
+                                      {120, 121, 122, 123, 124}},
 
-                {{125, 126, 127, 128, 129},
-                 {130, 131, 132, 133, 134},
-                 {135, 136, 137, 138, 139},
-                 {140, 141, 142, 143, 144},
-                 {145, 146, 147, 148, 149}}
-            }
-        }
-    });
+                                     {{125, 126, 127, 128, 129},
+                                      {130, 131, 132, 133, 134},
+                                      {135, 136, 137, 138, 139},
+                                      {140, 141, 142, 143, 144},
+                                      {145, 146, 147, 148, 149}}}}});
     SECTION("Stride") {
-        std::shared_ptr<Node> myAvgPool = AvgPooling({2,2}, "mycdw", {2,2});
-        auto op = std::static_pointer_cast<OperatorTensor>(myAvgPool -> getOperator());
+        std::shared_ptr<Node> myAvgPool = AvgPooling({2, 2}, "mycdw", {2, 2});
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myAvgPool->getOperator());
 
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> {
-            {
-                {
-                    {{  3,   5},
-                     { 13,  15}},
-                    {{ 28,  30},
-                     { 38,  40}}
-                },
-                {
-                    {{103, 105},
-                     {113, 115}},
-                    {{128, 130},
-                     {138, 140}}
-                }
-            }
-        });
-        op->associateInput(0,myInput);
+        std::shared_ptr<Tensor> myOutput =
+            std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{
+                {{{{3, 5}, {13, 15}}, {{28, 30}, {38, 40}}},
+                 {{{103, 105}, {113, 115}}, {{128, 130}, {138, 140}}}}});
+        op->associateInput(0, myInput);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         myAvgPool->forward();
@@ -80,31 +63,32 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") {
     }
 
     SECTION("Stride >= feature dim") {
-        std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(Array4D<float,1,1,3,3> { //NCHW
-        {
-            {
-                {{0.3745, 0.9507, 0.7320},
-                 {0.5987, 0.1560, 0.1560},
-                 {0.0581, 0.8662, 0.6011}}
-            }
-        }
-        });
-        std::shared_ptr<Node> myAvgPool = AvgPooling({3,3}, "mycdw", {3,3});
-        auto op = std::static_pointer_cast<OperatorTensor>(myAvgPool -> getOperator());
+        std::shared_ptr<Tensor> myInput2 = std::make_shared<Tensor>(
+            Array4D<float, 1, 1, 3, 3>{// NCHW
+                                       {{{{0.3745, 0.9507, 0.7320},
+                                          {0.5987, 0.1560, 0.1560},
+                                          {0.0581, 0.8662, 0.6011}}}}});
+        std::shared_ptr<Node> myAvgPool = AvgPooling({3, 3}, "mycdw", {3, 3});
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myAvgPool->getOperator());
 
-        Tensor myOutput = Array4D<float,1,1,1,1> {
-            {{{{(0.3745 + 0.9507 + 0.7320 + 0.5987 + 0.1560 + 0.1560 + 0.0581 + 0.8662 + 0.6011)/9.0}}}}
-        };
-        op->associateInput(0,myInput2);
+        Tensor myOutput = Array4D<float, 1, 1, 1, 1>{
+            {{{{(0.3745 + 0.9507 + 0.7320 + 0.5987 + 0.1560 + 0.1560 + 0.0581 +
+                 0.8662 + 0.6011) /
+                9.0}}}}};
+        op->associateInput(0, myInput2);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         myAvgPool->forward();
         op->getOutput(0)->print();
-        float* outPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedOutPtr = static_cast<float*>(myOutput.getImpl()->rawPtr());
+        float *outPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedOutPtr =
+            static_cast<float *>(myOutput.getImpl()->rawPtr());
         for (std::size_t i = 0; i < 1; ++i) {
             REQUIRE(std::abs(outPtr[i] - expectedOutPtr[i]) < 0.00001);
         }
     }
-    // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << std::endl;
+    // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] <<
+    // std::endl;
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_BatchNormImpl.cpp b/unit_tests/operator/Test_BatchNormImpl.cpp
index 1b42c90dd09d63cd319f19bd29751da816db06c0..2969faadc2391b48cbc93d01d552be612bbfbe66 100644
--- a/unit_tests/operator/Test_BatchNormImpl.cpp
+++ b/unit_tests/operator/Test_BatchNormImpl.cpp
@@ -21,78 +21,75 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] BatchNorm(forward)", "[BatchNorm][CPU]") {
-    std::shared_ptr<Node> myBatchNorm = BatchNorm<2>(3, 0.00001F, 0.1F, "mybatchnorm");
-    auto op = std::static_pointer_cast<OperatorTensor>(myBatchNorm -> getOperator());
-    std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array1D<float,3> {{0.9044, 0.3028, 0.0218}});
-    std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float,3> {{0.1332, 0.7503, 0.0878}});
-    std::shared_ptr<Tensor> myMean = std::make_shared<Tensor>(Array1D<float,3> {{0.9931, 0.8421, 0.9936}});
-    std::shared_ptr<Tensor> myVar = std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}});
-    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> { //NCHW
-        {
-                {
-                    {{8.28257084e-01, 7.99335480e-01, 7.36702740e-01},
-                     {2.36729562e-01, 8.61912668e-01, 9.93067741e-01},
-                     {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}},
-                    {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01},
-                     {1.31294072e-01, 7.10182846e-01, 1.08420849e-04},
-                     {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}},
-                    {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01},
-                     {8.07861805e-01, 7.79679358e-01, 5.01209974e-01},
-                     {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}
-                },
-                {
-                    {{6.22058094e-01, 2.32256651e-02, 6.18222237e-01},
-                     {9.58304763e-01, 2.11395025e-02, 4.95614648e-01},
-                     {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}},
-                    {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01},
-                     {1.12059772e-01, 7.64178872e-01, 7.60362148e-01},
-                     {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}},
-                    {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01},
-                     {9.52722490e-01, 6.35501027e-01, 5.67592978e-02},
-                     {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}
-                }
-            }
-    });
-    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
-        {
-            {
-                {{-0.08978321, -0.12890550, -0.21362889},
-                 {-0.88994324, -0.04425725,  0.13315639},
-                 {-0.98898154, -1.08899629, -0.80904692}},
-                {{ 0.41042271,  0.61188596,  0.40120730},
-                 { 0.36147383,  0.67813843,  0.28971246},
-                 { 0.68446606,  0.52936459,  0.56799078}},
-                {{ 0.07320327,  0.06596386,  0.07178652},
-                 { 0.08298140,  0.08225026,  0.07502592},
-                 { 0.08618324,  0.08781575,  0.06206840}}
-            },
-            {
-                {{-0.36870885, -1.17875028, -0.37389761},
-                 { 0.08613246, -1.18157220, -0.53974909},
-                 {-0.87087554, -0.60028774, -0.69565099}},
-                {{ 0.34390146,  0.56648612,  0.55713004},
-                 { 0.35095227,  0.70767546,  0.70558763},
-                 { 0.30519596,  0.52465916,  0.35959685}},
-                {{ 0.08685592,  0.08336888,  0.06698728},
-                 { 0.08673952,  0.07850984,  0.06349554},
-                 { 0.06723238,  0.07242157,  0.08574481}}
-            }
-        }
-    });
-    op->associateInput(0,myInput);
-    op->associateInput(1,myWeights);
-    op->associateInput(2,myBias);
-    op->associateInput(3,myMean);
-    op->associateInput(4,myVar);
+    std::shared_ptr<Node> myBatchNorm =
+        BatchNorm<2>(3, 0.00001F, 0.1F, "mybatchnorm");
+    auto op =
+        std::static_pointer_cast<OperatorTensor>(myBatchNorm->getOperator());
+    std::shared_ptr<Tensor> myWeights =
+        std::make_shared<Tensor>(Array1D<float, 3>{{0.9044, 0.3028, 0.0218}});
+    std::shared_ptr<Tensor> myBias =
+        std::make_shared<Tensor>(Array1D<float, 3>{{0.1332, 0.7503, 0.0878}});
+    std::shared_ptr<Tensor> myMean =
+        std::make_shared<Tensor>(Array1D<float, 3>{{0.9931, 0.8421, 0.9936}});
+    std::shared_ptr<Tensor> myVar =
+        std::make_shared<Tensor>(Array1D<float, 3>{{0.4470, 0.3064, 0.7061}});
+    std::shared_ptr<Tensor> myInput =
+        std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{
+            // NCHW
+            {{{{8.28257084e-01, 7.99335480e-01, 7.36702740e-01},
+               {2.36729562e-01, 8.61912668e-01, 9.93067741e-01},
+               {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}},
+              {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01},
+               {1.31294072e-01, 7.10182846e-01, 1.08420849e-04},
+               {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}},
+              {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01},
+               {8.07861805e-01, 7.79679358e-01, 5.01209974e-01},
+               {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}},
+             {{{6.22058094e-01, 2.32256651e-02, 6.18222237e-01},
+               {9.58304763e-01, 2.11395025e-02, 4.95614648e-01},
+               {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}},
+              {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01},
+               {1.12059772e-01, 7.64178872e-01, 7.60362148e-01},
+               {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}},
+              {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01},
+               {9.52722490e-01, 6.35501027e-01, 5.67592978e-02},
+               {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}}}});
+    std::shared_ptr<Tensor> myOutput =
+        std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{
+            {{{{-0.08978321, -0.12890550, -0.21362889},
+               {-0.88994324, -0.04425725, 0.13315639},
+               {-0.98898154, -1.08899629, -0.80904692}},
+              {{0.41042271, 0.61188596, 0.40120730},
+               {0.36147383, 0.67813843, 0.28971246},
+               {0.68446606, 0.52936459, 0.56799078}},
+              {{0.07320327, 0.06596386, 0.07178652},
+               {0.08298140, 0.08225026, 0.07502592},
+               {0.08618324, 0.08781575, 0.06206840}}},
+             {{{-0.36870885, -1.17875028, -0.37389761},
+               {0.08613246, -1.18157220, -0.53974909},
+               {-0.87087554, -0.60028774, -0.69565099}},
+              {{0.34390146, 0.56648612, 0.55713004},
+               {0.35095227, 0.70767546, 0.70558763},
+               {0.30519596, 0.52465916, 0.35959685}},
+              {{0.08685592, 0.08336888, 0.06698728},
+               {0.08673952, 0.07850984, 0.06349554},
+               {0.06723238, 0.07242157, 0.08574481}}}}});
+    op->associateInput(0, myInput);
+    op->associateInput(1, myWeights);
+    op->associateInput(2, myBias);
+    op->associateInput(3, myMean);
+    op->associateInput(4, myVar);
     op->setDataType(DataType::Float32);
     op->setBackend("cpu");
     myBatchNorm->forward();
 
-    float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-    float* expectedPtr = static_cast<float*>(myOutput->getImpl()->rawPtr());
-    for (std::size_t i = 0; i< 54; ++i) {
-        REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+    float *resPtr =
+        static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+    float *expectedPtr = static_cast<float *>(myOutput->getImpl()->rawPtr());
+    for (std::size_t i = 0; i < 54; ++i) {
+        REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
     }
 
-    // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] << std::endl;
+    // std::cout << static_cast<Tensor>((*op)["weight"])[0][0][0][0] <<
+    // std::endl;
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_BitShift.cpp b/unit_tests/operator/Test_BitShift.cpp
index a52990bc7991a325ce151cf6634b0d5a831992c8..cf6d1b4e593b423ecbba1e55189b6a7e1232a392 100644
--- a/unit_tests/operator/Test_BitShift.cpp
+++ b/unit_tests/operator/Test_BitShift.cpp
@@ -9,18 +9,18 @@
  *
  ********************************************************************************/
 
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/BitShift.hpp"
+#include "aidge/utils/TensorUtils.hpp"
 #include <catch2/catch_test_macros.hpp>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
 #include <chrono>
+#include <cstddef> // std::size_t
+#include <cstdint> // std::uint16_t
+#include <iomanip>
 #include <iostream>
 #include <memory>
-#include <numeric>   
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
-#include <iomanip>
-#include "aidge/data/Tensor.hpp"
-#include "aidge/operator/BitShift.hpp"
-#include "aidge/utils/TensorUtils.hpp"
+#include <numeric>
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 namespace Aidge {
 
@@ -29,31 +29,34 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") {
     // Create a random number generator
     std::random_device rd;
     std::mt19937 gen(rd());
-    std::uniform_int_distribution<int> valueDist(-15, 15); 
-    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5));
-    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3));
-    std::uniform_int_distribution<int> boolDist(0,1);
+    std::uniform_int_distribution<int> valueDist(-15, 15);
+    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
+                                                           std::size_t(5));
+    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
+                                                          std::size_t(3));
+    std::uniform_int_distribution<int> boolDist(0, 1);
 
-    BitShift_Op::BitShiftDirection direction = BitShift_Op::BitShiftDirection::left;
+    BitShift_Op::BitShiftDirection direction =
+        BitShift_Op::BitShiftDirection::left;
 
-    if(valueDist(gen) % 2 == 0)
-    {
+    if (valueDist(gen) % 2 == 0) {
         direction = BitShift_Op::BitShiftDirection::right;
     }
 
     // Create BitShift Operator
     std::shared_ptr<Node> myBitShift = BitShift(direction);
-    auto op = std::static_pointer_cast<OperatorTensor>(myBitShift-> getOperator());
+    auto op =
+        std::static_pointer_cast<OperatorTensor>(myBitShift->getOperator());
     op->setDataType(DataType::Int32);
     op->setBackend("cpu");
 
     // Create 2 input Tensors
     std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
-    op->associateInput(0,T0);
+    op->associateInput(0, T0);
     T0->setDataType(DataType::Int32);
     T0->setBackend("cpu");
     std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
-    op -> associateInput(1,T1);
+    op->associateInput(1, T1);
     T1->setDataType(DataType::Int32);
     T1->setBackend("cpu");
 
@@ -62,7 +65,8 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") {
     Tres->setDataType(DataType::Int32);
     Tres->setBackend("cpu");
 
-    // To measure execution time of 'BitShift_Op::forward()' member function call
+    // To measure execution time of 'BitShift_Op::forward()' member function
+    // call
     std::chrono::time_point<std::chrono::system_clock> start;
 
     std::chrono::time_point<std::chrono::system_clock> end;
@@ -79,44 +83,48 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") {
                 for (std::size_t i = 0; i < nbDims; ++i) {
                     dims.push_back(dimSizeDist(gen));
                 }
-                const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dims.cbegin(),
+                                    dims.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
 
                 // without broadcasting
-                int* array0 = new int[nb_elements];
-                int* array1 = new int[nb_elements];
-                int* result = new int[nb_elements];
+                int *array0 = new int[nb_elements];
+                int *array1 = new int[nb_elements];
+                int *result = new int[nb_elements];
 
                 for (std::size_t i = 0; i < nb_elements; ++i) {
                     array0[i] = valueDist(gen);
-                    array1[i] = std::abs(valueDist(gen)); // bitshift is impossible with negative value
-                    if(direction == BitShift_Op::BitShiftDirection::left)
-                    {
+                    array1[i] = std::abs(valueDist(
+                        gen)); // bitshift is impossible with negative value
+                    if (direction == BitShift_Op::BitShiftDirection::left) {
                         result[i] = array0[i] << array1[i];
-                    }
-                    else
-                    {
+                    } else {
                         result[i] = array0[i] >> array1[i];
                     }
                 }
 
                 // input0
                 T0->resize(dims);
-                T0 -> getImpl() -> setRawPtr(array0, nb_elements);
+                T0->getImpl()->setRawPtr(array0, nb_elements);
 
                 // input1
                 T1->resize(dims);
-                T1 -> getImpl() -> setRawPtr(array1, nb_elements);
+                T1->getImpl()->setRawPtr(array1, nb_elements);
 
                 // results
                 Tres->resize(dims);
-                Tres -> getImpl() -> setRawPtr(result, nb_elements);
+                Tres->getImpl()->setRawPtr(result, nb_elements);
 
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myBitShift->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 bool is_eq = approxEq<int>(*(op->getOutput(0)), *Tres);
 
@@ -128,18 +136,19 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") {
                 delete[] array0;
                 delete[] array1;
                 delete[] result;
-
-
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
         SECTION("Test BitShift kernels with Broadcasting") {
             std::size_t number_of_operation = 0;
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
                 // generate 2 random Tensors
-                // handle dimensions, replace some dimensions with '1' to get broadcasting
+                // handle dimensions, replace some dimensions with '1' to get
+                // broadcasting
                 constexpr std::size_t nbDims = 4;
                 std::vector<std::size_t> dims;
                 for (std::size_t i = 0; i < nbDims; ++i) {
@@ -159,42 +168,63 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") {
                 }
 
                 // create arrays and fill them with random values
-                int* array0 = new int[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-                int* array1 = new int[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
-                int* result = new int[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
-
-                for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
+                int *array0 =
+                    new int[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+                int *array1 =
+                    new int[dims1[0] * dims1[1] * dims1[2] * dims1[3]];
+                int *result =
+                    new int[dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]];
+
+                for (std::size_t i = 0;
+                     i < dims0[0] * dims0[1] * dims0[2] * dims0[3];
+                     ++i) {
                     array0[i] = valueDist(gen);
                 }
-                for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
+                for (std::size_t i = 0;
+                     i < dims1[0] * dims1[1] * dims1[2] * dims1[3];
+                     ++i) {
                     array1[i] = std::abs(valueDist(gen));
                 }
 
-                //True result with broadcast
-                const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-                const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
+                // True result with broadcast
+                const std::size_t strides0[nbDims] = {
+                    dims0[1] * dims0[2] * dims0[3],
+                    dims0[2] * dims0[3],
+                    dims0[3],
+                    1};
+                const std::size_t strides1[nbDims] = {
+                    dims1[1] * dims1[2] * dims1[3],
+                    dims1[2] * dims1[3],
+                    dims1[3],
+                    1};
                 for (std::size_t a = 0; a < dimsOut[0]; ++a) {
                     for (std::size_t b = 0; b < dimsOut[1]; ++b) {
-                        const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                                    + strides0[1] * ((dims0[1] > 1) ? b : 0);
-                        const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
-                                                    + strides1[1] * ((dims1[1] > 1) ? b : 0);
+                        const std::size_t idx0_0 =
+                            strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                            strides0[1] * ((dims0[1] > 1) ? b : 0);
+                        const std::size_t idx1_0 =
+                            strides1[0] * ((dims1[0] > 1) ? a : 0) +
+                            strides1[1] * ((dims1[1] > 1) ? b : 0);
                         for (std::size_t c = 0; c < dimsOut[2]; ++c) {
-                            const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
+                            const std::size_t idx_out =
+                                dimsOut[3] *
+                                (c + dimsOut[2] * (b + dimsOut[1] * a));
                             for (std::size_t d = 0; d < dimsOut[3]; ++d) {
-                                std::size_t idx0 = idx0_0
-                                                    + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                                    + ((dims0[3] > 1) ? d : 0);
-                                std::size_t idx1 = idx1_0
-                                                    + strides1[2] * ((dims1[2] > 1) ? c : 0)
-                                                    + ((dims1[3] > 1) ? d : 0);
-                                if(direction == BitShift_Op::BitShiftDirection::left)
-                                {
-                                    result[idx_out + d] = array0[idx0] << array1[idx1];
-                                }
-                                else
-                                {
-                                    result[idx_out + d] = array0[idx0] >> array1[idx1];                               
+                                std::size_t idx0 =
+                                    idx0_0 +
+                                    strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                                    ((dims0[3] > 1) ? d : 0);
+                                std::size_t idx1 =
+                                    idx1_0 +
+                                    strides1[2] * ((dims1[2] > 1) ? c : 0) +
+                                    ((dims1[3] > 1) ? d : 0);
+                                if (direction ==
+                                    BitShift_Op::BitShiftDirection::left) {
+                                    result[idx_out + d] = array0[idx0]
+                                                          << array1[idx1];
+                                } else {
+                                    result[idx_out + d] =
+                                        array0[idx0] >> array1[idx1];
                                 }
                             }
                         }
@@ -204,27 +234,34 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") {
                 // conversion to Aidge::Tensors
                 // input0
                 T0->resize(dims0);
-                T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+                T0->getImpl()->setRawPtr(
+                    array0,
+                    dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
                 // input1
                 T1->resize(dims1);
-                T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
+                T1->getImpl()->setRawPtr(
+                    array1,
+                    dims1[0] * dims1[1] * dims1[2] * dims1[3]);
 
                 // results
                 Tres->resize(dimsOut);
-                Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+                Tres->getImpl()->setRawPtr(
+                    result,
+                    dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myBitShift->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 // comparison between truth and computed result
                 bool equiv = (approxEq<int>(*(op->getOutput(0)), *Tres));
-                if(equiv == false)
-                {
+                if (equiv == false) {
                     std::cout << "Problem\n";
                 }
                 REQUIRE(equiv);
@@ -233,13 +270,18 @@ TEST_CASE("[cpu/operator] BitShift_TEST", "[BitShift][CPU]") {
                 delete[] array1;
                 delete[] result;
 
-                const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dimsOut.cbegin(),
+                                    dimsOut.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
-
-}
+    }
 } // namespace Aidge
-}
\ No newline at end of file
+} // namespace Aidge
\ No newline at end of file
diff --git a/unit_tests/operator/Test_ClipImpl.cpp b/unit_tests/operator/Test_ClipImpl.cpp
index 45c8da5bf7ecc84fad6b3e694fe204540f579af3..ef8f10da06459a324c1b557d41f6062c0528f80a 100644
--- a/unit_tests/operator/Test_ClipImpl.cpp
+++ b/unit_tests/operator/Test_ClipImpl.cpp
@@ -9,24 +9,27 @@
  *
  ********************************************************************************/
 
+#include <algorithm>
 #include <catch2/catch_test_macros.hpp>
-#include <cstddef>  // std::size_t
-#include <cstdint>  // std::uint16_t
 #include <chrono>
-#include <iostream>
-#include <vector>
-#include <algorithm>
+#include <cstddef> // std::size_t
+#include <cstdint> // std::uint16_t
 #include <iomanip>
+#include <iostream>
 #include <memory>
-#include <random>   // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <vector>
 
+#include "aidge/backend/cpu.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Clip.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/TensorUtils.hpp"
-#include "aidge/backend/cpu.hpp"
 
-void ComputeClipBackward(const std::vector<float>& vec1, std::vector<float>& vec2, float min, float max) {
+void ComputeClipBackward(const std::vector<float> &vec1,
+                         std::vector<float> &vec2,
+                         float min,
+                         float max) {
     if (vec1.size() != vec2.size()) {
         std::cerr << "Vectors should have the same sizes." << std::endl;
         return;
@@ -38,23 +41,21 @@ void ComputeClipBackward(const std::vector<float>& vec1, std::vector<float>& vec
         }
     }
 }
-namespace Aidge 
-{
-TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
- {
+namespace Aidge {
+TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]") {
     const std::uint16_t NBTRIALS = 10;
     // Create a random number generator
     std::random_device rd;
     std::mt19937 gen(rd());
     std::uniform_real_distribution<float> dis(0.0, 10.0);
-    std::uniform_real_distribution<float> dismin(0.0, 4.5); 
-    std::uniform_real_distribution<float> dismax(5.5, 10.0); 
-    std::uniform_int_distribution<std::size_t> distDims(5,15);
+    std::uniform_real_distribution<float> dismin(0.0, 4.5);
+    std::uniform_real_distribution<float> dismax(5.5, 10.0);
+    std::uniform_int_distribution<std::size_t> distDims(5, 15);
     std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
 
     // Create MatMul Operator
     std::shared_ptr<Node> myClip = Aidge::Clip("nop");
-    auto op = std::static_pointer_cast<OperatorTensor>(myClip -> getOperator());
+    auto op = std::static_pointer_cast<OperatorTensor>(myClip->getOperator());
 
     // To measure execution time of 'MatMul_Op::forward()' member function call
     std::chrono::time_point<std::chrono::system_clock> start;
@@ -67,41 +68,44 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
             // generate Tensors dimensions
             const std::size_t dim0 = distDims(gen);
             const std::size_t dim1 = distDims(gen);
-            totalComputation += dim0*dim1;
+            totalComputation += dim0 * dim1;
 
             // Create and populate the array with random float values
-            float* Array = new float[dim0*dim1];
-            for (int i = 0; i < dim0*dim1; ++i) {
+            float *Array = new float[dim0 * dim1];
+            for (int i = 0; i < dim0 * dim1; ++i) {
                 Array[i] = dis(gen); // Generate random float value
             }
 
             // Convert Input to Tensor
-            std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
-            TInput -> resize({dim0,dim1});
-            TInput -> setBackend("cpu");
-            TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
-            
+            std::shared_ptr<Tensor> TInput =
+                std::make_shared<Tensor>(DataType::Float32);
+            TInput->resize({dim0, dim1});
+            TInput->setBackend("cpu");
+            TInput->getImpl()->setRawPtr(Array, dim0 * dim1);
+
             float min = dismin(gen);
-            std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32);
-            Tmin -> resize({});
-            Tmin -> setBackend("cpu");
-            Tmin -> getImpl() -> setRawPtr(&min,1);
+            std::shared_ptr<Tensor> Tmin =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tmin->resize({});
+            Tmin->setBackend("cpu");
+            Tmin->getImpl()->setRawPtr(&min, 1);
 
             float max = dismax(gen);
-            std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32);
-            Tmax -> resize({});
-            Tmax -> setBackend("cpu");
-            Tmax -> getImpl() -> setRawPtr(&max,1);
+            std::shared_ptr<Tensor> Tmax =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tmax->resize({});
+            Tmax->setBackend("cpu");
+            Tmax->getImpl()->setRawPtr(&max, 1);
             // convert res to Tensordf
-            std::vector<float> GT(Array, Array + (dim0*dim1));
-            for (float& val : GT)
-            {
+            std::vector<float> GT(Array, Array + (dim0 * dim1));
+            for (float &val : GT) {
                 val = std::max(min, std::min(val, max));
             }
-            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
-            Tres -> resize({dim0,dim1});
-            Tres -> setBackend("cpu");
-            Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1);
+            std::shared_ptr<Tensor> Tres =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tres->resize({dim0, dim1});
+            Tres->setBackend("cpu");
+            Tres->getImpl()->setRawPtr(GT.data(), dim0 * dim1);
 
             op->associateInput(0, TInput);
             op->associateInput(1, Tmin);
@@ -109,59 +113,65 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             op->forwardDims(true);
-            
+
             start = std::chrono::system_clock::now();
             myClip->forward();
             end = std::chrono::system_clock::now();
 
-            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(
+                end - start);
 
             REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "multiplications over time spent: "
+                  << totalComputation / duration.count() << std::endl;
         std::cout << "total time: " << duration.count() << std::endl;
-    } 
+    }
     SECTION("Clip test with min >= max [Forward]") {
         std::size_t totalComputation = 0;
         for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
             // generate Tensors dimensions
             const std::size_t dim0 = distDims(gen);
             const std::size_t dim1 = distDims(gen);
-            totalComputation += dim0*dim1;
+            totalComputation += dim0 * dim1;
 
             // Create and populate the array with random float values
-            float* Array = new float[dim0*dim1];
-            for (int i = 0; i < dim0*dim1; ++i) {
+            float *Array = new float[dim0 * dim1];
+            for (int i = 0; i < dim0 * dim1; ++i) {
                 Array[i] = dis(gen); // Generate random float value
             }
 
             // Convert Input to Tensor
-            std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
-            TInput -> resize({dim0,dim1});
-            TInput -> setBackend("cpu");
-            TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
-            
+            std::shared_ptr<Tensor> TInput =
+                std::make_shared<Tensor>(DataType::Float32);
+            TInput->resize({dim0, dim1});
+            TInput->setBackend("cpu");
+            TInput->getImpl()->setRawPtr(Array, dim0 * dim1);
+
             float min = dismax(gen);
-            std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32);
-            Tmin -> resize({});
-            Tmin -> setBackend("cpu");
-            Tmin -> getImpl() -> setRawPtr(&min,1);
-
-            float max = dismin(gen); //We generate max and min so that max is always <= min
-            std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32);
-            Tmax -> resize({});
-            Tmax -> setBackend("cpu");
-            Tmax -> getImpl() -> setRawPtr(&max,1);
+            std::shared_ptr<Tensor> Tmin =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tmin->resize({});
+            Tmin->setBackend("cpu");
+            Tmin->getImpl()->setRawPtr(&min, 1);
+
+            float max = dismin(
+                gen); // We generate max and min so that max is always <= min
+            std::shared_ptr<Tensor> Tmax =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tmax->resize({});
+            Tmax->setBackend("cpu");
+            Tmax->getImpl()->setRawPtr(&max, 1);
             // convert res to Tensor
-            std::vector<float> GT(Array, Array + (dim0*dim1));
-            for (float& val : GT)
-            {
+            std::vector<float> GT(Array, Array + (dim0 * dim1));
+            for (float &val : GT) {
                 val = max;
             }
-            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
-            Tres -> resize({dim0,dim1});
-            Tres -> setBackend("cpu");
-            Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1);
+            std::shared_ptr<Tensor> Tres =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tres->resize({dim0, dim1});
+            Tres->setBackend("cpu");
+            Tres->getImpl()->setRawPtr(GT.data(), dim0 * dim1);
 
             op->associateInput(0, TInput);
             op->associateInput(1, Tmin);
@@ -169,56 +179,57 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             op->forwardDims(true);
-            
+
             start = std::chrono::system_clock::now();
             myClip->forward();
             end = std::chrono::system_clock::now();
 
-            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(
+                end - start);
 
             REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "multiplications over time spent: "
+                  << totalComputation / duration.count() << std::endl;
         std::cout << "total time: " << duration.count() << std::endl;
-    } 
-    SECTION("Clip with Clip Attr [Forward]")
-    {
+    }
+    SECTION("Clip with Clip Attr [Forward]") {
         std::size_t totalComputation = 0;
-        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) 
-        {
+        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
 
             float min = dismin(gen);
             float max = dismax(gen);
-            std::shared_ptr<Node> myCl = Aidge::Clip("",min,max);
-            auto op = std::static_pointer_cast<OperatorTensor>(myCl -> getOperator());
-
+            std::shared_ptr<Node> myCl = Aidge::Clip("", min, max);
+            auto op =
+                std::static_pointer_cast<OperatorTensor>(myCl->getOperator());
 
             // generate Tensors dimensions
             const std::size_t dim0 = 3;
             const std::size_t dim1 = 3;
-            totalComputation += dim0*dim1;
+            totalComputation += dim0 * dim1;
 
             // Create and populate the array with random float values
-            float* Array = new float[dim0*dim1];
-            for (int i = 0; i < dim0*dim1; ++i) {
+            float *Array = new float[dim0 * dim1];
+            for (int i = 0; i < dim0 * dim1; ++i) {
                 Array[i] = dis(gen); // Generate random float value
             }
             // Convert Input to Tensor
-            std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
-            TInput -> resize({dim0,dim1});
-            TInput -> setBackend("cpu");
-            TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
+            std::shared_ptr<Tensor> TInput =
+                std::make_shared<Tensor>(DataType::Float32);
+            TInput->resize({dim0, dim1});
+            TInput->setBackend("cpu");
+            TInput->getImpl()->setRawPtr(Array, dim0 * dim1);
 
             // convert res to Tensordf
-            std::vector<float> GT(Array, Array + (dim0*dim1));
-            for (float& val : GT)
-            {
+            std::vector<float> GT(Array, Array + (dim0 * dim1));
+            for (float &val : GT) {
                 val = std::max(min, std::min(val, max));
             }
-            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
-            Tres -> resize({dim0,dim1});
-            Tres -> setBackend("cpu");
-            Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1);
+            std::shared_ptr<Tensor> Tres =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tres->resize({dim0, dim1});
+            Tres->setBackend("cpu");
+            Tres->getImpl()->setRawPtr(GT.data(), dim0 * dim1);
             op->associateInput(0, TInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
@@ -227,11 +238,13 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
             myCl->forward();
             end = std::chrono::system_clock::now();
 
-            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(
+                end - start);
 
             REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "multiplications over time spent: "
+                  << totalComputation / duration.count() << std::endl;
         std::cout << "total time: " << duration.count() << std::endl;
     }
     SECTION("Simple clip test [Backward]") {
@@ -239,80 +252,90 @@ TEST_CASE("[cpu/operator] Clip", "[Clip][CPU]")
         duration = std::chrono::duration<double, std::micro>::zero();
         for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
             std::size_t totalComputation = 0;
-        for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
-            // generate Tensors dimensions
-            const std::size_t dim0 = distDims(gen);
-            const std::size_t dim1 = distDims(gen);
-  
-            totalComputation += dim0*dim1;
-
-            // Create and populate the array with random float values
-            float* Array = new float[dim0*dim1];
-            float* gradArray = new float[dim0*dim1];
-            for (int i = 0; i < dim0*dim1; ++i) {
-                Array[i] = dis(gen); // Generate random float value
-                gradArray[i] = dis(gen);
+            for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
+                // generate Tensors dimensions
+                const std::size_t dim0 = distDims(gen);
+                const std::size_t dim1 = distDims(gen);
+
+                totalComputation += dim0 * dim1;
+
+                // Create and populate the array with random float values
+                float *Array = new float[dim0 * dim1];
+                float *gradArray = new float[dim0 * dim1];
+                for (int i = 0; i < dim0 * dim1; ++i) {
+                    Array[i] = dis(gen); // Generate random float value
+                    gradArray[i] = dis(gen);
+                }
+
+                std::shared_ptr<Tensor> TGrad =
+                    std::make_shared<Tensor>(DataType::Float32);
+                TGrad->resize({dim0, dim1});
+                TGrad->setBackend("cpu");
+                TGrad->getImpl()->setRawPtr(gradArray, dim0 * dim1);
+
+                // Convert Input to Tensor
+                std::shared_ptr<Tensor> TInput =
+                    std::make_shared<Tensor>(DataType::Float32);
+                TInput->resize({dim0, dim1});
+                TInput->setBackend("cpu");
+                TInput->getImpl()->setRawPtr(Array, dim0 * dim1);
+
+                float min = dismin(gen);
+                std::shared_ptr<Tensor> Tmin =
+                    std::make_shared<Tensor>(DataType::Float32);
+                Tmin->resize({});
+                Tmin->setBackend("cpu");
+                Tmin->getImpl()->setRawPtr(&min, 1);
+
+                float max = dismax(gen);
+                std::shared_ptr<Tensor> Tmax =
+                    std::make_shared<Tensor>(DataType::Float32);
+                Tmax->resize({});
+                Tmax->setBackend("cpu");
+                Tmax->getImpl()->setRawPtr(&max, 1);
+                // convert res to Tensor
+                std::vector<float> GT(Array, Array + (dim0 * dim1));
+                for (float &val : GT) {
+                    val = std::max(min, std::min(val, max)); // Clip operation
+                }
+                std::shared_ptr<Tensor> Tres =
+                    std::make_shared<Tensor>(DataType::Float32);
+                Tres->resize({dim0, dim1});
+                Tres->setBackend("cpu");
+                Tres->getImpl()->setRawPtr(GT.data(), dim0 * dim1);
+
+                op->associateInput(0, TInput);
+                op->associateInput(1, Tmin);
+                op->associateInput(2, Tmax);
+                op->setDataType(DataType::Float32);
+                op->setBackend("cpu");
+                op->forwardDims(true);
+                myClip->forward();
+
+                op->getOutput(0)->setGrad(TGrad);
+
+                start = std::chrono::system_clock::now();
+                REQUIRE_NOTHROW(myClip->backward());
+                end = std::chrono::system_clock::now();
+
+                auto GradTensor = op->getInput(0)->grad();
+                float *BackwardTensor =
+                    (float *)GradTensor->getImpl()->rawPtr();
+                std::vector<float> GT0(Array, Array + (dim0 * dim1));
+                std::vector<float> GT1(gradArray, gradArray + (dim0 * dim1));
+                std::vector<float> BackwardTensorVec(
+                    BackwardTensor,
+                    BackwardTensor + (dim0 * dim1));
+                ComputeClipBackward(GT0, GT1, min, max);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
+                REQUIRE(GT1 == BackwardTensorVec);
             }
-
-            std::shared_ptr<Tensor> TGrad = std::make_shared<Tensor>(DataType::Float32);
-            TGrad -> resize({dim0,dim1});
-            TGrad -> setBackend("cpu");
-            TGrad -> getImpl() -> setRawPtr(gradArray, dim0*dim1);
-
-            // Convert Input to Tensor
-            std::shared_ptr<Tensor> TInput = std::make_shared<Tensor>(DataType::Float32);
-            TInput -> resize({dim0,dim1});
-            TInput -> setBackend("cpu");
-            TInput -> getImpl() -> setRawPtr(Array, dim0*dim1);
-            
-            float min = dismin(gen);
-            std::shared_ptr<Tensor> Tmin = std::make_shared<Tensor>(DataType::Float32);
-            Tmin -> resize({});
-            Tmin -> setBackend("cpu");
-            Tmin -> getImpl() -> setRawPtr(&min,1);
-
-            float max = dismax(gen);
-            std::shared_ptr<Tensor> Tmax = std::make_shared<Tensor>(DataType::Float32);
-            Tmax -> resize({});
-            Tmax -> setBackend("cpu");
-            Tmax -> getImpl() -> setRawPtr(&max,1);
-            // convert res to Tensor
-            std::vector<float> GT(Array, Array + (dim0*dim1));
-            for (float& val : GT)
-            {
-                val = std::max(min, std::min(val, max));//Clip operation
-            }
-            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
-            Tres -> resize({dim0,dim1});
-            Tres -> setBackend("cpu");
-            Tres -> getImpl() -> setRawPtr(GT.data(), dim0*dim1);
-
-            op->associateInput(0, TInput);
-            op->associateInput(1, Tmin);
-            op->associateInput(2, Tmax);
-            op->setDataType(DataType::Float32);
-            op->setBackend("cpu");
-            op->forwardDims(true);
-            myClip->forward();
-
-            op->getOutput(0)->setGrad(TGrad);
-            
-            start = std::chrono::system_clock::now();
-            REQUIRE_NOTHROW(myClip->backward());
-            end = std::chrono::system_clock::now();
-
-            auto GradTensor = op->getInput(0)->grad();
-            float* BackwardTensor = (float*)GradTensor->getImpl()->rawPtr();
-            std::vector<float> GT0(Array,Array+(dim0*dim1));
-            std::vector<float> GT1(gradArray,gradArray+(dim0*dim1));
-            std::vector<float> BackwardTensorVec(BackwardTensor,BackwardTensor+(dim0*dim1));
-            ComputeClipBackward(GT0,GT1,min,max);
-            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
-            REQUIRE(GT1 == BackwardTensorVec);
+            std::cout << "multiplications over time spent: "
+                      << totalComputation / duration.count() << std::endl;
+            std::cout << "total time: " << duration.count() << std::endl;
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
-        std::cout << "total time: " << duration.count() << std::endl;
     }
- }
-} // namespace Aidge 
-}
\ No newline at end of file
+} // namespace Aidge
+} // namespace Aidge
\ No newline at end of file
diff --git a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp
index 42505d385fde7e72e09531f1607287ffc6978f75..5c3a6a0d7e054dd4e6d694e6f2554c05475986a5 100644
--- a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp
+++ b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp
@@ -38,83 +38,90 @@
 
 namespace Aidge {
 TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") {
-  constexpr std::uint16_t NBTRIALS = 10;
-  // Create a random number generator
-  auto random_seed = Catch::Generators::Detail::getSeed;
-  std::mt19937 gen(random_seed());
-  std::uniform_real_distribution<float> valueDist(
-      0.1f, 1.1f); // Random float distribution between 0 and 1
-  std::uniform_int_distribution<DimSize_t> input_tensor_size_dist(
-      std::size_t(1), std::size_t(10));
-  std::uniform_int_distribution<int64_t> input_tensor_values_dist(
-      std::size_t(1), std::size_t(7));
-  std::uniform_real_distribution<double> operator_attr_value_dist(-100., 100.);
+    constexpr std::uint16_t NBTRIALS = 10;
+    // Create a random number generator
+    auto random_seed = Catch::Generators::Detail::getSeed;
+    std::mt19937 gen(random_seed());
+    std::uniform_real_distribution<float> valueDist(
+        0.1f,
+        1.1f); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<DimSize_t> input_tensor_size_dist(
+        std::size_t(1),
+        std::size_t(10));
+    std::uniform_int_distribution<int64_t> input_tensor_values_dist(
+        std::size_t(1),
+        std::size_t(7));
+    std::uniform_real_distribution<double> operator_attr_value_dist(-100.,
+                                                                    100.);
 
-  ///////////////////////////////////////////////
-  // SETUP FUNCTIONS
-  auto generate_input_tensor =
-      [&gen, &input_tensor_size_dist,
-       &input_tensor_values_dist]() -> std::shared_ptr<Tensor> {
-    std::vector<DimSize_t> input_dims;
-    input_dims.push_back(input_tensor_size_dist(gen));
+    ///////////////////////////////////////////////
+    // SETUP FUNCTIONS
+    auto generate_input_tensor =
+        [&gen,
+         &input_tensor_size_dist,
+         &input_tensor_values_dist]() -> std::shared_ptr<Tensor> {
+        std::vector<DimSize_t> input_dims;
+        input_dims.push_back(input_tensor_size_dist(gen));
 
-    auto result = std::make_shared<Tensor>(input_dims);
-    result->setDataType(DataType::Int64);
-    result->setBackend("cpu");
-    for (DimSize_t i = 0; i < result->size(); ++i) {
-      result->set<int64_t>(i, input_tensor_values_dist(gen));
-    }
-    return result;
-  };
+        auto result = std::make_shared<Tensor>(input_dims);
+        result->setDataType(DataType::Int64);
+        result->setBackend("cpu");
+        for (DimSize_t i = 0; i < result->size(); ++i) {
+            result->set<int64_t>(i, input_tensor_values_dist(gen));
+        }
+        return result;
+    };
 
-  auto generate_random_operator =
-      [&gen,
-       &operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> {
-    auto node = ConstantOfShape(Tensor(operator_attr_value_dist(gen)));
-    auto op = std::static_pointer_cast<ConstantOfShape_Op>(node->getOperator());
-    op->setDataType(DataType::Float64);
-    op->setBackend("cpu");
-    return op;
-  };
+    auto generate_random_operator =
+        [&gen,
+         &operator_attr_value_dist]() -> std::shared_ptr<ConstantOfShape_Op> {
+        auto node = ConstantOfShape(Tensor(operator_attr_value_dist(gen)));
+        auto op =
+            std::static_pointer_cast<ConstantOfShape_Op>(node->getOperator());
+        op->setDataType(DataType::Float64);
+        op->setBackend("cpu");
+        return op;
+    };
 
-  auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor,
-                                   std::shared_ptr<ConstantOfShape_Op> op) {
-    std::vector<DimSize_t> output_dims;
-    output_dims.reserve(input_tensor->size());
-    for (DimSize_t i = 0; i < input_tensor->size(); ++i) {
-      output_dims.push_back(input_tensor->get<int64_t>(i));
-    }
-    auto result = std::make_shared<Tensor>(output_dims);
-    result->setDataType(op->value().dataType());
-    result->setBackend("cpu");
-    constantFiller(result, op->value().get<double>(0));
-    return result;
-  };
+    auto generate_output_tensor = [](std::shared_ptr<Tensor> input_tensor,
+                                     std::shared_ptr<ConstantOfShape_Op> op) {
+        std::vector<DimSize_t> output_dims;
+        output_dims.reserve(input_tensor->size());
+        for (DimSize_t i = 0; i < input_tensor->size(); ++i) {
+            output_dims.push_back(input_tensor->get<int64_t>(i));
+        }
+        auto result = std::make_shared<Tensor>(output_dims);
+        result->setDataType(op->value().dataType());
+        result->setBackend("cpu");
+        constantFiller(result, op->value().get<double>(0));
+        return result;
+    };
 
-  /////////////////////////////////////
-  // BENCHMARKING
-  std::chrono::time_point<std::chrono::system_clock> start;
-  std::chrono::time_point<std::chrono::system_clock> end;
-  std::chrono::duration<double, std::micro> duration{};
-  int number_of_operation{0};
+    /////////////////////////////////////
+    // BENCHMARKING
+    std::chrono::time_point<std::chrono::system_clock> start;
+    std::chrono::time_point<std::chrono::system_clock> end;
+    std::chrono::duration<double, std::micro> duration{};
+    int number_of_operation{0};
 
-  SECTION("ConstantOfShapeImpl_cpu::forward()") {
-    for (int i = 0; i < NBTRIALS; ++i) {
-      auto input_T = generate_input_tensor();
-      std::shared_ptr<ConstantOfShape_Op> op = generate_random_operator();
-      auto output_T = generate_output_tensor(input_T, op);
-      op->associateInput(0, input_T);
+    SECTION("ConstantOfShapeImpl_cpu::forward()") {
+        for (int i = 0; i < NBTRIALS; ++i) {
+            auto input_T = generate_input_tensor();
+            std::shared_ptr<ConstantOfShape_Op> op =
+                generate_random_operator();
+            auto output_T = generate_output_tensor(input_T, op);
+            op->associateInput(0, input_T);
 
-      REQUIRE(op->forwardDims(true));
-      REQUIRE_NOTHROW(op->forward());
+            REQUIRE(op->forwardDims(true));
+            REQUIRE_NOTHROW(op->forward());
 
-      CHECK(output_T->nbDims() == op->getOutput(0)->nbDims());
-      for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) {
-        CHECK(output_T->dims().at(i) == op->getOutput(0)->dims().at(i));
-      }
-      CHECK(approxEq<double>(*output_T, *op->getOutput(0)));
+            CHECK(output_T->nbDims() == op->getOutput(0)->nbDims());
+            for (DimIdx_t i = 0; i < output_T->nbDims(); ++i) {
+                CHECK(output_T->dims().at(i) ==
+                      op->getOutput(0)->dims().at(i));
+            }
+            CHECK(approxEq<double>(*output_T, *op->getOutput(0)));
+        }
     }
-  }
 }
 } // namespace Aidge
-
diff --git a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
index f1594ef5a21070803a7b86861eac513708ec03a2..8750555bc2b64cae2740191195e3442b9ffe43e7 100644
--- a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
+++ b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
@@ -23,194 +23,174 @@ using namespace Aidge;
 
 TEST_CASE("[cpu/operator] ConvDepthWise(forward)", "[ConvDepthWise][CPU]") {
     SECTION("k[3,3]") {
-        std::shared_ptr<Node> myCDW = ConvDepthWise(4, {3,3}, "mycdw");
-        auto op = std::static_pointer_cast<OperatorTensor>(myCDW -> getOperator());
-        std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,1,3,3> {
-            {
-                {{
-                    {  0,  1,  2},
-                    {  3,  4,  5},
-                    {  6,  7,  8}
-
-                }},
-                {{
-                    { 27, 28, 29},
-                    { 30, 31, 32},
-                    { 33, 34, 35}
-
-                }},
-                {{
-                    { 54, 55, 56},
-                    { 57, 58, 59},
-                    { 60, 61, 62}
-                }},
-                {{
-                    { 81, 82, 83},
-                    { 84, 85, 86},
-                    { 87, 88, 89}
-                }}
-            }
-        });
-        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
-
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
-
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}},
-
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}}
-                },
-                {
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
-
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}},
-
-                    {{150, 151, 152, 153, 154},
-                    {155, 156, 157, 158, 159},
-                    {160, 161, 162, 163, 164},
-                    {165, 166, 167, 168, 169},
-                    {170, 171, 172, 173, 174}},
-
-                    {{175, 176, 177, 178, 179},
-                    {180, 181, 182, 183, 184},
-                    {185, 186, 187, 188, 189},
-                    {190, 191, 192, 193, 194},
-                    {195, 196, 197, 198, 199}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
-            {
-                {
-                    {{   319,    355,    391},
-                    {   499,    535,    571},
-                    {   679,    715,    751}},
-
-                    {{  8745,   9024,   9303},
-                    { 10140,  10419,  10698},
-                    { 11535,  11814,  12093}},
-
-                    {{ 29337,  29859,  30381},
-                    { 31947,  32469,  32991},
-                    { 34557,  35079,  35601}},
-
-                    {{ 62061,  62826,  63591},
-                    { 65886,  66651,  67416},
-                    { 69711,  70476,  71241}}
-                },
-                {
-                    {{  3919,   3955,   3991},
-                    {  4099,   4135,   4171},
-                    {  4279,   4315,   4351}},
-
-                    {{ 36645,  36924,  37203},
-                    { 38040,  38319,  38598},
-                    { 39435,  39714,  39993}},
-
-                    {{ 81537,  82059,  82581},
-                    { 84147,  84669,  85191},
-                    { 86757,  87279,  87801}},
-
-                    {{138561, 139326, 140091},
-                    {142386, 143151, 143916},
-                    {146211, 146976, 147741}}
-                }
-            }
-        });
-        op -> associateInput(0, myInput);
-        op -> associateInput(1, myWeights);
-        op -> associateInput(2, myBias);
+        std::shared_ptr<Node> myCDW = ConvDepthWise(4, {3, 3}, "mycdw");
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myCDW->getOperator());
+        std::shared_ptr<Tensor> myWeights =
+            std::make_shared<Tensor>(Array4D<int, 4, 1, 3, 3>{
+                {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}
+
+                 }},
+                 {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}
+
+                 }},
+                 {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}}},
+                 {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}}}}});
+        std::shared_ptr<Tensor> myBias =
+            std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}});
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 4, 5, 5>{// NCHW
+                                     {{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
+
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
+
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}},
+
+                                       {{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}}},
+                                      {{{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
+
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}},
+
+                                       {{150, 151, 152, 153, 154},
+                                        {155, 156, 157, 158, 159},
+                                        {160, 161, 162, 163, 164},
+                                        {165, 166, 167, 168, 169},
+                                        {170, 171, 172, 173, 174}},
+
+                                       {{175, 176, 177, 178, 179},
+                                        {180, 181, 182, 183, 184},
+                                        {185, 186, 187, 188, 189},
+                                        {190, 191, 192, 193, 194},
+                                        {195, 196, 197, 198, 199}}}}});
+        std::shared_ptr<Tensor> myOutput =
+            std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
+                {{{{319, 355, 391}, {499, 535, 571}, {679, 715, 751}},
+
+                  {{8745, 9024, 9303},
+                   {10140, 10419, 10698},
+                   {11535, 11814, 12093}},
+
+                  {{29337, 29859, 30381},
+                   {31947, 32469, 32991},
+                   {34557, 35079, 35601}},
+
+                  {{62061, 62826, 63591},
+                   {65886, 66651, 67416},
+                   {69711, 70476, 71241}}},
+                 {{{3919, 3955, 3991}, {4099, 4135, 4171}, {4279, 4315, 4351}},
+
+                  {{36645, 36924, 37203},
+                   {38040, 38319, 38598},
+                   {39435, 39714, 39993}},
+
+                  {{81537, 82059, 82581},
+                   {84147, 84669, 85191},
+                   {86757, 87279, 87801}},
+
+                  {{138561, 139326, 140091},
+                   {142386, 143151, 143916},
+                   {146211, 146976, 147741}}}}});
+        op->associateInput(0, myInput);
+        op->associateInput(1, myWeights);
+        op->associateInput(2, myBias);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        myCDW -> forward();
-        op -> getOutput(0) -> print();
-        REQUIRE(*(op -> getOutput(0)) == *myOutput);
+        myCDW->forward();
+        op->getOutput(0)->print();
+        REQUIRE(*(op->getOutput(0)) == *myOutput);
     }
     SECTION("point-wise") {
-        ConvDepthWise_Op<2> conv_op = ConvDepthWise_Op<2>({1,1});
-        std::shared_ptr<Tensor> weights = std::make_shared<Tensor>(std::vector<std::size_t>({3,1,1,1}));
-        weights -> setBackend("cpu");
-        std::shared_ptr<Tensor> biases = std::make_shared<Tensor>(std::vector<std::size_t>({3}));
-        biases -> setBackend("cpu");
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(std::vector<std::size_t>({2,3,5,5}));
-        input -> setBackend("cpu");
-        std::shared_ptr<Tensor> expected_output = std::make_shared<Tensor>(std::vector<std::size_t>({2,3,5,5}));
-        expected_output -> setBackend("cpu");
-
-        float weighst_array[3] {-0.0045, -0.4223, -0.9452};
+        ConvDepthWise_Op<2> conv_op = ConvDepthWise_Op<2>({1, 1});
+        std::shared_ptr<Tensor> weights =
+            std::make_shared<Tensor>(std::vector<std::size_t>({3, 1, 1, 1}));
+        weights->setBackend("cpu");
+        std::shared_ptr<Tensor> biases =
+            std::make_shared<Tensor>(std::vector<std::size_t>({3}));
+        biases->setBackend("cpu");
+        std::shared_ptr<Tensor> input =
+            std::make_shared<Tensor>(std::vector<std::size_t>({2, 3, 5, 5}));
+        input->setBackend("cpu");
+        std::shared_ptr<Tensor> expected_output =
+            std::make_shared<Tensor>(std::vector<std::size_t>({2, 3, 5, 5}));
+        expected_output->setBackend("cpu");
+
+        float weighst_array[3]{-0.0045, -0.4223, -0.9452};
         weights->getImpl()->setRawPtr(weighst_array, 3);
 
-        float biases_array[3] {-0.8595,  0.7062, -0.0062};
+        float biases_array[3]{-0.8595, 0.7062, -0.0062};
         biases->getImpl()->setRawPtr(biases_array, 3);
 
-        float input_array[2*3*5*5] {
-            0.6581, 0.2509, 0.2660, 0.8270, 0.8040, 0.3147, 0.5028, 0.2591, 0.8585,
-            0.7762, 0.9972, 0.0305, 0.1202, 0.2682, 0.9306, 0.7927, 0.1494, 0.0678,
-            0.5550, 0.4132, 0.4742, 0.6199, 0.1802, 0.6350, 0.2539, 0.5594, 0.0143,
-            0.8656, 0.7105, 0.1420, 0.2464, 0.7883, 0.5715, 0.7642, 0.5492, 0.6628,
-            0.4922, 0.7941, 0.8421, 0.7914, 0.0237, 0.8081, 0.0174, 0.6018, 0.7402,
-            0.3770, 0.8786, 0.3651, 0.5355, 0.4267, 0.4457, 0.6756, 0.9631, 0.0145,
-            0.4470, 0.5202, 0.2675, 0.5815, 0.3487, 0.3457, 0.7179, 0.0518, 0.1520,
-            0.0573, 0.9219, 0.3615, 0.0866, 0.5237, 0.4725, 0.2565, 0.8726, 0.6434,
-            0.6875, 0.2919, 0.3355, 0.1886, 0.1749, 0.0785, 0.4091, 0.1907, 0.4664,
-            0.2738, 0.4784, 0.7807, 0.0687, 0.3091, 0.4557, 0.2277, 0.2424, 0.8691,
-            0.1893, 0.2918, 0.5691, 0.1926, 0.2866, 0.0097, 0.5445, 0.5085, 0.1110,
-            0.7099, 0.8927, 0.6182, 0.2538, 0.8694, 0.7872, 0.3196, 0.0710, 0.2888,
-            0.0403, 0.1670, 0.6840, 0.7323, 0.4861, 0.3390, 0.1096, 0.5070, 0.3872,
-            0.7473, 0.6224, 0.6910, 0.7530, 0.0149, 0.0866, 0.9022, 0.5027, 0.3849,
-            0.5255, 0.1977, 0.0570, 0.9581, 0.5461, 0.4623, 0.0101, 0.2362, 0.5922,
-            0.8398, 0.1497, 0.5160, 0.2862, 0.5931, 0.9728, 0.1353, 0.7790, 0.9137,
+        float input_array[2 * 3 * 5 * 5]{
+            0.6581, 0.2509, 0.2660, 0.8270, 0.8040, 0.3147, 0.5028, 0.2591,
+            0.8585, 0.7762, 0.9972, 0.0305, 0.1202, 0.2682, 0.9306, 0.7927,
+            0.1494, 0.0678, 0.5550, 0.4132, 0.4742, 0.6199, 0.1802, 0.6350,
+            0.2539, 0.5594, 0.0143, 0.8656, 0.7105, 0.1420, 0.2464, 0.7883,
+            0.5715, 0.7642, 0.5492, 0.6628, 0.4922, 0.7941, 0.8421, 0.7914,
+            0.0237, 0.8081, 0.0174, 0.6018, 0.7402, 0.3770, 0.8786, 0.3651,
+            0.5355, 0.4267, 0.4457, 0.6756, 0.9631, 0.0145, 0.4470, 0.5202,
+            0.2675, 0.5815, 0.3487, 0.3457, 0.7179, 0.0518, 0.1520, 0.0573,
+            0.9219, 0.3615, 0.0866, 0.5237, 0.4725, 0.2565, 0.8726, 0.6434,
+            0.6875, 0.2919, 0.3355, 0.1886, 0.1749, 0.0785, 0.4091, 0.1907,
+            0.4664, 0.2738, 0.4784, 0.7807, 0.0687, 0.3091, 0.4557, 0.2277,
+            0.2424, 0.8691, 0.1893, 0.2918, 0.5691, 0.1926, 0.2866, 0.0097,
+            0.5445, 0.5085, 0.1110, 0.7099, 0.8927, 0.6182, 0.2538, 0.8694,
+            0.7872, 0.3196, 0.0710, 0.2888, 0.0403, 0.1670, 0.6840, 0.7323,
+            0.4861, 0.3390, 0.1096, 0.5070, 0.3872, 0.7473, 0.6224, 0.6910,
+            0.7530, 0.0149, 0.0866, 0.9022, 0.5027, 0.3849, 0.5255, 0.1977,
+            0.0570, 0.9581, 0.5461, 0.4623, 0.0101, 0.2362, 0.5922, 0.8398,
+            0.1497, 0.5160, 0.2862, 0.5931, 0.9728, 0.1353, 0.7790, 0.9137,
             0.9351, 0.4036, 0.7638, 0.3873, 0.0494, 0.7450};
-        input->getImpl()->setRawPtr(input_array, 2*3*5*5);
-
-        float expected_output_array[2*3*5*5] {
-            -0.8624, -0.8606, -0.8607, -0.8632, -0.8631, -0.8609, -0.8617, -0.8606,
-            -0.8633, -0.8629, -0.8639, -0.8596, -0.8600, -0.8607, -0.8636, -0.8630,
-            -0.8601, -0.8598, -0.8620, -0.8613, -0.8616, -0.8622, -0.8603, -0.8623,
-            -0.8606,  0.4700,  0.7002,  0.3407,  0.4062,  0.6463,  0.6022,  0.3733,
-            0.4649,  0.3835,  0.4743,  0.4263,  0.4984,  0.3709,  0.3506,  0.3720,
-            0.6962,  0.3650,  0.6989,  0.4521,  0.3936,  0.5470,  0.3352,  0.5520,
-            0.4801,  0.5260, -0.4274, -0.6447, -0.9165, -0.0199, -0.4287, -0.4979,
-            -0.2590, -0.5559, -0.3358, -0.3329, -0.6847, -0.0552, -0.1499, -0.0603,
-            -0.8776, -0.3479, -0.0881, -0.5011, -0.4528, -0.2486, -0.8309, -0.6143,
-            -0.6561, -0.2821, -0.3233, -0.8603, -0.8603, -0.8598, -0.8613, -0.8603,
-            -0.8616, -0.8607, -0.8616, -0.8630, -0.8598, -0.8609, -0.8615, -0.8605,
-            -0.8606, -0.8634, -0.8603, -0.8608, -0.8620, -0.8603, -0.8608, -0.8595,
-            -0.8619, -0.8617, -0.8600, -0.8626,  0.3292,  0.4451,  0.5991,  0.3390,
-            0.3738,  0.5712,  0.6762,  0.5843,  0.6892,  0.6357,  0.4174,  0.3969,
-            0.5009,  0.5631,  0.6599,  0.4921,  0.5427,  0.3906,  0.4434,  0.4144,
-            0.3882,  0.6999,  0.6697,  0.3252,  0.4939, -0.3700, -0.5029, -0.1931,
-            -0.0601, -0.9118, -0.5224, -0.4432, -0.0157, -0.2294, -0.5660, -0.7999,
-            -0.1477, -0.4939, -0.2767, -0.5668, -0.9257, -0.1341, -0.7425, -0.8698,
-            -0.8900, -0.3877, -0.7282, -0.3722, -0.0529, -0.7103};
-        expected_output->getImpl()->setRawPtr(expected_output_array, 2*3*5*5);
+        input->getImpl()->setRawPtr(input_array, 2 * 3 * 5 * 5);
+
+        float expected_output_array[2 * 3 * 5 * 5]{
+            -0.8624, -0.8606, -0.8607, -0.8632, -0.8631, -0.8609, -0.8617,
+            -0.8606, -0.8633, -0.8629, -0.8639, -0.8596, -0.8600, -0.8607,
+            -0.8636, -0.8630, -0.8601, -0.8598, -0.8620, -0.8613, -0.8616,
+            -0.8622, -0.8603, -0.8623, -0.8606, 0.4700,  0.7002,  0.3407,
+            0.4062,  0.6463,  0.6022,  0.3733,  0.4649,  0.3835,  0.4743,
+            0.4263,  0.4984,  0.3709,  0.3506,  0.3720,  0.6962,  0.3650,
+            0.6989,  0.4521,  0.3936,  0.5470,  0.3352,  0.5520,  0.4801,
+            0.5260,  -0.4274, -0.6447, -0.9165, -0.0199, -0.4287, -0.4979,
+            -0.2590, -0.5559, -0.3358, -0.3329, -0.6847, -0.0552, -0.1499,
+            -0.0603, -0.8776, -0.3479, -0.0881, -0.5011, -0.4528, -0.2486,
+            -0.8309, -0.6143, -0.6561, -0.2821, -0.3233, -0.8603, -0.8603,
+            -0.8598, -0.8613, -0.8603, -0.8616, -0.8607, -0.8616, -0.8630,
+            -0.8598, -0.8609, -0.8615, -0.8605, -0.8606, -0.8634, -0.8603,
+            -0.8608, -0.8620, -0.8603, -0.8608, -0.8595, -0.8619, -0.8617,
+            -0.8600, -0.8626, 0.3292,  0.4451,  0.5991,  0.3390,  0.3738,
+            0.5712,  0.6762,  0.5843,  0.6892,  0.6357,  0.4174,  0.3969,
+            0.5009,  0.5631,  0.6599,  0.4921,  0.5427,  0.3906,  0.4434,
+            0.4144,  0.3882,  0.6999,  0.6697,  0.3252,  0.4939,  -0.3700,
+            -0.5029, -0.1931, -0.0601, -0.9118, -0.5224, -0.4432, -0.0157,
+            -0.2294, -0.5660, -0.7999, -0.1477, -0.4939, -0.2767, -0.5668,
+            -0.9257, -0.1341, -0.7425, -0.8698, -0.8900, -0.3877, -0.7282,
+            -0.3722, -0.0529, -0.7103};
+        expected_output->getImpl()->setRawPtr(expected_output_array,
+                                              2 * 3 * 5 * 5);
 
         conv_op.associateInput(0, input);
         conv_op.associateInput(1, weights);
@@ -224,6 +204,9 @@ TEST_CASE("[cpu/operator] ConvDepthWise(forward)", "[ConvDepthWise][CPU]") {
 
         conv_op.getOutput(0)->print();
 
-        REQUIRE(approxEq<float>(*(conv_op.getOutput(0)),*expected_output, 1e-3f, 1e-4f));
+        REQUIRE(approxEq<float>(*(conv_op.getOutput(0)),
+                                *expected_output,
+                                1e-3f,
+                                1e-4f));
     }
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp
index e48d69c89eb0d6d52a834b3f32a41d8621fdd42b..477b3edc771b9fa038797d92a4452abb0317f8ef 100644
--- a/unit_tests/operator/Test_ConvImpl.cpp
+++ b/unit_tests/operator/Test_ConvImpl.cpp
@@ -23,134 +23,89 @@ using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") {
     SECTION("Classic Conv") {
-        std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv");
-        auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator());
-        std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
-            {
-                {
-                    {{  0,   1,   2},
-                    {  3,   4,   5},
-                    {  6,   7,   8}},
-                    {{  9,  10,  11},
-                    { 12,  13,  14},
-                    { 15,  16,  17}},
-                    {{ 18,  19,  20},
-                    { 21,  22,  23},
-                    { 24,  25,  26}}
-                },
-                {
-                    {{ 27,  28,  29},
-                    { 30,  31,  32},
-                    { 33,  34,  35}},
-                    {{ 36,  37,  38},
-                    { 39,  40,  41},
-                    { 42,  43,  44}},
-                    {{ 45,  46,  47},
-                    { 48,  49,  50},
-                    { 51,  52,  53}}
-                },
-                {
-                    {{ 54,  55,  56},
-                    { 57,  58,  59},
-                    { 60,  61,  62}},
-                    {{ 63,  64,  65},
-                    { 66,  67,  68},
-                    { 69,  70,  71}},
-                    {{ 72,  73,  74},
-                    { 75,  76,  77},
-                    { 78,  79,  80}}
-                },
-                {
-                    {{ 81,  82,  83},
-                    { 84,  85,  86},
-                    { 87,  88,  89}},
-                    {{ 90,  91,  92},
-                    { 93,  94,  95},
-                    { 96,  97,  98}},
-                    {{ 99, 100, 101},
-                    {102, 103, 104},
-                    {105, 106, 107}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
+        std::shared_ptr<Node> myConv = Conv(3, 4, {3, 3}, "myconv");
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
+        std::shared_ptr<Tensor> myWeights =
+            std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{
+                {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}},
+                  {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}},
+                  {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}},
+                 {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}},
+                  {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}},
+                  {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}},
+                 {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}},
+                  {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}},
+                  {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}},
+                 {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}},
+                  {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}},
+                  {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}});
+        std::shared_ptr<Tensor> myBias =
+            std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}});
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 5, 5>{// NCHW
+                                     {{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
 
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
 
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}}
-                },
-                {
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}},
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}}},
+                                      {{{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}},
 
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
+                                       {{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
 
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
-            {
-                {
-                    {{ 15226,  15577,  15928},
-                    { 16981,  17332,  17683},
-                    { 18736,  19087,  19438}},
-                    {{ 37818,  38898,  39978},
-                    { 43218,  44298,  45378},
-                    { 48618,  49698,  50778}},
-                    {{ 60426,  62235,  64044},
-                    { 69471,  71280,  73089},
-                    { 78516,  80325,  82134}},
-                    {{ 83016,  85554,  88092},
-                    { 95706,  98244, 100782},
-                    {108396, 110934, 113472}}
-                },
-                {
-                    {{ 41551,  41902,  42253},
-                    { 43306,  43657,  44008},
-                    { 45061,  45412,  45763}},
-                    {{118818, 119898, 120978},
-                    {124218, 125298, 126378},
-                    {129618, 130698, 131778}},
-                    {{196101, 197910, 199719},
-                    {205146, 206955, 208764},
-                    {214191, 216000, 217809}},
-                    {{273366, 275904, 278442},
-                    {286056, 288594, 291132},
-                    {298746, 301284, 303822}}
-                }
-            }
-        });
-        op->associateInput(0,myInput);
-        op->associateInput(1,myWeights);
-        op->associateInput(2,myBias);
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}}}}});
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
+            Array4D<int, 2, 4, 3, 3>{{{{{15226, 15577, 15928},
+                                        {16981, 17332, 17683},
+                                        {18736, 19087, 19438}},
+                                       {{37818, 38898, 39978},
+                                        {43218, 44298, 45378},
+                                        {48618, 49698, 50778}},
+                                       {{60426, 62235, 64044},
+                                        {69471, 71280, 73089},
+                                        {78516, 80325, 82134}},
+                                       {{83016, 85554, 88092},
+                                        {95706, 98244, 100782},
+                                        {108396, 110934, 113472}}},
+                                      {{{41551, 41902, 42253},
+                                        {43306, 43657, 44008},
+                                        {45061, 45412, 45763}},
+                                       {{118818, 119898, 120978},
+                                        {124218, 125298, 126378},
+                                        {129618, 130698, 131778}},
+                                       {{196101, 197910, 199719},
+                                        {205146, 206955, 208764},
+                                        {214191, 216000, 217809}},
+                                       {{273366, 275904, 278442},
+                                        {286056, 288594, 291132},
+                                        {298746, 301284, 303822}}}}});
+        op->associateInput(0, myInput);
+        op->associateInput(1, myWeights);
+        op->associateInput(2, myBias);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         myConv->forward();
@@ -158,241 +113,529 @@ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") {
         REQUIRE(*(op->getOutput(0)) == *myOutput);
     }
     SECTION("Point-wise") {
-        std::shared_ptr<Node> myConv = Conv(3,4,{1,1}, "myconv", {1,1});
-        auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator());
-        op->setInput(0, std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
-            {
-                {
-                    {{-1.38467371F, -0.87123615F, -0.22336592F},
-                     { 1.71736145F,  0.31888032F, -0.42451897F},
-                     { 0.30572093F, -0.77459252F, -1.55757248F}},
-                    {{ 0.99563611F, -0.87978584F, -0.60114205F},
-                     {-1.27415121F,  2.12278509F, -1.23465312F},
-                     {-0.48791388F, -0.91382301F, -0.65813726F}},
-                    {{ 0.07802387F,  0.52580875F, -0.48799172F},
-                     { 1.19136906F, -0.81400764F, -0.73599279F},
-                     {-1.40324783F,  0.03600367F, -0.06347727F}}
-                },
-                {
-                    {{ 0.67561489F, -0.09780689F,  1.84459400F},
-                     {-1.18453741F,  1.38354933F,  1.44513381F},
-                     { 0.85641253F,  2.21807575F,  0.52316552F}},
-                    {{ 0.34664667F, -0.19733144F,  1.14120162F},
-                     { 0.05164360F,  0.72810954F, -0.71064192F},
-                     {-0.60206831F,  0.96044880F,  0.40481427F}},
-                    {{-1.35434294F,  1.33470297F,  0.48353928F},
-                     {-0.19756168F,  1.26831138F,  1.22426283F},
-                     { 0.09811721F,  1.74225271F, -1.35267365F}}
-                }
-            }
-        }));
-        op->setInput(1, std::make_shared<Tensor>(Array4D<float,4,3,1,1> {
-            {
-                {
-                    {{ 0.33669037F}},
-                    {{ 0.12880941F}},
-                    {{ 0.23446237F}}
-                },
-                {
-                    {{ 0.23033303F}},
-                    {{-1.12285638F}},
-                    {{-0.18632829F}}
-                },
-                {
-                    {{ 2.20820141F}},
-                    {{-0.63799703F}},
-                    {{ 0.46165723F}}},
-                {
-                    {{ 0.26735088F}},
-                    {{ 0.53490466F}},
-                    {{ 0.80935723F}}
-                }
-            }
-        }));
-        op->setInput(2, std::make_shared<Tensor>(Array1D<float,4> {{ 1.11029029F, -1.68979895F, -0.98895991F,  0.95797181F}}));
-        Tensor expectedOutput = Array4D<float,2,4,3,3> {
-            {
-                {
-                    {{ 0.79062498F,  0.82691115F,  0.84323663F},
-                     { 1.80371785F,  1.30023468F,  0.63576132F},
-                     { 0.82136691F,  0.74022496F,  0.48621333F}},
-                    {{-3.14122939F, -1.00057328F, -0.97532475F},
-                     {-0.08553087F, -3.84826040F, -0.26410526F},
-                     {-0.81005937F, -0.84882969F, -1.29773819F}},
-                    {{-4.64579105F, -2.10878062F, -1.32395494F},
-                     { 4.16622877F, -2.01493120F, -1.47845459F},
-                     {-0.65039843F, -2.09977841F, -4.03780890F}},
-                    {{ 1.18349767F,  0.68001163F,  0.18174142F},
-                     { 1.69980371F,  1.51988935F, -0.41162649F},
-                     {-0.35700959F,  0.29121545F,  0.13813695F}}
-                },
-                {
-                    {{ 1.06487226F,  1.36487913F,  1.99171650F},
-                     { 0.67179936F,  1.96727657F,  1.79235911F},
-                     { 1.34408879F,  2.38930249F,  1.02142799F}},
-                    {{-1.67106462F, -1.73944509F, -2.63643050F},
-                     {-1.98381400F, -2.42500663F, -0.78710288F},
-                     {-0.83478457F, -2.58197999F, -1.77180362F}},
-                    {{-0.34346789F, -0.46286502F,  2.57942152F},
-                     {-3.72881150F,  2.18718910F,  3.22076392F},
-                     { 1.33158576F,  4.10055828F, -0.71644694F}},
-                    {{ 0.22787374F,  1.90652108F,  2.45291567F},
-                     { 0.50901115F,  2.74385118F,  1.95506990F},
-                     { 0.94429719F,  3.47482967F,  0.21958135F}}
-                }
-            }
-        };
+        std::shared_ptr<Node> myConv = Conv(3, 4, {1, 1}, "myconv", {1, 1});
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
+        op->setInput(0,
+                     std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{
+                         {{{{-1.38467371F, -0.87123615F, -0.22336592F},
+                            {1.71736145F, 0.31888032F, -0.42451897F},
+                            {0.30572093F, -0.77459252F, -1.55757248F}},
+                           {{0.99563611F, -0.87978584F, -0.60114205F},
+                            {-1.27415121F, 2.12278509F, -1.23465312F},
+                            {-0.48791388F, -0.91382301F, -0.65813726F}},
+                           {{0.07802387F, 0.52580875F, -0.48799172F},
+                            {1.19136906F, -0.81400764F, -0.73599279F},
+                            {-1.40324783F, 0.03600367F, -0.06347727F}}},
+                          {{{0.67561489F, -0.09780689F, 1.84459400F},
+                            {-1.18453741F, 1.38354933F, 1.44513381F},
+                            {0.85641253F, 2.21807575F, 0.52316552F}},
+                           {{0.34664667F, -0.19733144F, 1.14120162F},
+                            {0.05164360F, 0.72810954F, -0.71064192F},
+                            {-0.60206831F, 0.96044880F, 0.40481427F}},
+                           {{-1.35434294F, 1.33470297F, 0.48353928F},
+                            {-0.19756168F, 1.26831138F, 1.22426283F},
+                            {0.09811721F, 1.74225271F, -1.35267365F}}}}}));
+        op->setInput(
+            1,
+            std::make_shared<Tensor>(Array4D<float, 4, 3, 1, 1>{
+                {{{{0.33669037F}}, {{0.12880941F}}, {{0.23446237F}}},
+                 {{{0.23033303F}}, {{-1.12285638F}}, {{-0.18632829F}}},
+                 {{{2.20820141F}}, {{-0.63799703F}}, {{0.46165723F}}},
+                 {{{0.26735088F}}, {{0.53490466F}}, {{0.80935723F}}}}}));
+        op->setInput(
+            2,
+            std::make_shared<Tensor>(Array1D<float, 4>{
+                {1.11029029F, -1.68979895F, -0.98895991F, 0.95797181F}}));
+        Tensor expectedOutput = Array4D<float, 2, 4, 3, 3>{
+            {{{{0.79062498F, 0.82691115F, 0.84323663F},
+               {1.80371785F, 1.30023468F, 0.63576132F},
+               {0.82136691F, 0.74022496F, 0.48621333F}},
+              {{-3.14122939F, -1.00057328F, -0.97532475F},
+               {-0.08553087F, -3.84826040F, -0.26410526F},
+               {-0.81005937F, -0.84882969F, -1.29773819F}},
+              {{-4.64579105F, -2.10878062F, -1.32395494F},
+               {4.16622877F, -2.01493120F, -1.47845459F},
+               {-0.65039843F, -2.09977841F, -4.03780890F}},
+              {{1.18349767F, 0.68001163F, 0.18174142F},
+               {1.69980371F, 1.51988935F, -0.41162649F},
+               {-0.35700959F, 0.29121545F, 0.13813695F}}},
+             {{{1.06487226F, 1.36487913F, 1.99171650F},
+               {0.67179936F, 1.96727657F, 1.79235911F},
+               {1.34408879F, 2.38930249F, 1.02142799F}},
+              {{-1.67106462F, -1.73944509F, -2.63643050F},
+               {-1.98381400F, -2.42500663F, -0.78710288F},
+               {-0.83478457F, -2.58197999F, -1.77180362F}},
+              {{-0.34346789F, -0.46286502F, 2.57942152F},
+               {-3.72881150F, 2.18718910F, 3.22076392F},
+               {1.33158576F, 4.10055828F, -0.71644694F}},
+              {{0.22787374F, 1.90652108F, 2.45291567F},
+               {0.50901115F, 2.74385118F, 1.95506990F},
+               {0.94429719F, 3.47482967F, 0.21958135F}}}}};
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         myConv->forward();
 
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput.getImpl()->rawPtr());
-        for (std::size_t i = 0; i< expectedOutput.size(); ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        float *resPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedPtr =
+            static_cast<float *>(expectedOutput.getImpl()->rawPtr());
+        for (std::size_t i = 0; i < expectedOutput.size(); ++i) {
+            REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
         }
     }
     SECTION("Strided and dilated Conv") {
-        std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv", {3,3},{2,2});
-        auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator());
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,3,8,8> {
-            {{{
-                {0.0107F, 0.5076F, 0.2293F, 0.0486F, 0.7375F, 0.2637F, 0.9615F, 0.9138F},
-                {0.0678F, 0.5604F, 0.1940F, 0.0287F, 0.1029F, 0.2059F, 0.5058F, 0.9885F},
-                {0.9904F, 0.2890F, 0.4606F, 0.1055F, 0.9028F, 0.1654F, 0.6499F, 0.4775F},
-                {0.9499F, 0.4695F, 0.1713F, 0.0731F, 0.4913F, 0.8921F, 0.1782F, 0.1111F},
-                {0.2479F, 0.4669F, 0.1078F, 0.6153F, 0.0299F, 0.6484F, 0.2397F, 0.1814F},
-                {0.3779F, 0.9032F, 0.5651F, 0.3896F, 0.8439F, 0.6404F, 0.3813F, 0.0841F},
-                {0.5566F, 0.8950F, 0.1226F, 0.8881F, 0.9870F, 0.6256F, 0.6387F, 0.0628F},
-                {0.2857F, 0.0579F, 0.6247F, 0.1286F, 0.0951F, 0.1268F, 0.9510F, 0.3789F}},
+        std::shared_ptr<Node> myConv =
+            Conv(3, 4, {3, 3}, "myconv", {3, 3}, {2, 2});
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<float, 2, 3, 8, 8>{{{{{0.0107F,
+                                           0.5076F,
+                                           0.2293F,
+                                           0.0486F,
+                                           0.7375F,
+                                           0.2637F,
+                                           0.9615F,
+                                           0.9138F},
+                                          {0.0678F,
+                                           0.5604F,
+                                           0.1940F,
+                                           0.0287F,
+                                           0.1029F,
+                                           0.2059F,
+                                           0.5058F,
+                                           0.9885F},
+                                          {0.9904F,
+                                           0.2890F,
+                                           0.4606F,
+                                           0.1055F,
+                                           0.9028F,
+                                           0.1654F,
+                                           0.6499F,
+                                           0.4775F},
+                                          {0.9499F,
+                                           0.4695F,
+                                           0.1713F,
+                                           0.0731F,
+                                           0.4913F,
+                                           0.8921F,
+                                           0.1782F,
+                                           0.1111F},
+                                          {0.2479F,
+                                           0.4669F,
+                                           0.1078F,
+                                           0.6153F,
+                                           0.0299F,
+                                           0.6484F,
+                                           0.2397F,
+                                           0.1814F},
+                                          {0.3779F,
+                                           0.9032F,
+                                           0.5651F,
+                                           0.3896F,
+                                           0.8439F,
+                                           0.6404F,
+                                           0.3813F,
+                                           0.0841F},
+                                          {0.5566F,
+                                           0.8950F,
+                                           0.1226F,
+                                           0.8881F,
+                                           0.9870F,
+                                           0.6256F,
+                                           0.6387F,
+                                           0.0628F},
+                                          {0.2857F,
+                                           0.0579F,
+                                           0.6247F,
+                                           0.1286F,
+                                           0.0951F,
+                                           0.1268F,
+                                           0.9510F,
+                                           0.3789F}},
 
-               {{0.7648F, 0.5340F, 0.1024F, 0.4098F, 0.9958F, 0.7941F, 0.1190F, 0.7328F},
-                {0.4532F, 0.6598F, 0.9146F, 0.1690F, 0.6041F, 0.7230F, 0.5719F, 0.9282F},
-                {0.2862F, 0.2329F, 0.7302F, 0.6717F, 0.1983F, 0.1876F, 0.4561F, 0.2126F},
-                {0.7849F, 0.0239F, 0.7977F, 0.5935F, 0.9958F, 0.4703F, 0.4612F, 0.1627F},
-                {0.6393F, 0.3544F, 0.8643F, 0.5039F, 0.8087F, 0.6521F, 0.5086F, 0.9331F},
-                {0.7749F, 0.9798F, 0.6820F, 0.7869F, 0.5144F, 0.2941F, 0.8137F, 0.4561F},
-                {0.6505F, 0.3974F, 0.6909F, 0.7019F, 0.2729F, 0.4240F, 0.0162F, 0.1536F},
-                {0.3529F, 0.8821F, 0.1812F, 0.3426F, 0.3472F, 0.0300F, 0.8841F, 0.8088F}},
+                                         {{0.7648F,
+                                           0.5340F,
+                                           0.1024F,
+                                           0.4098F,
+                                           0.9958F,
+                                           0.7941F,
+                                           0.1190F,
+                                           0.7328F},
+                                          {0.4532F,
+                                           0.6598F,
+                                           0.9146F,
+                                           0.1690F,
+                                           0.6041F,
+                                           0.7230F,
+                                           0.5719F,
+                                           0.9282F},
+                                          {0.2862F,
+                                           0.2329F,
+                                           0.7302F,
+                                           0.6717F,
+                                           0.1983F,
+                                           0.1876F,
+                                           0.4561F,
+                                           0.2126F},
+                                          {0.7849F,
+                                           0.0239F,
+                                           0.7977F,
+                                           0.5935F,
+                                           0.9958F,
+                                           0.4703F,
+                                           0.4612F,
+                                           0.1627F},
+                                          {0.6393F,
+                                           0.3544F,
+                                           0.8643F,
+                                           0.5039F,
+                                           0.8087F,
+                                           0.6521F,
+                                           0.5086F,
+                                           0.9331F},
+                                          {0.7749F,
+                                           0.9798F,
+                                           0.6820F,
+                                           0.7869F,
+                                           0.5144F,
+                                           0.2941F,
+                                           0.8137F,
+                                           0.4561F},
+                                          {0.6505F,
+                                           0.3974F,
+                                           0.6909F,
+                                           0.7019F,
+                                           0.2729F,
+                                           0.4240F,
+                                           0.0162F,
+                                           0.1536F},
+                                          {0.3529F,
+                                           0.8821F,
+                                           0.1812F,
+                                           0.3426F,
+                                           0.3472F,
+                                           0.0300F,
+                                           0.8841F,
+                                           0.8088F}},
 
-               {{0.5099F, 0.3323F, 0.1488F, 0.3424F, 0.1494F, 0.6225F, 0.8103F, 0.5995F},
-                {0.9198F, 0.5635F, 0.8908F, 0.9378F, 0.6689F, 0.3176F, 0.3755F, 0.3883F},
-                {0.0626F, 0.5309F, 0.0307F, 0.3955F, 0.2794F, 0.1420F, 0.4758F, 0.7558F},
-                {0.6154F, 0.5280F, 0.2318F, 0.3832F, 0.4435F, 0.3490F, 0.4043F, 0.5872F},
-                {0.3705F, 0.3848F, 0.2182F, 0.8332F, 0.4559F, 0.5310F, 0.4611F, 0.4236F},
-                {0.6141F, 0.8103F, 0.2260F, 0.9907F, 0.5615F, 0.4520F, 0.6949F, 0.0175F},
-                {0.3969F, 0.5021F, 0.0970F, 0.9937F, 0.9270F, 0.4302F, 0.2868F, 0.3891F},
-                {0.8693F, 0.5170F, 0.5348F, 0.2676F, 0.9769F, 0.3356F, 0.9427F, 0.3908F}}
-            },
-            {
-               {{0.4803F, 0.5223F, 0.6395F, 0.8402F, 0.4442F, 0.6377F, 0.7852F, 0.9063F},
-                {0.0361F, 0.0470F, 0.3104F, 0.6921F, 0.0543F, 0.4490F, 0.9541F, 0.7395F},
-                {0.3832F, 0.3828F, 0.2236F, 0.2068F, 0.4369F, 0.7443F, 0.6952F, 0.6394F},
-                {0.5309F, 0.8483F, 0.1991F, 0.9756F, 0.8969F, 0.7284F, 0.4657F, 0.5486F},
-                {0.8839F, 0.3260F, 0.6892F, 0.4074F, 0.9473F, 0.5526F, 0.4147F, 0.4786F},
-                {0.9674F, 0.0952F, 0.8379F, 0.2163F, 0.9420F, 0.4046F, 0.1339F, 0.5234F},
-                {0.4213F, 0.8392F, 0.3184F, 0.4576F, 0.9349F, 0.8267F, 0.0931F, 0.8009F},
-                {0.5570F, 0.5871F, 0.4175F, 0.5465F, 0.6679F, 0.9224F, 0.0049F, 0.9421F}},
+                                         {{0.5099F,
+                                           0.3323F,
+                                           0.1488F,
+                                           0.3424F,
+                                           0.1494F,
+                                           0.6225F,
+                                           0.8103F,
+                                           0.5995F},
+                                          {0.9198F,
+                                           0.5635F,
+                                           0.8908F,
+                                           0.9378F,
+                                           0.6689F,
+                                           0.3176F,
+                                           0.3755F,
+                                           0.3883F},
+                                          {0.0626F,
+                                           0.5309F,
+                                           0.0307F,
+                                           0.3955F,
+                                           0.2794F,
+                                           0.1420F,
+                                           0.4758F,
+                                           0.7558F},
+                                          {0.6154F,
+                                           0.5280F,
+                                           0.2318F,
+                                           0.3832F,
+                                           0.4435F,
+                                           0.3490F,
+                                           0.4043F,
+                                           0.5872F},
+                                          {0.3705F,
+                                           0.3848F,
+                                           0.2182F,
+                                           0.8332F,
+                                           0.4559F,
+                                           0.5310F,
+                                           0.4611F,
+                                           0.4236F},
+                                          {0.6141F,
+                                           0.8103F,
+                                           0.2260F,
+                                           0.9907F,
+                                           0.5615F,
+                                           0.4520F,
+                                           0.6949F,
+                                           0.0175F},
+                                          {0.3969F,
+                                           0.5021F,
+                                           0.0970F,
+                                           0.9937F,
+                                           0.9270F,
+                                           0.4302F,
+                                           0.2868F,
+                                           0.3891F},
+                                          {0.8693F,
+                                           0.5170F,
+                                           0.5348F,
+                                           0.2676F,
+                                           0.9769F,
+                                           0.3356F,
+                                           0.9427F,
+                                           0.3908F}}},
+                                        {{{0.4803F,
+                                           0.5223F,
+                                           0.6395F,
+                                           0.8402F,
+                                           0.4442F,
+                                           0.6377F,
+                                           0.7852F,
+                                           0.9063F},
+                                          {0.0361F,
+                                           0.0470F,
+                                           0.3104F,
+                                           0.6921F,
+                                           0.0543F,
+                                           0.4490F,
+                                           0.9541F,
+                                           0.7395F},
+                                          {0.3832F,
+                                           0.3828F,
+                                           0.2236F,
+                                           0.2068F,
+                                           0.4369F,
+                                           0.7443F,
+                                           0.6952F,
+                                           0.6394F},
+                                          {0.5309F,
+                                           0.8483F,
+                                           0.1991F,
+                                           0.9756F,
+                                           0.8969F,
+                                           0.7284F,
+                                           0.4657F,
+                                           0.5486F},
+                                          {0.8839F,
+                                           0.3260F,
+                                           0.6892F,
+                                           0.4074F,
+                                           0.9473F,
+                                           0.5526F,
+                                           0.4147F,
+                                           0.4786F},
+                                          {0.9674F,
+                                           0.0952F,
+                                           0.8379F,
+                                           0.2163F,
+                                           0.9420F,
+                                           0.4046F,
+                                           0.1339F,
+                                           0.5234F},
+                                          {0.4213F,
+                                           0.8392F,
+                                           0.3184F,
+                                           0.4576F,
+                                           0.9349F,
+                                           0.8267F,
+                                           0.0931F,
+                                           0.8009F},
+                                          {0.5570F,
+                                           0.5871F,
+                                           0.4175F,
+                                           0.5465F,
+                                           0.6679F,
+                                           0.9224F,
+                                           0.0049F,
+                                           0.9421F}},
 
-               {{0.3739F, 0.6230F, 0.7613F, 0.1337F, 0.8527F, 0.0557F, 0.6424F, 0.8463F},
-                {0.7179F, 0.5638F, 0.2457F, 0.4579F, 0.0487F, 0.8693F, 0.8216F, 0.0415F},
-                {0.1724F, 0.5108F, 0.9103F, 0.0850F, 0.0080F, 0.8927F, 0.7706F, 0.3600F},
-                {0.7751F, 0.8828F, 0.7872F, 0.4541F, 0.3181F, 0.1855F, 0.2486F, 0.0033F},
-                {0.5558F, 0.3500F, 0.6034F, 0.1763F, 0.7418F, 0.5190F, 0.5147F, 0.4090F},
-                {0.4476F, 0.1249F, 0.8116F, 0.9091F, 0.1738F, 0.6150F, 0.3285F, 0.3133F},
-                {0.5657F, 0.4447F, 0.5049F, 0.3425F, 0.7443F, 0.2718F, 0.2466F, 0.5586F},
-                {0.3684F, 0.7616F, 0.5165F, 0.9621F, 0.2864F, 0.7747F, 0.8110F, 0.7045F}},
+                                         {{0.3739F,
+                                           0.6230F,
+                                           0.7613F,
+                                           0.1337F,
+                                           0.8527F,
+                                           0.0557F,
+                                           0.6424F,
+                                           0.8463F},
+                                          {0.7179F,
+                                           0.5638F,
+                                           0.2457F,
+                                           0.4579F,
+                                           0.0487F,
+                                           0.8693F,
+                                           0.8216F,
+                                           0.0415F},
+                                          {0.1724F,
+                                           0.5108F,
+                                           0.9103F,
+                                           0.0850F,
+                                           0.0080F,
+                                           0.8927F,
+                                           0.7706F,
+                                           0.3600F},
+                                          {0.7751F,
+                                           0.8828F,
+                                           0.7872F,
+                                           0.4541F,
+                                           0.3181F,
+                                           0.1855F,
+                                           0.2486F,
+                                           0.0033F},
+                                          {0.5558F,
+                                           0.3500F,
+                                           0.6034F,
+                                           0.1763F,
+                                           0.7418F,
+                                           0.5190F,
+                                           0.5147F,
+                                           0.4090F},
+                                          {0.4476F,
+                                           0.1249F,
+                                           0.8116F,
+                                           0.9091F,
+                                           0.1738F,
+                                           0.6150F,
+                                           0.3285F,
+                                           0.3133F},
+                                          {0.5657F,
+                                           0.4447F,
+                                           0.5049F,
+                                           0.3425F,
+                                           0.7443F,
+                                           0.2718F,
+                                           0.2466F,
+                                           0.5586F},
+                                          {0.3684F,
+                                           0.7616F,
+                                           0.5165F,
+                                           0.9621F,
+                                           0.2864F,
+                                           0.7747F,
+                                           0.8110F,
+                                           0.7045F}},
 
-               {{0.4570F, 0.4577F, 0.0373F, 0.6084F, 0.4632F, 0.3472F, 0.9917F, 0.2011F},
-                {0.7921F, 0.2202F, 0.9525F, 0.7274F, 0.3357F, 0.0076F, 0.5786F, 0.3034F},
-                {0.6510F, 0.0798F, 0.2757F, 0.1738F, 0.3046F, 0.2197F, 0.3872F, 0.5650F},
-                {0.1532F, 0.3204F, 0.6094F, 0.3287F, 0.8903F, 0.9773F, 0.7950F, 0.2845F},
-                {0.2482F, 0.3395F, 0.8795F, 0.4325F, 0.1395F, 0.2457F, 0.2968F, 0.5424F},
-                {0.8636F, 0.7426F, 0.2151F, 0.6900F, 0.3938F, 0.0062F, 0.4980F, 0.4098F},
-                {0.8026F, 0.0464F, 0.2662F, 0.7835F, 0.8444F, 0.0688F, 0.8796F, 0.7625F},
-                {0.2764F, 0.5341F, 0.1773F, 0.6671F, 0.7555F, 0.5235F, 0.7142F, 0.9423F}}}}
-        });
-        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<float,4> {{ 0.1902F, -0.1789F, -0.0314F, -0.0589F}});
-        std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<float,4,3,3,3> { //NCHW
-            {
-                {
-                    {{ 0.0039F,  0.1098F, -0.0834F},
-                     {-0.0890F,  0.0725F, -0.1178F},
-                     { 0.1056F, -0.0924F, -0.0574F}},
-                    {{ 0.0070F, -0.0730F, -0.0674F},
-                     {-0.0380F, -0.1025F, -0.0085F},
-                     {-0.1451F, -0.0656F,  0.1137F}},
-                    {{ 0.1020F,  0.1025F, -0.0678F},
-                     { 0.0028F,  0.1512F, -0.0871F},
-                     { 0.1563F, -0.1446F, -0.1636F}}
-                },
-                {
-                    {{ 0.1472F,  0.0025F, -0.0281F},
-                     { 0.0350F,  0.0296F, -0.1711F},
-                     {-0.1197F, -0.1198F, -0.1130F}},
-                    {{-0.1492F,  0.1554F, -0.1044F},
-                     { 0.1203F, -0.1596F,  0.0589F},
-                     {-0.0436F, -0.1876F, -0.0816F}},
-                    {{ 0.1572F, -0.0982F,  0.1293F},
-                     { 0.1358F,  0.1559F,  0.1322F},
-                     { 0.0296F, -0.0354F, -0.0632F}}
-                },
-                {
-                    {{-0.0941F, -0.0479F,  0.0908F},
-                     {-0.1319F, -0.1333F,  0.1223F},
-                     {-0.1098F,  0.1924F,  0.1075F}},
-                    {{ 0.1796F,  0.0213F,  0.0626F},
-                     { 0.0275F,  0.1883F, -0.0818F},
-                     { 0.0363F,  0.0684F,  0.1094F}},
-                    {{ 0.1131F,  0.1258F, -0.0558F},
-                     { 0.1498F,  0.0322F, -0.0186F},
-                     {-0.1801F, -0.0358F,  0.1727F}}
-                },
-                {
-                    {{-0.1500F, -0.0554F, -0.0994F},
-                     {-0.0818F, -0.1223F,  0.1365F},
-                     { 0.1281F,  0.1507F, -0.0890F}},
-                    {{-0.0444F, -0.1071F, -0.1632F},
-                     { 0.0757F, -0.1235F,  0.0408F},
-                     { 0.0401F, -0.1914F,  0.1772F}},
-                    {{-0.0714F,  0.1582F, -0.0065F},
-                     {-0.0119F,  0.1375F, -0.0727F},
-                     {-0.1532F, -0.1826F, -0.0417F}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,4,2,2> {
-            {
-                {
-                    {{-0.2174F, -0.0778F},
-                     {-0.2584F,  0.2303F}},
-                    {{-0.7686F, -0.3879F},
-                     {-0.1775F,  0.0119F}},
-                    {{ 0.5180F,  0.5087F},
-                     { 0.5398F,  0.3476F}},
-                    {{-0.5258F, -0.3128F},
-                     {-0.6673F, -0.1827F}}
-                },
-                {
-                    {{-0.1902F, -0.0467F},
-                     {-0.3327F, -0.1701F}},
-                    {{-0.5505F, -0.4875F},
-                     {-0.4119F, -0.5726F}},
-                    {{ 0.5777F,  0.4428F},
-                     { 0.6121F,  0.7221F}},
-                    {{-0.6009F, -0.6335F},
-                     {-0.5159F, -0.3353F}}
-                }
-            }
-        });
-        op->associateInput(0,myInput);
-        op->associateInput(1,myWeights);
-        op->associateInput(2,myBias);
+                                         {{0.4570F,
+                                           0.4577F,
+                                           0.0373F,
+                                           0.6084F,
+                                           0.4632F,
+                                           0.3472F,
+                                           0.9917F,
+                                           0.2011F},
+                                          {0.7921F,
+                                           0.2202F,
+                                           0.9525F,
+                                           0.7274F,
+                                           0.3357F,
+                                           0.0076F,
+                                           0.5786F,
+                                           0.3034F},
+                                          {0.6510F,
+                                           0.0798F,
+                                           0.2757F,
+                                           0.1738F,
+                                           0.3046F,
+                                           0.2197F,
+                                           0.3872F,
+                                           0.5650F},
+                                          {0.1532F,
+                                           0.3204F,
+                                           0.6094F,
+                                           0.3287F,
+                                           0.8903F,
+                                           0.9773F,
+                                           0.7950F,
+                                           0.2845F},
+                                          {0.2482F,
+                                           0.3395F,
+                                           0.8795F,
+                                           0.4325F,
+                                           0.1395F,
+                                           0.2457F,
+                                           0.2968F,
+                                           0.5424F},
+                                          {0.8636F,
+                                           0.7426F,
+                                           0.2151F,
+                                           0.6900F,
+                                           0.3938F,
+                                           0.0062F,
+                                           0.4980F,
+                                           0.4098F},
+                                          {0.8026F,
+                                           0.0464F,
+                                           0.2662F,
+                                           0.7835F,
+                                           0.8444F,
+                                           0.0688F,
+                                           0.8796F,
+                                           0.7625F},
+                                          {0.2764F,
+                                           0.5341F,
+                                           0.1773F,
+                                           0.6671F,
+                                           0.7555F,
+                                           0.5235F,
+                                           0.7142F,
+                                           0.9423F}}}}});
+        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(
+            Array1D<float, 4>{{0.1902F, -0.1789F, -0.0314F, -0.0589F}});
+        std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(
+            Array4D<float, 4, 3, 3, 3>{// NCHW
+                                       {{{{0.0039F, 0.1098F, -0.0834F},
+                                          {-0.0890F, 0.0725F, -0.1178F},
+                                          {0.1056F, -0.0924F, -0.0574F}},
+                                         {{0.0070F, -0.0730F, -0.0674F},
+                                          {-0.0380F, -0.1025F, -0.0085F},
+                                          {-0.1451F, -0.0656F, 0.1137F}},
+                                         {{0.1020F, 0.1025F, -0.0678F},
+                                          {0.0028F, 0.1512F, -0.0871F},
+                                          {0.1563F, -0.1446F, -0.1636F}}},
+                                        {{{0.1472F, 0.0025F, -0.0281F},
+                                          {0.0350F, 0.0296F, -0.1711F},
+                                          {-0.1197F, -0.1198F, -0.1130F}},
+                                         {{-0.1492F, 0.1554F, -0.1044F},
+                                          {0.1203F, -0.1596F, 0.0589F},
+                                          {-0.0436F, -0.1876F, -0.0816F}},
+                                         {{0.1572F, -0.0982F, 0.1293F},
+                                          {0.1358F, 0.1559F, 0.1322F},
+                                          {0.0296F, -0.0354F, -0.0632F}}},
+                                        {{{-0.0941F, -0.0479F, 0.0908F},
+                                          {-0.1319F, -0.1333F, 0.1223F},
+                                          {-0.1098F, 0.1924F, 0.1075F}},
+                                         {{0.1796F, 0.0213F, 0.0626F},
+                                          {0.0275F, 0.1883F, -0.0818F},
+                                          {0.0363F, 0.0684F, 0.1094F}},
+                                         {{0.1131F, 0.1258F, -0.0558F},
+                                          {0.1498F, 0.0322F, -0.0186F},
+                                          {-0.1801F, -0.0358F, 0.1727F}}},
+                                        {{{-0.1500F, -0.0554F, -0.0994F},
+                                          {-0.0818F, -0.1223F, 0.1365F},
+                                          {0.1281F, 0.1507F, -0.0890F}},
+                                         {{-0.0444F, -0.1071F, -0.1632F},
+                                          {0.0757F, -0.1235F, 0.0408F},
+                                          {0.0401F, -0.1914F, 0.1772F}},
+                                         {{-0.0714F, 0.1582F, -0.0065F},
+                                          {-0.0119F, 0.1375F, -0.0727F},
+                                          {-0.1532F, -0.1826F, -0.0417F}}}}});
+        std::shared_ptr<Tensor> myOutput =
+            std::make_shared<Tensor>(Array4D<float, 2, 4, 2, 2>{
+                {{{{-0.2174F, -0.0778F}, {-0.2584F, 0.2303F}},
+                  {{-0.7686F, -0.3879F}, {-0.1775F, 0.0119F}},
+                  {{0.5180F, 0.5087F}, {0.5398F, 0.3476F}},
+                  {{-0.5258F, -0.3128F}, {-0.6673F, -0.1827F}}},
+                 {{{-0.1902F, -0.0467F}, {-0.3327F, -0.1701F}},
+                  {{-0.5505F, -0.4875F}, {-0.4119F, -0.5726F}},
+                  {{0.5777F, 0.4428F}, {0.6121F, 0.7221F}},
+                  {{-0.6009F, -0.6335F}, {-0.5159F, -0.3353F}}}}});
+        op->associateInput(0, myInput);
+        op->associateInput(1, myWeights);
+        op->associateInput(2, myBias);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         op->forwardDims();
         myConv->forward();
         op->getOutput(0)->print();
-        REQUIRE(approxEq<float>(*(op->getOutput(0)),*myOutput, 1e-3f, 1e-4f));
+        REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput, 1e-3f, 1e-4f));
     }
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_DivImpl.cpp b/unit_tests/operator/Test_DivImpl.cpp
index 5d7dfdf12032d4c444e38cda6d2a4298fc552b14..1f3505ee14e78d58f4937104d3556c2ad5cb7692 100644
--- a/unit_tests/operator/Test_DivImpl.cpp
+++ b/unit_tests/operator/Test_DivImpl.cpp
@@ -10,13 +10,13 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
 #include <chrono>
+#include <cstddef> // std::size_t
+#include <cstdint> // std::uint16_t
 #include <iostream>
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric> // std::accumulate
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Div.hpp"
@@ -29,24 +29,28 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
     // Create a random number generator
     std::random_device rd;
     std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
-    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
-    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
-    std::uniform_int_distribution<int> boolDist(0,1);
+    std::uniform_real_distribution<float> valueDist(
+        0.1f,
+        1.1f); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
+                                                           std::size_t(10));
+    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
+                                                          std::size_t(5));
+    std::uniform_int_distribution<int> boolDist(0, 1);
 
     // Create MatMul Operator
     std::shared_ptr<Node> myDiv = Div();
-    auto op = std::static_pointer_cast<OperatorTensor>(myDiv-> getOperator());
+    auto op = std::static_pointer_cast<OperatorTensor>(myDiv->getOperator());
     op->setDataType(DataType::Float32);
     op->setBackend("cpu");
 
     // Create 2 input Tensors
     std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
-    op->associateInput(0,T0);
+    op->associateInput(0, T0);
     T0->setDataType(DataType::Float32);
     T0->setBackend("cpu");
     std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
-    op -> associateInput(1,T1);
+    op->associateInput(1, T1);
     T1->setDataType(DataType::Float32);
     T1->setBackend("cpu");
 
@@ -61,12 +65,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
     std::chrono::duration<double, std::micro> duration{};
 
     SECTION("DivImpl_cpu::forward()") {
-        SECTION("Scalar / Scalar") {
-
-        }
-        SECTION("Scalar / +1-D Tensor") {
-
-        }
+        SECTION("Scalar / Scalar") {}
+        SECTION("Scalar / +1-D Tensor") {}
         SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
             std::size_t number_of_operation = 0;
 
@@ -77,13 +77,17 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 for (std::size_t i = 0; i < nbDims; ++i) {
                     dims.push_back(dimSizeDist(gen));
                 }
-                const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dims.cbegin(),
+                                    dims.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
 
                 // without broadcasting
-                float* array0 = new float[nb_elements];
-                float* array1 = new float[nb_elements];
-                float* result = new float[nb_elements];
+                float *array0 = new float[nb_elements];
+                float *array1 = new float[nb_elements];
+                float *result = new float[nb_elements];
 
                 for (std::size_t i = 0; i < nb_elements; ++i) {
                     array0[i] = valueDist(gen);
@@ -93,21 +97,23 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
 
                 // input0
                 T0->resize(dims);
-                T0 -> getImpl() -> setRawPtr(array0, nb_elements);
+                T0->getImpl()->setRawPtr(array0, nb_elements);
 
                 // input1
                 T1->resize(dims);
-                T1 -> getImpl() -> setRawPtr(array1, nb_elements);
+                T1->getImpl()->setRawPtr(array1, nb_elements);
 
                 // results
                 Tres->resize(dims);
-                Tres -> getImpl() -> setRawPtr(result, nb_elements);
+                Tres->getImpl()->setRawPtr(result, nb_elements);
 
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myDiv->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
 
@@ -117,8 +123,10 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
 
                 // with broadcasting
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
 
         SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
@@ -126,7 +134,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
                 // generate 2 random Tensors
-                // handle dimensions, replace some dimensions with '1' to get broadcasting
+                // handle dimensions, replace some dimensions with '1' to get
+                // broadcasting
                 constexpr std::size_t nbDims = 4;
                 std::vector<std::size_t> dims;
                 for (std::size_t i = 0; i < nbDims; ++i) {
@@ -146,37 +155,62 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 }
 
                 // create arrays and fill them with random values
-                float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-                float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
-                float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
-
-                for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
+                float *array0 =
+                    new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+                float *array1 =
+                    new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]];
+                float *result = new float[dimsOut[0] * dimsOut[1] *
+                                          dimsOut[2] * dimsOut[3]];
+
+                for (std::size_t i = 0;
+                     i < dims0[0] * dims0[1] * dims0[2] * dims0[3];
+                     ++i) {
                     array0[i] = valueDist(gen);
                 }
-                for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
+                for (std::size_t i = 0;
+                     i < dims1[0] * dims1[1] * dims1[2] * dims1[3];
+                     ++i) {
                     array1[i] = valueDist(gen);
                 }
 
                 // compute true result
-                const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-                const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
+                const std::size_t strides0[nbDims] = {
+                    dims0[1] * dims0[2] * dims0[3],
+                    dims0[2] * dims0[3],
+                    dims0[3],
+                    1};
+                const std::size_t strides1[nbDims] = {
+                    dims1[1] * dims1[2] * dims1[3],
+                    dims1[2] * dims1[3],
+                    dims1[3],
+                    1};
                 for (std::size_t a = 0; a < dimsOut[0]; ++a) {
                     for (std::size_t b = 0; b < dimsOut[1]; ++b) {
-                        const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                                    + strides0[1] * ((dims0[1] > 1) ? b : 0);
-                        const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
-                                                    + strides1[1] * ((dims1[1] > 1) ? b : 0);
+                        const std::size_t idx0_0 =
+                            strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                            strides0[1] * ((dims0[1] > 1) ? b : 0);
+                        const std::size_t idx1_0 =
+                            strides1[0] * ((dims1[0] > 1) ? a : 0) +
+                            strides1[1] * ((dims1[1] > 1) ? b : 0);
                         for (std::size_t c = 0; c < dimsOut[2]; ++c) {
-                            const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
+                            const std::size_t idx_out =
+                                dimsOut[3] *
+                                (c + dimsOut[2] * (b + dimsOut[1] * a));
                             for (std::size_t d = 0; d < dimsOut[3]; ++d) {
-                                std::size_t idx0 = idx0_0
-                                                    + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                                    + ((dims0[3] > 1) ? d : 0);
-                                std::size_t idx1 = idx1_0
-                                                    + strides1[2] * ((dims1[2] > 1) ? c : 0)
-                                                    + ((dims1[3] > 1) ? d : 0);
-                                result[idx_out + d] = array0[idx0] / array1[idx1];
-                                // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " / " << array1[idx1] << " -> " << idx_out + d << std::endl;
+                                std::size_t idx0 =
+                                    idx0_0 +
+                                    strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                                    ((dims0[3] > 1) ? d : 0);
+                                std::size_t idx1 =
+                                    idx1_0 +
+                                    strides1[2] * ((dims1[2] > 1) ? c : 0) +
+                                    ((dims1[3] > 1) ? d : 0);
+                                result[idx_out + d] =
+                                    array0[idx0] / array1[idx1];
+                                // std::cout << "(" << idx0 << ", " << idx1 <<
+                                // ") -> " << array0[idx0] << " / " <<
+                                // array1[idx1] << " -> " << idx_out + d <<
+                                // std::endl;
                             }
                         }
                     }
@@ -185,22 +219,30 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 // conversion to Aidge::Tensors
                 // input0
                 T0->resize(dims0);
-                T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+                T0->getImpl()->setRawPtr(
+                    array0,
+                    dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
                 // input1
                 T1->resize(dims1);
-                T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
+                T1->getImpl()->setRawPtr(
+                    array1,
+                    dims1[0] * dims1[1] * dims1[2] * dims1[3]);
 
                 // results
                 Tres->resize(dimsOut);
-                Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+                Tres->getImpl()->setRawPtr(
+                    result,
+                    dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myDiv->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 // comparison between truth and computed result
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
@@ -209,15 +251,23 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 delete[] array1;
                 delete[] result;
 
-                const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dimsOut.cbegin(),
+                                    dimsOut.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
         SECTION("+1-D Tensor / 1-D Tensor") {
             std::size_t number_of_operation = 0;
-            std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
+            std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(
+                std::size_t(1),
+                std::size_t(3));
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
                 // generate 2 random Tensors
@@ -234,15 +284,24 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                         dims1[i] = 1;
                     }
                 }
-                dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
+                dims1.erase(dims1.cbegin(),
+                            dims1.cbegin() + nbRemovedDimsDist(gen));
 
                 // create arrays and fill them with random values
-                float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-                std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
-                float* array1 = new float[array1_size];
-                float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
-
-                for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
+                float *array0 =
+                    new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+                std::size_t array1_size =
+                    std::accumulate(dims1.cbegin(),
+                                    dims1.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
+                float *array1 = new float[array1_size];
+                float *result = new float[dimsOut[0] * dimsOut[1] *
+                                          dimsOut[2] * dimsOut[3]];
+
+                for (std::size_t i = 0;
+                     i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]);
+                     ++i) {
                     array0[i] = valueDist(gen);
                 }
                 for (std::size_t i = 0; i < array1_size; ++i) {
@@ -251,27 +310,48 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
 
                 // compute true result
                 auto dims1_tmp = dims1;
-                dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
-
-                const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-                const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
+                dims1_tmp.insert(dims1_tmp.cbegin(),
+                                 4 - dims1_tmp.size(),
+                                 std::size_t(1));
+
+                const std::size_t strides0[nbDims] = {
+                    dims0[1] * dims0[2] * dims0[3],
+                    dims0[2] * dims0[3],
+                    dims0[3],
+                    1};
+                const std::size_t strides1[nbDims] = {
+                    dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3],
+                    dims1_tmp[2] * dims1_tmp[3],
+                    dims1_tmp[3],
+                    1};
                 for (std::size_t a = 0; a < dimsOut[0]; ++a) {
                     for (std::size_t b = 0; b < dimsOut[1]; ++b) {
-                        const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                                    + strides0[1] * ((dims0[1] > 1) ? b : 0);
-                        const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
-                                                    + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
+                        const std::size_t idx0_0 =
+                            strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                            strides0[1] * ((dims0[1] > 1) ? b : 0);
+                        const std::size_t idx1_0 =
+                            strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) +
+                            strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
                         for (std::size_t c = 0; c < dimsOut[2]; ++c) {
-                            const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
+                            const std::size_t idx_out =
+                                dimsOut[3] *
+                                (c + dimsOut[2] * (b + dimsOut[1] * a));
                             for (std::size_t d = 0; d < dimsOut[3]; ++d) {
-                                std::size_t idx0 = idx0_0
-                                                    + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                                    + ((dims0[3] > 1) ? d : 0);
-                                std::size_t idx1 = idx1_0
-                                                    + strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
-                                                    + ((dims1_tmp[3] > 1) ? d : 0);
-                                result[idx_out + d] = array0[idx0] / array1[idx1];
-                                // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " / " << array1[idx1] << " -> " << idx_out + d << std::endl;
+                                std::size_t idx0 =
+                                    idx0_0 +
+                                    strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                                    ((dims0[3] > 1) ? d : 0);
+                                std::size_t idx1 =
+                                    idx1_0 +
+                                    strides1[2] *
+                                        ((dims1_tmp[2] > 1) ? c : 0) +
+                                    ((dims1_tmp[3] > 1) ? d : 0);
+                                result[idx_out + d] =
+                                    array0[idx0] / array1[idx1];
+                                // std::cout << "(" << idx0 << ", " << idx1 <<
+                                // ") -> " << array0[idx0] << " / " <<
+                                // array1[idx1] << " -> " << idx_out + d <<
+                                // std::endl;
                             }
                         }
                     }
@@ -280,22 +360,28 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 // conversion to Aidge::Tensors
                 // input0
                 T0->resize(dims0);
-                T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+                T0->getImpl()->setRawPtr(
+                    array0,
+                    dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
                 // input1
                 T1->resize(dims1);
-                T1 -> getImpl() -> setRawPtr(array1, array1_size);
+                T1->getImpl()->setRawPtr(array1, array1_size);
 
                 // results
                 Tres->resize(dimsOut);
-                Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+                Tres->getImpl()->setRawPtr(
+                    result,
+                    dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myDiv->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 // comparison between truth and computed result
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
@@ -304,12 +390,18 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 delete[] array1;
                 delete[] result;
 
-                const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dimsOut.cbegin(),
+                                    dimsOut.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
 
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
     }
 }
diff --git a/unit_tests/operator/Test_ErfImpl.cpp b/unit_tests/operator/Test_ErfImpl.cpp
index 2826b5b57d431cf8296a9869f88f7d642c59c963..8203e79326ad321eb997eeae2f735a253f06c469 100644
--- a/unit_tests/operator/Test_ErfImpl.cpp
+++ b/unit_tests/operator/Test_ErfImpl.cpp
@@ -18,71 +18,80 @@
 
 #include <memory>
 
-
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Erf(forward)") {
     SECTION("1D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> {
-            {0.41384590, 0.43120754, 0.93762982, 0.31049860, 0.77547199, 0.09514862,
-              0.16145366, 0.42776686, 0.43487436, 0.41170865}
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<float,10> {
-                {0.44163144, 0.45801866, 0.81516320, 0.33941913, 0.72722000, 0.10704061,
-              0.18061027, 0.45479023, 0.46144873, 0.43959764}
-        });
+        std::shared_ptr<Tensor> input0 =
+            std::make_shared<Tensor>(Array1D<float, 10>{{0.41384590,
+                                                         0.43120754,
+                                                         0.93762982,
+                                                         0.31049860,
+                                                         0.77547199,
+                                                         0.09514862,
+                                                         0.16145366,
+                                                         0.42776686,
+                                                         0.43487436,
+                                                         0.41170865}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array1D<float, 10>{{0.44163144,
+                                                         0.45801866,
+                                                         0.81516320,
+                                                         0.33941913,
+                                                         0.72722000,
+                                                         0.10704061,
+                                                         0.18061027,
+                                                         0.45479023,
+                                                         0.46144873,
+                                                         0.43959764}});
 
         std::shared_ptr<Node> myErf = Erf();
-        auto op = std::static_pointer_cast<OperatorTensor>(myErf -> getOperator());
-        op->associateInput(0,input0);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myErf->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         myErf->forward();
 
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-        for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        float *resPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedPtr =
+            static_cast<float *>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
+            REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
         }
     }
 
     SECTION("3D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<float,2,2,3> {
-            {
-                {
-                    {0.97037154, 0.86208081, 0.77767169},
-                    {0.38160080, 0.11422747, 0.77284443},
-                },
-                {
-                    {0.51592529, 0.72543722, 0.54641193},
-                    {0.93866944, 0.97767913, 0.34172094}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> {
-            {
-                {
-                    {0.83003384, 0.77721894, 0.72857803},
-                    {0.41057193, 0.12833349, 0.72559172},
-                },
-                {
-                    {0.53438270, 0.69507217, 0.56032562},
-                    {0.81564975, 0.83322692, 0.37109339}
-                }
-            }
-        });
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array3D<float, 2, 2, 3>{{{
+                                         {0.97037154, 0.86208081, 0.77767169},
+                                         {0.38160080, 0.11422747, 0.77284443},
+                                     },
+                                     {{0.51592529, 0.72543722, 0.54641193},
+                                      {0.93866944, 0.97767913, 0.34172094}}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array3D<float, 2, 2, 3>{{{
+                                         {0.83003384, 0.77721894, 0.72857803},
+                                         {0.41057193, 0.12833349, 0.72559172},
+                                     },
+                                     {{0.53438270, 0.69507217, 0.56032562},
+                                      {0.81564975, 0.83322692, 0.37109339}}}});
 
         std::shared_ptr<Node> myErf = Erf();
-        auto op = std::static_pointer_cast<OperatorTensor>(myErf -> getOperator());
-        op->associateInput(0,input0);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myErf->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         myErf->forward();
 
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-        for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        float *resPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedPtr =
+            static_cast<float *>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
+            REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
         }
     }
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_FCImpl.cpp b/unit_tests/operator/Test_FCImpl.cpp
index b2566f26d984fb1d89052745ec35870c6b935d48..9d7ca9113baec1855eb8ceac2c0565f64eb35824 100644
--- a/unit_tests/operator/Test_FCImpl.cpp
+++ b/unit_tests/operator/Test_FCImpl.cpp
@@ -20,91 +20,105 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") {
-    std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{
-            {{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
-              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
-              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
-             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
-              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
-              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
-             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
-              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
-              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
-             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
-              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
-              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
-             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
-              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
-              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
-              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}}});
-    std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
-    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{
-            {{23601, 23602, 23603, 23604, 23605}, {68601, 68602, 68603, 68604, 68605}}});
+    std::shared_ptr<Tensor> myWeights =
+        std::make_shared<Tensor>(Array2D<int, 5, 75>{
+            {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+             {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+             {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+             {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+             {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+              1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}});
+    std::shared_ptr<Tensor> myBias =
+        std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
+    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
+        Array2D<int, 2, 5>{{{23601, 23602, 23603, 23604, 23605},
+                            {68601, 68602, 68603, 68604, 68605}}});
 
     std::shared_ptr<Node> myFC = FC(75, 5, false, "myfc");
-    auto op = std::static_pointer_cast<OperatorTensor>(myFC -> getOperator());
-    op -> associateInput(1, myWeights);
-    op -> associateInput(2, myBias);
+    auto op = std::static_pointer_cast<OperatorTensor>(myFC->getOperator());
+    op->associateInput(1, myWeights);
+    op->associateInput(2, myBias);
 
     SECTION("2D input") {
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{
-                {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
-                  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
-                  38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
-                  57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74},
-                 {75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
-                  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
-                  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
-                  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
-                  135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}});
+        std::shared_ptr<Tensor> myInput =
+            std::make_shared<Tensor>(Array2D<int, 2, 75>{
+                {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+                  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+                  30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+                  45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+                  60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74},
+                 {75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,
+                  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
+                  97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107,
+                  108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
+                  119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
+                  130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
+                  141, 142, 143, 144, 145, 146, 147, 148, 149}}});
         op->associateInput(0, myInput);
-        op -> setDataType(DataType::Int32);
-        op -> setBackend("cpu");
+        op->setDataType(DataType::Int32);
+        op->setBackend("cpu");
         myFC->forward();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
     }
     SECTION("4D input") {
-        std::shared_ptr<Tensor> myInput =
-                std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4},
-                                                                     {5, 6, 7, 8, 9},
-                                                                     {10, 11, 12, 13, 14},
-                                                                     {15, 16, 17, 18, 19},
-                                                                     {20, 21, 22, 23, 24}},
-                                                                    {{25, 26, 27, 28, 29},
-                                                                     {30, 31, 32, 33, 34},
-                                                                     {35, 36, 37, 38, 39},
-                                                                     {40, 41, 42, 43, 44},
-                                                                     {45, 46, 47, 48, 49}},
-                                                                    {{50, 51, 52, 53, 54},
-                                                                     {55, 56, 57, 58, 59},
-                                                                     {60, 61, 62, 63, 64},
-                                                                     {65, 66, 67, 68, 69},
-                                                                     {70, 71, 72, 73, 74}}},
-                                                                   {{{75, 76, 77, 78, 79},
-                                                                     {80, 81, 82, 83, 84},
-                                                                     {85, 86, 87, 88, 89},
-                                                                     {90, 91, 92, 93, 94},
-                                                                     {95, 96, 97, 98, 99}},
-                                                                    {{100, 101, 102, 103, 104},
-                                                                     {105, 106, 107, 108, 109},
-                                                                     {110, 111, 112, 113, 114},
-                                                                     {115, 116, 117, 118, 119},
-                                                                     {120, 121, 122, 123, 124}},
-                                                                    {{125, 126, 127, 128, 129},
-                                                                     {130, 131, 132, 133, 134},
-                                                                     {135, 136, 137, 138, 139},
-                                                                     {140, 141, 142, 143, 144},
-                                                                     {145, 146, 147, 148, 149}}}}});
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}}},
+                                      {{{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}},
+                                       {{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}}}}});
         op->associateInput(0, myInput);
-        op -> setDataType(DataType::Int32);
-        op -> setBackend("cpu");
+        op->setDataType(DataType::Int32);
+        op->setBackend("cpu");
         myFC->forward();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
     }
 
-    // std::cout << static_cast<Tensor>((*myFC->getOperator())["weight"])[0][0][0][0] << std::endl;
+    // std::cout <<
+    // static_cast<Tensor>((*myFC->getOperator())["weight"])[0][0][0][0] <<
+    // std::endl;
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_FoldImpl.cpp b/unit_tests/operator/Test_FoldImpl.cpp
index 6832f5a42d796d9261495794e0758ce1b6df0346..bdd8b88e82f6abe90fad14699f023f6c7a94cd51 100644
--- a/unit_tests/operator/Test_FoldImpl.cpp
+++ b/unit_tests/operator/Test_FoldImpl.cpp
@@ -15,154 +15,111 @@
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/graph/GraphView.hpp"
-#include "aidge/scheduler/SequentialScheduler.hpp"
 #include "aidge/operator/Fold.hpp"
-#include "aidge/operator/Unfold.hpp"
 #include "aidge/operator/MatMul.hpp"
 #include "aidge/operator/Reshape.hpp"
+#include "aidge/operator/Unfold.hpp"
+#include "aidge/scheduler/SequentialScheduler.hpp"
 
 #include "aidge/backend/cpu.hpp"
 
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Fold(forward)", "[Fold][CPU]") {
-    std::shared_ptr<Node> myUnfold = Unfold({3,3}, "myunfold");
+    std::shared_ptr<Node> myUnfold = Unfold({3, 3}, "myunfold");
     std::shared_ptr<Node> myReshape = Reshape({4, 27}, "myreshape");
     std::shared_ptr<Node> myMatMul = MatMul("mymatmul");
-    std::shared_ptr<Node> myFold = Fold({3,3}, {1,1}, "myfold");
+    std::shared_ptr<Node> myFold = Fold({3, 3}, {1, 1}, "myfold");
     myUnfold->addChild(myMatMul, 0, 1);
     myReshape->addChild(myMatMul, 0, 0);
     myMatMul->addChild(myFold, 0, 0);
 
-    std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
-        {
-            {
-                {{  0,   1,   2},
-                {  3,   4,   5},
-                {  6,   7,   8}},
-                {{  9,  10,  11},
-                { 12,  13,  14},
-                { 15,  16,  17}},
-                {{ 18,  19,  20},
-                { 21,  22,  23},
-                { 24,  25,  26}}
-            },
-            {
-                {{ 27,  28,  29},
-                { 30,  31,  32},
-                { 33,  34,  35}},
-                {{ 36,  37,  38},
-                { 39,  40,  41},
-                { 42,  43,  44}},
-                {{ 45,  46,  47},
-                { 48,  49,  50},
-                { 51,  52,  53}}
-            },
-            {
-                {{ 54,  55,  56},
-                { 57,  58,  59},
-                { 60,  61,  62}},
-                {{ 63,  64,  65},
-                { 66,  67,  68},
-                { 69,  70,  71}},
-                {{ 72,  73,  74},
-                { 75,  76,  77},
-                { 78,  79,  80}}
-            },
-            {
-                {{ 81,  82,  83},
-                { 84,  85,  86},
-                { 87,  88,  89}},
-                {{ 90,  91,  92},
-                { 93,  94,  95},
-                { 96,  97,  98}},
-                {{ 99, 100, 101},
-                {102, 103, 104},
-                {105, 106, 107}}
-            }
-        }
-    });
-    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-        {
-            {
-                {{  0,   1,   2,   3,   4},
-                {  5,   6,   7,   8,   9},
-                { 10,  11,  12,  13,  14},
-                { 15,  16,  17,  18,  19},
-                { 20,  21,  22,  23,  24}},
+    std::shared_ptr<Tensor> myWeights =
+        std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{
+            {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}},
+              {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}},
+              {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}},
+             {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}},
+              {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}},
+              {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}},
+             {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}},
+              {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}},
+              {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}},
+             {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}},
+              {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}},
+              {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}});
+    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+        Array4D<int, 2, 3, 5, 5>{// NCHW
+                                 {{{{0, 1, 2, 3, 4},
+                                    {5, 6, 7, 8, 9},
+                                    {10, 11, 12, 13, 14},
+                                    {15, 16, 17, 18, 19},
+                                    {20, 21, 22, 23, 24}},
 
-                {{ 25,  26,  27,  28,  29},
-                { 30,  31,  32,  33,  34},
-                { 35,  36,  37,  38,  39},
-                { 40,  41,  42,  43,  44},
-                { 45,  46,  47,  48,  49}},
+                                   {{25, 26, 27, 28, 29},
+                                    {30, 31, 32, 33, 34},
+                                    {35, 36, 37, 38, 39},
+                                    {40, 41, 42, 43, 44},
+                                    {45, 46, 47, 48, 49}},
 
-                {{ 50,  51,  52,  53,  54},
-                { 55,  56,  57,  58,  59},
-                { 60,  61,  62,  63,  64},
-                { 65,  66,  67,  68,  69},
-                { 70,  71,  72,  73,  74}}
-            },
-            {
-                {{ 75,  76,  77,  78,  79},
-                { 80,  81,  82,  83,  84},
-                { 85,  86,  87,  88,  89},
-                { 90,  91,  92,  93,  94},
-                { 95,  96,  97,  98,  99}},
+                                   {{50, 51, 52, 53, 54},
+                                    {55, 56, 57, 58, 59},
+                                    {60, 61, 62, 63, 64},
+                                    {65, 66, 67, 68, 69},
+                                    {70, 71, 72, 73, 74}}},
+                                  {{{75, 76, 77, 78, 79},
+                                    {80, 81, 82, 83, 84},
+                                    {85, 86, 87, 88, 89},
+                                    {90, 91, 92, 93, 94},
+                                    {95, 96, 97, 98, 99}},
 
-                {{100, 101, 102, 103, 104},
-                {105, 106, 107, 108, 109},
-                {110, 111, 112, 113, 114},
-                {115, 116, 117, 118, 119},
-                {120, 121, 122, 123, 124}},
+                                   {{100, 101, 102, 103, 104},
+                                    {105, 106, 107, 108, 109},
+                                    {110, 111, 112, 113, 114},
+                                    {115, 116, 117, 118, 119},
+                                    {120, 121, 122, 123, 124}},
 
-                {{125, 126, 127, 128, 129},
-                {130, 131, 132, 133, 134},
-                {135, 136, 137, 138, 139},
-                {140, 141, 142, 143, 144},
-                {145, 146, 147, 148, 149}}
-            }
-        }
-    });
-    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
-        {
-            {
-                {{ 15219, 15570, 15921},
-                { 16974, 17325, 17676},
-                { 18729, 19080, 19431}},
-                {{ 37818, 38898, 39978},
-                { 43218, 44298, 45378},
-                { 48618, 49698, 50778}},
-                {{ 60417, 62226, 64035},
-                { 69462, 71271, 73080},
-                { 78507, 80316, 82125}},
-                {{ 83016, 85554, 88092},
-                { 95706, 98244, 100782},
-                { 108396, 110934, 113472}}
-            },
-            {
-                {{ 41544, 41895, 42246},
-                { 43299, 43650, 44001},
-                { 45054, 45405, 45756}},
-                {{ 118818, 119898, 120978},
-                { 124218, 125298, 126378},
-                { 129618, 130698, 131778}},
-                {{ 196092, 197901, 199710},
-                { 205137, 206946, 208755},
-                { 214182, 215991, 217800}},
-                {{ 273366, 275904, 278442},
-                { 286056, 288594, 291132},
-                { 298746, 301284, 303822}}
-            }
-        }
-    });
+                                   {{125, 126, 127, 128, 129},
+                                    {130, 131, 132, 133, 134},
+                                    {135, 136, 137, 138, 139},
+                                    {140, 141, 142, 143, 144},
+                                    {145, 146, 147, 148, 149}}}}});
+    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
+        Array4D<int, 2, 4, 3, 3>{{{{{15219, 15570, 15921},
+                                    {16974, 17325, 17676},
+                                    {18729, 19080, 19431}},
+                                   {{37818, 38898, 39978},
+                                    {43218, 44298, 45378},
+                                    {48618, 49698, 50778}},
+                                   {{60417, 62226, 64035},
+                                    {69462, 71271, 73080},
+                                    {78507, 80316, 82125}},
+                                   {{83016, 85554, 88092},
+                                    {95706, 98244, 100782},
+                                    {108396, 110934, 113472}}},
+                                  {{{41544, 41895, 42246},
+                                    {43299, 43650, 44001},
+                                    {45054, 45405, 45756}},
+                                   {{118818, 119898, 120978},
+                                    {124218, 125298, 126378},
+                                    {129618, 130698, 131778}},
+                                   {{196092, 197901, 199710},
+                                    {205137, 206946, 208755},
+                                    {214182, 215991, 217800}},
+                                   {{273366, 275904, 278442},
+                                    {286056, 288594, 291132},
+                                    {298746, 301284, 303822}}}}});
 
-    auto opUnfold = std::static_pointer_cast<OperatorTensor>(myUnfold -> getOperator());
-    auto opReshape = std::static_pointer_cast<OperatorTensor>(myReshape -> getOperator());
-    auto opMatMul = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
-    auto opFold = std::static_pointer_cast<OperatorTensor>(myFold -> getOperator());
-    opUnfold->associateInput(0,myInput);
-    opReshape->associateInput(0,myWeights);
+    auto opUnfold =
+        std::static_pointer_cast<OperatorTensor>(myUnfold->getOperator());
+    auto opReshape =
+        std::static_pointer_cast<OperatorTensor>(myReshape->getOperator());
+    auto opMatMul =
+        std::static_pointer_cast<OperatorTensor>(myMatMul->getOperator());
+    auto opFold =
+        std::static_pointer_cast<OperatorTensor>(myFold->getOperator());
+    opUnfold->associateInput(0, myInput);
+    opReshape->associateInput(0, myWeights);
 
     auto g = getConnectedGraphView(myMatMul);
     g->setDataType(DataType::Int32);
@@ -173,6 +130,6 @@ TEST_CASE("[cpu/operator] Fold(forward)", "[Fold][CPU]") {
 
     SequentialScheduler scheduler(g);
     scheduler.forward();
-    //opFold->getOutput(0)->print();
+    // opFold->getOutput(0)->print();
     REQUIRE(*(opFold->getOutput(0)) == *myOutput);
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
index d5f2065b624de431b43edef9a83bf079905129dd..9e6d93bf7d4129dc5055f7e901c1fde77da90987 100644
--- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
+++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
@@ -27,539 +27,584 @@
 
 // debug print function
 void print_tensor(Aidge::Tensor &T) {
-  // Print tensors
-  std::cout << "Tensor : size =  [";
-  for (auto &dim : T.dims()) {
-    std::cout << dim << " , ";
-  }
-  std::cout << "]" << std::endl;
-  T.print();
+    // Print tensors
+    std::cout << "Tensor : size =  [";
+    for (auto &dim : T.dims()) {
+        std::cout << dim << " , ";
+    }
+    std::cout << "]" << std::endl;
+    T.print();
 }
 
 namespace Aidge {
 TEST_CASE("[cpu/operator] GlobalAveragePooling",
           "[GlobalAveragePooling][CPU]") {
-  constexpr std::uint16_t NBTRIALS = 10;
-  // Create a random number generator
-  std::random_device rd;
-  std::mt19937 gen(rd());
-  std::uniform_real_distribution<float> valueDist(
-      0.1f, 1.1f); // Random float distribution between 0 and 1
-  std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
-                                                         std::size_t(10));
-
-  std::uniform_int_distribution<std::size_t> nbLowDimsDist(std::size_t(1),
-                                                           std::size_t(2));
-  std::uniform_int_distribution<std::size_t> nbHighDimsDist(std::size_t(3),
-                                                            std::size_t(7));
-
-  // Create MatGlobalAveragePooling Operator
-  std::shared_ptr<Node> globAvgPool = GlobalAveragePooling();
-  auto op =
-      std::static_pointer_cast<OperatorTensor>(globAvgPool->getOperator());
-  op->setDataType(DataType::Float32);
-  op->setBackend("cpu");
-
-  // Create the input Tensor
-  std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
-  op->associateInput(0, T0);
-  T0->setDataType(DataType::Float32);
-  T0->setBackend("cpu");
-
-  // Create results Tensor
-  std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
-  Tres->setDataType(DataType::Float32);
-  Tres->setBackend("cpu");
-
-  // To measure execution time of 'MatGlobalAveragePooling_Op::forward()' member
-  // function call
-  std::chrono::time_point<std::chrono::system_clock> start;
-  std::chrono::time_point<std::chrono::system_clock> end;
-  std::chrono::duration<double, std::micro> duration{};
-  int number_of_operation{0};
-
-  SECTION("GlobalAveragePoolingImpl_cpu::forward()") {
-    SECTION(
-        "1-2Dim > not enough dimensions leads to function throwing an error") {
-      // generate a random tensors
-      const std::size_t nbDims = nbLowDimsDist(gen);
-      std::vector<std::size_t> dims;
-      for (std::size_t i = 0; i < nbDims; ++i) {
-        dims.push_back(dimSizeDist(gen));
-      }
-      const std::size_t nb_elements =
-          std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1),
-                          std::multiplies<std::size_t>());
-
-      float *array0 = new float[nb_elements];
-      for (std::size_t i = 0; i < nb_elements; ++i) {
-        array0[i] = valueDist(gen);
-      }
-      // input0
-      T0->resize(dims);
-      T0->getImpl()->setRawPtr(array0, nb_elements);
-
-      REQUIRE_THROWS(globAvgPool->forward());
-      delete[] array0;
-    }
-
-    SECTION("3+Dim") {
-      SECTION("Fill a tensor with all values set as N will result with every "
-              "output being N") {
-        // generate the tensor
-        const std::size_t nbDims = nbHighDimsDist(gen);
-        std::vector<std::size_t> dims_in;
-        for (std::size_t i = 0; i < nbDims; ++i) {
-          dims_in.push_back(dimSizeDist(gen));
-        }
-        // create in nb_elems
-        const std::size_t in_nb_elems =
-            std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1),
-                            std::multiplies<std::size_t>());
-        const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0];
-        const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1];
-
-        number_of_operation +=
-            in_nb_elems +
-            dims_in[1]; //  averaging per channel : 1 addition per element in
-                        //  the channel + 1 division this for every batch
-        // create out nb_elems
-        std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
-        const std::size_t out_nb_elems =
-            std::accumulate(dims_out.cbegin(), dims_out.cend(), std::size_t(1),
-                            std::multiplies<std::size_t>());
-        const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0];
-
-        // iterate over each batch/channel
-        float *array0 = new float[in_nb_elems];
-        float *result = new float[out_nb_elems];
-        float val = valueDist(gen);
-        for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
-          for (std::size_t channel = 0; channel < dims_in[1]; ++channel) {
-            for (std::size_t i = 0; i < in_channel_nb_elems; ++i)
-
-            {
-              array0[batch * in_batch_nb_elems + channel * in_channel_nb_elems +
-                     i] = val;
+    constexpr std::uint16_t NBTRIALS = 10;
+    // Create a random number generator
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<float> valueDist(
+        0.1f,
+        1.1f); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
+                                                           std::size_t(10));
+
+    std::uniform_int_distribution<std::size_t> nbLowDimsDist(std::size_t(1),
+                                                             std::size_t(2));
+    std::uniform_int_distribution<std::size_t> nbHighDimsDist(std::size_t(3),
+                                                              std::size_t(7));
+
+    // Create MatGlobalAveragePooling Operator
+    std::shared_ptr<Node> globAvgPool = GlobalAveragePooling();
+    auto op =
+        std::static_pointer_cast<OperatorTensor>(globAvgPool->getOperator());
+    op->setDataType(DataType::Float32);
+    op->setBackend("cpu");
+
+    // Create the input Tensor
+    std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
+    op->associateInput(0, T0);
+    T0->setDataType(DataType::Float32);
+    T0->setBackend("cpu");
+
+    // Create results Tensor
+    std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>();
+    Tres->setDataType(DataType::Float32);
+    Tres->setBackend("cpu");
+
+    // To measure execution time of 'MatGlobalAveragePooling_Op::forward()'
+    // member function call
+    std::chrono::time_point<std::chrono::system_clock> start;
+    std::chrono::time_point<std::chrono::system_clock> end;
+    std::chrono::duration<double, std::micro> duration{};
+    int number_of_operation{0};
+
+    SECTION("GlobalAveragePoolingImpl_cpu::forward()") {
+        SECTION("1-2Dim > not enough dimensions leads to function throwing an "
+                "error") {
+            // generate a random tensors
+            const std::size_t nbDims = nbLowDimsDist(gen);
+            std::vector<std::size_t> dims;
+            for (std::size_t i = 0; i < nbDims; ++i) {
+                dims.push_back(dimSizeDist(gen));
             }
-            result[batch * out_batch_nb_elems + channel] = val;
-          }
-        }
-
-        // input0
-        T0->resize(dims_in);
-        T0->getImpl()->setRawPtr(array0, in_nb_elems);
-
-        // results
-        Tres->resize(dims_out);
-        Tres->getImpl()->setRawPtr(result, out_nb_elems);
-
-        op->forwardDims();
-        start = std::chrono::system_clock::now();
-        REQUIRE_NOTHROW(globAvgPool->forward());
-        end = std::chrono::system_clock::now();
-        duration +=
-            std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            const std::size_t nb_elements =
+                std::accumulate(dims.cbegin(),
+                                dims.cend(),
+                                std::size_t(1),
+                                std::multiplies<std::size_t>());
+
+            float *array0 = new float[nb_elements];
+            for (std::size_t i = 0; i < nb_elements; ++i) {
+                array0[i] = valueDist(gen);
+            }
+            // input0
+            T0->resize(dims);
+            T0->getImpl()->setRawPtr(array0, nb_elements);
 
-        REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
-        for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
-          REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
+            REQUIRE_THROWS(globAvgPool->forward());
+            delete[] array0;
         }
 
-        REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
-
-        delete[] array0;
-        delete[] result;
-      }
-
-      SECTION("random testing") {
-        for (int trial = 0; trial < NBTRIALS; ++trial) {
-          // generate the tensor
-          const std::size_t nbDims = nbHighDimsDist(gen);
-          std::vector<std::size_t> dims_in;
-          for (std::size_t i = 0; i < nbDims; ++i) {
-            dims_in.push_back(dimSizeDist(gen));
-          }
-          // create in nb_elems
-          const std::size_t in_nb_elems =
-              std::accumulate(dims_in.cbegin(), dims_in.cend(), std::size_t(1),
-                              std::multiplies<std::size_t>());
-          const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0];
-          const DimSize_t in_channel_nb_elems = in_batch_nb_elems / dims_in[1];
-          number_of_operation +=
-              in_nb_elems +
-              dims_in[1]; //  averaging per channel : 1 addition per element in
-                          //  the channel + 1 division this for every batch
-
-          // create out nb_elems
-          std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
-          const std::size_t out_nb_elems =
-              std::accumulate(dims_out.cbegin(), dims_out.cend(),
-                              std::size_t(1), std::multiplies<std::size_t>());
-          const DimSize_t out_batch_nb_elems = out_nb_elems / dims_out[0];
-
-          // iterate over each batch/channel
-          float *array0 = new float[in_nb_elems];
-          float *result = new float[out_nb_elems];
-          for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
-            for (std::size_t channel = 0; channel < dims_in[1]; ++channel) {
-              float channel_sum = 0;
-              for (std::size_t i = 0; i < in_channel_nb_elems; ++i)
-
-              {
+        SECTION("3+Dim") {
+            SECTION("Fill a tensor with all values set as N will result with "
+                    "every "
+                    "output being N") {
+                // generate the tensor
+                const std::size_t nbDims = nbHighDimsDist(gen);
+                std::vector<std::size_t> dims_in;
+                for (std::size_t i = 0; i < nbDims; ++i) {
+                    dims_in.push_back(dimSizeDist(gen));
+                }
+                // create in nb_elems
+                const std::size_t in_nb_elems =
+                    std::accumulate(dims_in.cbegin(),
+                                    dims_in.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
+                const DimSize_t in_batch_nb_elems = in_nb_elems / dims_in[0];
+                const DimSize_t in_channel_nb_elems =
+                    in_batch_nb_elems / dims_in[1];
+
+                number_of_operation +=
+                    in_nb_elems +
+                    dims_in[1]; //  averaging per channel : 1 addition per
+                                //  element in the channel + 1 division this
+                                //  for every batch
+                // create out nb_elems
+                std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
+                const std::size_t out_nb_elems =
+                    std::accumulate(dims_out.cbegin(),
+                                    dims_out.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
+                const DimSize_t out_batch_nb_elems =
+                    out_nb_elems / dims_out[0];
+
+                // iterate over each batch/channel
+                float *array0 = new float[in_nb_elems];
+                float *result = new float[out_nb_elems];
                 float val = valueDist(gen);
-                array0[batch * in_batch_nb_elems +
-                       channel * in_channel_nb_elems + i] = val;
-                channel_sum += val;
-              }
-              result[batch * out_batch_nb_elems + channel] =
-                  channel_sum / in_channel_nb_elems;
+                for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
+                    for (std::size_t channel = 0; channel < dims_in[1];
+                         ++channel) {
+                        for (std::size_t i = 0; i < in_channel_nb_elems; ++i)
+
+                        {
+                            array0[batch * in_batch_nb_elems +
+                                   channel * in_channel_nb_elems + i] = val;
+                        }
+                        result[batch * out_batch_nb_elems + channel] = val;
+                    }
+                }
+
+                // input0
+                T0->resize(dims_in);
+                T0->getImpl()->setRawPtr(array0, in_nb_elems);
+
+                // results
+                Tres->resize(dims_out);
+                Tres->getImpl()->setRawPtr(result, out_nb_elems);
+
+                op->forwardDims();
+                start = std::chrono::system_clock::now();
+                REQUIRE_NOTHROW(globAvgPool->forward());
+                end = std::chrono::system_clock::now();
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
+
+                REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
+                for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
+                    REQUIRE(Tres->dims().at(i) ==
+                            op->getOutput(0)->dims().at(i));
+                }
+
+                REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+
+                delete[] array0;
+                delete[] result;
             }
-          }
 
-          // input0
-          T0->resize(dims_in);
-          T0->getImpl()->setRawPtr(array0, in_nb_elems);
-
-          // results
-          Tres->resize(dims_out);
-          Tres->getImpl()->setRawPtr(result, out_nb_elems);
-
-          op->forwardDims();
-          start = std::chrono::system_clock::now();
-          REQUIRE_NOTHROW(globAvgPool->forward());
-          end = std::chrono::system_clock::now();
-          duration += std::chrono::duration_cast<std::chrono::microseconds>(
-              end - start);
-
-          REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
-          for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
-            REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
-          }
-
-          REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres, 1e-4f));
-
-          delete[] array0;
-          delete[] result;
-        }
-      }
-      SECTION("Using result from a pytorch function as groundtruth") {
-        DimSize_t batch_size = 2;
-        DimSize_t channels = 3;
-        DimSize_t height = 4;
-        DimSize_t width = 3;
-        DimSize_t depth = 2;
-
-        SECTION("2D_img") {
-          const std::vector<DimSize_t> in_dims{batch_size, channels, height,
-                                               width};
-          const std::vector<DimSize_t> out_dims{batch_size, channels};
-          DimSize_t in_nb_elems = batch_size * channels * height * width;
-          DimSize_t out_nb_elems = batch_size * channels;
-          number_of_operation +=
-              in_nb_elems +
-              channels; //  averaging per channel : 1 addition per element in
-                        //  the channel + 1 division this for every batch
-          auto input = new float[in_nb_elems];
-          auto result = new float[out_nb_elems];
-          input[0] = 0.1807716;
-          input[1] = -0.0699881;
-          input[2] = -0.3596235;
-          input[3] = -0.9152045;
-          input[4] = 0.6257653;
-          input[5] = 0.0255099;
-          input[6] = 0.9545137;
-          input[7] = 0.0643485;
-          input[8] = 0.3611506;
-          input[9] = 1.1678782;
-          input[10] = -1.3498932;
-          input[11] = -0.5101767;
-          input[12] = 0.2359577;
-          input[13] = -0.2397784;
-          input[14] = -0.9211147;
-          input[15] = 1.5432971;
-          input[16] = 1.3488258;
-          input[17] = -0.1396417;
-          input[18] = 0.2857972;
-          input[19] = 0.9651205;
-          input[20] = -2.0371499;
-          input[21] = 0.4931363;
-          input[22] = 1.4869986;
-          input[23] = 0.5910330;
-          input[24] = 0.1260297;
-          input[25] = -1.5626874;
-          input[26] = -1.1601028;
-          input[27] = -0.3348408;
-          input[28] = 0.4477722;
-          input[29] = -0.8016447;
-          input[30] = 1.5236114;
-          input[31] = 2.5085869;
-          input[32] = -0.6630959;
-          input[33] = -0.2512752;
-          input[34] = 1.0101448;
-          input[35] = 0.1215468;
-          input[36] = 0.1583993;
-          input[37] = 1.1340188;
-          input[38] = -1.1538976;
-          input[39] = -0.2983968;
-          input[40] = -0.5075365;
-          input[41] = -0.9239212;
-          input[42] = 0.5467061;
-          input[43] = -1.4947776;
-          input[44] = -1.2057148;
-          input[45] = 0.5718198;
-          input[46] = -0.5973545;
-          input[47] = -0.6936757;
-          input[48] = 1.6455388;
-          input[49] = -0.8029931;
-          input[50] = 1.3514109;
-          input[51] = -0.2759193;
-          input[52] = -1.5108346;
-          input[53] = 2.1047730;
-          input[54] = 2.7629590;
-          input[55] = -1.7465292;
-          input[56] = 0.8353187;
-          input[57] = -1.9560477;
-          input[58] = -0.8002653;
-          input[59] = -0.5044988;
-          input[60] = -0.0711742;
-          input[61] = -0.5130699;
-          input[62] = -1.0307810;
-          input[63] = 0.9154347;
-          input[64] = -0.2282317;
-          input[65] = -0.6884708;
-          input[66] = 0.1832259;
-          input[67] = 0.6003584;
-          input[68] = -1.5429375;
-          input[69] = -0.3465560;
-          input[70] = -0.1476223;
-          input[71] = 0.6469797;
-
-          result[0] = 0.0145876;
-          result[1] = 0.3010401;
-          result[2] = 0.0803371;
-
-          result[3] = -0.3720275;
-          result[4] = 0.0919094;
-          result[5] = -0.1852371;
-
-          // input0
-          T0->resize(in_dims);
-          T0->getImpl()->setRawPtr(input, in_nb_elems);
-
-          // results
-          Tres->resize(out_dims);
-          Tres->getImpl()->setRawPtr(result, out_nb_elems);
-          op->forwardDims();
-          start = std::chrono::system_clock::now();
-          REQUIRE_NOTHROW(globAvgPool->forward());
-          end = std::chrono::system_clock::now();
-          duration += std::chrono::duration_cast<std::chrono::microseconds>(
-              end - start);
-
-          REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
-          for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
-            REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
-          }
-          REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
-          delete[] input;
-          delete[] result;
-        }
-        SECTION("3D_img") {
-          const std::vector<DimSize_t> in_dims{batch_size, channels, height,
-                                               width, depth};
-          const std::vector<DimSize_t> out_dims{batch_size, channels};
-          DimSize_t in_nb_elems =
-              batch_size * channels * height * width * depth;
-          number_of_operation +=
-              in_nb_elems +
-              channels; //  averaging per channel : 1 addition per element in
-                        //  the channel + 1 division this for every batch
-          DimSize_t out_nb_elems = batch_size * channels;
-          auto input = new float[in_nb_elems];
-          auto result = new float[out_nb_elems];
-          input[0] = 0.0061403;
-          input[1] = -0.9665052;
-          input[2] = 0.3582928;
-          input[3] = 0.1072854;
-          input[4] = 1.2463317;
-          input[5] = 1.2460036;
-          input[6] = 0.3534451;
-          input[7] = 0.9425349;
-          input[8] = -0.2103887;
-          input[9] = -0.7959853;
-          input[10] = 0.1297970;
-          input[11] = -1.9445597;
-          input[12] = 0.0609514;
-          input[13] = -0.2379328;
-          input[14] = 1.9020044;
-          input[15] = -1.1762751;
-          input[16] = 0.3404147;
-          input[17] = 1.1685153;
-          input[18] = -0.6526139;
-          input[19] = 0.3767620;
-          input[20] = 0.1887376;
-          input[21] = 0.5154487;
-          input[22] = 0.6371427;
-          input[23] = -0.3948864;
-          input[24] = -1.1571540;
-          input[25] = 0.2896117;
-          input[26] = 0.6163548;
-          input[27] = -0.4370409;
-          input[28] = 0.6589766;
-          input[29] = 0.6587803;
-          input[30] = -1.3702172;
-          input[31] = -1.6210355;
-          input[32] = 0.5872851;
-          input[33] = 0.2860694;
-          input[34] = 0.0082870;
-          input[35] = -0.2523253;
-          input[36] = -1.3247224;
-          input[37] = 0.1891782;
-          input[38] = 0.0211001;
-          input[39] = 0.9404197;
-          input[40] = -0.5576900;
-          input[41] = -0.6939272;
-          input[42] = -0.3252473;
-          input[43] = 1.2439330;
-          input[44] = -1.1671864;
-          input[45] = -0.4091243;
-          input[46] = 1.2600617;
-          input[47] = -1.5630058;
-          input[48] = 1.1346143;
-          input[49] = -0.0823837;
-          input[50] = 0.2893163;
-          input[51] = 0.8357732;
-          input[52] = -0.2449911;
-          input[53] = 0.2712233;
-          input[54] = 0.0936364;
-          input[55] = -0.8834321;
-          input[56] = -0.3274170;
-          input[57] = 0.0783938;
-          input[58] = -0.3807656;
-          input[59] = 0.3775077;
-          input[60] = 0.1119123;
-          input[61] = 2.3142793;
-          input[62] = -0.7989057;
-          input[63] = -0.5643027;
-          input[64] = -1.1346605;
-          input[65] = 0.1705271;
-          input[66] = 0.9946650;
-          input[67] = 1.2625724;
-          input[68] = 1.6218156;
-          input[69] = 1.0774711;
-          input[70] = 0.5947813;
-          input[71] = -1.5290873;
-          input[72] = 2.0437069;
-          input[73] = -0.1656267;
-          input[74] = 0.0870704;
-          input[75] = -0.5276564;
-          input[76] = -0.1002882;
-          input[77] = 1.0539219;
-          input[78] = -0.6230739;
-          input[79] = -1.5905718;
-          input[80] = -0.9741858;
-          input[81] = -0.1869211;
-          input[82] = 0.5816050;
-          input[83] = -2.6339815;
-          input[84] = -1.0764544;
-          input[85] = 2.5903966;
-          input[86] = 0.4940658;
-          input[87] = 0.4671729;
-          input[88] = 0.6588292;
-          input[89] = -0.7257792;
-          input[90] = 1.4280071;
-          input[91] = -1.2187740;
-          input[92] = 0.7380729;
-          input[93] = -1.1599953;
-          input[94] = -1.4355115;
-          input[95] = -1.5304037;
-          input[96] = 0.8474578;
-          input[97] = 0.0774260;
-          input[98] = 0.5433396;
-          input[99] = -0.8438400;
-          input[100] = -0.1089903;
-          input[101] = -0.6354192;
-          input[102] = 0.8772392;
-          input[103] = 0.2844733;
-          input[104] = 0.0975270;
-          input[105] = -0.9785872;
-          input[106] = -0.4320499;
-          input[107] = -1.4937501;
-          input[108] = -2.0644901;
-          input[109] = 0.0851217;
-          input[110] = 0.6644159;
-          input[111] = 0.4168026;
-          input[112] = 0.0958830;
-          input[113] = -1.5699565;
-          input[114] = 0.3739572;
-          input[115] = -0.1420672;
-          input[116] = -0.7864021;
-          input[117] = 0.2443752;
-          input[118] = -0.9811850;
-          input[119] = -0.0698569;
-          input[120] = 0.1463890;
-          input[121] = 0.2536245;
-          input[122] = 0.2136150;
-          input[123] = 0.3113698;
-          input[124] = 1.8353856;
-          input[125] = 1.4473228;
-          input[126] = -0.7373698;
-          input[127] = 0.2485314;
-          input[128] = -0.4789796;
-          input[129] = -0.3396149;
-          input[130] = 0.6438198;
-          input[131] = 0.7287521;
-          input[132] = -1.5119252;
-          input[133] = -0.1006494;
-          input[134] = 1.8955028;
-          input[135] = 1.0871323;
-          input[136] = 0.3620502;
-          input[137] = -0.8826663;
-          input[138] = 1.2220223;
-          input[139] = -1.2817260;
-          input[140] = 1.4153577;
-          input[141] = 0.4148015;
-          input[142] = 1.3458617;
-          input[143] = 1.9718349;
-
-          result[0] = 0.1333608;
-          result[1] = -0.1716091;
-          result[2] = 0.2201060;
-          result[3] = -0.1585989;
-          result[4] = -0.2291074;
-          result[5] = 0.4254351;
-
-          // input0
-          T0->resize(in_dims);
-          T0->getImpl()->setRawPtr(input, in_nb_elems);
-
-          // results
-          Tres->resize(out_dims);
-          Tres->getImpl()->setRawPtr(result, out_nb_elems);
-          op->forwardDims();
-          start = std::chrono::system_clock::now();
-          REQUIRE_NOTHROW(globAvgPool->forward());
-          end = std::chrono::system_clock::now();
-          duration += std::chrono::duration_cast<std::chrono::microseconds>(
-              end - start);
-
-          REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
-          for (DimSize_t i = 0; i < op->getOutput(0)->nbDims(); ++i) {
-            REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
-          }
-          REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
-          delete[] input;
-          delete[] result;
+            SECTION("random testing") {
+                for (int trial = 0; trial < NBTRIALS; ++trial) {
+                    // generate the tensor
+                    const std::size_t nbDims = nbHighDimsDist(gen);
+                    std::vector<std::size_t> dims_in;
+                    for (std::size_t i = 0; i < nbDims; ++i) {
+                        dims_in.push_back(dimSizeDist(gen));
+                    }
+                    // create in nb_elems
+                    const std::size_t in_nb_elems =
+                        std::accumulate(dims_in.cbegin(),
+                                        dims_in.cend(),
+                                        std::size_t(1),
+                                        std::multiplies<std::size_t>());
+                    const DimSize_t in_batch_nb_elems =
+                        in_nb_elems / dims_in[0];
+                    const DimSize_t in_channel_nb_elems =
+                        in_batch_nb_elems / dims_in[1];
+                    number_of_operation +=
+                        in_nb_elems +
+                        dims_in[1]; //  averaging per channel : 1 addition per
+                                    //  element in the channel + 1 division
+                                    //  this for every batch
+
+                    // create out nb_elems
+                    std::vector<std::size_t> dims_out{dims_in[0], dims_in[1]};
+                    const std::size_t out_nb_elems =
+                        std::accumulate(dims_out.cbegin(),
+                                        dims_out.cend(),
+                                        std::size_t(1),
+                                        std::multiplies<std::size_t>());
+                    const DimSize_t out_batch_nb_elems =
+                        out_nb_elems / dims_out[0];
+
+                    // iterate over each batch/channel
+                    float *array0 = new float[in_nb_elems];
+                    float *result = new float[out_nb_elems];
+                    for (std::size_t batch = 0; batch < dims_in[0]; ++batch) {
+                        for (std::size_t channel = 0; channel < dims_in[1];
+                             ++channel) {
+                            float channel_sum = 0;
+                            for (std::size_t i = 0; i < in_channel_nb_elems;
+                                 ++i)
+
+                            {
+                                float val = valueDist(gen);
+                                array0[batch * in_batch_nb_elems +
+                                       channel * in_channel_nb_elems + i] =
+                                    val;
+                                channel_sum += val;
+                            }
+                            result[batch * out_batch_nb_elems + channel] =
+                                channel_sum / in_channel_nb_elems;
+                        }
+                    }
+
+                    // input0
+                    T0->resize(dims_in);
+                    T0->getImpl()->setRawPtr(array0, in_nb_elems);
+
+                    // results
+                    Tres->resize(dims_out);
+                    Tres->getImpl()->setRawPtr(result, out_nb_elems);
+
+                    op->forwardDims();
+                    start = std::chrono::system_clock::now();
+                    REQUIRE_NOTHROW(globAvgPool->forward());
+                    end = std::chrono::system_clock::now();
+                    duration +=
+                        std::chrono::duration_cast<std::chrono::microseconds>(
+                            end - start);
+
+                    REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
+                    for (DimSize_t i = 0; i < op->getOutput(0)->nbDims();
+                         ++i) {
+                        REQUIRE(Tres->dims().at(i) ==
+                                op->getOutput(0)->dims().at(i));
+                    }
+
+                    REQUIRE(
+                        approxEq<float>(*(op->getOutput(0)), *Tres, 1e-4f));
+
+                    delete[] array0;
+                    delete[] result;
+                }
+            }
+            SECTION("Using result from a pytorch function as groundtruth") {
+                DimSize_t batch_size = 2;
+                DimSize_t channels = 3;
+                DimSize_t height = 4;
+                DimSize_t width = 3;
+                DimSize_t depth = 2;
+
+                SECTION("2D_img") {
+                    const std::vector<DimSize_t> in_dims{batch_size,
+                                                         channels,
+                                                         height,
+                                                         width};
+                    const std::vector<DimSize_t> out_dims{batch_size,
+                                                          channels};
+                    DimSize_t in_nb_elems =
+                        batch_size * channels * height * width;
+                    DimSize_t out_nb_elems = batch_size * channels;
+                    number_of_operation +=
+                        in_nb_elems +
+                        channels; //  averaging per channel : 1 addition per
+                                  //  element in the channel + 1 division this
+                                  //  for every batch
+                    auto input = new float[in_nb_elems];
+                    auto result = new float[out_nb_elems];
+                    input[0] = 0.1807716;
+                    input[1] = -0.0699881;
+                    input[2] = -0.3596235;
+                    input[3] = -0.9152045;
+                    input[4] = 0.6257653;
+                    input[5] = 0.0255099;
+                    input[6] = 0.9545137;
+                    input[7] = 0.0643485;
+                    input[8] = 0.3611506;
+                    input[9] = 1.1678782;
+                    input[10] = -1.3498932;
+                    input[11] = -0.5101767;
+                    input[12] = 0.2359577;
+                    input[13] = -0.2397784;
+                    input[14] = -0.9211147;
+                    input[15] = 1.5432971;
+                    input[16] = 1.3488258;
+                    input[17] = -0.1396417;
+                    input[18] = 0.2857972;
+                    input[19] = 0.9651205;
+                    input[20] = -2.0371499;
+                    input[21] = 0.4931363;
+                    input[22] = 1.4869986;
+                    input[23] = 0.5910330;
+                    input[24] = 0.1260297;
+                    input[25] = -1.5626874;
+                    input[26] = -1.1601028;
+                    input[27] = -0.3348408;
+                    input[28] = 0.4477722;
+                    input[29] = -0.8016447;
+                    input[30] = 1.5236114;
+                    input[31] = 2.5085869;
+                    input[32] = -0.6630959;
+                    input[33] = -0.2512752;
+                    input[34] = 1.0101448;
+                    input[35] = 0.1215468;
+                    input[36] = 0.1583993;
+                    input[37] = 1.1340188;
+                    input[38] = -1.1538976;
+                    input[39] = -0.2983968;
+                    input[40] = -0.5075365;
+                    input[41] = -0.9239212;
+                    input[42] = 0.5467061;
+                    input[43] = -1.4947776;
+                    input[44] = -1.2057148;
+                    input[45] = 0.5718198;
+                    input[46] = -0.5973545;
+                    input[47] = -0.6936757;
+                    input[48] = 1.6455388;
+                    input[49] = -0.8029931;
+                    input[50] = 1.3514109;
+                    input[51] = -0.2759193;
+                    input[52] = -1.5108346;
+                    input[53] = 2.1047730;
+                    input[54] = 2.7629590;
+                    input[55] = -1.7465292;
+                    input[56] = 0.8353187;
+                    input[57] = -1.9560477;
+                    input[58] = -0.8002653;
+                    input[59] = -0.5044988;
+                    input[60] = -0.0711742;
+                    input[61] = -0.5130699;
+                    input[62] = -1.0307810;
+                    input[63] = 0.9154347;
+                    input[64] = -0.2282317;
+                    input[65] = -0.6884708;
+                    input[66] = 0.1832259;
+                    input[67] = 0.6003584;
+                    input[68] = -1.5429375;
+                    input[69] = -0.3465560;
+                    input[70] = -0.1476223;
+                    input[71] = 0.6469797;
+
+                    result[0] = 0.0145876;
+                    result[1] = 0.3010401;
+                    result[2] = 0.0803371;
+
+                    result[3] = -0.3720275;
+                    result[4] = 0.0919094;
+                    result[5] = -0.1852371;
+
+                    // input0
+                    T0->resize(in_dims);
+                    T0->getImpl()->setRawPtr(input, in_nb_elems);
+
+                    // results
+                    Tres->resize(out_dims);
+                    Tres->getImpl()->setRawPtr(result, out_nb_elems);
+                    op->forwardDims();
+                    start = std::chrono::system_clock::now();
+                    REQUIRE_NOTHROW(globAvgPool->forward());
+                    end = std::chrono::system_clock::now();
+                    duration +=
+                        std::chrono::duration_cast<std::chrono::microseconds>(
+                            end - start);
+
+                    REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
+                    for (DimSize_t i = 0; i < op->getOutput(0)->nbDims();
+                         ++i) {
+                        REQUIRE(Tres->dims().at(i) ==
+                                op->getOutput(0)->dims().at(i));
+                    }
+                    REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+                    delete[] input;
+                    delete[] result;
+                }
+                SECTION("3D_img") {
+                    const std::vector<DimSize_t> in_dims{batch_size,
+                                                         channels,
+                                                         height,
+                                                         width,
+                                                         depth};
+                    const std::vector<DimSize_t> out_dims{batch_size,
+                                                          channels};
+                    DimSize_t in_nb_elems =
+                        batch_size * channels * height * width * depth;
+                    number_of_operation +=
+                        in_nb_elems +
+                        channels; //  averaging per channel : 1 addition per
+                                  //  element in the channel + 1 division this
+                                  //  for every batch
+                    DimSize_t out_nb_elems = batch_size * channels;
+                    auto input = new float[in_nb_elems];
+                    auto result = new float[out_nb_elems];
+                    input[0] = 0.0061403;
+                    input[1] = -0.9665052;
+                    input[2] = 0.3582928;
+                    input[3] = 0.1072854;
+                    input[4] = 1.2463317;
+                    input[5] = 1.2460036;
+                    input[6] = 0.3534451;
+                    input[7] = 0.9425349;
+                    input[8] = -0.2103887;
+                    input[9] = -0.7959853;
+                    input[10] = 0.1297970;
+                    input[11] = -1.9445597;
+                    input[12] = 0.0609514;
+                    input[13] = -0.2379328;
+                    input[14] = 1.9020044;
+                    input[15] = -1.1762751;
+                    input[16] = 0.3404147;
+                    input[17] = 1.1685153;
+                    input[18] = -0.6526139;
+                    input[19] = 0.3767620;
+                    input[20] = 0.1887376;
+                    input[21] = 0.5154487;
+                    input[22] = 0.6371427;
+                    input[23] = -0.3948864;
+                    input[24] = -1.1571540;
+                    input[25] = 0.2896117;
+                    input[26] = 0.6163548;
+                    input[27] = -0.4370409;
+                    input[28] = 0.6589766;
+                    input[29] = 0.6587803;
+                    input[30] = -1.3702172;
+                    input[31] = -1.6210355;
+                    input[32] = 0.5872851;
+                    input[33] = 0.2860694;
+                    input[34] = 0.0082870;
+                    input[35] = -0.2523253;
+                    input[36] = -1.3247224;
+                    input[37] = 0.1891782;
+                    input[38] = 0.0211001;
+                    input[39] = 0.9404197;
+                    input[40] = -0.5576900;
+                    input[41] = -0.6939272;
+                    input[42] = -0.3252473;
+                    input[43] = 1.2439330;
+                    input[44] = -1.1671864;
+                    input[45] = -0.4091243;
+                    input[46] = 1.2600617;
+                    input[47] = -1.5630058;
+                    input[48] = 1.1346143;
+                    input[49] = -0.0823837;
+                    input[50] = 0.2893163;
+                    input[51] = 0.8357732;
+                    input[52] = -0.2449911;
+                    input[53] = 0.2712233;
+                    input[54] = 0.0936364;
+                    input[55] = -0.8834321;
+                    input[56] = -0.3274170;
+                    input[57] = 0.0783938;
+                    input[58] = -0.3807656;
+                    input[59] = 0.3775077;
+                    input[60] = 0.1119123;
+                    input[61] = 2.3142793;
+                    input[62] = -0.7989057;
+                    input[63] = -0.5643027;
+                    input[64] = -1.1346605;
+                    input[65] = 0.1705271;
+                    input[66] = 0.9946650;
+                    input[67] = 1.2625724;
+                    input[68] = 1.6218156;
+                    input[69] = 1.0774711;
+                    input[70] = 0.5947813;
+                    input[71] = -1.5290873;
+                    input[72] = 2.0437069;
+                    input[73] = -0.1656267;
+                    input[74] = 0.0870704;
+                    input[75] = -0.5276564;
+                    input[76] = -0.1002882;
+                    input[77] = 1.0539219;
+                    input[78] = -0.6230739;
+                    input[79] = -1.5905718;
+                    input[80] = -0.9741858;
+                    input[81] = -0.1869211;
+                    input[82] = 0.5816050;
+                    input[83] = -2.6339815;
+                    input[84] = -1.0764544;
+                    input[85] = 2.5903966;
+                    input[86] = 0.4940658;
+                    input[87] = 0.4671729;
+                    input[88] = 0.6588292;
+                    input[89] = -0.7257792;
+                    input[90] = 1.4280071;
+                    input[91] = -1.2187740;
+                    input[92] = 0.7380729;
+                    input[93] = -1.1599953;
+                    input[94] = -1.4355115;
+                    input[95] = -1.5304037;
+                    input[96] = 0.8474578;
+                    input[97] = 0.0774260;
+                    input[98] = 0.5433396;
+                    input[99] = -0.8438400;
+                    input[100] = -0.1089903;
+                    input[101] = -0.6354192;
+                    input[102] = 0.8772392;
+                    input[103] = 0.2844733;
+                    input[104] = 0.0975270;
+                    input[105] = -0.9785872;
+                    input[106] = -0.4320499;
+                    input[107] = -1.4937501;
+                    input[108] = -2.0644901;
+                    input[109] = 0.0851217;
+                    input[110] = 0.6644159;
+                    input[111] = 0.4168026;
+                    input[112] = 0.0958830;
+                    input[113] = -1.5699565;
+                    input[114] = 0.3739572;
+                    input[115] = -0.1420672;
+                    input[116] = -0.7864021;
+                    input[117] = 0.2443752;
+                    input[118] = -0.9811850;
+                    input[119] = -0.0698569;
+                    input[120] = 0.1463890;
+                    input[121] = 0.2536245;
+                    input[122] = 0.2136150;
+                    input[123] = 0.3113698;
+                    input[124] = 1.8353856;
+                    input[125] = 1.4473228;
+                    input[126] = -0.7373698;
+                    input[127] = 0.2485314;
+                    input[128] = -0.4789796;
+                    input[129] = -0.3396149;
+                    input[130] = 0.6438198;
+                    input[131] = 0.7287521;
+                    input[132] = -1.5119252;
+                    input[133] = -0.1006494;
+                    input[134] = 1.8955028;
+                    input[135] = 1.0871323;
+                    input[136] = 0.3620502;
+                    input[137] = -0.8826663;
+                    input[138] = 1.2220223;
+                    input[139] = -1.2817260;
+                    input[140] = 1.4153577;
+                    input[141] = 0.4148015;
+                    input[142] = 1.3458617;
+                    input[143] = 1.9718349;
+
+                    result[0] = 0.1333608;
+                    result[1] = -0.1716091;
+                    result[2] = 0.2201060;
+                    result[3] = -0.1585989;
+                    result[4] = -0.2291074;
+                    result[5] = 0.4254351;
+
+                    // input0
+                    T0->resize(in_dims);
+                    T0->getImpl()->setRawPtr(input, in_nb_elems);
+
+                    // results
+                    Tres->resize(out_dims);
+                    Tres->getImpl()->setRawPtr(result, out_nb_elems);
+                    op->forwardDims();
+                    start = std::chrono::system_clock::now();
+                    REQUIRE_NOTHROW(globAvgPool->forward());
+                    end = std::chrono::system_clock::now();
+                    duration +=
+                        std::chrono::duration_cast<std::chrono::microseconds>(
+                            end - start);
+
+                    REQUIRE(Tres->nbDims() == op->getOutput(0)->nbDims());
+                    for (DimSize_t i = 0; i < op->getOutput(0)->nbDims();
+                         ++i) {
+                        REQUIRE(Tres->dims().at(i) ==
+                                op->getOutput(0)->dims().at(i));
+                    }
+                    REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+                    delete[] input;
+                    delete[] result;
+                }
+            }
+            std::cout << "GlobalAveragePooling total execution time : "
+                      << duration.count() << "µs" << std::endl;
+            std::cout << "Number of operations : " << number_of_operation
+                      << std::endl;
+            std::cout << "Operation / µs = "
+                      << number_of_operation / duration.count() << std::endl;
         }
-      }
-      std::cout << "GlobalAveragePooling total execution time : "
-                << duration.count() << "µs" << std::endl;
-      std::cout << "Number of operations : " << number_of_operation
-                << std::endl;
-      std::cout << "Operation / µs = " << number_of_operation / duration.count()
-                << std::endl;
     }
-  }
 }
 } // namespace Aidge
diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp
index 85dd9f99ee425216f8495e7813b35ce69be9c806..18901d3b73cb32421fd9156d5b02d2e3625f9e7f 100644
--- a/unit_tests/operator/Test_LeakyReLUImpl.cpp
+++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp
@@ -20,16 +20,15 @@ using namespace Aidge;
 
 TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
     SECTION("1D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> {
-            {0, 1, 2,-3, 4,-5,-6, 7, 8, 9}
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,10> {
-            {0, 1, 2, 0, 4, 0, 0, 7, 8, 9}
-        });
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array1D<int, 10>{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array1D<int, 10>{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}});
 
         std::shared_ptr<Node> myLeakyReLU = LeakyReLU();
-        auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
-        op->associateInput(0,input0);
+        auto op = std::static_pointer_cast<OperatorTensor>(
+            myLeakyReLU->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         myLeakyReLU->forward();
@@ -37,22 +36,17 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
     }
 
     SECTION("2D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> {
-            {
-                { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,10> {
-            {
-                { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-            }
-        });
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array2D<int, 2, 10>{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                                 {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array2D<int, 2, 10>{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                 {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}});
 
         std::shared_ptr<Node> myLeakyReLU = LeakyReLU();
-        auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
-        op->associateInput(0,input0);
+        auto op = std::static_pointer_cast<OperatorTensor>(
+            myLeakyReLU->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         myLeakyReLU->forward();
@@ -60,34 +54,21 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
     }
 
     SECTION("3D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> {
-            {
-                {
-                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                },
-                {
-                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,2,10> {
-            {
-                {
-                    { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                    { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                },
-                {
-                    { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                    { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                }
-            }
-        });
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array3D<int, 2, 2, 10>{{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                                     {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                                    {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                                     {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array3D<int, 2, 2, 10>{{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                     {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}},
+                                    {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                     {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}});
 
         std::shared_ptr<Node> myLeakyReLU = LeakyReLU();
-        auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
-        op->associateInput(0,input0);
+        auto op = std::static_pointer_cast<OperatorTensor>(
+            myLeakyReLU->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         myLeakyReLU->forward();
@@ -95,58 +76,30 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
     }
 
     SECTION("4D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
-            {
-                {
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    }
-                },
-                {
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    }
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
-            {
-                {
-                    {
-                        { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                        { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                    },
-                    {
-                        { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                        { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                    }
-                },
-                {
-                    {
-                        { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                        { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                    },
-                    {
-                        { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                        { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                    }
-                }
-            }
-        });
+        std::shared_ptr<Tensor> input0 =
+            std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{
+                {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}},
+                 {{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array4D<int, 2, 2, 2, 10>{{{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                         {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}},
+                                        {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                         {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}},
+                                       {{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                         {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}},
+                                        {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                         {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}}});
 
         std::shared_ptr<Node> myLeakyReLU = LeakyReLU();
-        auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
-        op->associateInput(0,input0);
+        auto op = std::static_pointer_cast<OperatorTensor>(
+            myLeakyReLU->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         myLeakyReLU->forward();
@@ -154,16 +107,17 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
     }
 
     SECTION("Test construction attribute: negative_slop") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> {
-            {0.0f, 1.0f, 2.0f,-3.0f, 4.0f,-5.0f,-6.0f, 7.0f, 8.0f, 9.0f}
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<float,10> {
-            {0.0f, 1.0f, 2.0f,-1.5f, 4.0f,-2.5f,-3.0f, 7.0f, 8.0f, 9.0f}
-        });
+        std::shared_ptr<Tensor> input0 = std::make_shared<
+            Tensor>(Array1D<float, 10>{
+            {0.0f, 1.0f, 2.0f, -3.0f, 4.0f, -5.0f, -6.0f, 7.0f, 8.0f, 9.0f}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<
+            Tensor>(Array1D<float, 10>{
+            {0.0f, 1.0f, 2.0f, -1.5f, 4.0f, -2.5f, -3.0f, 7.0f, 8.0f, 9.0f}});
 
         std::shared_ptr<Node> myLeakyReLU = LeakyReLU(0.5f);
-        auto op = std::static_pointer_cast<OperatorTensor>(myLeakyReLU -> getOperator());
-        op->associateInput(0,input0);
+        auto op = std::static_pointer_cast<OperatorTensor>(
+            myLeakyReLU->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         myLeakyReLU->forward();
diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp
index d6e934b4dc8d84e8a595eb74d1af9d2c68c892d1..141126fd43144121f6e8a1c905806c9518cc1a7d 100644
--- a/unit_tests/operator/Test_MatMulImpl.cpp
+++ b/unit_tests/operator/Test_MatMulImpl.cpp
@@ -10,12 +10,12 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <cstddef>  // std::size_t
-#include <cstdint>  // std::uint16_t
 #include <chrono>
+#include <cstddef> // std::size_t
+#include <cstdint> // std::uint16_t
 #include <iostream>
 #include <memory>
-#include <random>   // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/MatMul.hpp"
@@ -31,13 +31,16 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
     // Create a random number generator
     std::random_device rd;
     std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> dis(0.0, 1.0); // Random float distribution between 0 and 1
+    std::uniform_real_distribution<float> dis(
+        0.0,
+        1.0); // Random float distribution between 0 and 1
     std::uniform_int_distribution<std::size_t> distDims(10, 100);
     std::uniform_int_distribution<std::size_t> distNbMatrix(1, 5);
 
     // Create MatMul Operator
     std::shared_ptr<Node> myMatMul = MatMul();
-    auto op = std::static_pointer_cast<OperatorTensor>(myMatMul -> getOperator());
+    auto op =
+        std::static_pointer_cast<OperatorTensor>(myMatMul->getOperator());
 
     // To measure execution time of 'MatMul_Op::forward()' member function call
     std::chrono::time_point<std::chrono::system_clock> start;
@@ -51,44 +54,47 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             const std::size_t dim0 = distDims(gen);
             const std::size_t dim1 = distDims(gen);
             const std::size_t dim2 = distDims(gen);
-            totalComputation += dim0*dim1*dim2;
+            totalComputation += dim0 * dim1 * dim2;
 
             // Create and populate the array with random float values
-            float* bigArray1 = new float[dim0*dim1];
-            for (int i = 0; i < dim0*dim1; ++i) {
+            float *bigArray1 = new float[dim0 * dim1];
+            for (int i = 0; i < dim0 * dim1; ++i) {
                 bigArray1[i] = dis(gen); // Generate random float value
             }
-            float* bigArray2 = new float[dim1*dim2];
-            for (int i = 0; i < dim1*dim2; ++i) {
+            float *bigArray2 = new float[dim1 * dim2];
+            for (int i = 0; i < dim1 * dim2; ++i) {
                 bigArray2[i] = dis(gen); // Generate random float value
             }
-            float* res = new float[dim0*dim2];
+            float *res = new float[dim0 * dim2];
             for (int i = 0; i < dim0; ++i) {
                 for (int j = 0; j < dim2; ++j) {
                     float sum = 0.0;
                     for (int k = 0; k < dim1; ++k) {
-                        sum += bigArray1[i*dim1+k] * bigArray2[k*dim2+j];
+                        sum +=
+                            bigArray1[i * dim1 + k] * bigArray2[k * dim2 + j];
                     }
-                    res[i*dim2+j] = sum;
+                    res[i * dim2 + j] = sum;
                 }
             }
 
-
             // Convert bigArray1 to Tensor
-            std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
-            T1 -> resize({dim0,dim1});
-            T1 -> setBackend("cpu");
-            T1 -> getImpl() -> setRawPtr(bigArray1, dim0*dim1);
+            std::shared_ptr<Tensor> T1 =
+                std::make_shared<Tensor>(DataType::Float32);
+            T1->resize({dim0, dim1});
+            T1->setBackend("cpu");
+            T1->getImpl()->setRawPtr(bigArray1, dim0 * dim1);
             // Convert bigArray2 to Tensor
-            std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
-            T2 -> resize({dim1,dim2});
-            T2 -> setBackend("cpu");
-            T2 -> getImpl() -> setRawPtr(bigArray2, dim1*dim2);
+            std::shared_ptr<Tensor> T2 =
+                std::make_shared<Tensor>(DataType::Float32);
+            T2->resize({dim1, dim2});
+            T2->setBackend("cpu");
+            T2->getImpl()->setRawPtr(bigArray2, dim1 * dim2);
             // convert res to Tensor
-            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
-            Tres -> resize({dim0,dim2});
-            Tres -> setBackend("cpu");
-            Tres -> getImpl() -> setRawPtr(res, dim0*dim2);
+            std::shared_ptr<Tensor> Tres =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tres->resize({dim0, dim2});
+            Tres->setBackend("cpu");
+            Tres->getImpl()->setRawPtr(res, dim0 * dim2);
 
             op->associateInput(0, T1);
             op->associateInput(1, T2);
@@ -98,7 +104,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             start = std::chrono::system_clock::now();
             myMatMul->forward();
             end = std::chrono::system_clock::now();
-            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(
+                end - start);
 
             REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
 
@@ -106,7 +113,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             delete[] bigArray2;
             delete[] res;
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "multiplications over time spent: "
+                  << totalComputation / duration.count() << std::endl;
         std::cout << "total time: " << duration.count() << std::endl;
     }
 
@@ -119,44 +127,48 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             const std::size_t dim0 = distDims(gen);
             const std::size_t dim1 = distDims(gen);
             const std::size_t dim2 = distDims(gen);
-            totalComputation += dim0*dim1*dim2*dimNb;
+            totalComputation += dim0 * dim1 * dim2 * dimNb;
 
             // Create and populate the array with random float values
-            float* bigArray1 = new float[dimNb*dim0*dim1];
-            for (std::size_t i = 0; i < dimNb*dim0*dim1; ++i) {
+            float *bigArray1 = new float[dimNb * dim0 * dim1];
+            for (std::size_t i = 0; i < dimNb * dim0 * dim1; ++i) {
                 bigArray1[i] = dis(gen); // Generate random float value
             }
-            float* bigArray2 = new float[dimNb*dim1*dim2];
-            for (int i = 0; i < dimNb*dim1*dim2; ++i) {
+            float *bigArray2 = new float[dimNb * dim1 * dim2];
+            for (int i = 0; i < dimNb * dim1 * dim2; ++i) {
                 bigArray2[i] = dis(gen); // Generate random float value
             }
-            float* res = new float[dimNb*dim0*dim2];
+            float *res = new float[dimNb * dim0 * dim2];
             for (std::size_t n = 0; n < dimNb; ++n) {
                 for (int i = 0; i < dim0; ++i) {
                     for (int j = 0; j < dim2; ++j) {
                         float sum = 0.0;
                         for (int k = 0; k < dim1; ++k) {
-                            sum += bigArray1[n*dim0*dim1 + i*dim1 + k] * bigArray2[n*dim2*dim1+k*dim2+j];
+                            sum += bigArray1[n * dim0 * dim1 + i * dim1 + k] *
+                                   bigArray2[n * dim2 * dim1 + k * dim2 + j];
                         }
-                        res[n*dim0*dim2+i*dim2+j] = sum;
+                        res[n * dim0 * dim2 + i * dim2 + j] = sum;
                     }
                 }
             }
             // Convert bigArray1 to Tensor
-            std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
-            T1 -> resize({dimNb,dim0,dim1});
-            T1 -> setBackend("cpu");
-            T1 -> getImpl() -> setRawPtr(bigArray1, dimNb*dim0*dim1);
+            std::shared_ptr<Tensor> T1 =
+                std::make_shared<Tensor>(DataType::Float32);
+            T1->resize({dimNb, dim0, dim1});
+            T1->setBackend("cpu");
+            T1->getImpl()->setRawPtr(bigArray1, dimNb * dim0 * dim1);
             // Convert bigArray2 to Tensor
-            std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
-            T2 -> resize({dimNb,dim1,dim2});
-            T2 -> setBackend("cpu");
-            T2 -> getImpl() -> setRawPtr(bigArray2, dimNb*dim1*dim2);
+            std::shared_ptr<Tensor> T2 =
+                std::make_shared<Tensor>(DataType::Float32);
+            T2->resize({dimNb, dim1, dim2});
+            T2->setBackend("cpu");
+            T2->getImpl()->setRawPtr(bigArray2, dimNb * dim1 * dim2);
             // convert res to Tensor
-            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
-            Tres -> resize({dimNb,dim0,dim2});
-            Tres -> setBackend("cpu");
-            Tres -> getImpl() -> setRawPtr(res, dimNb*dim0*dim2);
+            std::shared_ptr<Tensor> Tres =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tres->resize({dimNb, dim0, dim2});
+            Tres->setBackend("cpu");
+            Tres->getImpl()->setRawPtr(res, dimNb * dim0 * dim2);
 
             op->associateInput(0, T1);
             op->associateInput(1, T2);
@@ -166,7 +178,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             start = std::chrono::system_clock::now();
             myMatMul->forward();
             end = std::chrono::system_clock::now();
-            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(
+                end - start);
 
             REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
 
@@ -174,7 +187,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             delete[] bigArray2;
             delete[] res;
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "multiplications over time spent: "
+                  << totalComputation / duration.count() << std::endl;
         std::cout << "total time: " << duration.count() << std::endl;
     }
 
@@ -188,46 +202,55 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             const std::size_t dim0 = distDims(gen);
             const std::size_t dim1 = distDims(gen);
             const std::size_t dim2 = distDims(gen);
-            totalComputation += dim0*dim1*dim2*dimNb1*dimNb2;
+            totalComputation += dim0 * dim1 * dim2 * dimNb1 * dimNb2;
 
             // Create and populate the array with random float values
-            float* bigArray1 = new float[dimNb1*dimNb2*dim0*dim1];
-            for (std::size_t i = 0; i < dimNb1*dimNb2*dim0*dim1; ++i) {
+            float *bigArray1 = new float[dimNb1 * dimNb2 * dim0 * dim1];
+            for (std::size_t i = 0; i < dimNb1 * dimNb2 * dim0 * dim1; ++i) {
                 bigArray1[i] = dis(gen); // Generate random float value
             }
-            float* bigArray2 = new float[dimNb1*dimNb2*dim1*dim2];
-            for (std::size_t i = 0; i < dimNb1*dimNb2*dim1*dim2; ++i) {
+            float *bigArray2 = new float[dimNb1 * dimNb2 * dim1 * dim2];
+            for (std::size_t i = 0; i < dimNb1 * dimNb2 * dim1 * dim2; ++i) {
                 bigArray2[i] = dis(gen); // Generate random float value
             }
-            float* res = new float[dimNb1*dimNb2*dim0*dim2];
+            float *res = new float[dimNb1 * dimNb2 * dim0 * dim2];
             for (std::size_t n1 = 0; n1 < dimNb1; ++n1) {
                 for (std::size_t n2 = 0; n2 < dimNb2; ++n2) {
                     for (int i = 0; i < dim0; ++i) {
                         for (int j = 0; j < dim2; ++j) {
                             float sum = 0.0;
                             for (int k = 0; k < dim1; ++k) {
-                                sum += bigArray1[n1*dimNb2*dim0*dim1+n2*dim0*dim1+i*dim1+k] * bigArray2[n1*dimNb2*dim1*dim2+n2*dim1*dim2+k*dim2+j];
+                                sum +=
+                                    bigArray1[n1 * dimNb2 * dim0 * dim1 +
+                                              n2 * dim0 * dim1 + i * dim1 +
+                                              k] *
+                                    bigArray2[n1 * dimNb2 * dim1 * dim2 +
+                                              n2 * dim1 * dim2 + k * dim2 + j];
                             }
-                            res[n1*dimNb2*dim0*dim2+n2*dim0*dim2+i*dim2+j] = sum;
+                            res[n1 * dimNb2 * dim0 * dim2 + n2 * dim0 * dim2 +
+                                i * dim2 + j] = sum;
                         }
                     }
                 }
             }
             // Convert bigArray1 to Tensor
-            std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>(DataType::Float32);
-            T1 -> resize({dimNb1,dimNb2,dim0,dim1});
-            T1 -> setBackend("cpu");
-            T1 -> getImpl() -> setRawPtr(bigArray1, dimNb1*dimNb2*dim0*dim1);
+            std::shared_ptr<Tensor> T1 =
+                std::make_shared<Tensor>(DataType::Float32);
+            T1->resize({dimNb1, dimNb2, dim0, dim1});
+            T1->setBackend("cpu");
+            T1->getImpl()->setRawPtr(bigArray1, dimNb1 * dimNb2 * dim0 * dim1);
             // Convert bigArray2 to Tensor
-            std::shared_ptr<Tensor> T2 = std::make_shared<Tensor>(DataType::Float32);
-            T2 -> resize({dimNb1,dimNb2,dim1,dim2});
-            T2 -> setBackend("cpu");
-            T2 -> getImpl() -> setRawPtr(bigArray2, dimNb1*dimNb2*dim1*dim2);
+            std::shared_ptr<Tensor> T2 =
+                std::make_shared<Tensor>(DataType::Float32);
+            T2->resize({dimNb1, dimNb2, dim1, dim2});
+            T2->setBackend("cpu");
+            T2->getImpl()->setRawPtr(bigArray2, dimNb1 * dimNb2 * dim1 * dim2);
             // convert res to Tensor
-            std::shared_ptr<Tensor> Tres = std::make_shared<Tensor>(DataType::Float32);
-            Tres -> resize({dimNb1,dimNb2,dim0,dim2});
-            Tres -> setBackend("cpu");
-            Tres -> getImpl() -> setRawPtr(res, dimNb1*dimNb2*dim0*dim2);
+            std::shared_ptr<Tensor> Tres =
+                std::make_shared<Tensor>(DataType::Float32);
+            Tres->resize({dimNb1, dimNb2, dim0, dim2});
+            Tres->setBackend("cpu");
+            Tres->getImpl()->setRawPtr(res, dimNb1 * dimNb2 * dim0 * dim2);
 
             op->associateInput(0, T1);
             op->associateInput(1, T2);
@@ -237,14 +260,16 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             start = std::chrono::system_clock::now();
             myMatMul->forward();
             end = std::chrono::system_clock::now();
-            duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            duration += std::chrono::duration_cast<std::chrono::microseconds>(
+                end - start);
             REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
 
             delete[] bigArray1;
             delete[] bigArray2;
             delete[] res;
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
+        std::cout << "multiplications over time spent: "
+                  << totalComputation / duration.count() << std::endl;
         std::cout << "total time: " << duration.count() << std::endl;
     }
 
@@ -252,18 +277,18 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
         // allows to test both computation with a 1-D Tensor and broadcasting
         // input_0
         std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
-        op->associateInput(0,T0);
+        op->associateInput(0, T0);
         const std::size_t dim0 = distNbMatrix(gen);
         const std::size_t dim1 = distNbMatrix(gen) + 1;
         const std::size_t dim2 = distNbMatrix(gen);
         const std::size_t dim3 = distNbMatrix(gen);
-        T0->resize({dim0,dim1,dim2,dim3});
+        T0->resize({dim0, dim1, dim2, dim3});
         T0->setDataType(DataType::Float32);
         T0->setBackend("cpu");
 
         // input_1
         std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
-        op -> associateInput(1,T1);
+        op->associateInput(1, T1);
         T1->resize({dim3});
         T1->setDataType(DataType::Float32);
         T1->setBackend("cpu");
@@ -272,7 +297,6 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
         op->setBackend("cpu");
         op->forwardDims();
         myMatMul->forward();
-
     }
 }
 } // namespace Aidge
\ No newline at end of file
diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp
index af04ede4e33c32ce785804e2484b6ba9ac5edc36..c026d2dc9dfca2f0faca77dd28601e4959ccca2c 100644
--- a/unit_tests/operator/Test_MaxPoolingImpl.cpp
+++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp
@@ -10,8 +10,8 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <memory>
 #include <cstdlib>
+#include <memory>
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/MaxPooling.hpp"
@@ -20,59 +20,44 @@
 
 using namespace Aidge;
 
-
 TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") {
-    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW
-        {
-            {
-                {{-0.3848,  0.2166, -0.4373,  0.6142,  0.5277},
-                 {0.7995,  0.3638, -1.4589, -1.0843,  1.0918},
-            	 {0.7147,  0.0936, -1.2902,  1.2037,  0.4874},
-                 {-0.5981,  2.1184, -0.9175,  1.3859,  0.3305},
-                 {-1.7700,  0.0563, -0.3914,  0.0538, -0.3955}},
+    std::shared_ptr<Tensor> myInput =
+        std::make_shared<Tensor>(Array4D<float, 2, 2, 5, 5>{
+            // NCHW
+            {{{{-0.3848, 0.2166, -0.4373, 0.6142, 0.5277},
+               {0.7995, 0.3638, -1.4589, -1.0843, 1.0918},
+               {0.7147, 0.0936, -1.2902, 1.2037, 0.4874},
+               {-0.5981, 2.1184, -0.9175, 1.3859, 0.3305},
+               {-1.7700, 0.0563, -0.3914, 0.0538, -0.3955}},
 
-                {{-3.1409, -0.4554,  0.0524,  2.2291,  0.4859},
-                 {-0.7465, -0.6567, -2.3703, -0.6386, -1.4152},
-                 { 2.2329, -0.5850,  0.0700,  1.2838, -1.7363},
-                 { 0.2139,  0.0624, -1.0689, -0.8221, -0.8038},
-                 { 0.1886, -0.7840, -0.2313,  0.2651, -1.6244}}
-            },
-            {
-                {{ 0.4371,  1.6417,  0.9129,  0.6325,  0.5438},
-                 {-2.3552, -0.8850, -0.0232, -0.5462, -1.2011},
-                 {1.7653, -1.6668, -1.0814,  0.6182,  1.2071},
-                 {0.9541, -0.5133,  0.8664, -0.8892,  1.4585},
-                 {1.0220, -0.5107,  0.1829, -0.2301, -0.4268}},
+              {{-3.1409, -0.4554, 0.0524, 2.2291, 0.4859},
+               {-0.7465, -0.6567, -2.3703, -0.6386, -1.4152},
+               {2.2329, -0.5850, 0.0700, 1.2838, -1.7363},
+               {0.2139, 0.0624, -1.0689, -0.8221, -0.8038},
+               {0.1886, -0.7840, -0.2313, 0.2651, -1.6244}}},
+             {{{0.4371, 1.6417, 0.9129, 0.6325, 0.5438},
+               {-2.3552, -0.8850, -0.0232, -0.5462, -1.2011},
+               {1.7653, -1.6668, -1.0814, 0.6182, 1.2071},
+               {0.9541, -0.5133, 0.8664, -0.8892, 1.4585},
+               {1.0220, -0.5107, 0.1829, -0.2301, -0.4268}},
 
-                {{ 1.0429,  0.6279, -0.2875,  0.7187, -0.1500},
-                 {1.6041,  2.9635,  1.4172, -0.7517,  0.5441},
-                 {-0.2276,  0.0857,  0.6776, -0.1389, -0.0614},
-                 {-0.1547, -0.3435,  0.0650, -0.5095, -1.8073},
-                 {1.7217,  0.3999, -0.5953,  1.0604, -0.4126}}
-            }
-        }
-    });
+              {{1.0429, 0.6279, -0.2875, 0.7187, -0.1500},
+               {1.6041, 2.9635, 1.4172, -0.7517, 0.5441},
+               {-0.2276, 0.0857, 0.6776, -0.1389, -0.0614},
+               {-0.1547, -0.3435, 0.0650, -0.5095, -1.8073},
+               {1.7217, 0.3999, -0.5953, 1.0604, -0.4126}}}}});
     SECTION("Stride") {
-        std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2});
-        auto op = std::static_pointer_cast<OperatorTensor>(myMaxPool -> getOperator());
+        std::shared_ptr<Node> myMaxPool = MaxPooling({2, 2}, "mycdw", {2, 2});
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myMaxPool->getOperator());
 
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> {
-            {
-                {
-                    {{  0.7995,  0.6142},
-                     { 2.1184,  1.3859}},
-                    {{ -0.4554,  2.2291},
-                     {  2.2329,  1.2838}}
-                },
-                {
-                    {{1.6417,  0.9129},
-                     {1.7653,  0.8664}},
-                    {{2.9635,  1.4172},
-                     {0.0857,  0.6776}}
-                }
-            }
-        });
-        myMaxPool->getOperator()->associateInput(0,myInput);
+        std::shared_ptr<Tensor> myOutput =
+            std::make_shared<Tensor>(Array4D<float, 2, 2, 2, 2>{
+                {{{{0.7995, 0.6142}, {2.1184, 1.3859}},
+                  {{-0.4554, 2.2291}, {2.2329, 1.2838}}},
+                 {{{1.6417, 0.9129}, {1.7653, 0.8664}},
+                  {{2.9635, 1.4172}, {0.0857, 0.6776}}}}});
+        myMaxPool->getOperator()->associateInput(0, myInput);
         myMaxPool->getOperator()->setDataType(DataType::Float32);
         myMaxPool->getOperator()->setBackend("cpu");
         myMaxPool->forward();
diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp
index 271a1e2f9860d92f840916f6b2e396993b0bea39..1ea4fe8c3195c89c51def462809107caac7da373 100644
--- a/unit_tests/operator/Test_MetaOperator.cpp
+++ b/unit_tests/operator/Test_MetaOperator.cpp
@@ -14,7 +14,6 @@
 #include <cstdlib>
 #include <memory>
 
-#include "aidge/utils/TensorUtils.hpp"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
 #include "aidge/data/Tensor.hpp"
@@ -23,56 +22,60 @@
 #include "aidge/operator/MetaOperatorDefs.hpp"
 #include "aidge/operator/Pad.hpp"
 #include "aidge/operator/Pop.hpp"
-#include "aidge/scheduler/SequentialScheduler.hpp"
 #include "aidge/scheduler/ParallelScheduler.hpp"
+#include "aidge/scheduler/SequentialScheduler.hpp"
+#include "aidge/utils/TensorUtils.hpp"
 
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
-  SECTION("PaddedConv(forward)") {
-    std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(
-            Array4D<double, 4, 3, 3, 3>{{{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02},
-                                          {1.16492919e-01, 8.21634093e-02, 1.17413265e-01},
-                                          {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}},
-                                         {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01},
-                                          {6.12586558e-01, 8.09918671e-02, 8.40649383e-01},
-                                          {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}},
-                                         {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01},
-                                          {1.36960611e-01, 4.64806728e-01, 4.85150000e-01},
-                                          {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}},
-
-                                        {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01},
-                                          {1.56806559e-01, 6.22280998e-01, 3.15827594e-01},
-                                          {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}},
-                                         {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01},
-                                          {1.67925807e-01, 2.68356150e-01, 6.28875602e-01},
-                                          {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}},
-                                         {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01},
-                                          {7.63047502e-01, 5.12539506e-02, 9.77400493e-01},
-                                          {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}},
-
-                                        {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01},
-                                          {7.10897067e-02, 5.02579011e-01, 3.35236224e-01},
-                                          {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}},
-                                         {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01},
-                                          {1.59875539e-01, 9.13163381e-01, 3.59806060e-01},
-                                          {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}},
-                                         {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01},
-                                          {5.46298282e-01, 2.89698587e-01, 2.62612651e-01},
-                                          {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}},
-
-                                        {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01},
-                                          {8.67878485e-01, 2.93263422e-01, 8.03912714e-01},
-                                          {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}},
-                                         {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01},
-                                          {5.80538620e-01, 6.63031275e-01, 2.07247191e-01},
-                                          {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}},
-                                         {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01},
-                                          {7.34639028e-01, 2.84957200e-02, 9.70225217e-01},
-                                          {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}});
-    std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(
-            Array1D<double, 4>{{0.16884905, 0.27994487, 0.57227465, 0.06435205}});
-    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<double, 2, 3, 5, 5>{
+    SECTION("PaddedConv(forward)") {
+        std::shared_ptr<Tensor> myWeights =
+            std::make_shared<Tensor>(Array4D<double, 4, 3, 3, 3>{
+                {{{{6.20986394e-01, 1.19775136e-03, 7.22876095e-02},
+                   {1.16492919e-01, 8.21634093e-02, 1.17413265e-01},
+                   {2.23743494e-01, 3.99495413e-01, 5.55552411e-01}},
+                  {{6.64970077e-01, 9.62199940e-01, 4.87531967e-01},
+                   {6.12586558e-01, 8.09918671e-02, 8.40649383e-01},
+                   {4.15264406e-01, 8.28247138e-01, 1.52301135e-01}},
+                  {{1.76992844e-02, 7.78697112e-01, 8.14531592e-01},
+                   {1.36960611e-01, 4.64806728e-01, 4.85150000e-01},
+                   {4.34776520e-01, 9.51740977e-01, 9.05793799e-01}}},
+
+                 {{{1.71925246e-02, 1.91082720e-01, 3.67982644e-01},
+                   {1.56806559e-01, 6.22280998e-01, 3.15827594e-01},
+                   {6.04359038e-01, 2.83095947e-01, 6.11168892e-01}},
+                  {{2.76942832e-01, 1.89768419e-01, 8.07988176e-01},
+                   {1.67925807e-01, 2.68356150e-01, 6.28875602e-01},
+                   {1.69093357e-04, 9.64788636e-01, 7.29254981e-01}},
+                  {{6.34030122e-01, 1.32087038e-01, 3.33857107e-01},
+                   {7.63047502e-01, 5.12539506e-02, 9.77400493e-01},
+                   {8.06151288e-01, 2.60237147e-01, 3.93729313e-01}}},
+
+                 {{{5.84605240e-01, 4.74648725e-01, 8.54111741e-01},
+                   {7.10897067e-02, 5.02579011e-01, 3.35236224e-01},
+                   {9.08637408e-01, 8.02903830e-01, 2.83929907e-01}},
+                  {{3.68206999e-01, 9.18579021e-02, 7.33168098e-01},
+                   {1.59875539e-01, 9.13163381e-01, 3.59806060e-01},
+                   {1.41295882e-01, 7.00312185e-01, 5.63728289e-01}},
+                  {{9.39513546e-01, 1.91704891e-01, 1.11454944e-01},
+                   {5.46298282e-01, 2.89698587e-01, 2.62612651e-01},
+                   {1.18554992e-01, 4.32147376e-02, 7.53016994e-01}}},
+
+                 {{{9.53179175e-01, 2.05041054e-02, 1.11318451e-01},
+                   {8.67878485e-01, 2.93263422e-01, 8.03912714e-01},
+                   {8.93620255e-01, 1.37831128e-01, 3.83640583e-01}},
+                  {{3.96020188e-01, 6.24959320e-01, 1.90709175e-01},
+                   {5.80538620e-01, 6.63031275e-01, 2.07247191e-01},
+                   {5.65672171e-01, 5.57014317e-01, 9.26909496e-01}},
+                  {{3.43901418e-01, 4.47741636e-01, 6.59249367e-01},
+                   {7.34639028e-01, 2.84957200e-02, 9.70225217e-01},
+                   {1.33578790e-02, 6.12054702e-01, 9.36685235e-02}}}}});
+        std::shared_ptr<Tensor> myBias =
+            std::make_shared<Tensor>(Array1D<double, 4>{
+                {0.16884905, 0.27994487, 0.57227465, 0.06435205}});
+        std::shared_ptr<Tensor> myInput = std::make_shared<
+            Tensor>(Array4D<double, 2, 3, 5, 5>{
             // NCHW
             {{{{0.43224481, 0.9047832, 0.18402257, 0.06162838, 0.52490127},
                {0.27773404, 0.55402353, 0.9485062, 0.31197083, 0.80328607},
@@ -108,93 +111,106 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
                {0.95873236, 0.6742374, 0.55679676, 0.6323497, 0.34072958},
                {0.49694061, 0.79173045, 0.19738225, 0.14755281, 0.80818177},
                {0.02332061, 0.74270703, 0.59415632, 0.08195934, 0.46295434},
-               {0.71426058, 0.85032931, 0.90750818, 0.28768431, 0.4401146}}}}});
-
-    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
-            Array4D<double, 2, 4, 5, 5>{{{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273},
-                {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567},
-                {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523},
-                {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136},
-                {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}},
-
-                {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890},
-                {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475},
-                {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442},
-                {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438},
-                {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}},
-
-                {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092},
-                {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575},
-                {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146},
-                {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581},
-                {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}},
-
-                {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740},
-                {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107},
-                {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523},
-                {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123},
-                {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}},
-
-
-                {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229},
-                {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444},
-                {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241},
-                {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706},
-                {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}},
-
-                {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648},
-                {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705},
-                {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404},
-                {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069},
-                {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}},
-
-                {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888},
-                {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179},
-                {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316},
-                {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387},
-                {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}},
-
-                {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038},
-                {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408},
-                {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357},
-                {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303},
-                {3.16612267, 4.38248920, 5.23248482, 4.21292210, 2.86031270}}}}});
-
-    std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv");
-    auto convOp = std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
-
-    std::shared_ptr<Node> myPad =
+               {0.71426058,
+                0.85032931,
+                0.90750818,
+                0.28768431,
+                0.4401146}}}}});
+
+        std::shared_ptr<Tensor> myOutput = std::make_shared<
+            Tensor>(Array4D<double, 2, 4, 5, 5>{
+            {{{{3.40294218, 3.74021220, 4.02050114, 4.07054710, 2.46286273},
+               {4.61770582, 6.70517588, 6.50356627, 6.29688787, 3.53332567},
+               {5.47480106, 5.92094421, 6.64605665, 7.95090199, 4.28721523},
+               {4.01485729, 6.06748962, 7.52447891, 7.37980652, 5.28401136},
+               {2.83065438, 3.62033439, 3.56222963, 5.56103945, 3.23335814}},
+
+              {{3.30230498, 4.92814112, 4.34710836, 3.96262765, 2.97987890},
+               {4.49693012, 6.68929291, 5.53603029, 5.68874264, 4.28756475},
+               {4.20528078, 6.82776880, 6.70569849, 7.12809610, 4.40845442},
+               {4.31169367, 6.73352146, 6.30962515, 7.45826864, 4.99164438},
+               {2.18136287, 4.28968000, 4.20080042, 4.89814138, 2.87394023}},
+
+              {{3.54787683, 4.35851812, 4.63881302, 4.23359537, 3.16992092},
+               {5.25099468, 7.54282856, 6.69849157, 5.64309788, 4.56919575},
+               {4.71914101, 7.52830601, 6.71450949, 7.81113863, 5.84658146},
+               {4.97893143, 7.39293909, 6.89905310, 8.14430809, 5.62998581},
+               {2.79735112, 4.80967140, 5.57630205, 5.38828325, 4.57078695}},
+
+              {{3.03048635, 5.04540300, 4.21824932, 4.87323284, 2.35113740},
+               {4.45167351, 6.47721338, 7.40922976, 6.70445728, 3.60700107},
+               {3.77927423, 6.82826376, 7.41777134, 7.57402420, 5.13131523},
+               {4.08747244, 7.07994175, 7.57206821, 8.51897335, 5.26987123},
+               {2.34426999, 4.60127831, 4.86486769, 6.01579571, 3.97803569}}},
+
+             {{{3.84700942, 4.25972605, 3.05269003, 3.78043652, 2.08771229},
+               {6.00459957, 6.05633259, 4.45951605, 4.54089880, 4.03066444},
+               {5.41579390, 7.29543972, 6.18680000, 5.58812714, 3.45964241},
+               {6.04531050, 7.70924091, 5.52207708, 5.02131319, 4.09403706},
+               {3.18092418, 4.45422697, 4.04294252, 3.86577177, 2.18776536}},
+
+              {{4.02600670, 4.27603531, 3.81011319, 4.03631020, 2.57254648},
+               {5.33471155, 5.72588634, 5.12079763, 5.11733150, 3.76836705},
+               {5.62947607, 5.92492962, 6.24170446, 6.44130468, 3.44276404},
+               {5.38414621, 6.02679539, 5.88985586, 5.90263271, 3.15044069},
+               {3.31261086, 4.44371319, 3.47660780, 4.15411520, 1.48961508}},
+
+              {{3.95879412, 4.17324543, 3.70114422, 3.27447152, 3.09713888},
+               {5.78258181, 6.57920837, 4.99913597, 6.20961237, 4.98552179},
+               {5.84685421, 7.19971228, 6.66386652, 6.68013430, 4.90963316},
+               {5.24417877, 7.06430531, 6.58512402, 6.02492285, 4.48986387},
+               {3.64294529, 5.00678444, 5.04760027, 4.72895622, 2.67990756}},
+
+              {{3.48610687, 4.12853813, 4.07563591, 3.51327014, 2.44217038},
+               {4.80529881, 7.33211374, 5.14774036, 4.77281189, 4.44612408},
+               {5.11703110, 7.55168772, 7.14374542, 6.43696356, 4.10621357},
+               {5.41270018, 6.85949135, 6.73503923, 5.74601364, 4.46150303},
+               {3.16612267,
+                4.38248920,
+                5.23248482,
+                4.21292210,
+                2.86031270}}}}});
+
+        std::shared_ptr<Node> myConv = Conv<2>(3, 4, {3, 3}, "myconv");
+        auto convOp =
+            std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
+
+        std::shared_ptr<Node> myPad =
             Pad<2>({1, 1, 1, 1}, "myPad", PadBorderType::Constant, 0.0);
-    auto padOp = std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
-
-    convOp->setInput(1, myWeights);
-    convOp->setInput(2, myBias);
-
-    myPad->addChild(myConv, 0, 0);
-    padOp->setInput(0, myInput);
-
-    padOp->setDataType(DataType::Float64);
-    padOp->setBackend("cpu");
-    convOp->setDataType(DataType::Float64);
-    convOp->setBackend("cpu");
-
-    myPad->forward();
-    myConv->forward();
-    convOp -> getOutput(0) -> print();
-
-    double* computedOutput = static_cast<double*>(convOp->getOutput(0)->getImpl()->rawPtr());
-    double* expectedOutput = static_cast<double*>(myOutput->getImpl()->rawPtr());
-    for (std::size_t i = 0; i < myOutput->size(); ++i) {
-        REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5);
-    }
+        auto padOp =
+            std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
+
+        convOp->setInput(1, myWeights);
+        convOp->setInput(2, myBias);
+
+        myPad->addChild(myConv, 0, 0);
+        padOp->setInput(0, myInput);
+
+        padOp->setDataType(DataType::Float64);
+        padOp->setBackend("cpu");
+        convOp->setDataType(DataType::Float64);
+        convOp->setBackend("cpu");
+
+        myPad->forward();
+        myConv->forward();
+        convOp->getOutput(0)->print();
+
+        double *computedOutput =
+            static_cast<double *>(convOp->getOutput(0)->getImpl()->rawPtr());
+        double *expectedOutput =
+            static_cast<double *>(myOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < myOutput->size(); ++i) {
+            REQUIRE(std::abs(computedOutput[i] - expectedOutput[i]) < 1e-5);
+        }
 
-    std::shared_ptr<Node> myPaddedConv =
+        std::shared_ptr<Node> myPaddedConv =
             PaddedConv(3, 4, {3, 3}, "myPaddedConv", {1, 1}, {1, 1, 1, 1});
-  }
+    }
     SECTION("LSTM(forward)") {
         auto pop = Pop();
         auto myLSTM = LSTM(32, 64, 0, true, "ltsm");
-        auto op = std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
+        auto op =
+            std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
 
         auto microGraph = op->getMicroGraph();
         microGraph->save("lstm", false, true);
@@ -209,14 +225,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         }
         REQUIRE(myLSTM->nbOutputs() == 2);
 
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
-            Array2D<float, 16, 32>{});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 32, 64>{});
-        std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
-            Array2D<float, 64, 32>{});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 64, 64>{});
+        std::shared_ptr<Tensor> myInput =
+            std::make_shared<Tensor>(Array2D<float, 16, 32>{});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 32, 64>{});
+        std::shared_ptr<Tensor> myInitW =
+            std::make_shared<Tensor>(Array2D<float, 64, 32>{});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 64, 64>{});
 
         pop->addChild(myLSTM, 0, 0);
         pop->getOperator()->associateInput(0, myInput);
@@ -246,7 +262,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         microGraph->save("lstm_dims", true, true);
         REQUIRE(op->dimsForwarded());
 
-        auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler();
+        auto microGraphScheduler =
+            std::dynamic_pointer_cast<MetaOperator_Op>(op)
+                ->getMicroGraphScheduler();
         microGraphScheduler->saveSchedulingDiagram("lstm_scheduling");
 
         REQUIRE(op->getNbConsumedData(0).data == 512);
@@ -259,9 +277,11 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
     }
     SECTION("LSTM(forward_values)") {
         auto myLSTM = LSTM(2, 3, 0, true, "ltsm");
-        auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
 
-        auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph();
+        auto microGraph =
+            std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph();
         microGraph->save("lstm", false, false);
 
         REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
@@ -276,12 +296,14 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
         std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
 
         op->associateInput(0, myInput);
         op->associateInput(17, myInit);
@@ -308,12 +330,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         microGraph->save("lstm_values_dims", false, true);
 
         std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
-                Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412},
-                                     {0.25606447, 0.25606447, 0.25606447},
-                                     {0.40323776, 0.40323776, 0.40323776}}});
-
+            Array2D<float, 3, 3>{{{0.0952412, 0.0952412, 0.0952412},
+                                  {0.25606447, 0.25606447, 0.25606447},
+                                  {0.40323776, 0.40323776, 0.40323776}}});
 
-        auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler();
+        auto microGraphScheduler =
+            std::dynamic_pointer_cast<MetaOperator_Op>(op)
+                ->getMicroGraphScheduler();
         microGraphScheduler->saveSchedulingDiagram("lstm_values_scheduling");
 
         op->getOutput(0)->print();
@@ -325,7 +348,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         auto pop = Pop();
         auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
         auto myGraph = Sequential({pop, myLSTM});
-        auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
 
         REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
         REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
@@ -338,13 +362,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
-            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
+            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
+                                     {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
         std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
 
         pop->getOperator()->associateInput(0, myInput);
         op->associateInput(17, myInit);
@@ -371,9 +398,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         scheduler.saveSchedulingDiagram("lstm_seq_schedule");
 
         std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
-                Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
-                                     {0.49801484, 0.49801484, 0.49801484},
-                                     {0.67162132, 0.67162132, 0.67162132}}});
+            Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
+                                  {0.49801484, 0.49801484, 0.49801484},
+                                  {0.67162132, 0.67162132, 0.67162132}}});
 
         myGraph->save("lstm_seq_mygraph", true, true);
 
@@ -385,7 +412,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
     SECTION("LSTM(forward_values_seq_flatten)(sequential)") {
         auto pop = Pop();
         auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
-        auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
+        auto op =
+            std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
 
         // Here we test LSTM as it is was flatten in the graph.
         // We just borrow its micro-graph into our larger myGraph graph.
@@ -405,13 +433,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
-            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
+            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
+                                     {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
         std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
 
         pop->getOperator()->associateInput(0, myInput);
         op->associateInput(17, myInit);
@@ -419,16 +450,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
         // Weights X
         auto prodX = Producer(myInitW);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first,
+                        0,
+                        1);
         // Weights H
         auto prodH = Producer(myInitR);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first,
+                        0,
+                        1);
         myGraph->add({prodX, prodH});
 
         myGraph->setDataType(DataType::Float32);
@@ -436,9 +483,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         myGraph->save("lstm_seq_flatten", true, true);
 
         std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
-                Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
-                                     {0.49801484, 0.49801484, 0.49801484},
-                                     {0.67162132, 0.67162132, 0.67162132}}});
+            Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
+                                  {0.49801484, 0.49801484, 0.49801484},
+                                  {0.67162132, 0.67162132, 0.67162132}}});
 
         auto scheduler = SequentialScheduler(myGraph);
         scheduler.generateScheduling();
@@ -454,7 +501,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
     SECTION("LSTM(forward_values_seq_flatten)(parallel)") {
         auto pop = Pop();
         auto myLSTM = LSTM(2, 3, 2, true, "ltsm");
-        auto op = std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
+        auto op =
+            std::static_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
 
         // Here we test LSTM as it is was flatten in the graph.
         // We just borrow its micro-graph into our larger myGraph graph.
@@ -474,13 +522,16 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
-            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
-        std::shared_ptr<Tensor> myInit = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
+            Array3D<float, 2, 3, 2>{{{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}},
+                                     {{2.0, 3.0}, {4.0, 5.0}, {6.0, 7.0}}}});
+        std::shared_ptr<Tensor> myInit =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}, {0.0, 0.0, 0.0}}});
         std::shared_ptr<Tensor> myInitW = std::make_shared<Tensor>(
             Array2D<float, 3, 2>{{{0.1, 0.1}, {0.1, 0.1}, {0.1, 0.1}}});
-        std::shared_ptr<Tensor> myInitR = std::make_shared<Tensor>(
-            Array2D<float, 3, 3>{{{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
+        std::shared_ptr<Tensor> myInitR =
+            std::make_shared<Tensor>(Array2D<float, 3, 3>{
+                {{0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}, {0.1, 0.1, 0.1}}});
 
         pop->getOperator()->associateInput(0, myInput);
         op->associateInput(17, myInit);
@@ -488,16 +539,32 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
         // Weights X
         auto prodX = Producer(myInitW);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first, 0, 1);
-        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first, 0, 1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[1].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[2].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[3].first,
+                        0,
+                        1);
+        prodX->addChild(op->getMicroGraph()->getOrderedInputs()[4].first,
+                        0,
+                        1);
         // Weights H
         auto prodH = Producer(myInitR);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first, 0, 1);
-        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first, 0, 1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[5].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[6].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[7].first,
+                        0,
+                        1);
+        prodH->addChild(op->getMicroGraph()->getOrderedInputs()[8].first,
+                        0,
+                        1);
         myGraph->add({prodX, prodH});
 
         myGraph->setDataType(DataType::Float32);
@@ -505,9 +572,9 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         myGraph->save("lstm_seq_flatten", true, true);
 
         std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
-                Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
-                                     {0.49801484, 0.49801484, 0.49801484},
-                                     {0.67162132, 0.67162132, 0.67162132}}});
+            Array2D<float, 3, 3>{{{0.24439372, 0.24439372, 0.24439372},
+                                  {0.49801484, 0.49801484, 0.49801484},
+                                  {0.67162132, 0.67162132, 0.67162132}}});
 
         auto scheduler = ParallelScheduler(myGraph);
         scheduler.generateScheduling();
diff --git a/unit_tests/operator/Test_MulImpl.cpp b/unit_tests/operator/Test_MulImpl.cpp
index 3378861d0d3d7e74e7867c2765a0b09069fa8caf..f228a42827fc218fae5e3fd1abcafe2c908d0215 100644
--- a/unit_tests/operator/Test_MulImpl.cpp
+++ b/unit_tests/operator/Test_MulImpl.cpp
@@ -10,13 +10,13 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
 #include <chrono>
+#include <cstddef> // std::size_t
+#include <cstdint> // std::uint16_t
 #include <iostream>
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric> // std::accumulate
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Mul.hpp"
@@ -24,360 +24,210 @@
 
 namespace Aidge {
 
-    TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]")
-    {
-        std::shared_ptr<Node> myMul = Mul();
-        auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator());
-        op->setDataType(DataType::Float32);
-        op->setBackend("cpu");
-
-        SECTION("Case 1: 2D and 1D tensors") {
-            const auto T0 = std::make_shared<Tensor>(Array2D<float,2,3>(
-                {
-                    {
-                        {1,2,3},{4,5,6}
-                    }
-                }
-            ));
+TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]") {
+    std::shared_ptr<Node> myMul = Mul();
+    auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator());
+    op->setDataType(DataType::Float32);
+    op->setBackend("cpu");
 
-            const auto T1 = std::make_shared<Tensor>(Array1D<float,3>(
-                {0.1,0.2,0.3}
-            ));
+    SECTION("Case 1: 2D and 1D tensors") {
+        const auto T0 = std::make_shared<Tensor>(
+            Array2D<float, 2, 3>({{{1, 2, 3}, {4, 5, 6}}}));
 
-            T0->setDataType(DataType::Float32);
-            T0->setBackend("cpu");
-            T1->setDataType(DataType::Float32);
-            T1->setBackend("cpu");
+        const auto T1 =
+            std::make_shared<Tensor>(Array1D<float, 3>({0.1, 0.2, 0.3}));
 
-            op->getOutput(0)->setGrad(std::make_shared<Tensor>(Array2D<float,2,3>({{{1.0,1.0,1.0},{1.0,1.0,1.0}}})));
+        T0->setDataType(DataType::Float32);
+        T0->setBackend("cpu");
+        T1->setDataType(DataType::Float32);
+        T1->setBackend("cpu");
 
-            op->associateInput(0,T0);
-            op->associateInput(1,T1);
-            op->forwardDims();
+        op->getOutput(0)->setGrad(std::make_shared<Tensor>(
+            Array2D<float, 2, 3>({{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}})));
 
-            myMul->forward();
-            myMul->backward();
+        op->associateInput(0, T0);
+        op->associateInput(1, T1);
+        op->forwardDims();
 
-            auto T0Grad = std::make_shared<Tensor>(Array2D<float, 2,3>({{{0.1,0.2,0.3},{0.1, 0.2, 0.3}}}));
-            auto T1Grad = std::make_shared<Tensor>(Array1D<float, 3>({5,7,9}));
+        myMul->forward();
+        myMul->backward();
 
-            REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *T0Grad));
-            REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *T1Grad));
-        }
+        auto T0Grad = std::make_shared<Tensor>(
+            Array2D<float, 2, 3>({{{0.1, 0.2, 0.3}, {0.1, 0.2, 0.3}}}));
+        auto T1Grad = std::make_shared<Tensor>(Array1D<float, 3>({5, 7, 9}));
 
-        SECTION("Case 2: 3D and 1D tensors") {
-            const auto T0 = std::make_shared<Tensor>(Array3D<float,2,2,3>(
-                {
-                    {
-                        {
-                            {1.0, 2.0, 3.0},
-                            {4.0, 5.0, 6.0}
-                        },
-                        {
-                            {7.0, 8.0, 9.0},
-                            {10.0, 11.0, 12.0}
-                        }
-                    }
-                }
-            ));
-
-            const auto T1 = std::make_shared<Tensor>(Array1D<float, 3>({0.3,0.2,0.1}));
-
-            const auto newGrad = std::make_shared<Tensor>(Array3D<float,2,2,3>(
-                    {
-                        {
-                            {
-                                {1, 1, 1},
-                                {1, 1, 1}
-                            },
-                            {
-                                {1, 1, 1},
-                                {1, 1, 1}
-                            }
-                        }
-                    }
-                ));
-
-            const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,2,3>(
-                {
-                    {
-                        {
-                            {0.3, 0.2, 0.1},
-                            {0.3, 0.2, 0.1}
-                        },
-                        {
-                            {0.3, 0.2, 0.1},
-                            {0.3, 0.2, 0.1}
-                        }
-                    }
-                }
-            ));
+        REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *T0Grad));
+        REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *T1Grad));
+    }
 
-            const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float,3>(
-                {22.0, 26.0, 30.0}
-            ));
+    SECTION("Case 2: 3D and 1D tensors") {
+        const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
+            {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}},
+              {{7.0, 8.0, 9.0}, {10.0, 11.0, 12.0}}}}));
 
-            for(auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
-            {
-                    T->setBackend("cpu") ;
-                    T->setDataType(DataType::Float32);
-            }
+        const auto T1 =
+            std::make_shared<Tensor>(Array1D<float, 3>({0.3, 0.2, 0.1}));
 
-            op->associateInput(0, T0);
-            op->associateInput(1, T1);
-            op->getOutput(0)->setGrad(newGrad);
-            op->forwardDims();
+        const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
+            {{{{1, 1, 1}, {1, 1, 1}}, {{1, 1, 1}, {1, 1, 1}}}}));
 
-            myMul->backward();
+        const auto expectedGrad0 = std::make_shared<Tensor>(
+            Array3D<float, 2, 2, 3>({{{{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}},
+                                      {{0.3, 0.2, 0.1}, {0.3, 0.2, 0.1}}}}));
 
-            REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
-            REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
-        }
+        const auto expectedGrad1 =
+            std::make_shared<Tensor>(Array1D<float, 3>({22.0, 26.0, 30.0}));
 
-        SECTION("Case 3: 4D and 2D tensors") {
-            const auto T0 = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>(
-                {
-                    {
-                        {
-                            {
-                                {1.0, 2.0, 3.0},
-                                {4.0, 5.0, 6.0},
-                                {7.0, 8.0, 9.0}
-                            },
-                            {
-                                {10.0, 11.0, 12.0},
-                                {13.0, 14.0, 15.0},
-                                {16.0, 17.0, 18.0}
-                            }
-                        },
-                        {
-                            {
-                                {19.0, 20.0, 21.0},
-                                {22.0, 23.0, 24.0},
-                                {25.0, 26.0, 27.0}
-                            },
-                            {
-                                {28.0, 29.0, 30.0},
-                                {31.0, 32.0, 33.0},
-                                {34.0, 35.0, 36.0}
-                            }
-                        }
-                    }
-                }
-            ));
-
-            const auto T1 = std::make_shared<Tensor>(Array2D<float, 3,3>(
-                {
-                    {
-                        {0.5,0.3,0.1},
-                        {0.4,0.2,0.6},
-                        {0.7,0.8,0.9}
-                    }
-                }
-            ));
-
-            const auto newGrad = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>(
-                {
-                    {
-                        {
-                            {
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0}
-                            },
-                            {
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0}
-                            }
-                        },
-                        {
-                            {
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0}
-                            },
-                            {
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0}
-                            }
-                        }
-                    }
-                }
-            ));
-
-            const auto expectedGrad0 = std::make_shared<Tensor>(Array4D<float,2,2,3,3>(
-                {
-                    {
-                        {
-                            {
-                                {0.5, 0.3, 0.1},
-                                {0.4, 0.2, 0.6},
-                                {0.7, 0.8, 0.9}
-                            },
-                            {
-                                {0.5, 0.3, 0.1},
-                                {0.4, 0.2, 0.6},
-                                {0.7, 0.8, 0.9}
-                            }
-                        },
-                        {
-                            {
-                                {0.5, 0.3, 0.1},
-                                {0.4, 0.2, 0.6},
-                                {0.7, 0.8, 0.9}
-                            },
-                            {
-                                {0.5, 0.3, 0.1},
-                                {0.4, 0.2, 0.6},
-                                {0.7, 0.8, 0.9}
-                            }
-                        }
-                    }
-                }
-            ));
-
-            const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 3>(
-                {
-                    {
-                        {58.0, 62.0, 66.0},
-                        {70.0, 74.0, 78.0},
-                        {82.0, 86.0, 90.0}
-                    }
-                }
-            ));
+        for (auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) {
+            T->setBackend("cpu");
+            T->setDataType(DataType::Float32);
+        }
 
-            for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
-            {
-                    T->setBackend("cpu") ;
-                    T->setDataType(DataType::Float32);
-            }
+        op->associateInput(0, T0);
+        op->associateInput(1, T1);
+        op->getOutput(0)->setGrad(newGrad);
+        op->forwardDims();
 
-            op->associateInput(0, T0);
-            op->associateInput(1, T1);
-            op->getOutput(0)->setGrad(newGrad);
-            op->forwardDims();
+        myMul->backward();
 
-            myMul->backward();
+        REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
+        REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
+    }
 
-            REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
-            REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
+    SECTION("Case 3: 4D and 2D tensors") {
+        const auto T0 = std::make_shared<Tensor>(Array4D<float, 2, 2, 3, 3>(
+            {{{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}, {7.0, 8.0, 9.0}},
+               {{10.0, 11.0, 12.0}, {13.0, 14.0, 15.0}, {16.0, 17.0, 18.0}}},
+              {{{19.0, 20.0, 21.0}, {22.0, 23.0, 24.0}, {25.0, 26.0, 27.0}},
+               {{28.0, 29.0, 30.0},
+                {31.0, 32.0, 33.0},
+                {34.0, 35.0, 36.0}}}}}));
+
+        const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 3>(
+            {{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}));
+
+        const auto newGrad =
+            std::make_shared<Tensor>(Array4D<float, 2, 2, 3, 3>(
+                {{{{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
+                   {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}},
+                  {{{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}},
+                   {{1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {1.0, 1.0, 1.0}}}}}));
+
+        const auto expectedGrad0 =
+            std::make_shared<Tensor>(Array4D<float, 2, 2, 3, 3>(
+                {{{{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}},
+                   {{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}},
+                  {{{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}},
+                   {{0.5, 0.3, 0.1}, {0.4, 0.2, 0.6}, {0.7, 0.8, 0.9}}}}}));
+
+        const auto expectedGrad1 = std::make_shared<Tensor>(
+            Array2D<float, 3, 3>({{{58.0, 62.0, 66.0},
+                                   {70.0, 74.0, 78.0},
+                                   {82.0, 86.0, 90.0}}}));
+
+        for (const auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) {
+            T->setBackend("cpu");
+            T->setDataType(DataType::Float32);
         }
 
-        SECTION("Case 4: 3D and 2D tensors") {
-            const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 3, 4>(
-                {
-                    {
-                        {
-                            {1.0, 2.0, 3.0, 4.0},
-                            {5.0, 6.0, 7.0, 8.0},
-                            {9.0, 10.0, 11.0, 12.0},
-                        },
-                        {
-                            {13.0, 14.0, 15.0, 16.0},
-                            {17.0, 18.0, 19.0, 20.0},
-                            {21.0, 22.0, 23.0, 24.0},
-                        }
-                    }
-                }
-            ));
-
-            const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 4>(
-                {
-                    {
-                        {0.1, 0.2, 0.3, 0.4},
-                        {0.5, 0.6, 0.7, 0.8},
-                        {0.9, 1.0, 1.1, 1.2}
-                    }
-                }
-            ));
-
-            const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2,3,4>(
-                {
-                    {
-                        {
-                            {1.0, 1.0, 1.0, 1.0},
-                            {1.0, 1.0, 1.0, 1.0},
-                            {1.0, 1.0, 1.0, 1.0},
-                        },
-                        {
-                            {1.0, 1.0, 1.0, 1.0},
-                            {1.0, 1.0, 1.0, 1.0},
-                            {1.0, 1.0, 1.0, 1.0},
-                        }
-                    }
-                }
-            ));
-
-            const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,3,4>(
-                {
-                    {
-                        {
-                            {0.1, 0.2, 0.3, 0.4},
-                            {0.5, 0.6, 0.7, 0.8},
-                            {0.9, 1.0, 1.1, 1.2}
-                        },
-                        {
-                            {0.1, 0.2, 0.3, 0.4},
-                            {0.5, 0.6, 0.7, 0.8},
-                            {0.9, 1.0, 1.1, 1.2}
-                        }
-                    }
-                }
-            ));
-
-            const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 4>(
-                {
-                    {
-                        {14.0, 16.0, 18.0, 20.0},
-                        {22.0, 24.0, 26.0, 28.0},
-                        {30.0, 32.0, 34.0, 36.0}
-                    }
-                }
-            ));
-
-            for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
-            {
-                T->setBackend("cpu") ;
-                T->setDataType(DataType::Float32);
-            }
+        op->associateInput(0, T0);
+        op->associateInput(1, T1);
+        op->getOutput(0)->setGrad(newGrad);
+        op->forwardDims();
 
-            op->associateInput(0, T0);
-            op->associateInput(1, T1);
-            op->getOutput(0)->setGrad(newGrad);
-            op->forwardDims();
+        myMul->backward();
 
-            myMul->backward();
+        REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
+        REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
+    }
 
-            REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
-            REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
+    SECTION("Case 4: 3D and 2D tensors") {
+        const auto T0 = std::make_shared<Tensor>(
+            Array3D<float, 2, 3, 4>({{{
+                                          {1.0, 2.0, 3.0, 4.0},
+                                          {5.0, 6.0, 7.0, 8.0},
+                                          {9.0, 10.0, 11.0, 12.0},
+                                      },
+                                      {
+                                          {13.0, 14.0, 15.0, 16.0},
+                                          {17.0, 18.0, 19.0, 20.0},
+                                          {21.0, 22.0, 23.0, 24.0},
+                                      }}}));
+
+        const auto T1 = std::make_shared<Tensor>(
+            Array2D<float, 3, 4>({{{0.1, 0.2, 0.3, 0.4},
+                                   {0.5, 0.6, 0.7, 0.8},
+                                   {0.9, 1.0, 1.1, 1.2}}}));
+
+        const auto newGrad = std::make_shared<Tensor>(
+            Array3D<float, 2, 3, 4>({{{
+                                          {1.0, 1.0, 1.0, 1.0},
+                                          {1.0, 1.0, 1.0, 1.0},
+                                          {1.0, 1.0, 1.0, 1.0},
+                                      },
+                                      {
+                                          {1.0, 1.0, 1.0, 1.0},
+                                          {1.0, 1.0, 1.0, 1.0},
+                                          {1.0, 1.0, 1.0, 1.0},
+                                      }}}));
+
+        const auto expectedGrad0 = std::make_shared<Tensor>(
+            Array3D<float, 2, 3, 4>({{{{0.1, 0.2, 0.3, 0.4},
+                                       {0.5, 0.6, 0.7, 0.8},
+                                       {0.9, 1.0, 1.1, 1.2}},
+                                      {{0.1, 0.2, 0.3, 0.4},
+                                       {0.5, 0.6, 0.7, 0.8},
+                                       {0.9, 1.0, 1.1, 1.2}}}}));
+
+        const auto expectedGrad1 = std::make_shared<Tensor>(
+            Array2D<float, 3, 4>({{{14.0, 16.0, 18.0, 20.0},
+                                   {22.0, 24.0, 26.0, 28.0},
+                                   {30.0, 32.0, 34.0, 36.0}}}));
+
+        for (const auto T : {T0, T1, newGrad, expectedGrad0, expectedGrad1}) {
+            T->setBackend("cpu");
+            T->setDataType(DataType::Float32);
         }
+
+        op->associateInput(0, T0);
+        op->associateInput(1, T1);
+        op->getOutput(0)->setGrad(newGrad);
+        op->forwardDims();
+
+        myMul->backward();
+
+        REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
+        REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
     }
+}
 
 TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
     constexpr std::uint16_t NBTRIALS = 10;
     // Create a random number generator
     std::random_device rd;
     std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
-    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
-    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3));
-    std::uniform_int_distribution<int> boolDist(0,1);
+    std::uniform_real_distribution<float> valueDist(
+        0.1f,
+        1.1f); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
+                                                           std::size_t(10));
+    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
+                                                          std::size_t(3));
+    std::uniform_int_distribution<int> boolDist(0, 1);
 
     // Create MatMul Operator
     std::shared_ptr<Node> myMul = Mul();
-    auto op = std::static_pointer_cast<OperatorTensor>(myMul-> getOperator());
+    auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator());
     op->setDataType(DataType::Float32);
     op->setBackend("cpu");
 
     // Create 2 input Tensors
     std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
-    op->associateInput(0,T0);
+    op->associateInput(0, T0);
     T0->setDataType(DataType::Float32);
     T0->setBackend("cpu");
     std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
-    op -> associateInput(1,T1);
+    op->associateInput(1, T1);
     T1->setDataType(DataType::Float32);
     T1->setBackend("cpu");
 
@@ -391,14 +241,9 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
     std::chrono::time_point<std::chrono::system_clock> end;
     std::chrono::duration<double, std::micro> duration{};
 
-
     SECTION("MulImpl_cpu::forward()") {
-        SECTION("Scalar / Scalar") {
-
-        }
-        SECTION("Scalar / +1-D Tensor") {
-
-        }
+        SECTION("Scalar / Scalar") {}
+        SECTION("Scalar / +1-D Tensor") {}
         SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
 
             std::size_t number_of_operation = 0;
@@ -413,13 +258,17 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                     dims.push_back(dimSizeDist(gen));
                 }
 
-                const auto nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const auto nb_elements =
+                    std::accumulate(dims.cbegin(),
+                                    dims.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
 
                 // without broadcasting
-                float* array0 = new float[nb_elements];
-                float* array1 = new float[nb_elements];
-                float* result = new float[nb_elements];
+                float *array0 = new float[nb_elements];
+                float *array1 = new float[nb_elements];
+                float *result = new float[nb_elements];
 
                 for (std::size_t i = 0; i < nb_elements; ++i) {
                     array0[i] = valueDist(gen);
@@ -429,21 +278,23 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
 
                 // input0
                 T0->resize(dims);
-                T0 -> getImpl() -> setRawPtr(array0, nb_elements);
+                T0->getImpl()->setRawPtr(array0, nb_elements);
 
                 // input1
                 T1->resize(dims);
-                T1 -> getImpl() -> setRawPtr(array1, nb_elements);
+                T1->getImpl()->setRawPtr(array1, nb_elements);
 
                 // results
                 Tres->resize(dims);
-                Tres -> getImpl() -> setRawPtr(result, nb_elements);
+                Tres->getImpl()->setRawPtr(result, nb_elements);
 
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myMul->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
 
@@ -451,24 +302,25 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 delete[] array1;
                 delete[] result;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
 
-
         SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
             std::size_t number_of_operation = 0;
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
 
                 // generate 2 random Tensors
-                // handle dimensions, replace some dimensions with '1' to get broadcasting
+                // handle dimensions, replace some dimensions with '1' to get
+                // broadcasting
 
                 constexpr std::size_t nbDims = 4;
                 std::vector<std::size_t> dimensions;
 
-                for (std::size_t i = 0; i < nbDims; ++i)
-                {
+                for (std::size_t i = 0; i < nbDims; ++i) {
                     dimensions.push_back(dimSizeDist(gen));
                 }
 
@@ -476,77 +328,90 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 auto dims1 = dimensions;
                 auto dimsOut = dimensions;
 
-                for (std::size_t i = 0; i < nbDims; ++i)
-                {
-                    if (boolDist(gen))
-                    {
+                for (std::size_t i = 0; i < nbDims; ++i) {
+                    if (boolDist(gen)) {
                         dims0[i] = 1;
                     }
 
-                    if (boolDist(gen))
-                    {
+                    if (boolDist(gen)) {
                         dims1[i] = 1;
                     }
 
                     dimsOut[i] = (dims0[i] == 1) ? dims1[i] : dims0[i];
                 }
 
-                for(auto dim : dims0)
-                {
+                for (auto dim : dims0) {
                     Log::info("Dimension of input 0 : {}", dim);
                 }
 
-                for(auto dim : dims1)
-                {
+                for (auto dim : dims1) {
                     Log::info("Dimension of input 1 : {}", dim);
                 }
 
                 // create arrays and fill them with random values
-                float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-                float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
-                float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
-
-
-                for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i)
-                {
+                float *array0 =
+                    new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+                float *array1 =
+                    new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]];
+                float *result = new float[dimsOut[0] * dimsOut[1] *
+                                          dimsOut[2] * dimsOut[3]];
+
+                for (std::size_t i = 0;
+                     i < dims0[0] * dims0[1] * dims0[2] * dims0[3];
+                     ++i) {
                     array0[i] = valueDist(gen);
                 }
 
-                for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i)
-                {
+                for (std::size_t i = 0;
+                     i < dims1[0] * dims1[1] * dims1[2] * dims1[3];
+                     ++i) {
                     array1[i] = valueDist(gen);
                 }
 
                 // compute true result
-                const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-                const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
-
-                for (std::size_t a = 0; a < dimsOut[0]; ++a)
-                {
-                    for (std::size_t b = 0; b < dimsOut[1]; ++b)
-                    {
-                        const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                                    + strides0[1] * ((dims0[1] > 1) ? b : 0);
-
-                        const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
-                                                    + strides1[1] * ((dims1[1] > 1) ? b : 0);
-
-                        for (std::size_t c = 0; c < dimsOut[2]; ++c)
-                        {
-                            const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
-
-                            for (std::size_t d = 0; d < dimsOut[3]; ++d)
-                            {
-                                std::size_t idx0 = idx0_0
-                                                    + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                                    + ((dims0[3] > 1) ? d : 0);
-
-                                std::size_t idx1 = idx1_0
-                                                    + strides1[2] * ((dims1[2] > 1) ? c : 0)
-                                                    + ((dims1[3] > 1) ? d : 0);
-
-                                result[idx_out + d] = array0[idx0] * array1[idx1];
-                                // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
+                const std::size_t strides0[nbDims] = {
+                    dims0[1] * dims0[2] * dims0[3],
+                    dims0[2] * dims0[3],
+                    dims0[3],
+                    1};
+                const std::size_t strides1[nbDims] = {
+                    dims1[1] * dims1[2] * dims1[3],
+                    dims1[2] * dims1[3],
+                    dims1[3],
+                    1};
+
+                for (std::size_t a = 0; a < dimsOut[0]; ++a) {
+                    for (std::size_t b = 0; b < dimsOut[1]; ++b) {
+                        const std::size_t idx0_0 =
+                            strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                            strides0[1] * ((dims0[1] > 1) ? b : 0);
+
+                        const std::size_t idx1_0 =
+                            strides1[0] * ((dims1[0] > 1) ? a : 0) +
+                            strides1[1] * ((dims1[1] > 1) ? b : 0);
+
+                        for (std::size_t c = 0; c < dimsOut[2]; ++c) {
+                            const std::size_t idx_out =
+                                dimsOut[3] *
+                                (c + dimsOut[2] * (b + dimsOut[1] * a));
+
+                            for (std::size_t d = 0; d < dimsOut[3]; ++d) {
+                                std::size_t idx0 =
+                                    idx0_0 +
+                                    strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                                    ((dims0[3] > 1) ? d : 0);
+
+                                std::size_t idx1 =
+                                    idx1_0 +
+                                    strides1[2] * ((dims1[2] > 1) ? c : 0) +
+                                    ((dims1[3] > 1) ? d : 0);
+
+                                result[idx_out + d] =
+                                    array0[idx0] * array1[idx1];
+                                // std::cout << "(" << idx0 << ", " << idx1 <<
+                                // ") -> " << array0[idx0] << " * " <<
+                                // array1[idx1] << " -> " << idx_out + d <<
+                                // std::endl;
                             }
                         }
                     }
@@ -555,22 +420,30 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 // conversion to Aidge::Tensors
                 // input0
                 T0->resize(dims0);
-                T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+                T0->getImpl()->setRawPtr(
+                    array0,
+                    dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
                 // input1
                 T1->resize(dims1);
-                T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
+                T1->getImpl()->setRawPtr(
+                    array1,
+                    dims1[0] * dims1[1] * dims1[2] * dims1[3]);
 
                 // results
                 Tres->resize(dimsOut);
-                Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+                Tres->getImpl()->setRawPtr(
+                    result,
+                    dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myMul->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 // comparison between truth and computed result
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
@@ -579,15 +452,23 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 delete[] array1;
                 delete[] result;
 
-                const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dimsOut.cbegin(),
+                                    dimsOut.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
         SECTION("+1-D Tensor / 1-D Tensor") {
             std::size_t number_of_operation = 0;
-            std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
+            std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(
+                std::size_t(1),
+                std::size_t(3));
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
                 // generate 2 random Tensors
@@ -604,15 +485,24 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                         dims1[i] = 1;
                     }
                 }
-                dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
+                dims1.erase(dims1.cbegin(),
+                            dims1.cbegin() + nbRemovedDimsDist(gen));
 
                 // create arrays and fill them with random values
-                float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-                std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
-                float* array1 = new float[array1_size];
-                float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
-
-                for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
+                float *array0 =
+                    new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+                std::size_t array1_size =
+                    std::accumulate(dims1.cbegin(),
+                                    dims1.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
+                float *array1 = new float[array1_size];
+                float *result = new float[dimsOut[0] * dimsOut[1] *
+                                          dimsOut[2] * dimsOut[3]];
+
+                for (std::size_t i = 0;
+                     i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]);
+                     ++i) {
                     array0[i] = valueDist(gen);
                 }
                 for (std::size_t i = 0; i < array1_size; ++i) {
@@ -621,27 +511,48 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
 
                 // compute true result
                 auto dims1_tmp = dims1;
-                dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
-
-                const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-                const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
+                dims1_tmp.insert(dims1_tmp.cbegin(),
+                                 4 - dims1_tmp.size(),
+                                 std::size_t(1));
+
+                const std::size_t strides0[nbDims] = {
+                    dims0[1] * dims0[2] * dims0[3],
+                    dims0[2] * dims0[3],
+                    dims0[3],
+                    1};
+                const std::size_t strides1[nbDims] = {
+                    dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3],
+                    dims1_tmp[2] * dims1_tmp[3],
+                    dims1_tmp[3],
+                    1};
                 for (std::size_t a = 0; a < dimsOut[0]; ++a) {
                     for (std::size_t b = 0; b < dimsOut[1]; ++b) {
-                        const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                                    + strides0[1] * ((dims0[1] > 1) ? b : 0);
-                        const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
-                                                    + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
+                        const std::size_t idx0_0 =
+                            strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                            strides0[1] * ((dims0[1] > 1) ? b : 0);
+                        const std::size_t idx1_0 =
+                            strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) +
+                            strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
                         for (std::size_t c = 0; c < dimsOut[2]; ++c) {
-                            const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
+                            const std::size_t idx_out =
+                                dimsOut[3] *
+                                (c + dimsOut[2] * (b + dimsOut[1] * a));
                             for (std::size_t d = 0; d < dimsOut[3]; ++d) {
-                                std::size_t idx0 = idx0_0
-                                                    + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                                    + ((dims0[3] > 1) ? d : 0);
-                                std::size_t idx1 = idx1_0
-                                                    + strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
-                                                    + ((dims1_tmp[3] > 1) ? d : 0);
-                                result[idx_out + d] = array0[idx0] * array1[idx1];
-                                // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " * " << array1[idx1] << " -> " << idx_out + d << std::endl;
+                                std::size_t idx0 =
+                                    idx0_0 +
+                                    strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                                    ((dims0[3] > 1) ? d : 0);
+                                std::size_t idx1 =
+                                    idx1_0 +
+                                    strides1[2] *
+                                        ((dims1_tmp[2] > 1) ? c : 0) +
+                                    ((dims1_tmp[3] > 1) ? d : 0);
+                                result[idx_out + d] =
+                                    array0[idx0] * array1[idx1];
+                                // std::cout << "(" << idx0 << ", " << idx1 <<
+                                // ") -> " << array0[idx0] << " * " <<
+                                // array1[idx1] << " -> " << idx_out + d <<
+                                // std::endl;
                             }
                         }
                     }
@@ -650,22 +561,28 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 // conversion to Aidge::Tensors
                 // input0
                 T0->resize(dims0);
-                T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+                T0->getImpl()->setRawPtr(
+                    array0,
+                    dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
                 // input1
                 T1->resize(dims1);
-                T1 -> getImpl() -> setRawPtr(array1, array1_size);
+                T1->getImpl()->setRawPtr(array1, array1_size);
 
                 // results
                 Tres->resize(dimsOut);
-                Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+                Tres->getImpl()->setRawPtr(
+                    result,
+                    dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myMul->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 // comparison between truth and computed result
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
@@ -674,12 +591,18 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 delete[] array1;
                 delete[] result;
 
-                const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dimsOut.cbegin(),
+                                    dimsOut.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
 
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
     }
 }
diff --git a/unit_tests/operator/Test_PadImpl.cpp b/unit_tests/operator/Test_PadImpl.cpp
index cdd3a5f979085f3782776ce69ddd92c0d53150c4..77f0add738bef378a4e4e72f86a589e2f867d449 100644
--- a/unit_tests/operator/Test_PadImpl.cpp
+++ b/unit_tests/operator/Test_PadImpl.cpp
@@ -24,106 +24,98 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
     SECTION("Symmetric Pad") {
         const int pv = 0; // pad value
 
-        std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv));
-        auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
-
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
-
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}}
-                },
-                {
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}},
-
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
-
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
-            {
-                {
-                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
-                    { pv,   0,   1,   2,   3,   4,  pv},
-                    { pv,   5,   6,   7,   8,   9,  pv},
-                    { pv,  10,  11,  12,  13,  14,  pv},
-                    { pv,  15,  16,  17,  18,  19,  pv},
-                    { pv,  20,  21,  22,  23,  24,  pv},
-                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}},
-
-                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
-                    { pv,  25,  26,  27,  28,  29,  pv},
-                    { pv,  30,  31,  32,  33,  34,  pv},
-                    { pv,  35,  36,  37,  38,  39,  pv},
-                    { pv,  40,  41,  42,  43,  44,  pv},
-                    { pv,  45,  46,  47,  48,  49,  pv},
-                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}},
-
-                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
-                    { pv,  50,  51,  52,  53,  54,  pv},
-                    { pv,  55,  56,  57,  58,  59,  pv},
-                    { pv,  60,  61,  62,  63,  64,  pv},
-                    { pv,  65,  66,  67,  68,  69,  pv},
-                    { pv,  70,  71,  72,  73,  74,  pv},
-                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}}
-                },
-                {
-                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
-                    { pv,  75,  76,  77,  78,  79,  pv},
-                    { pv,  80,  81,  82,  83,  84,  pv},
-                    { pv,  85,  86,  87,  88,  89,  pv},
-                    { pv,  90,  91,  92,  93,  94,  pv},
-                    { pv,  95,  96,  97,  98,  99,  pv},
-                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}},
-
-                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
-                    {pv,  100, 101, 102, 103, 104,  pv},
-                    {pv,  105, 106, 107, 108, 109,  pv},
-                    {pv,  110, 111, 112, 113, 114,  pv},
-                    {pv,  115, 116, 117, 118, 119,  pv},
-                    {pv,  120, 121, 122, 123, 124,  pv},
-                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}},
-
-                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
-                    {pv,  125, 126, 127, 128, 129,  pv},
-                    {pv,  130, 131, 132, 133, 134,  pv},
-                    {pv,  135, 136, 137, 138, 139,  pv},
-                    {pv,  140, 141, 142, 143, 144,  pv},
-                    {pv,  145, 146, 147, 148, 149,  pv},
-                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}}
-                }
-            }
-        });
-
-        myPad->getOperator()->associateInput(0,myInput);
+        std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1},
+                                             "mypad",
+                                             PadBorderType::Constant,
+                                             static_cast<double>(pv));
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 5, 5>{// NCHW
+                                     {{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
+
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
+
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}}},
+                                      {{{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}},
+
+                                       {{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
+
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}}}}});
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 7, 7>{// NCHW
+                                     {{{{pv, pv, pv, pv, pv, pv, pv},
+                                        {pv, 0, 1, 2, 3, 4, pv},
+                                        {pv, 5, 6, 7, 8, 9, pv},
+                                        {pv, 10, 11, 12, 13, 14, pv},
+                                        {pv, 15, 16, 17, 18, 19, pv},
+                                        {pv, 20, 21, 22, 23, 24, pv},
+                                        {pv, pv, pv, pv, pv, pv, pv}},
+
+                                       {{pv, pv, pv, pv, pv, pv, pv},
+                                        {pv, 25, 26, 27, 28, 29, pv},
+                                        {pv, 30, 31, 32, 33, 34, pv},
+                                        {pv, 35, 36, 37, 38, 39, pv},
+                                        {pv, 40, 41, 42, 43, 44, pv},
+                                        {pv, 45, 46, 47, 48, 49, pv},
+                                        {pv, pv, pv, pv, pv, pv, pv}},
+
+                                       {{pv, pv, pv, pv, pv, pv, pv},
+                                        {pv, 50, 51, 52, 53, 54, pv},
+                                        {pv, 55, 56, 57, 58, 59, pv},
+                                        {pv, 60, 61, 62, 63, 64, pv},
+                                        {pv, 65, 66, 67, 68, 69, pv},
+                                        {pv, 70, 71, 72, 73, 74, pv},
+                                        {pv, pv, pv, pv, pv, pv, pv}}},
+                                      {{{pv, pv, pv, pv, pv, pv, pv},
+                                        {pv, 75, 76, 77, 78, 79, pv},
+                                        {pv, 80, 81, 82, 83, 84, pv},
+                                        {pv, 85, 86, 87, 88, 89, pv},
+                                        {pv, 90, 91, 92, 93, 94, pv},
+                                        {pv, 95, 96, 97, 98, 99, pv},
+                                        {pv, pv, pv, pv, pv, pv, pv}},
+
+                                       {{pv, pv, pv, pv, pv, pv, pv},
+                                        {pv, 100, 101, 102, 103, 104, pv},
+                                        {pv, 105, 106, 107, 108, 109, pv},
+                                        {pv, 110, 111, 112, 113, 114, pv},
+                                        {pv, 115, 116, 117, 118, 119, pv},
+                                        {pv, 120, 121, 122, 123, 124, pv},
+                                        {pv, pv, pv, pv, pv, pv, pv}},
+
+                                       {{pv, pv, pv, pv, pv, pv, pv},
+                                        {pv, 125, 126, 127, 128, 129, pv},
+                                        {pv, 130, 131, 132, 133, 134, pv},
+                                        {pv, 135, 136, 137, 138, 139, pv},
+                                        {pv, 140, 141, 142, 143, 144, pv},
+                                        {pv, 145, 146, 147, 148, 149, pv},
+                                        {pv, pv, pv, pv, pv, pv, pv}}}}});
+
+        myPad->getOperator()->associateInput(0, myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
         myPad->forward();
@@ -134,100 +126,92 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
     SECTION("Asymmetric Pad") {
         const int pv = 0; // pad value
 
-        std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv));
-        auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
-
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
-
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}}
-                },
-                {
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}},
-
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
-
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,6,6> { //NCHW
-            {
-                {
-                    {{ pv,   pv,   pv,   pv,   pv,  pv},
-                    { 0,   1,   2,   3,   4,  pv},
-                    { 5,   6,   7,   8,   9,  pv},
-                    { 10,  11,  12,  13,  14,  pv},
-                    { 15,  16,  17,  18,  19,  pv},
-                    { 20,  21,  22,  23,  24,  pv}},
-
-                    {{ pv,   pv,   pv,   pv,   pv,  pv},
-                    { 25,  26,  27,  28,  29,  pv},
-                    { 30,  31,  32,  33,  34,  pv},
-                    { 35,  36,  37,  38,  39,  pv},
-                    { 40,  41,  42,  43,  44,  pv},
-                    { 45,  46,  47,  48,  49,  pv}},
-
-                    {{ pv,   pv,   pv,   pv,   pv,  pv},
-                    { 50,  51,  52,  53,  54,  pv},
-                    { 55,  56,  57,  58,  59,  pv},
-                    { 60,  61,  62,  63,  64,  pv},
-                    { 65,  66,  67,  68,  69,  pv},
-                    { 70,  71,  72,  73,  74,  pv}}
-                },
-                {
-                    {{ pv,   pv,   pv,   pv,   pv,  pv},
-                    { 75,  76,  77,  78,  79,  pv},
-                    { 80,  81,  82,  83,  84,  pv},
-                    { 85,  86,  87,  88,  89,  pv},
-                    { 90,  91,  92,  93,  94,  pv},
-                    { 95,  96,  97,  98,  99,  pv}},
-
-                    {{ pv,   pv,   pv,   pv,   pv,  pv},
-                    { 100, 101, 102, 103, 104,  pv},
-                    { 105, 106, 107, 108, 109,  pv},
-                    { 110, 111, 112, 113, 114,  pv},
-                    { 115, 116, 117, 118, 119,  pv},
-                    { 120, 121, 122, 123, 124,  pv}},
-
-                    {{ pv,   pv,   pv,   pv,   pv,  pv},
-                    { 125, 126, 127, 128, 129,  pv},
-                    { 130, 131, 132, 133, 134,  pv},
-                    { 135, 136, 137, 138, 139,  pv},
-                    { 140, 141, 142, 143, 144,  pv},
-                    { 145, 146, 147, 148, 149,  pv}}
-                }
-            }
-        });
-
-        myPad->getOperator()->associateInput(0,myInput);
+        std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1},
+                                             "mypad",
+                                             PadBorderType::Constant,
+                                             static_cast<double>(pv));
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 5, 5>{// NCHW
+                                     {{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
+
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
+
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}}},
+                                      {{{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}},
+
+                                       {{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
+
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}}}}});
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 6, 6>{// NCHW
+                                     {{{{pv, pv, pv, pv, pv, pv},
+                                        {0, 1, 2, 3, 4, pv},
+                                        {5, 6, 7, 8, 9, pv},
+                                        {10, 11, 12, 13, 14, pv},
+                                        {15, 16, 17, 18, 19, pv},
+                                        {20, 21, 22, 23, 24, pv}},
+
+                                       {{pv, pv, pv, pv, pv, pv},
+                                        {25, 26, 27, 28, 29, pv},
+                                        {30, 31, 32, 33, 34, pv},
+                                        {35, 36, 37, 38, 39, pv},
+                                        {40, 41, 42, 43, 44, pv},
+                                        {45, 46, 47, 48, 49, pv}},
+
+                                       {{pv, pv, pv, pv, pv, pv},
+                                        {50, 51, 52, 53, 54, pv},
+                                        {55, 56, 57, 58, 59, pv},
+                                        {60, 61, 62, 63, 64, pv},
+                                        {65, 66, 67, 68, 69, pv},
+                                        {70, 71, 72, 73, 74, pv}}},
+                                      {{{pv, pv, pv, pv, pv, pv},
+                                        {75, 76, 77, 78, 79, pv},
+                                        {80, 81, 82, 83, 84, pv},
+                                        {85, 86, 87, 88, 89, pv},
+                                        {90, 91, 92, 93, 94, pv},
+                                        {95, 96, 97, 98, 99, pv}},
+
+                                       {{pv, pv, pv, pv, pv, pv},
+                                        {100, 101, 102, 103, 104, pv},
+                                        {105, 106, 107, 108, 109, pv},
+                                        {110, 111, 112, 113, 114, pv},
+                                        {115, 116, 117, 118, 119, pv},
+                                        {120, 121, 122, 123, 124, pv}},
+
+                                       {{pv, pv, pv, pv, pv, pv},
+                                        {125, 126, 127, 128, 129, pv},
+                                        {130, 131, 132, 133, 134, pv},
+                                        {135, 136, 137, 138, 139, pv},
+                                        {140, 141, 142, 143, 144, pv},
+                                        {145, 146, 147, 148, 149, pv}}}}});
+
+        myPad->getOperator()->associateInput(0, myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
         myPad->forward();
@@ -236,106 +220,97 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
     }
 
     SECTION("Pad Edge") {
-        std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Edge);
-        auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
-
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
-
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}}
-                },
-                {
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}},
-
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
-
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
-            {
-                {
-                    {{ 0,  0,   1,   2,   3,   4,  4},
-                    { 0,   0,   1,   2,   3,   4,  4},
-                    { 5,   5,   6,   7,   8,   9,  9},
-                    { 10,  10,  11,  12,  13,  14,  14},
-                    { 15,  15,  16,  17,  18,  19,  19},
-                    { 20,  20,  21,  22,  23,  24,  24},
-                    { 20,  20,  21,  22,  23,  24,  24}},
-
-                    {{ 25,  25,  26,  27,  28,  29,  29},
-                    { 25,  25,  26,  27,  28,  29,  29},
-                    { 30,  30,  31,  32,  33,  34,  34},
-                    { 35,  35,  36,  37,  38,  39,  39},
-                    { 40,  40,  41,  42,  43,  44,  44},
-                    { 45,  45,  46,  47,  48,  49,  49},
-                    { 45,  45,  46,  47,  48,  49, 49}},
-
-                    {{ 50,  50,  51,  52,  53,  54,  54},
-                    { 50,  50,  51,  52,  53,  54,  54},
-                    { 55,  55,  56,  57,  58,  59,  59},
-                    { 60,  60,  61,  62,  63,  64,  64},
-                    { 65,  65,  66,  67,  68,  69,  69},
-                    { 70,  70,  71,  72,  73,  74,  74},
-                    { 70,  70,  71,  72,  73,  74,  74}}
-                },
-                {
-                    {{ 75,  75,  76,  77,  78,  79,  79},
-                    { 75,  75,  76,  77,  78,  79,  79},
-                    { 80,  80,  81,  82,  83,  84,  84},
-                    { 85,  85,  86,  87,  88,  89,  89},
-                    { 90,  90,  91,  92,  93,  94,  94},
-                    { 95,  95,  96,  97,  98,  99,  99},
-                    { 95,  95,  96,  97,  98,  99,  99}},
-
-                    {{100,  100, 101, 102, 103, 104,  104},
-                    {100,  100, 101, 102, 103, 104,  104},
-                    {105,  105, 106, 107, 108, 109, 109},
-                    {110,  110, 111, 112, 113, 114,  114},
-                    {115,  115, 116, 117, 118, 119,  119},
-                    {120,  120, 121, 122, 123, 124,  124},
-                    {120,  120, 121, 122, 123, 124,  124}},
-
-                    {{125,  125, 126, 127, 128, 129,  129},
-                    {125,  125, 126, 127, 128, 129,  129},
-                    {130,  130, 131, 132, 133, 134,  134},
-                    {135,  135, 136, 137, 138, 139,  139},
-                    {140,  140, 141, 142, 143, 144,  144},
-                    {145,  145, 146, 147, 148, 149,  149},
-                    {145,  145, 146, 147, 148, 149,  149}}
-                }
-            }
-        });
-
-        myPad->getOperator()->associateInput(0,myInput);
+        std::shared_ptr<Node> myPad =
+            Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Edge);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 5, 5>{// NCHW
+                                     {{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
+
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
+
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}}},
+                                      {{{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}},
+
+                                       {{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
+
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}}}}});
+        std::shared_ptr<Tensor> myOutput =
+            std::make_shared<Tensor>(Array4D<int, 2, 3, 7, 7>{
+                // NCHW
+                {{{{0, 0, 1, 2, 3, 4, 4},
+                   {0, 0, 1, 2, 3, 4, 4},
+                   {5, 5, 6, 7, 8, 9, 9},
+                   {10, 10, 11, 12, 13, 14, 14},
+                   {15, 15, 16, 17, 18, 19, 19},
+                   {20, 20, 21, 22, 23, 24, 24},
+                   {20, 20, 21, 22, 23, 24, 24}},
+
+                  {{25, 25, 26, 27, 28, 29, 29},
+                   {25, 25, 26, 27, 28, 29, 29},
+                   {30, 30, 31, 32, 33, 34, 34},
+                   {35, 35, 36, 37, 38, 39, 39},
+                   {40, 40, 41, 42, 43, 44, 44},
+                   {45, 45, 46, 47, 48, 49, 49},
+                   {45, 45, 46, 47, 48, 49, 49}},
+
+                  {{50, 50, 51, 52, 53, 54, 54},
+                   {50, 50, 51, 52, 53, 54, 54},
+                   {55, 55, 56, 57, 58, 59, 59},
+                   {60, 60, 61, 62, 63, 64, 64},
+                   {65, 65, 66, 67, 68, 69, 69},
+                   {70, 70, 71, 72, 73, 74, 74},
+                   {70, 70, 71, 72, 73, 74, 74}}},
+                 {{{75, 75, 76, 77, 78, 79, 79},
+                   {75, 75, 76, 77, 78, 79, 79},
+                   {80, 80, 81, 82, 83, 84, 84},
+                   {85, 85, 86, 87, 88, 89, 89},
+                   {90, 90, 91, 92, 93, 94, 94},
+                   {95, 95, 96, 97, 98, 99, 99},
+                   {95, 95, 96, 97, 98, 99, 99}},
+
+                  {{100, 100, 101, 102, 103, 104, 104},
+                   {100, 100, 101, 102, 103, 104, 104},
+                   {105, 105, 106, 107, 108, 109, 109},
+                   {110, 110, 111, 112, 113, 114, 114},
+                   {115, 115, 116, 117, 118, 119, 119},
+                   {120, 120, 121, 122, 123, 124, 124},
+                   {120, 120, 121, 122, 123, 124, 124}},
+
+                  {{125, 125, 126, 127, 128, 129, 129},
+                   {125, 125, 126, 127, 128, 129, 129},
+                   {130, 130, 131, 132, 133, 134, 134},
+                   {135, 135, 136, 137, 138, 139, 139},
+                   {140, 140, 141, 142, 143, 144, 144},
+                   {145, 145, 146, 147, 148, 149, 149},
+                   {145, 145, 146, 147, 148, 149, 149}}}}});
+
+        myPad->getOperator()->associateInput(0, myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
         myPad->forward();
@@ -344,114 +319,93 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
     }
 
     SECTION("Pad Reflect") {
-        std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Reflect);
-        auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
-
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
-
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}}
-                },
-                {
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}},
-
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
-
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
-            {
-                {
-                    {
-                    { 6, 5, 6, 7, 8, 9, 5},
-                    { 1, 0, 1, 2, 3, 4, 0},
-                    { 6, 5, 6, 7, 8, 9, 5},
-                    { 11, 10, 11, 12, 13, 14, 10},
-                    { 16, 15, 16, 17, 18, 19, 15},
-                    { 21, 20, 21, 22, 23, 24, 20},
-                    { 1, 0, 1, 2, 3, 4, 0}
-                    },
-                    {
-                    { 31, 30, 31, 32, 33, 34, 30},
-                    { 26, 25, 26, 27, 28, 29, 25},
-                    { 31, 30, 31, 32, 33, 34, 30},
-                    { 36, 35, 36, 37, 38, 39, 35},
-                    { 41, 40, 41, 42, 43, 44, 40},
-                    { 46, 45, 46, 47, 48, 49, 45},
-                    { 26, 25, 26, 27, 28, 29, 25}
-                    },
-                    {
-                    { 56, 55, 56, 57, 58, 59, 55},
-                    { 51, 50, 51, 52, 53, 54, 50},
-                    { 56, 55, 56, 57, 58, 59, 55},
-                    { 61, 60, 61, 62, 63, 64, 60},
-                    { 66, 65, 66, 67, 68, 69, 65},
-                    { 71, 70, 71, 72, 73, 74, 70},
-                    { 51, 50, 51, 52, 53, 54, 50}
-                    }
-                },
-                {
-                    {
-                    { 81, 80, 81, 82, 83, 84, 80},
-                    { 76, 75, 76, 77, 78, 79, 75},
-                    { 81, 80, 81, 82, 83, 84, 80},
-                    { 86, 85, 86, 87, 88, 89, 85},
-                    { 91, 90, 91, 92, 93, 94, 90},
-                    { 96, 95, 96, 97, 98, 99, 95},
-                    { 76, 75, 76, 77, 78, 79, 75}
-                    },
-                    {
-                    { 106, 105, 106, 107, 108, 109, 105},
-                    { 101, 100, 101, 102, 103, 104, 100},
-                    { 106, 105, 106, 107, 108, 109, 105},
-                    { 111, 110, 111, 112, 113, 114, 110},
-                    { 116, 115, 116, 117, 118, 119, 115},
-                    { 121, 120, 121, 122, 123, 124, 120},
-                    { 101, 100, 101, 102, 103, 104, 100}
-                    },
-                    {
-                    { 131, 130, 131, 132, 133, 134, 130},
-                    { 126, 125, 126, 127, 128, 129, 125},
-                    { 131, 130, 131, 132, 133, 134, 130},
-                    { 136, 135, 136, 137, 138, 139, 135},
-                    { 141, 140, 141, 142, 143, 144, 140},
-                    { 146, 145, 146, 147, 148, 149, 145},
-                    { 126, 125, 126, 127, 128, 129, 125}
-                    }
-                    }
-                }
-        });
-
-        myPad->getOperator()->associateInput(0,myInput);
+        std::shared_ptr<Node> myPad =
+            Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Reflect);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 5, 5>{// NCHW
+                                     {{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
+
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
+
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}}},
+                                      {{{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}},
+
+                                       {{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
+
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}}}}});
+        std::shared_ptr<Tensor> myOutput =
+            std::make_shared<Tensor>(Array4D<int, 2, 3, 7, 7>{
+                // NCHW
+                {{{{6, 5, 6, 7, 8, 9, 5},
+                   {1, 0, 1, 2, 3, 4, 0},
+                   {6, 5, 6, 7, 8, 9, 5},
+                   {11, 10, 11, 12, 13, 14, 10},
+                   {16, 15, 16, 17, 18, 19, 15},
+                   {21, 20, 21, 22, 23, 24, 20},
+                   {1, 0, 1, 2, 3, 4, 0}},
+                  {{31, 30, 31, 32, 33, 34, 30},
+                   {26, 25, 26, 27, 28, 29, 25},
+                   {31, 30, 31, 32, 33, 34, 30},
+                   {36, 35, 36, 37, 38, 39, 35},
+                   {41, 40, 41, 42, 43, 44, 40},
+                   {46, 45, 46, 47, 48, 49, 45},
+                   {26, 25, 26, 27, 28, 29, 25}},
+                  {{56, 55, 56, 57, 58, 59, 55},
+                   {51, 50, 51, 52, 53, 54, 50},
+                   {56, 55, 56, 57, 58, 59, 55},
+                   {61, 60, 61, 62, 63, 64, 60},
+                   {66, 65, 66, 67, 68, 69, 65},
+                   {71, 70, 71, 72, 73, 74, 70},
+                   {51, 50, 51, 52, 53, 54, 50}}},
+                 {{{81, 80, 81, 82, 83, 84, 80},
+                   {76, 75, 76, 77, 78, 79, 75},
+                   {81, 80, 81, 82, 83, 84, 80},
+                   {86, 85, 86, 87, 88, 89, 85},
+                   {91, 90, 91, 92, 93, 94, 90},
+                   {96, 95, 96, 97, 98, 99, 95},
+                   {76, 75, 76, 77, 78, 79, 75}},
+                  {{106, 105, 106, 107, 108, 109, 105},
+                   {101, 100, 101, 102, 103, 104, 100},
+                   {106, 105, 106, 107, 108, 109, 105},
+                   {111, 110, 111, 112, 113, 114, 110},
+                   {116, 115, 116, 117, 118, 119, 115},
+                   {121, 120, 121, 122, 123, 124, 120},
+                   {101, 100, 101, 102, 103, 104, 100}},
+                  {{131, 130, 131, 132, 133, 134, 130},
+                   {126, 125, 126, 127, 128, 129, 125},
+                   {131, 130, 131, 132, 133, 134, 130},
+                   {136, 135, 136, 137, 138, 139, 135},
+                   {141, 140, 141, 142, 143, 144, 140},
+                   {146, 145, 146, 147, 148, 149, 145},
+                   {126, 125, 126, 127, 128, 129, 125}}}}});
+
+        myPad->getOperator()->associateInput(0, myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
         myPad->forward();
@@ -460,106 +414,97 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
     }
 
     SECTION("Pad Wrap") {
-        std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Wrap);
-        auto op = std::static_pointer_cast<OperatorTensor>(myPad -> getOperator());
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
-
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
-
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}}
-                },
-                {
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}},
-
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
-
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
-            {
-                {
-                    {{ 24,  20,  21,  22,  23,  24,  20},
-                    { 4,   0,   1,   2,   3,   4,  0},
-                    { 9,   5,   6,   7,   8,   9,  5},
-                    { 14,  10,  11,  12,  13,  14,  10},
-                    { 19,  15,  16,  17,  18,  19,  15},
-                    { 24,  20,  21,  22,  23,  24,  20},
-                    { 4,   0,   1,   2,   3,   4,  0}},
-
-                    {{ 49,  45,  46,  47,  48,  49, 45},
-                    { 29,  25,  26,  27,  28,  29,  25},
-                    { 34,  30,  31,  32,  33,  34,  30},
-                    { 39,  35,  36,  37,  38,  39,  35},
-                    { 44,  40,  41,  42,  43,  44,  40},
-                    { 49,  45,  46,  47,  48,  49,  45},
-                    { 29,  25,  26,  27,  28,  29,  25}},
-
-                    {{ 74,  70,  71,  72,  73,  74,  70},
-                    { 54,  50,  51,  52,  53,  54,  50},
-                    { 59,  55,  56,  57,  58,  59,  55},
-                    { 64,  60,  61,  62,  63,  64,  60},
-                    { 69,  65,  66,  67,  68,  69,  65},
-                    { 74,  70,  71,  72,  73,  74,  70},
-                    { 54,  50,  51,  52,  53,  54,  50}}
-                },
-                {
-                    {{ 99,  95,  96,  97,  98,  99,  95},
-                    { 79,  75,  76,  77,  78,  79,  75},
-                    { 84,  80,  81,  82,  83,  84,  80},
-                    { 89,  85,  86,  87,  88,  89,  85},
-                    { 94,  90,  91,  92,  93,  94,  90},
-                    { 99,  95,  96,  97,  98,  99,  95},
-                    { 79,  75,  76,  77,  78,  79,  75}},
-
-                    {{124,  120, 121, 122, 123, 124,  120},
-                    {104,  100, 101, 102, 103, 104,  100},
-                    {109,  105, 106, 107, 108, 109, 105},
-                    {114,  110, 111, 112, 113, 114,  110},
-                    {119,  115, 116, 117, 118, 119,  115},
-                    {124,  120, 121, 122, 123, 124,  120},
-                    {104,  100, 101, 102, 103, 104,  100}},
-
-                    {{149,  145, 146, 147, 148, 149,  145},
-                    {129,  125, 126, 127, 128, 129,  125},
-                    {134,  130, 131, 132, 133, 134,  130},
-                    {139,  135, 136, 137, 138, 139,  135},
-                    {144,  140, 141, 142, 143, 144,  140},
-                    {149,  145, 146, 147, 148, 149,  145},
-                    {129,  125, 126, 127, 128, 129,  125}}
-                }
-            }
-        });
-
-        myPad->getOperator()->associateInput(0,myInput);
+        std::shared_ptr<Node> myPad =
+            Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Wrap);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myPad->getOperator());
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 5, 5>{// NCHW
+                                     {{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
+
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
+
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}}},
+                                      {{{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}},
+
+                                       {{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
+
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}}}}});
+        std::shared_ptr<Tensor> myOutput =
+            std::make_shared<Tensor>(Array4D<int, 2, 3, 7, 7>{
+                // NCHW
+                {{{{24, 20, 21, 22, 23, 24, 20},
+                   {4, 0, 1, 2, 3, 4, 0},
+                   {9, 5, 6, 7, 8, 9, 5},
+                   {14, 10, 11, 12, 13, 14, 10},
+                   {19, 15, 16, 17, 18, 19, 15},
+                   {24, 20, 21, 22, 23, 24, 20},
+                   {4, 0, 1, 2, 3, 4, 0}},
+
+                  {{49, 45, 46, 47, 48, 49, 45},
+                   {29, 25, 26, 27, 28, 29, 25},
+                   {34, 30, 31, 32, 33, 34, 30},
+                   {39, 35, 36, 37, 38, 39, 35},
+                   {44, 40, 41, 42, 43, 44, 40},
+                   {49, 45, 46, 47, 48, 49, 45},
+                   {29, 25, 26, 27, 28, 29, 25}},
+
+                  {{74, 70, 71, 72, 73, 74, 70},
+                   {54, 50, 51, 52, 53, 54, 50},
+                   {59, 55, 56, 57, 58, 59, 55},
+                   {64, 60, 61, 62, 63, 64, 60},
+                   {69, 65, 66, 67, 68, 69, 65},
+                   {74, 70, 71, 72, 73, 74, 70},
+                   {54, 50, 51, 52, 53, 54, 50}}},
+                 {{{99, 95, 96, 97, 98, 99, 95},
+                   {79, 75, 76, 77, 78, 79, 75},
+                   {84, 80, 81, 82, 83, 84, 80},
+                   {89, 85, 86, 87, 88, 89, 85},
+                   {94, 90, 91, 92, 93, 94, 90},
+                   {99, 95, 96, 97, 98, 99, 95},
+                   {79, 75, 76, 77, 78, 79, 75}},
+
+                  {{124, 120, 121, 122, 123, 124, 120},
+                   {104, 100, 101, 102, 103, 104, 100},
+                   {109, 105, 106, 107, 108, 109, 105},
+                   {114, 110, 111, 112, 113, 114, 110},
+                   {119, 115, 116, 117, 118, 119, 115},
+                   {124, 120, 121, 122, 123, 124, 120},
+                   {104, 100, 101, 102, 103, 104, 100}},
+
+                  {{149, 145, 146, 147, 148, 149, 145},
+                   {129, 125, 126, 127, 128, 129, 125},
+                   {134, 130, 131, 132, 133, 134, 130},
+                   {139, 135, 136, 137, 138, 139, 135},
+                   {144, 140, 141, 142, 143, 144, 140},
+                   {149, 145, 146, 147, 148, 149, 145},
+                   {129, 125, 126, 127, 128, 129, 125}}}}});
+
+        myPad->getOperator()->associateInput(0, myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
         myPad->forward();
diff --git a/unit_tests/operator/Test_PaddedConv.cpp b/unit_tests/operator/Test_PaddedConv.cpp
index b7584ad069336a270ed07c32d4c07552888b6587..3e2bad72bb9a874e5ce8af7753b22fd4076640bb 100644
--- a/unit_tests/operator/Test_PaddedConv.cpp
+++ b/unit_tests/operator/Test_PaddedConv.cpp
@@ -24,133 +24,88 @@ using namespace Aidge;
 
 TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") {
     SECTION("Classic Conv") {
-        std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv");
-        auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator());
-        std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
-            {
-                {
-                    {{  0,   1,   2},
-                    {  3,   4,   5},
-                    {  6,   7,   8}},
-                    {{  9,  10,  11},
-                    { 12,  13,  14},
-                    { 15,  16,  17}},
-                    {{ 18,  19,  20},
-                    { 21,  22,  23},
-                    { 24,  25,  26}}
-                },
-                {
-                    {{ 27,  28,  29},
-                    { 30,  31,  32},
-                    { 33,  34,  35}},
-                    {{ 36,  37,  38},
-                    { 39,  40,  41},
-                    { 42,  43,  44}},
-                    {{ 45,  46,  47},
-                    { 48,  49,  50},
-                    { 51,  52,  53}}
-                },
-                {
-                    {{ 54,  55,  56},
-                    { 57,  58,  59},
-                    { 60,  61,  62}},
-                    {{ 63,  64,  65},
-                    { 66,  67,  68},
-                    { 69,  70,  71}},
-                    {{ 72,  73,  74},
-                    { 75,  76,  77},
-                    { 78,  79,  80}}
-                },
-                {
-                    {{ 81,  82,  83},
-                    { 84,  85,  86},
-                    { 87,  88,  89}},
-                    {{ 90,  91,  92},
-                    { 93,  94,  95},
-                    { 96,  97,  98}},
-                    {{ 99, 100, 101},
-                    {102, 103, 104},
-                    {105, 106, 107}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
-
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
-
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}}
-                },
-                {
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}},
-
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
-
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
-            {
-                {
-                    {{ 15226,  15577,  15928},
-                    { 16981,  17332,  17683},
-                    { 18736,  19087,  19438}},
-                    {{ 37818,  38898,  39978},
-                    { 43218,  44298,  45378},
-                    { 48618,  49698,  50778}},
-                    {{ 60426,  62235,  64044},
-                    { 69471,  71280,  73089},
-                    { 78516,  80325,  82134}},
-                    {{ 83016,  85554,  88092},
-                    { 95706,  98244, 100782},
-                    {108396, 110934, 113472}}
-                },
-                {
-                    {{ 41551,  41902,  42253},
-                    { 43306,  43657,  44008},
-                    { 45061,  45412,  45763}},
-                    {{118818, 119898, 120978},
-                    {124218, 125298, 126378},
-                    {129618, 130698, 131778}},
-                    {{196101, 197910, 199719},
-                    {205146, 206955, 208764},
-                    {214191, 216000, 217809}},
-                    {{273366, 275904, 278442},
-                    {286056, 288594, 291132},
-                    {298746, 301284, 303822}}
-                }
-            }
-        });
-
-        myConv->getOperator()->associateInput(0,myInput);
+        std::shared_ptr<Node> myConv = PaddedConv(3, 4, {3, 3}, "myconv");
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
+        std::shared_ptr<Tensor> myWeights =
+            std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{
+                {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}},
+                  {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}},
+                  {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}},
+                 {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}},
+                  {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}},
+                  {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}},
+                 {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}},
+                  {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}},
+                  {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}},
+                 {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}},
+                  {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}},
+                  {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}});
+        std::shared_ptr<Tensor> myBias =
+            std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}});
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 5, 5>{// NCHW
+                                     {{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
+
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
+
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}}},
+                                      {{{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}},
+
+                                       {{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
+
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}}}}});
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
+            Array4D<int, 2, 4, 3, 3>{{{{{15226, 15577, 15928},
+                                        {16981, 17332, 17683},
+                                        {18736, 19087, 19438}},
+                                       {{37818, 38898, 39978},
+                                        {43218, 44298, 45378},
+                                        {48618, 49698, 50778}},
+                                       {{60426, 62235, 64044},
+                                        {69471, 71280, 73089},
+                                        {78516, 80325, 82134}},
+                                       {{83016, 85554, 88092},
+                                        {95706, 98244, 100782},
+                                        {108396, 110934, 113472}}},
+                                      {{{41551, 41902, 42253},
+                                        {43306, 43657, 44008},
+                                        {45061, 45412, 45763}},
+                                       {{118818, 119898, 120978},
+                                        {124218, 125298, 126378},
+                                        {129618, 130698, 131778}},
+                                       {{196101, 197910, 199719},
+                                        {205146, 206955, 208764},
+                                        {214191, 216000, 217809}},
+                                       {{273366, 275904, 278442},
+                                        {286056, 288594, 291132},
+                                        {298746, 301284, 303822}}}}});
+
+        myConv->getOperator()->associateInput(0, myInput);
         myConv->input(1).first->getOperator()->setOutput(0, myWeights);
         myConv->input(2).first->getOperator()->setOutput(0, myBias);
 
@@ -164,155 +119,112 @@ TEST_CASE("[cpu/operator] PaddedConv(forward)", "[PaddedConv][CPU]") {
         REQUIRE(*(op->getOutput(0)) == *myOutput);
     }
     SECTION("test Padding") {
-        std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv", {1,1}, {1,1,1,1});
-        auto op = std::static_pointer_cast<OperatorTensor>(myConv -> getOperator());
-        std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
-            {
-                {
-                    {{  0,   1,   2},
-                    {  3,   4,   5},
-                    {  6,   7,   8}},
-                    {{  9,  10,  11},
-                    { 12,  13,  14},
-                    { 15,  16,  17}},
-                    {{ 18,  19,  20},
-                    { 21,  22,  23},
-                    { 24,  25,  26}}
-                },
-                {
-                    {{ 27,  28,  29},
-                    { 30,  31,  32},
-                    { 33,  34,  35}},
-                    {{ 36,  37,  38},
-                    { 39,  40,  41},
-                    { 42,  43,  44}},
-                    {{ 45,  46,  47},
-                    { 48,  49,  50},
-                    { 51,  52,  53}}
-                },
-                {
-                    {{ 54,  55,  56},
-                    { 57,  58,  59},
-                    { 60,  61,  62}},
-                    {{ 63,  64,  65},
-                    { 66,  67,  68},
-                    { 69,  70,  71}},
-                    {{ 72,  73,  74},
-                    { 75,  76,  77},
-                    { 78,  79,  80}}
-                },
-                {
-                    {{ 81,  82,  83},
-                    { 84,  85,  86},
-                    { 87,  88,  89}},
-                    {{ 90,  91,  92},
-                    { 93,  94,  95},
-                    { 96,  97,  98}},
-                    {{ 99, 100, 101},
-                    {102, 103, 104},
-                    {105, 106, 107}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
-
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
-
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}}
-                },
-                {
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}},
-
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
-
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> {
-            {
-                {
-                    {{  6895,  10225,  10486,  10747,   7063},
-                     { 10303,  15226,  15577,  15928,  10429},
-                     { 11518,  16981,  17332,  17683,  11554},
-                     { 12733,  18736,  19087,  19438,  12679},
-                     {  8047,  11791,  11998,  12205,   7927}},
-
-                    {{ 15960,  24069,  24816,  25563,  17100},
-                     { 25119,  37818,  38898,  39978,  26703},
-                     { 28764,  43218,  44298,  45378,  30258},
-                     { 32409,  48618,  49698,  50778,  33813},
-                     { 21972,  32925,  33618,  34311,  22824}},
-
-                    {{ 25041,  37929,  39162,  40395,  27153},
-                     { 39951,  60426,  62235,  64044,  42993},
-                     { 46026,  69471,  71280,  73089,  48978},
-                     { 52101,  78516,  80325,  82134,  54963},
-                     { 35913,  54075,  55254,  56433,  37737}},
-
-                    {{ 34104,  51771,  53490,  55209,  37188},
-                     { 54765,  83016,  85554,  88092,  59265},
-                     { 63270,  95706,  98244, 100782,  67680},
-                     { 71775, 108396, 110934, 113472,  76095},
-                     { 49836,  75207,  76872,  78537,  52632}}
-                },
-                {
-                    {{ 20395,  29800,  30061,  30322,  19663},
-                     { 28528,  41551,  41902,  42253,  27304},
-                     { 29743,  43306,  43657,  44008,  28429},
-                     { 30958,  45061,  45412,  45763,  29554},
-                     { 18847,  27316,  27523,  27730,  17827}},
-
-                    {{ 53760,  80094,  80841,  81588,  54000},
-                     { 79794, 118818, 119898, 120978,  80028},
-                     { 83439, 124218, 125298, 126378,  83583},
-                     { 87084, 129618, 130698, 131778,  87138},
-                     { 57072,  84900,  85593,  86286,  57024}},
-
-                    {{ 87141, 130404, 131637, 132870,  88353},
-                     {131076, 196101, 197910, 199719, 132768},
-                     {137151, 205146, 206955, 208764, 138753},
-                     {143226, 214191, 216000, 217809, 144738},
-                     { 95313, 142500, 143679, 144858,  96237}},
-
-                    {{120504, 180696, 182415, 184134, 122688},
-                     {182340, 273366, 275904, 278442, 185490},
-                     {190845, 286056, 288594, 291132, 193905},
-                     {199350, 298746, 301284, 303822, 202320},
-                     {133536, 200082, 201747, 203412, 135432}}
-                }
-            }
-        });
-
-        myConv->getOperator()->associateInput(0,myInput);
+        std::shared_ptr<Node> myConv =
+            PaddedConv(3, 4, {3, 3}, "myconv", {1, 1}, {1, 1, 1, 1});
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myConv->getOperator());
+        std::shared_ptr<Tensor> myWeights =
+            std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{
+                {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}},
+                  {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}},
+                  {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}},
+                 {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}},
+                  {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}},
+                  {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}},
+                 {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}},
+                  {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}},
+                  {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}},
+                 {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}},
+                  {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}},
+                  {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}});
+        std::shared_ptr<Tensor> myBias =
+            std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}});
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 5, 5>{// NCHW
+                                     {{{{0, 1, 2, 3, 4},
+                                        {5, 6, 7, 8, 9},
+                                        {10, 11, 12, 13, 14},
+                                        {15, 16, 17, 18, 19},
+                                        {20, 21, 22, 23, 24}},
+
+                                       {{25, 26, 27, 28, 29},
+                                        {30, 31, 32, 33, 34},
+                                        {35, 36, 37, 38, 39},
+                                        {40, 41, 42, 43, 44},
+                                        {45, 46, 47, 48, 49}},
+
+                                       {{50, 51, 52, 53, 54},
+                                        {55, 56, 57, 58, 59},
+                                        {60, 61, 62, 63, 64},
+                                        {65, 66, 67, 68, 69},
+                                        {70, 71, 72, 73, 74}}},
+                                      {{{75, 76, 77, 78, 79},
+                                        {80, 81, 82, 83, 84},
+                                        {85, 86, 87, 88, 89},
+                                        {90, 91, 92, 93, 94},
+                                        {95, 96, 97, 98, 99}},
+
+                                       {{100, 101, 102, 103, 104},
+                                        {105, 106, 107, 108, 109},
+                                        {110, 111, 112, 113, 114},
+                                        {115, 116, 117, 118, 119},
+                                        {120, 121, 122, 123, 124}},
+
+                                       {{125, 126, 127, 128, 129},
+                                        {130, 131, 132, 133, 134},
+                                        {135, 136, 137, 138, 139},
+                                        {140, 141, 142, 143, 144},
+                                        {145, 146, 147, 148, 149}}}}});
+        std::shared_ptr<Tensor> myOutput =
+            std::make_shared<Tensor>(Array4D<int, 2, 4, 5, 5>{
+                {{{{6895, 10225, 10486, 10747, 7063},
+                   {10303, 15226, 15577, 15928, 10429},
+                   {11518, 16981, 17332, 17683, 11554},
+                   {12733, 18736, 19087, 19438, 12679},
+                   {8047, 11791, 11998, 12205, 7927}},
+
+                  {{15960, 24069, 24816, 25563, 17100},
+                   {25119, 37818, 38898, 39978, 26703},
+                   {28764, 43218, 44298, 45378, 30258},
+                   {32409, 48618, 49698, 50778, 33813},
+                   {21972, 32925, 33618, 34311, 22824}},
+
+                  {{25041, 37929, 39162, 40395, 27153},
+                   {39951, 60426, 62235, 64044, 42993},
+                   {46026, 69471, 71280, 73089, 48978},
+                   {52101, 78516, 80325, 82134, 54963},
+                   {35913, 54075, 55254, 56433, 37737}},
+
+                  {{34104, 51771, 53490, 55209, 37188},
+                   {54765, 83016, 85554, 88092, 59265},
+                   {63270, 95706, 98244, 100782, 67680},
+                   {71775, 108396, 110934, 113472, 76095},
+                   {49836, 75207, 76872, 78537, 52632}}},
+                 {{{20395, 29800, 30061, 30322, 19663},
+                   {28528, 41551, 41902, 42253, 27304},
+                   {29743, 43306, 43657, 44008, 28429},
+                   {30958, 45061, 45412, 45763, 29554},
+                   {18847, 27316, 27523, 27730, 17827}},
+
+                  {{53760, 80094, 80841, 81588, 54000},
+                   {79794, 118818, 119898, 120978, 80028},
+                   {83439, 124218, 125298, 126378, 83583},
+                   {87084, 129618, 130698, 131778, 87138},
+                   {57072, 84900, 85593, 86286, 57024}},
+
+                  {{87141, 130404, 131637, 132870, 88353},
+                   {131076, 196101, 197910, 199719, 132768},
+                   {137151, 205146, 206955, 208764, 138753},
+                   {143226, 214191, 216000, 217809, 144738},
+                   {95313, 142500, 143679, 144858, 96237}},
+
+                  {{120504, 180696, 182415, 184134, 122688},
+                   {182340, 273366, 275904, 278442, 185490},
+                   {190845, 286056, 288594, 291132, 193905},
+                   {199350, 298746, 301284, 303822, 202320},
+                   {133536, 200082, 201747, 203412, 135432}}}}});
+
+        myConv->getOperator()->associateInput(0, myInput);
         myConv->input(1).first->getOperator()->setOutput(0, myWeights);
         myConv->input(2).first->getOperator()->setOutput(0, myBias);
 
diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp
index cb5d8872c9c7242bb4aa4efca388d53b578417f9..a833f0273835294bf897ac85805776b606c57df0 100644
--- a/unit_tests/operator/Test_PowImpl.cpp
+++ b/unit_tests/operator/Test_PowImpl.cpp
@@ -10,14 +10,14 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <cmath>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
 #include <chrono>
+#include <cmath>
+#include <cstddef> // std::size_t
+#include <cstdint> // std::uint16_t
 #include <iostream>
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric> // std::accumulate
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Pow.hpp"
@@ -30,24 +30,28 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
     // Create a random number generator
     std::random_device rd;
     std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
-    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
-    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
-    std::uniform_int_distribution<int> boolDist(0,1);
+    std::uniform_real_distribution<float> valueDist(
+        0.1f,
+        1.1f); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
+                                                           std::size_t(10));
+    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
+                                                          std::size_t(5));
+    std::uniform_int_distribution<int> boolDist(0, 1);
 
     // Create MatPow Operator
     std::shared_ptr<Node> myPow = Pow();
-    auto op = std::static_pointer_cast<OperatorTensor>(myPow-> getOperator());
+    auto op = std::static_pointer_cast<OperatorTensor>(myPow->getOperator());
     op->setDataType(DataType::Float32);
     op->setBackend("cpu");
 
     // Create 2 input Tensors
     std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
-    op->associateInput(0,T0);
+    op->associateInput(0, T0);
     T0->setDataType(DataType::Float32);
     T0->setBackend("cpu");
     std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
-    op -> associateInput(1,T1);
+    op->associateInput(1, T1);
     T1->setDataType(DataType::Float32);
     T1->setBackend("cpu");
 
@@ -62,12 +66,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
     std::chrono::duration<double, std::micro> duration{};
 
     SECTION("PowImpl_cpu::forward()") {
-        SECTION("Scalar / Scalar") {
-
-        }
-        SECTION("Scalar / +1-D Tensor") {
-
-        }
+        SECTION("Scalar / Scalar") {}
+        SECTION("Scalar / +1-D Tensor") {}
         SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
             std::size_t number_of_operation = 0;
 
@@ -78,13 +78,17 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 for (std::size_t i = 0; i < nbDims; ++i) {
                     dims.push_back(dimSizeDist(gen));
                 }
-                const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dims.cbegin(),
+                                    dims.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
 
                 // without broadcasting
-                float* array0 = new float[nb_elements];
-                float* array1 = new float[nb_elements];
-                float* result = new float[nb_elements];
+                float *array0 = new float[nb_elements];
+                float *array1 = new float[nb_elements];
+                float *result = new float[nb_elements];
 
                 for (std::size_t i = 0; i < nb_elements; ++i) {
                     array0[i] = valueDist(gen);
@@ -94,21 +98,23 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
 
                 // input0
                 T0->resize(dims);
-                T0 -> getImpl() -> setRawPtr(array0, nb_elements);
+                T0->getImpl()->setRawPtr(array0, nb_elements);
 
                 // input1
                 T1->resize(dims);
-                T1 -> getImpl() -> setRawPtr(array1, nb_elements);
+                T1->getImpl()->setRawPtr(array1, nb_elements);
 
                 // results
                 Tres->resize(dims);
-                Tres -> getImpl() -> setRawPtr(result, nb_elements);
+                Tres->getImpl()->setRawPtr(result, nb_elements);
 
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myPow->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
 
@@ -118,8 +124,10 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
 
                 // with broadcasting
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
 
         SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
@@ -127,7 +135,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
                 // generate 2 random Tensors
-                // handle dimensions, replace some dimensions with '1' to get broadcasting
+                // handle dimensions, replace some dimensions with '1' to get
+                // broadcasting
                 constexpr std::size_t nbDims = 4;
                 std::vector<std::size_t> dims;
                 for (std::size_t i = 0; i < nbDims; ++i) {
@@ -147,37 +156,62 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 }
 
                 // create arrays and fill them with random values
-                float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-                float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
-                float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
-
-                for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
+                float *array0 =
+                    new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+                float *array1 =
+                    new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]];
+                float *result = new float[dimsOut[0] * dimsOut[1] *
+                                          dimsOut[2] * dimsOut[3]];
+
+                for (std::size_t i = 0;
+                     i < dims0[0] * dims0[1] * dims0[2] * dims0[3];
+                     ++i) {
                     array0[i] = valueDist(gen);
                 }
-                for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
+                for (std::size_t i = 0;
+                     i < dims1[0] * dims1[1] * dims1[2] * dims1[3];
+                     ++i) {
                     array1[i] = valueDist(gen);
                 }
 
                 // compute true result
-                const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-                const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
+                const std::size_t strides0[nbDims] = {
+                    dims0[1] * dims0[2] * dims0[3],
+                    dims0[2] * dims0[3],
+                    dims0[3],
+                    1};
+                const std::size_t strides1[nbDims] = {
+                    dims1[1] * dims1[2] * dims1[3],
+                    dims1[2] * dims1[3],
+                    dims1[3],
+                    1};
                 for (std::size_t a = 0; a < dimsOut[0]; ++a) {
                     for (std::size_t b = 0; b < dimsOut[1]; ++b) {
-                        const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                                    + strides0[1] * ((dims0[1] > 1) ? b : 0);
-                        const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
-                                                    + strides1[1] * ((dims1[1] > 1) ? b : 0);
+                        const std::size_t idx0_0 =
+                            strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                            strides0[1] * ((dims0[1] > 1) ? b : 0);
+                        const std::size_t idx1_0 =
+                            strides1[0] * ((dims1[0] > 1) ? a : 0) +
+                            strides1[1] * ((dims1[1] > 1) ? b : 0);
                         for (std::size_t c = 0; c < dimsOut[2]; ++c) {
-                            const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
+                            const std::size_t idx_out =
+                                dimsOut[3] *
+                                (c + dimsOut[2] * (b + dimsOut[1] * a));
                             for (std::size_t d = 0; d < dimsOut[3]; ++d) {
-                                std::size_t idx0 = idx0_0
-                                                    + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                                    + ((dims0[3] > 1) ? d : 0);
-                                std::size_t idx1 = idx1_0
-                                                    + strides1[2] * ((dims1[2] > 1) ? c : 0)
-                                                    + ((dims1[3] > 1) ? d : 0);
-                                result[idx_out + d] = std::pow(array0[idx0], array1[idx1]);
-                                // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " ** " << array1[idx1] << " -> " << idx_out + d << std::endl;
+                                std::size_t idx0 =
+                                    idx0_0 +
+                                    strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                                    ((dims0[3] > 1) ? d : 0);
+                                std::size_t idx1 =
+                                    idx1_0 +
+                                    strides1[2] * ((dims1[2] > 1) ? c : 0) +
+                                    ((dims1[3] > 1) ? d : 0);
+                                result[idx_out + d] =
+                                    std::pow(array0[idx0], array1[idx1]);
+                                // std::cout << "(" << idx0 << ", " << idx1 <<
+                                // ") -> " << array0[idx0] << " ** " <<
+                                // array1[idx1] << " -> " << idx_out + d <<
+                                // std::endl;
                             }
                         }
                     }
@@ -186,22 +220,30 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 // conversion to Aidge::Tensors
                 // input0
                 T0->resize(dims0);
-                T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+                T0->getImpl()->setRawPtr(
+                    array0,
+                    dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
                 // input1
                 T1->resize(dims1);
-                T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
+                T1->getImpl()->setRawPtr(
+                    array1,
+                    dims1[0] * dims1[1] * dims1[2] * dims1[3]);
 
                 // results
                 Tres->resize(dimsOut);
-                Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+                Tres->getImpl()->setRawPtr(
+                    result,
+                    dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myPow->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 // comparison between truth and computed result
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
@@ -210,15 +252,23 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 delete[] array1;
                 delete[] result;
 
-                const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dimsOut.cbegin(),
+                                    dimsOut.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
         SECTION("+1-D Tensor / 1-D Tensor") {
             std::size_t number_of_operation = 0;
-            std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
+            std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(
+                std::size_t(1),
+                std::size_t(3));
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
                 // generate 2 random Tensors
@@ -235,15 +285,24 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                         dims1[i] = 1;
                     }
                 }
-                dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
+                dims1.erase(dims1.cbegin(),
+                            dims1.cbegin() + nbRemovedDimsDist(gen));
 
                 // create arrays and fill them with random values
-                float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-                std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
-                float* array1 = new float[array1_size];
-                float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
-
-                for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
+                float *array0 =
+                    new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+                std::size_t array1_size =
+                    std::accumulate(dims1.cbegin(),
+                                    dims1.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
+                float *array1 = new float[array1_size];
+                float *result = new float[dimsOut[0] * dimsOut[1] *
+                                          dimsOut[2] * dimsOut[3]];
+
+                for (std::size_t i = 0;
+                     i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]);
+                     ++i) {
                     array0[i] = valueDist(gen);
                 }
                 for (std::size_t i = 0; i < array1_size; ++i) {
@@ -252,27 +311,48 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
 
                 // compute true result
                 auto dims1_tmp = dims1;
-                dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
-
-                const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-                const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
+                dims1_tmp.insert(dims1_tmp.cbegin(),
+                                 4 - dims1_tmp.size(),
+                                 std::size_t(1));
+
+                const std::size_t strides0[nbDims] = {
+                    dims0[1] * dims0[2] * dims0[3],
+                    dims0[2] * dims0[3],
+                    dims0[3],
+                    1};
+                const std::size_t strides1[nbDims] = {
+                    dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3],
+                    dims1_tmp[2] * dims1_tmp[3],
+                    dims1_tmp[3],
+                    1};
                 for (std::size_t a = 0; a < dimsOut[0]; ++a) {
                     for (std::size_t b = 0; b < dimsOut[1]; ++b) {
-                        const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                                    + strides0[1] * ((dims0[1] > 1) ? b : 0);
-                        const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
-                                                    + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
+                        const std::size_t idx0_0 =
+                            strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                            strides0[1] * ((dims0[1] > 1) ? b : 0);
+                        const std::size_t idx1_0 =
+                            strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) +
+                            strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
                         for (std::size_t c = 0; c < dimsOut[2]; ++c) {
-                            const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
+                            const std::size_t idx_out =
+                                dimsOut[3] *
+                                (c + dimsOut[2] * (b + dimsOut[1] * a));
                             for (std::size_t d = 0; d < dimsOut[3]; ++d) {
-                                std::size_t idx0 = idx0_0
-                                                    + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                                    + ((dims0[3] > 1) ? d : 0);
-                                std::size_t idx1 = idx1_0
-                                                    + strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
-                                                    + ((dims1_tmp[3] > 1) ? d : 0);
-                                result[idx_out + d] = std::pow(array0[idx0], array1[idx1]);
-                                // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " ** " << array1[idx1] << " -> " << idx_out + d << std::endl;
+                                std::size_t idx0 =
+                                    idx0_0 +
+                                    strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                                    ((dims0[3] > 1) ? d : 0);
+                                std::size_t idx1 =
+                                    idx1_0 +
+                                    strides1[2] *
+                                        ((dims1_tmp[2] > 1) ? c : 0) +
+                                    ((dims1_tmp[3] > 1) ? d : 0);
+                                result[idx_out + d] =
+                                    std::pow(array0[idx0], array1[idx1]);
+                                // std::cout << "(" << idx0 << ", " << idx1 <<
+                                // ") -> " << array0[idx0] << " ** " <<
+                                // array1[idx1] << " -> " << idx_out + d <<
+                                // std::endl;
                             }
                         }
                     }
@@ -281,22 +361,28 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 // conversion to Aidge::Tensors
                 // input0
                 T0->resize(dims0);
-                T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+                T0->getImpl()->setRawPtr(
+                    array0,
+                    dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
                 // input1
                 T1->resize(dims1);
-                T1 -> getImpl() -> setRawPtr(array1, array1_size);
+                T1->getImpl()->setRawPtr(array1, array1_size);
 
                 // results
                 Tres->resize(dimsOut);
-                Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+                Tres->getImpl()->setRawPtr(
+                    result,
+                    dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myPow->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 // comparison between truth and computed result
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
@@ -305,95 +391,51 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 delete[] array1;
                 delete[] result;
 
-                const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dimsOut.cbegin(),
+                                    dimsOut.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
 
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
     }
 
-
     SECTION("PowImpl_cpu::backward()") {
         SECTION("3D Tensors") {
-            const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
-                {
-                    {
-                        {
-                            {2.0, 3.0},
-                            {4.0, 5.0}
-                        },
-                        {
-                            {6.0, 7.0},
-                            {8.0, 9.0}
-                        }
-                    }
-                }
-            ));
-            const auto input1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
-                {
-                    {
-                        {
-                            {1.0, 2.0},
-                            {3.0, 2.0}
-                        },
-                        {
-                            {2.0, 3.0},
-                            {1.0, 0.5}
-                        }
-                    }
-                }
-            ));
-            const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
-                {
-                    {
-                        {
-                            {0.5, 1.0},
-                            {1.5, 2.0}
-                        },
-                        {
-                            {2.5, 3.0},
-                            {3.5, 4.0}
-                        }
-                    }
-                }
-            ));
-            const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
-                {
-                    {
-                        {
-                            {0.50000000,   6.00000000},
-                            {72.00000000,  20.00000000}
-                        },
-                        {
-                            {30.00000000, 441.00000000},
-                            {3.50000000,   0.66666669}
-                        }
-                    }
-                }
-            ));
-            const auto expectedGrad1 = std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
-                {
-                    {
-                        {
-                            {  0.693147182, 9.88751030},
-                            {1.33084259e+02, 8.04718933e+01}
-                        },
-                        {
-                            {1.61258362e+02, 2.00234143e+03},
-                            {5.82243652e+01, 2.63666954e+01}
-                        }
-                    }
-                }
-            ));
-            for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1})
-            {
-                    T->setBackend("cpu") ;
-                    T->setDataType(DataType::Float32);
+            const auto input0 =
+                std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
+                    {{{{2.0, 3.0}, {4.0, 5.0}}, {{6.0, 7.0}, {8.0, 9.0}}}}));
+            const auto input1 =
+                std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
+                    {{{{1.0, 2.0}, {3.0, 2.0}}, {{2.0, 3.0}, {1.0, 0.5}}}}));
+            const auto gradOut =
+                std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
+                    {{{{0.5, 1.0}, {1.5, 2.0}}, {{2.5, 3.0}, {3.5, 4.0}}}}));
+            const auto expectedGrad0 =
+                std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
+                    {{{{0.50000000, 6.00000000}, {72.00000000, 20.00000000}},
+                      {{30.00000000, 441.00000000},
+                       {3.50000000, 0.66666669}}}}));
+            const auto expectedGrad1 =
+                std::make_shared<Tensor>(Array3D<float, 2, 2, 2>(
+                    {{{{0.693147182, 9.88751030},
+                       {1.33084259e+02, 8.04718933e+01}},
+                      {{1.61258362e+02, 2.00234143e+03},
+                       {5.82243652e+01, 2.63666954e+01}}}}));
+            for (const auto T :
+                 {input0, input1, gradOut, expectedGrad0, expectedGrad1}) {
+                T->setBackend("cpu");
+                T->setDataType(DataType::Float32);
             }
             std::shared_ptr<Node> powOp = Pow();
-            auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator());
+            auto opr =
+                std::static_pointer_cast<OperatorTensor>(powOp->getOperator());
             opr->setDataType(DataType::Float32);
             opr->setBackend("cpu");
             opr->associateInput(0, input0);
@@ -402,71 +444,40 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
             opr->forward();
 
             powOp->backward();
-            REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
-            REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
+            REQUIRE(
+                approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
+            REQUIRE(
+                approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
         }
         SECTION("Broadcasting") {
-            const auto input0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
-                {
-                    {
-                        {
-                            {1.0, 2.0, 3.0},
-                            {4.0, 5.0, 6.0}
-                        },
-                        {
-                            {1.5, 2.5, 3.5},
-                            {4.5, 5.5, 6.5}
-                        }
-                    }
-                }
-            ));
-            const auto input1 = std::make_shared<Tensor>(Array1D<float, 3>(
-                {
-                    {0.1, 0.2, 0.3}
-                }
-            ));
-
-            const auto gradOut = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
-                {
-                    {
-                        {
-                            {1.0, 2.0, 3.0},
-                            {4.0, 5.0, 6.0}
-                        },
-                        {
-                            {6.0, 5.0, 4.0},
-                            {3.0, 2.0, 1.0}
-                        }
-                    }
-                }
-            ));
-            const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
-                {
-                    {
-                        {
-                            {0.10000000, 0.22973967, 0.41711676},
-                            {0.11486985, 0.27594593, 0.51353097}
-                        },
-                        {
-                            {0.41655189, 0.48044977, 0.49926791},
-                            {0.07748720, 0.10227509, 0.08092485}
-                        }
-                    }
-                }
-            ));
-            const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>(
-                {
-                    {14.14779854, 22.99299049, 33.56402588}
-                }
-            ));
-
-            for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1})
-            {
-                    T->setBackend("cpu") ;
-                    T->setDataType(DataType::Float32);
+            const auto input0 =
+                std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
+                    {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}},
+                      {{1.5, 2.5, 3.5}, {4.5, 5.5, 6.5}}}}));
+            const auto input1 =
+                std::make_shared<Tensor>(Array1D<float, 3>({{0.1, 0.2, 0.3}}));
+
+            const auto gradOut =
+                std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
+                    {{{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}},
+                      {{6.0, 5.0, 4.0}, {3.0, 2.0, 1.0}}}}));
+            const auto expectedGrad0 =
+                std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
+                    {{{{0.10000000, 0.22973967, 0.41711676},
+                       {0.11486985, 0.27594593, 0.51353097}},
+                      {{0.41655189, 0.48044977, 0.49926791},
+                       {0.07748720, 0.10227509, 0.08092485}}}}));
+            const auto expectedGrad1 = std::make_shared<Tensor>(
+                Array1D<float, 3>({{14.14779854, 22.99299049, 33.56402588}}));
+
+            for (const auto T :
+                 {input0, input1, gradOut, expectedGrad0, expectedGrad1}) {
+                T->setBackend("cpu");
+                T->setDataType(DataType::Float32);
             }
             std::shared_ptr<Node> powOp = Pow();
-            auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator());
+            auto opr =
+                std::static_pointer_cast<OperatorTensor>(powOp->getOperator());
             opr->setDataType(DataType::Float32);
             opr->setBackend("cpu");
             opr->associateInput(0, input0);
@@ -475,8 +486,10 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
             powOp->forward();
 
             powOp->backward();
-            REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
-            REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
+            REQUIRE(
+                approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
+            REQUIRE(
+                approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
         }
     }
 }
diff --git a/unit_tests/operator/Test_ReLUImpl.cpp b/unit_tests/operator/Test_ReLUImpl.cpp
index 106d29ecfbf8ba785b4f9e5dba75daa272a86b26..b760929e32c97cfaa262099140641ea6ef8136e8 100644
--- a/unit_tests/operator/Test_ReLUImpl.cpp
+++ b/unit_tests/operator/Test_ReLUImpl.cpp
@@ -18,21 +18,19 @@
 
 #include <memory>
 
-
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") {
     SECTION("1D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> {
-            {0, 1, 2,-3, 4,-5,-6, 7, 8, 9}
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,10> {
-            {0, 1, 2, 0, 4, 0, 0, 7, 8, 9}
-        });
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array1D<int, 10>{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array1D<int, 10>{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9}});
 
         std::shared_ptr<Node> myReLU = ReLU();
-        auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator());
-        op->associateInput(0,input0);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myReLU->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         myReLU->forward();
@@ -40,22 +38,17 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") {
     }
 
     SECTION("2D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> {
-            {
-                { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,10> {
-            {
-                { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-            }
-        });
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array2D<int, 2, 10>{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                                 {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array2D<int, 2, 10>{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                 {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}});
 
         std::shared_ptr<Node> myReLU = ReLU();
-        auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator());
-        op->associateInput(0,input0);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myReLU->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         myReLU->forward();
@@ -63,34 +56,21 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") {
     }
 
     SECTION("3D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> {
-            {
-                {
-                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                },
-                {
-                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,2,10> {
-            {
-                {
-                    { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                    { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                },
-                {
-                    { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                    { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                }
-            }
-        });
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array3D<int, 2, 2, 10>{{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                                     {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                                    {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                                     {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array3D<int, 2, 2, 10>{{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                     {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}},
+                                    {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                     {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}});
 
         std::shared_ptr<Node> myReLU = ReLU();
-        auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator());
-        op->associateInput(0,input0);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myReLU->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         myReLU->forward();
@@ -98,58 +78,30 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") {
     }
 
     SECTION("4D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
-            {
-                {
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    }
-                },
-                {
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    }
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
-            {
-                {
-                    {
-                        { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                        { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                    },
-                    {
-                        { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                        { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                    }
-                },
-                {
-                    {
-                        { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                        { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                    },
-                    {
-                        { 0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
-                        { 0, 4, 2, 0, 4, 0, 0, 7, 0,10}
-                    }
-                }
-            }
-        });
+        std::shared_ptr<Tensor> input0 =
+            std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{
+                {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}},
+                 {{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array4D<int, 2, 2, 2, 10>{{{{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                         {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}},
+                                        {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                         {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}},
+                                       {{{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                         {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}},
+                                        {{0, 1, 2, 0, 4, 0, 0, 7, 8, 9},
+                                         {0, 4, 2, 0, 4, 0, 0, 7, 0, 10}}}}});
 
         std::shared_ptr<Node> myReLU = ReLU();
-        auto op = std::static_pointer_cast<OperatorTensor>(myReLU -> getOperator());
-        op->associateInput(0,input0);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(myReLU->getOperator());
+        op->associateInput(0, input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         myReLU->forward();
diff --git a/unit_tests/operator/Test_ReduceMeanImpl.cpp b/unit_tests/operator/Test_ReduceMeanImpl.cpp
index dd647c7ba3f90fe7f3554aae7133e97ffa9c99ba..a414a6403625aadce45400654371e10252ac5f7f 100644
--- a/unit_tests/operator/Test_ReduceMeanImpl.cpp
+++ b/unit_tests/operator/Test_ReduceMeanImpl.cpp
@@ -11,12 +11,12 @@
 
 #include <catch2/catch_test_macros.hpp>
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric> // std::accumulate
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/operator/ReduceMean.hpp"
 #include "aidge/operator/Conv.hpp"
+#include "aidge/operator/ReduceMean.hpp"
 
 #include "aidge/backend/cpu.hpp"
 #include "aidge/utils/TensorUtils.hpp"
@@ -24,16 +24,20 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
-    SECTION("ForwardDims")
-    {
+    SECTION("ForwardDims") {
         constexpr std::uint16_t NBTRIALS = 10;
         // Create a random number generator
         std::random_device rd;
         std::mt19937 gen(rd());
-        std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
-        std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
-        std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
-        std::uniform_int_distribution<int> boolDist(0,1);
+        std::uniform_real_distribution<float> valueDist(
+            0.1f,
+            1.1f); // Random float distribution between 0 and 1
+        std::uniform_int_distribution<std::size_t> dimSizeDist(
+            std::size_t(2),
+            std::size_t(10));
+        std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
+                                                              std::size_t(5));
+        std::uniform_int_distribution<int> boolDist(0, 1);
 
         SECTION("KeepDims") {
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
@@ -44,22 +48,27 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
                     expectedOutDims[i] = dims[i];
-                    if(boolDist(gen)) {
+                    if (boolDist(gen)) {
                         axes.push_back(i);
                         expectedOutDims[i] = 1;
                     }
                 }
-                if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions
-                   std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1);
+                if (axes.empty()) { // Default behaviour if no axes are
+                                    // provided is to reduce all dimensions
+                    std::fill(expectedOutDims.begin(),
+                              expectedOutDims.end(),
+                              1);
                 }
 
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
                 myInput->zeros();
                 std::shared_ptr<Node> myReduceMean = ReduceMean(axes, true);
-                auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-                op->associateInput(0,myInput);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myReduceMean->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
                 op->forwardDims();
@@ -76,23 +85,27 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
                 std::vector<std::int32_t> axes;
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
-                    if(boolDist(gen)) {
+                    if (boolDist(gen)) {
                         axes.push_back(i);
-                    }
-                    else {
+                    } else {
                         expectedOutDims.push_back(dims[i]);
                     }
                 }
-                if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions
-                   expectedOutDims = std::vector<DimSize_t>{1};
+                if (axes.empty() ||
+                    expectedOutDims
+                        .empty()) { // Default behaviour if no axes are
+                                    // provided is to reduce all dimensions
+                    expectedOutDims = std::vector<DimSize_t>{1};
                 }
 
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
                 std::shared_ptr<Node> myReduceMean = ReduceMean(axes, false);
-                auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-                op->associateInput(0,myInput);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myReduceMean->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
 
@@ -109,12 +122,15 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
                 }
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
-                std::shared_ptr<Node> myReduceMean = ReduceMean(std::vector<int32_t>{}, false, true);
-                auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-                op->associateInput(0,myInput);
+                std::shared_ptr<Node> myReduceMean =
+                    ReduceMean(std::vector<int32_t>{}, false, true);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myReduceMean->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
 
@@ -131,12 +147,15 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
                 }
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
-                std::shared_ptr<Node> myReduceMean = ReduceMean({}, false, false);
-                auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-                op->associateInput(0,myInput);
+                std::shared_ptr<Node> myReduceMean =
+                    ReduceMean({}, false, false);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myReduceMean->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
 
@@ -149,34 +168,20 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
     }
     SECTION("KeepDims") {
         SECTION("test 1") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
-                {
-                    {
-                        { 5.0, 1.0 },
-                        { 20.0, 2.0 }
-                    },
-                    {
-                        { 30.0, 1.0 },
-                        { 40.0, 2.0 }
-                    },
-                    {
-                        { 55.0, 1.0 },
-                        { 60.0, 2.0 }
-                    }
-                }
-            });
-            Tensor myOutput = Tensor(Array3D<float,3,1,2> {
-                {
+            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+                Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}},
+                                         {{30.0, 1.0}, {40.0, 2.0}},
+                                         {{55.0, 1.0}, {60.0, 2.0}}}});
+            Tensor myOutput = Tensor(Array3D<float, 3, 1, 2>{{
 
-                    {{ 12.5, 1.5 }},
-                    {{ 35.0, 1.5 }},
-                    {{ 57.5, 1.5 }}
-                }
-            });
+                {{12.5, 1.5}},
+                {{35.0, 1.5}},
+                {{57.5, 1.5}}}});
 
             std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 1);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceMean->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceMean->forward();
@@ -185,37 +190,21 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
             REQUIRE(*(op->getOutput(0)) == myOutput);
         }
         SECTION("test 2") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> {
-                {
-                    {
-                        { 0.0, 0.0 },
-                        { 1.0, 1.0 },
-                        { 2.0, 2.0 }
-                    },
-                    {
-                        { 3.0, 3.0 },
-                        { 4.0, 4.0 },
-                        { 5.0, 5.0 }
-                    },
-                    {
-                        { 6.0, 6.0 },
-                        { 7.0, 7.0 },
-                        { 8.0, 8.0 }
-                    }
-                }
-            });
-            Tensor myOutput = Tensor(Array3D<float,3,1,1> {
-                {
+            std::shared_ptr<Tensor> myInput =
+                std::make_shared<Tensor>(Array3D<float, 3, 3, 2>{
+                    {{{0.0, 0.0}, {1.0, 1.0}, {2.0, 2.0}},
+                     {{3.0, 3.0}, {4.0, 4.0}, {5.0, 5.0}},
+                     {{6.0, 6.0}, {7.0, 7.0}, {8.0, 8.0}}}});
+            Tensor myOutput = Tensor(Array3D<float, 3, 1, 1>{{
 
-                    {{ 1.0 }},
-                    {{ 4.0 }},
-                    {{ 7.0 }}
-                }
-            });
+                {{1.0}},
+                {{4.0}},
+                {{7.0}}}});
 
             std::shared_ptr<Node> myReduceMean = ReduceMean({1, 2}, 1);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceMean->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceMean->forward();
@@ -225,66 +214,37 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
         }
     }
     SECTION("not_KeepDims") {
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
-            {
-                {
-                    { 5.0, 1.0 },
-                    { 20.0, 2.0 }
-                },
-                {
-                    { 30.0, 1.0 },
-                    { 40.0, 2.0 }
-                },
-                {
-                    { 55.0, 1.0 },
-                    { 60.0, 2.0 }
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> {
-            {
-                { 12.5, 1.5 },
-                { 35.0, 1.5 },
-                { 57.5, 1.5 }
-            }
-        });
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}},
+                                     {{30.0, 1.0}, {40.0, 2.0}},
+                                     {{55.0, 1.0}, {60.0, 2.0}}}});
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
+            Array2D<float, 3, 2>{{{12.5, 1.5}, {35.0, 1.5}, {57.5, 1.5}}});
 
         std::shared_ptr<Node> myReduceMean = ReduceMean({1}, 0);
-        auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-        op->associateInput(0,myInput);
+        auto op = std::static_pointer_cast<OperatorTensor>(
+            myReduceMean->getOperator());
+        op->associateInput(0, myInput);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         myReduceMean->forward();
         op->getOutput(0)->print();
 
         REQUIRE(*(op->getOutput(0)) == *myOutput);
-
     }
     SECTION("all_axes") {
         SECTION("1") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
-                {
-                    {
-                        { 5.0, 1.0 },
-                        { 20.0, 2.0 }
-                    },
-                    {
-                        { 30.0, 1.0 },
-                        { 40.0, 2.0 }
-                    },
-                    {
-                        { 55.0, 1.0 },
-                        { 60.0, 2.0 }
-                    }
-                }
-            });
-            std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
-                {18.25}
-            });
+            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+                Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}},
+                                         {{30.0, 1.0}, {40.0, 2.0}},
+                                         {{55.0, 1.0}, {60.0, 2.0}}}});
+            std::shared_ptr<Tensor> myOutput =
+                std::make_shared<Tensor>(Array1D<float, 1>{{18.25}});
 
             std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceMean->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceMean->forward();
@@ -293,20 +253,20 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
             REQUIRE(*(op->getOutput(0)) == *myOutput);
         }
         SECTION("2") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<float,5,4> {
-               {{ 0.004232f, 0.105120f, 0.045124f, 0.009205f},
-                { 0.000766f, 0.272162f, 0.503560f, 0.044163f},
-                { 0.049755f, 0.000305f, 0.143634f, 0.013253f},
-                { 0.096258f, 0.311231f, 0.358143f, 0.000452f},
-                { 0.468617f, 0.015693f, 0.145316f, 0.000105f}}
-            });
-            std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
-                {0.1293547f}
-            });
+            std::shared_ptr<Tensor> myInput =
+                std::make_shared<Tensor>(Array2D<float, 5, 4>{
+                    {{0.004232f, 0.105120f, 0.045124f, 0.009205f},
+                     {0.000766f, 0.272162f, 0.503560f, 0.044163f},
+                     {0.049755f, 0.000305f, 0.143634f, 0.013253f},
+                     {0.096258f, 0.311231f, 0.358143f, 0.000452f},
+                     {0.468617f, 0.015693f, 0.145316f, 0.000105f}}});
+            std::shared_ptr<Tensor> myOutput =
+                std::make_shared<Tensor>(Array1D<float, 1>{{0.1293547f}});
 
             std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceMean->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceMean->forward();
@@ -314,26 +274,15 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
             REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput));
         }
         SECTION("noop_with_empty_axes") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
-                {
-                    {
-                        { 5.0, 1.0 },
-                        { 20.0, 2.0 }
-                    },
-                    {
-                        { 30.0, 1.0 },
-                        { 40.0, 2.0 }
-                    },
-                    {
-                        { 55.0, 1.0 },
-                        { 60.0, 2.0 }
-                    }
-                }
-            });
+            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+                Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}},
+                                         {{30.0, 1.0}, {40.0, 2.0}},
+                                         {{55.0, 1.0}, {60.0, 2.0}}}});
 
             std::shared_ptr<Node> myReduceMean = ReduceMean({}, 0, 1);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceMean -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceMean->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceMean->forward();
diff --git a/unit_tests/operator/Test_ReduceSumImpl.cpp b/unit_tests/operator/Test_ReduceSumImpl.cpp
index 49569d1f65ff6c51f9681632b16375605ab326e7..654227894cf543b307e7953309d063a4702b6757 100644
--- a/unit_tests/operator/Test_ReduceSumImpl.cpp
+++ b/unit_tests/operator/Test_ReduceSumImpl.cpp
@@ -11,12 +11,12 @@
 
 #include <catch2/catch_test_macros.hpp>
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric> // std::accumulate
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/operator/ReduceSum.hpp"
 #include "aidge/operator/Conv.hpp"
+#include "aidge/operator/ReduceSum.hpp"
 
 #include "aidge/backend/cpu.hpp"
 #include "aidge/utils/TensorUtils.hpp"
@@ -24,16 +24,20 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
-    SECTION("ForwardDims")
-    {
+    SECTION("ForwardDims") {
         constexpr std::uint16_t NBTRIALS = 10;
         // Create a random number generator
         std::random_device rd;
         std::mt19937 gen(rd());
-        std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
-        std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
-        std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
-        std::uniform_int_distribution<int> boolDist(0,1);
+        std::uniform_real_distribution<float> valueDist(
+            0.1f,
+            1.1f); // Random float distribution between 0 and 1
+        std::uniform_int_distribution<std::size_t> dimSizeDist(
+            std::size_t(2),
+            std::size_t(10));
+        std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
+                                                              std::size_t(5));
+        std::uniform_int_distribution<int> boolDist(0, 1);
 
         SECTION("KeepDims") {
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
@@ -44,22 +48,27 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
                     expectedOutDims[i] = dims[i];
-                    if(boolDist(gen)) {
+                    if (boolDist(gen)) {
                         axes.push_back(i);
                         expectedOutDims[i] = 1;
                     }
                 }
-                if (axes.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions
-                   std::fill(expectedOutDims.begin(), expectedOutDims.end(), 1);
+                if (axes.empty()) { // Default behaviour if no axes are
+                                    // provided is to reduce all dimensions
+                    std::fill(expectedOutDims.begin(),
+                              expectedOutDims.end(),
+                              1);
                 }
 
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
                 myInput->zeros();
                 std::shared_ptr<Node> myReduceSum = ReduceSum(axes, true);
-                auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-                op->associateInput(0,myInput);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myReduceSum->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
                 op->forwardDims();
@@ -76,23 +85,27 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
                 std::vector<std::int32_t> axes;
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
-                    if(boolDist(gen)) {
+                    if (boolDist(gen)) {
                         axes.push_back(i);
-                    }
-                    else {
+                    } else {
                         expectedOutDims.push_back(dims[i]);
                     }
                 }
-                if (axes.empty() || expectedOutDims.empty()) { // Default behaviour if no axes are provided is to reduce all dimensions
-                   expectedOutDims = std::vector<DimSize_t>{1};
+                if (axes.empty() ||
+                    expectedOutDims
+                        .empty()) { // Default behaviour if no axes are
+                                    // provided is to reduce all dimensions
+                    expectedOutDims = std::vector<DimSize_t>{1};
                 }
 
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
                 std::shared_ptr<Node> myReduceSum = ReduceSum(axes, false);
-                auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-                op->associateInput(0,myInput);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myReduceSum->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
 
@@ -109,12 +122,15 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
                 }
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
-                std::shared_ptr<Node> myReduceSum = ReduceSum(std::vector<int32_t>{}, false, true);
-                auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-                op->associateInput(0,myInput);
+                std::shared_ptr<Node> myReduceSum =
+                    ReduceSum(std::vector<int32_t>{}, false, true);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myReduceSum->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
 
@@ -131,12 +147,15 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
                 for (std::size_t i = 0; i < nbDims; i++) {
                     dims[i] = dimSizeDist(gen);
                 }
-                std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(dims);
+                std::shared_ptr<Tensor> myInput =
+                    std::make_shared<Tensor>(dims);
                 myInput->setBackend("cpu");
                 myInput->setDataType(DataType::Float32);
-                std::shared_ptr<Node> myReduceSum = ReduceSum({}, false, false);
-                auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-                op->associateInput(0,myInput);
+                std::shared_ptr<Node> myReduceSum =
+                    ReduceSum({}, false, false);
+                auto op = std::static_pointer_cast<OperatorTensor>(
+                    myReduceSum->getOperator());
+                op->associateInput(0, myInput);
                 op->setDataType(DataType::Float32);
                 op->setBackend("cpu");
 
@@ -149,34 +168,20 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
     }
     SECTION("KeepDims") {
         SECTION("test 1") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
-                {
-                    {
-                        { 5.0, 1.0 },
-                        { 20.0, 2.0 }
-                    },
-                    {
-                        { 30.0, 1.0 },
-                        { 40.0, 2.0 }
-                    },
-                    {
-                        { 55.0, 1.0 },
-                        { 60.0, 2.0 }
-                    }
-                }
-            });
-            Tensor myOutput = Tensor(Array3D<float,3,1,2> {
-                {
+            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+                Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}},
+                                         {{30.0, 1.0}, {40.0, 2.0}},
+                                         {{55.0, 1.0}, {60.0, 2.0}}}});
+            Tensor myOutput = Tensor(Array3D<float, 3, 1, 2>{{
 
-                    {{ 25.0, 3.0 }},
-                    {{ 70.0, 3.0 }},
-                    {{ 115.0, 3.0 }}
-                }
-            });
+                {{25.0, 3.0}},
+                {{70.0, 3.0}},
+                {{115.0, 3.0}}}});
 
             std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 1);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceSum->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceSum->forward();
@@ -185,37 +190,21 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
             REQUIRE(*(op->getOutput(0)) == myOutput);
         }
         SECTION("test 2") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,3,2> {
-                {
-                    {
-                        { 0.0, 0.0 },
-                        { 1.0, 1.0 },
-                        { 2.0, 2.0 }
-                    },
-                    {
-                        { 3.0, 3.0 },
-                        { 4.0, 4.0 },
-                        { 5.0, 5.0 }
-                    },
-                    {
-                        { 6.0, 6.0 },
-                        { 7.0, 7.0 },
-                        { 8.0, 8.0 }
-                    }
-                }
-            });
-            Tensor myOutput = Tensor(Array3D<float,3,1,1> {
-                {
+            std::shared_ptr<Tensor> myInput =
+                std::make_shared<Tensor>(Array3D<float, 3, 3, 2>{
+                    {{{0.0, 0.0}, {1.0, 1.0}, {2.0, 2.0}},
+                     {{3.0, 3.0}, {4.0, 4.0}, {5.0, 5.0}},
+                     {{6.0, 6.0}, {7.0, 7.0}, {8.0, 8.0}}}});
+            Tensor myOutput = Tensor(Array3D<float, 3, 1, 1>{{
 
-                    {{ 6.0 }},
-                    {{ 24.0 }},
-                    {{ 42.0 }}
-                }
-            });
+                {{6.0}},
+                {{24.0}},
+                {{42.0}}}});
 
             std::shared_ptr<Node> myReduceSum = ReduceSum({1, 2}, 1);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceSum->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceSum->forward();
@@ -225,66 +214,37 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
         }
     }
     SECTION("not_KeepDims") {
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
-            {
-                {
-                    { 5.0, 1.0 },
-                    { 20.0, 2.0 }
-                },
-                {
-                    { 30.0, 1.0 },
-                    { 40.0, 2.0 }
-                },
-                {
-                    { 55.0, 1.0 },
-                    { 60.0, 2.0 }
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<float,3,2> {
-            {
-                { 25.0, 3.0 },
-                { 70.0, 3.0 },
-                { 115.0, 3.0 }
-            }
-        });
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+            Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}},
+                                     {{30.0, 1.0}, {40.0, 2.0}},
+                                     {{55.0, 1.0}, {60.0, 2.0}}}});
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
+            Array2D<float, 3, 2>{{{25.0, 3.0}, {70.0, 3.0}, {115.0, 3.0}}});
 
         std::shared_ptr<Node> myReduceSum = ReduceSum({1}, 0);
-        auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-        op->associateInput(0,myInput);
+        auto op = std::static_pointer_cast<OperatorTensor>(
+            myReduceSum->getOperator());
+        op->associateInput(0, myInput);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         myReduceSum->forward();
         op->getOutput(0)->print();
 
         REQUIRE(*(op->getOutput(0)) == *myOutput);
-
     }
     SECTION("all_axes") {
         SECTION("1") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
-                {
-                    {
-                        { 5.0, 1.0 },
-                        { 20.0, 2.0 }
-                    },
-                    {
-                        { 30.0, 1.0 },
-                        { 40.0, 2.0 }
-                    },
-                    {
-                        { 55.0, 1.0 },
-                        { 60.0, 2.0 }
-                    }
-                }
-            });
-            std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
-                {219.0}
-            });
+            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+                Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}},
+                                         {{30.0, 1.0}, {40.0, 2.0}},
+                                         {{55.0, 1.0}, {60.0, 2.0}}}});
+            std::shared_ptr<Tensor> myOutput =
+                std::make_shared<Tensor>(Array1D<float, 1>{{219.0}});
 
             std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceSum->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceSum->forward();
@@ -293,20 +253,20 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
             REQUIRE(*(op->getOutput(0)) == *myOutput);
         }
         SECTION("2") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<float,5,4> {
-               {{ 0.004232f, 0.105120f, 0.045124f, 0.009205f},
-                { 0.000766f, 0.272162f, 0.503560f, 0.044163f},
-                { 0.049755f, 0.000305f, 0.143634f, 0.013253f},
-                { 0.096258f, 0.311231f, 0.358143f, 0.000452f},
-                { 0.468617f, 0.015693f, 0.145316f, 0.000105f}}
-            });
-            std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array1D<float,1> {
-                {2.587094f}
-            });
+            std::shared_ptr<Tensor> myInput =
+                std::make_shared<Tensor>(Array2D<float, 5, 4>{
+                    {{0.004232f, 0.105120f, 0.045124f, 0.009205f},
+                     {0.000766f, 0.272162f, 0.503560f, 0.044163f},
+                     {0.049755f, 0.000305f, 0.143634f, 0.013253f},
+                     {0.096258f, 0.311231f, 0.358143f, 0.000452f},
+                     {0.468617f, 0.015693f, 0.145316f, 0.000105f}}});
+            std::shared_ptr<Tensor> myOutput =
+                std::make_shared<Tensor>(Array1D<float, 1>{{2.587094f}});
 
             std::shared_ptr<Node> myReduceSum = ReduceSum({0, 1}, 0);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceSum->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceSum->forward();
@@ -314,26 +274,15 @@ TEST_CASE("[cpu/operator] ReduceSum(forward)", "[ReduceSum][CPU]") {
             REQUIRE(approxEq<float>(*(op->getOutput(0)), *myOutput));
         }
         SECTION("noop_with_empty_axes") {
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array3D<float,3,2,2> {
-                {
-                    {
-                        { 5.0, 1.0 },
-                        { 20.0, 2.0 }
-                    },
-                    {
-                        { 30.0, 1.0 },
-                        { 40.0, 2.0 }
-                    },
-                    {
-                        { 55.0, 1.0 },
-                        { 60.0, 2.0 }
-                    }
-                }
-            });
+            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+                Array3D<float, 3, 2, 2>{{{{5.0, 1.0}, {20.0, 2.0}},
+                                         {{30.0, 1.0}, {40.0, 2.0}},
+                                         {{55.0, 1.0}, {60.0, 2.0}}}});
 
             std::shared_ptr<Node> myReduceSum = ReduceSum({}, 0, 1);
-            auto op = std::static_pointer_cast<OperatorTensor>(myReduceSum -> getOperator());
-            op->associateInput(0,myInput);
+            auto op = std::static_pointer_cast<OperatorTensor>(
+                myReduceSum->getOperator());
+            op->associateInput(0, myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
             myReduceSum->forward();
diff --git a/unit_tests/operator/Test_SliceImpl.cpp b/unit_tests/operator/Test_SliceImpl.cpp
index 2b9f89e62c09c04a7f848c362336418ef62aecce..6bd5920f0c50410a5e699a6c0bbc019b50e79d76 100644
--- a/unit_tests/operator/Test_SliceImpl.cpp
+++ b/unit_tests/operator/Test_SliceImpl.cpp
@@ -18,22 +18,24 @@ using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
     SECTION("1D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> {
-            {0, 1, -2,-3, 4,-5,-6, 7, 8, 9}
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,3> {
-            {0, 1, -2}
-        });
-        std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,1>{{0}});
-        std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,1>{{3}});
-        std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,1>{{0}});
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array1D<int, 10>{{0, 1, -2, -3, 4, -5, -6, 7, 8, 9}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array1D<int, 3>{{0, 1, -2}});
+        std::shared_ptr<Tensor> starts =
+            std::make_shared<Tensor>(Array1D<int, 1>{{0}});
+        std::shared_ptr<Tensor> ends =
+            std::make_shared<Tensor>(Array1D<int, 1>{{3}});
+        std::shared_ptr<Tensor> axes =
+            std::make_shared<Tensor>(Array1D<int, 1>{{0}});
 
         std::shared_ptr<Node> mySlice = Slice();
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
-        mySlice->getOperator()->associateInput(1,starts);
-        mySlice->getOperator()->associateInput(2,ends);
-        mySlice->getOperator()->associateInput(3,axes);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySlice->getOperator());
+        mySlice->getOperator()->associateInput(0, input0);
+        mySlice->getOperator()->associateInput(1, starts);
+        mySlice->getOperator()->associateInput(2, ends);
+        mySlice->getOperator()->associateInput(3, axes);
         mySlice->getOperator()->setDataType(DataType::Int32);
         mySlice->getOperator()->setBackend("cpu");
         mySlice->forward();
@@ -44,28 +46,25 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
     }
 
     SECTION("2D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> {
-            {
-                { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,3> {
-            {
-                {-5,-6, 7},
-                {-5,-6, 7}
-            }
-        });
-        std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,2>{{0,5}});
-        std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,2>{{2,8}});
-        std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,2>{{0,1}});
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array2D<int, 2, 10>{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                                 {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array2D<int, 2, 3>{{{-5, -6, 7}, {-5, -6, 7}}});
+        std::shared_ptr<Tensor> starts =
+            std::make_shared<Tensor>(Array1D<int, 2>{{0, 5}});
+        std::shared_ptr<Tensor> ends =
+            std::make_shared<Tensor>(Array1D<int, 2>{{2, 8}});
+        std::shared_ptr<Tensor> axes =
+            std::make_shared<Tensor>(Array1D<int, 2>{{0, 1}});
 
         std::shared_ptr<Node> mySlice = Slice();
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
-        mySlice->getOperator()->associateInput(1,starts);
-        mySlice->getOperator()->associateInput(2,ends);
-        mySlice->getOperator()->associateInput(3,axes);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySlice->getOperator());
+        mySlice->getOperator()->associateInput(0, input0);
+        mySlice->getOperator()->associateInput(1, starts);
+        mySlice->getOperator()->associateInput(2, ends);
+        mySlice->getOperator()->associateInput(3, axes);
         mySlice->getOperator()->setDataType(DataType::Int32);
         mySlice->getOperator()->setBackend("cpu");
         mySlice->forward();
@@ -76,35 +75,27 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
     }
 
     SECTION("3D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> {
-            {
-                {
-                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                },
-                {
-                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,1,1,3> {
-            {
-                {
-                    { 4,-5,-6}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,3>{{0,1,4}});
-        std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,3>{{1,2,7}});
-        std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,3>{{0,1,2}});
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(
+            Array3D<int, 2, 2, 10>{{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                                     {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                                    {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                                     {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array3D<int, 1, 1, 3>{{{{4, -5, -6}}}});
+        std::shared_ptr<Tensor> starts =
+            std::make_shared<Tensor>(Array1D<int, 3>{{0, 1, 4}});
+        std::shared_ptr<Tensor> ends =
+            std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 7}});
+        std::shared_ptr<Tensor> axes =
+            std::make_shared<Tensor>(Array1D<int, 3>{{0, 1, 2}});
 
         std::shared_ptr<Node> mySlice = Slice();
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
-        mySlice->getOperator()->associateInput(1,starts);
-        mySlice->getOperator()->associateInput(2,ends);
-        mySlice->getOperator()->associateInput(3,axes);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySlice->getOperator());
+        mySlice->getOperator()->associateInput(0, input0);
+        mySlice->getOperator()->associateInput(1, starts);
+        mySlice->getOperator()->associateInput(2, ends);
+        mySlice->getOperator()->associateInput(3, axes);
         mySlice->getOperator()->setDataType(DataType::Int32);
         mySlice->getOperator()->setBackend("cpu");
         mySlice->forward();
@@ -115,64 +106,40 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
     }
 
     SECTION("4D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
-            {
-                {
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    }
-                },
-                {
-                    {
-                        { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3,11,-5,-6, 7,-1,10}
-                    }
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
-            {
-                {
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    }
-                },
-                {
-                    {
-                        { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3,11,-5,-6, 7,-1,10}
-                    }
-                }
-            }
-        });
-        std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,4>{{0,0,0,0}});
-        std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,4>{{2,2,2,10}});
-        std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,4>{{0,1,2,3}});
+        std::shared_ptr<Tensor> input0 =
+            std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{
+                {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}},
+                 {{{0, 1, 2, -3, 6, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 11, -5, -6, 7, -1, 10}}}}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{
+                {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}},
+                 {{{0, 1, 2, -3, 6, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 11, -5, -6, 7, -1, 10}}}}});
+        std::shared_ptr<Tensor> starts =
+            std::make_shared<Tensor>(Array1D<int, 4>{{0, 0, 0, 0}});
+        std::shared_ptr<Tensor> ends =
+            std::make_shared<Tensor>(Array1D<int, 4>{{2, 2, 2, 10}});
+        std::shared_ptr<Tensor> axes =
+            std::make_shared<Tensor>(Array1D<int, 4>{{0, 1, 2, 3}});
 
         std::shared_ptr<Node> mySlice = Slice();
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
-        mySlice->getOperator()->associateInput(1,starts);
-        mySlice->getOperator()->associateInput(2,ends);
-        mySlice->getOperator()->associateInput(3,axes);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySlice->getOperator());
+        mySlice->getOperator()->associateInput(0, input0);
+        mySlice->getOperator()->associateInput(1, starts);
+        mySlice->getOperator()->associateInput(2, ends);
+        mySlice->getOperator()->associateInput(3, axes);
         mySlice->getOperator()->setDataType(DataType::Int32);
         mySlice->getOperator()->setBackend("cpu");
         mySlice->forward();
@@ -183,43 +150,24 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
     }
 
     SECTION("Attributes instead of inputs") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
-            {
-                {
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    }
-                },
-                {
-                    {
-                        { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3,11,-5,-6, 7,-1,10}
-                    }
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,1,1,5> {
-            {
-                {
-                    {
-                        { 0, 1, 2,-3, 4}
-                    }
-                }
-            }
-        });
-
-        std::shared_ptr<Node> mySlice = Slice({0,0,0,0}, {1,1,1,5}, {0,1,2,3}, {1,1,1,1});
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
+        std::shared_ptr<Tensor> input0 =
+            std::make_shared<Tensor>(Array4D<int, 2, 2, 2, 10>{
+                {{{{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}}},
+                 {{{0, 1, 2, -3, 6, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 4, -5, -6, 7, -1, 10}},
+                  {{0, 1, 2, -3, 4, -5, -6, 7, 8, 9},
+                   {-5, 4, 2, -3, 11, -5, -6, 7, -1, 10}}}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array4D<int, 1, 1, 1, 5>{{{{{0, 1, 2, -3, 4}}}}});
+
+        std::shared_ptr<Node> mySlice =
+            Slice({0, 0, 0, 0}, {1, 1, 1, 5}, {0, 1, 2, 3}, {1, 1, 1, 1});
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySlice->getOperator());
+        mySlice->getOperator()->associateInput(0, input0);
         mySlice->getOperator()->setDataType(DataType::Int32);
         mySlice->getOperator()->setBackend("cpu");
         mySlice->forward();
@@ -230,44 +178,27 @@ TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
     }
 
     SECTION("Different Steps") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,4,2,8> {
-            {
-                {
-                    { 0, 1, 2,-3, 4,-5,-6,7},
-                    {-5, 4, 2,-3, 4,-5,-6,-7}
-                },
-                {
-                    { 10, 11, 12,-13, 14,-15,-16,17},
-                    {-15, 14, 12,-13, 14,-15,-16,-17}
-                },
-                {
-                    { 20, 21, 22,-23, 24,-25,-26,27},
-                    {-25, 24, 22,-23, 24,-25,-26,-27}
-                },
-                {
-                    { 30, 31, 32,-33, 34,-35,-36,37},
-                    {-35, 34, 32,-33, 34,-35,-36,-37}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,1,3> {
-            {
-                {
-                    { 7, 4, 1}
-                },
-                {
-                    { 27, 24, 21}
-                }
-            }
-        });
-
-        std::shared_ptr<Node> mySlice = Slice({0,0,7}, {4,1,0}, {0,1,2}, {2,1,-3});
+        std::shared_ptr<Tensor> input0 =
+            std::make_shared<Tensor>(Array3D<int, 4, 2, 8>{
+                {{{0, 1, 2, -3, 4, -5, -6, 7}, {-5, 4, 2, -3, 4, -5, -6, -7}},
+                 {{10, 11, 12, -13, 14, -15, -16, 17},
+                  {-15, 14, 12, -13, 14, -15, -16, -17}},
+                 {{20, 21, 22, -23, 24, -25, -26, 27},
+                  {-25, 24, 22, -23, 24, -25, -26, -27}},
+                 {{30, 31, 32, -33, 34, -35, -36, 37},
+                  {-35, 34, 32, -33, 34, -35, -36, -37}}}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
+            Array3D<int, 2, 1, 3>{{{{7, 4, 1}}, {{27, 24, 21}}}});
+
+        std::shared_ptr<Node> mySlice =
+            Slice({0, 0, 7}, {4, 1, 0}, {0, 1, 2}, {2, 1, -3});
         // Steps are 2,1,-3 so the slice will be:
         // on Axis 0: from 0 to 4 by step of 2
         // on Axis 1: from 0 to 1 by step of 1
         // on Axis 2: from 7 to 0 by step of -3 (reverse the order of elements)
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySlice->getOperator());
+        mySlice->getOperator()->associateInput(0, input0);
         mySlice->getOperator()->setDataType(DataType::Int32);
         mySlice->getOperator()->setBackend("cpu");
         mySlice->forward();
diff --git a/unit_tests/operator/Test_SoftmaxImpl.cpp b/unit_tests/operator/Test_SoftmaxImpl.cpp
index da6c6f0d35a1db9ad9099a40b7e83459e14a20f5..17b384af123a5e55fe39e79f69844e97dfadb1ff 100644
--- a/unit_tests/operator/Test_SoftmaxImpl.cpp
+++ b/unit_tests/operator/Test_SoftmaxImpl.cpp
@@ -22,102 +22,121 @@ using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") {
     SECTION("2D Tensor") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<float,2,10> {
-            {
-                {-0.21908280,  0.62226844, -0.01738115,  0.49075750,  0.42159843,
-                    -0.70403218,  0.95780319,  1.39435363,  0.25255841,  0.20038256},
-                { 0.23626225,  1.84539008,  1.89050162, -0.64871430,  0.37908587,
-                    0.35077620, -0.78156322, -0.98952234,  0.04166317,  1.34357309}
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,10> {
-            {
-                {0.04883239, 0.11326669, 0.05974559, 0.09930880, 0.09267281, 0.03006749,
-                    0.15842478, 0.24514021, 0.07825989, 0.07428131},
-                {0.05429055, 0.27136859, 0.28389078, 0.02240700, 0.06262558, 0.06087753,
-                    0.01961952, 0.01593576, 0.04469007, 0.16429459}
-            }
-        });
+        std::shared_ptr<Tensor> input =
+            std::make_shared<Tensor>(Array2D<float, 2, 10>{{{-0.21908280,
+                                                             0.62226844,
+                                                             -0.01738115,
+                                                             0.49075750,
+                                                             0.42159843,
+                                                             -0.70403218,
+                                                             0.95780319,
+                                                             1.39435363,
+                                                             0.25255841,
+                                                             0.20038256},
+                                                            {0.23626225,
+                                                             1.84539008,
+                                                             1.89050162,
+                                                             -0.64871430,
+                                                             0.37908587,
+                                                             0.35077620,
+                                                             -0.78156322,
+                                                             -0.98952234,
+                                                             0.04166317,
+                                                             1.34357309}}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array2D<float, 2, 10>{{{0.04883239,
+                                                             0.11326669,
+                                                             0.05974559,
+                                                             0.09930880,
+                                                             0.09267281,
+                                                             0.03006749,
+                                                             0.15842478,
+                                                             0.24514021,
+                                                             0.07825989,
+                                                             0.07428131},
+                                                            {0.05429055,
+                                                             0.27136859,
+                                                             0.28389078,
+                                                             0.02240700,
+                                                             0.06262558,
+                                                             0.06087753,
+                                                             0.01961952,
+                                                             0.01593576,
+                                                             0.04469007,
+                                                             0.16429459}}});
 
         std::shared_ptr<Node> mySoftmax = Softmax(1);
-        auto op = std::static_pointer_cast<OperatorTensor>(mySoftmax -> getOperator());
-        op->associateInput(0,input);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySoftmax->getOperator());
+        op->associateInput(0, input);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         mySoftmax->forward();
 
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-        for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        float *resPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedPtr =
+            static_cast<float *>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
+            REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
         }
-
     }
     SECTION("4D Tensor") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
-            {
-                {
-                    {{8.28257084e-01, 7.99335480e-01, 7.36702740e-01},
-                     {2.36729562e-01, 8.61912668e-01, 9.93067741e-01},
-                     {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}},
-                    {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01},
-                     {1.31294072e-01, 7.10182846e-01, 1.08420849e-04},
-                     {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}},
-                    {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01},
-                     {8.07861805e-01, 7.79679358e-01, 5.01209974e-01},
-                     {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}
-                },
-                {
-                    {{6.22058094e-01, 2.32256651e-02, 6.18222237e-01},
-                     {9.58304763e-01, 2.11395025e-02, 4.95614648e-01},
-                     {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}},
-                    {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01},
-                     {1.12059772e-01, 7.64178872e-01, 7.60362148e-01},
-                     {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}},
-                    {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01},
-                     {9.52722490e-01, 6.35501027e-01, 5.67592978e-02},
-                     {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
-            {
-                {
-                    {{0.45109013, 0.42849392, 0.43775153},
-                     {0.27246451, 0.35967633, 0.50454903},
-                     {0.20397615, 0.20457645, 0.33543545}},
-                    {{0.24571852, 0.34723747, 0.25694931},
-                     {0.24519968, 0.30904123, 0.18692467},
-                     {0.35646603, 0.28991172, 0.41476840}},
-                    {{0.30319133, 0.22426860, 0.30529919},
-                     {0.48233581, 0.33128241, 0.30852637},
-                     {0.43955776, 0.50551182, 0.24979614}}
-                },
-                {
-                    {{0.33434108, 0.20638679, 0.39505392},
-                     {0.41263384, 0.20198789, 0.33922729},
-                     {0.36339980, 0.34127754, 0.28713942}},
-                    {{0.19819947, 0.33448750, 0.34715438},
-                     {0.17702937, 0.42464229, 0.44204772},
-                     {0.29093260, 0.33410171, 0.22306615}},
-                    {{0.46745953, 0.45912567, 0.25779176},
-                     {0.41033682, 0.37336978, 0.21872495},
-                     {0.34566763, 0.32462072, 0.48979440}}
-                }
-            }
-        });
+        std::shared_ptr<Tensor> input =
+            std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{
+                {{{{8.28257084e-01, 7.99335480e-01, 7.36702740e-01},
+                   {2.36729562e-01, 8.61912668e-01, 9.93067741e-01},
+                   {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}},
+                  {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01},
+                   {1.31294072e-01, 7.10182846e-01, 1.08420849e-04},
+                   {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}},
+                  {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01},
+                   {8.07861805e-01, 7.79679358e-01, 5.01209974e-01},
+                   {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}},
+                 {{{6.22058094e-01, 2.32256651e-02, 6.18222237e-01},
+                   {9.58304763e-01, 2.11395025e-02, 4.95614648e-01},
+                   {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}},
+                  {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01},
+                   {1.12059772e-01, 7.64178872e-01, 7.60362148e-01},
+                   {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}},
+                  {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01},
+                   {9.52722490e-01, 6.35501027e-01, 5.67592978e-02},
+                   {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}}}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{
+                {{{{0.45109013, 0.42849392, 0.43775153},
+                   {0.27246451, 0.35967633, 0.50454903},
+                   {0.20397615, 0.20457645, 0.33543545}},
+                  {{0.24571852, 0.34723747, 0.25694931},
+                   {0.24519968, 0.30904123, 0.18692467},
+                   {0.35646603, 0.28991172, 0.41476840}},
+                  {{0.30319133, 0.22426860, 0.30529919},
+                   {0.48233581, 0.33128241, 0.30852637},
+                   {0.43955776, 0.50551182, 0.24979614}}},
+                 {{{0.33434108, 0.20638679, 0.39505392},
+                   {0.41263384, 0.20198789, 0.33922729},
+                   {0.36339980, 0.34127754, 0.28713942}},
+                  {{0.19819947, 0.33448750, 0.34715438},
+                   {0.17702937, 0.42464229, 0.44204772},
+                   {0.29093260, 0.33410171, 0.22306615}},
+                  {{0.46745953, 0.45912567, 0.25779176},
+                   {0.41033682, 0.37336978, 0.21872495},
+                   {0.34566763, 0.32462072, 0.48979440}}}}});
 
         std::shared_ptr<Node> mySoftmax = Softmax(1);
-        auto op = std::static_pointer_cast<OperatorTensor>(mySoftmax -> getOperator());
-        op->associateInput(0,input);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySoftmax->getOperator());
+        op->associateInput(0, input);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
         mySoftmax->forward();
 
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-        for (std::size_t i = 0; i< expectedOutput->size(); ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        float *resPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedPtr =
+            static_cast<float *>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < expectedOutput->size(); ++i) {
+            REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
         }
     }
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_SqrtImpl.cpp b/unit_tests/operator/Test_SqrtImpl.cpp
index d630c66c8b8085e6d382841da6b7cac2c88b1dd0..025ef6c30e28d96dba004c39a95ac1fb09e53c4e 100644
--- a/unit_tests/operator/Test_SqrtImpl.cpp
+++ b/unit_tests/operator/Test_SqrtImpl.cpp
@@ -22,100 +22,87 @@ using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Sqrt(forward)", "[Sqrt][CPU]") {
     SECTION("2D Tensor") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<float,2,2> {
-            {
-                {16.00000000,  0.62226844},
-                { 0.00000000,  1.84539008}
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
-            {
-                {4.00000000, 0.78883994},
-                {0.00000000, 1.35845140}
-            }
-        });
+        std::shared_ptr<Tensor> input =
+            std::make_shared<Tensor>(Array2D<float, 2, 2>{
+                {{16.00000000, 0.62226844}, {0.00000000, 1.84539008}}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array2D<float, 2, 2>{
+                {{4.00000000, 0.78883994}, {0.00000000, 1.35845140}}});
 
         std::shared_ptr<Node> mySqrt = Sqrt();
-        auto op = std::static_pointer_cast<OperatorTensor>(mySqrt -> getOperator());
-        mySqrt->getOperator()->associateInput(0,input);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySqrt->getOperator());
+        mySqrt->getOperator()->associateInput(0, input);
         mySqrt->getOperator()->setDataType(DataType::Float32);
         mySqrt->getOperator()->setBackend("cpu");
         mySqrt->forward();
 
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-        for (std::size_t i = 0; i< 4; ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        float *resPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedPtr =
+            static_cast<float *>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < 4; ++i) {
+            REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
         }
-
     }
 
     SECTION("4D Tensor") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
-            {
-                {
-                    {{0.06218481, 0.46850157, 0.60914326},
-                     {0.57470602, 0.09943211, 0.59992820},
-                     {0.99623793, 0.54931718, 0.89343822}},
-                    {{0.75176072, 0.38237786, 0.84824580},
-                     {0.10619396, 0.11959118, 0.93499404},
-                     {0.65563291, 0.02913034, 0.17093092}},
-                    {{0.36303985, 0.92073035, 0.79146117},
-                     {0.88962847, 0.94561219, 0.92033130},
-                     {0.52903181, 0.13397896, 0.76086712}}
-                },
-                {
-                    {{0.31242222, 0.80526417, 0.48411584},
-                     {0.84375203, 0.65408552, 0.55028963},
-                     {0.77546734, 0.06203610, 0.83163154}},
-                    {{0.46342927, 0.53631741, 0.39145601},
-                     {0.14204198, 0.84214240, 0.94185621},
-                     {0.05068624, 0.99889028, 0.38464361}},
-                    {{0.37591159, 0.51769549, 0.30288595},
-                     {0.96883464, 0.35154045, 0.55648762},
-                     {0.13022375, 0.73467660, 0.02705121}}
-                }
-            }
-        });
+        std::shared_ptr<Tensor> input =
+            std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{
+                {{{{0.06218481, 0.46850157, 0.60914326},
+                   {0.57470602, 0.09943211, 0.59992820},
+                   {0.99623793, 0.54931718, 0.89343822}},
+                  {{0.75176072, 0.38237786, 0.84824580},
+                   {0.10619396, 0.11959118, 0.93499404},
+                   {0.65563291, 0.02913034, 0.17093092}},
+                  {{0.36303985, 0.92073035, 0.79146117},
+                   {0.88962847, 0.94561219, 0.92033130},
+                   {0.52903181, 0.13397896, 0.76086712}}},
+                 {{{0.31242222, 0.80526417, 0.48411584},
+                   {0.84375203, 0.65408552, 0.55028963},
+                   {0.77546734, 0.06203610, 0.83163154}},
+                  {{0.46342927, 0.53631741, 0.39145601},
+                   {0.14204198, 0.84214240, 0.94185621},
+                   {0.05068624, 0.99889028, 0.38464361}},
+                  {{0.37591159, 0.51769549, 0.30288595},
+                   {0.96883464, 0.35154045, 0.55648762},
+                   {0.13022375, 0.73467660, 0.02705121}}}}});
 
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
-            {
-                {
-                    {{0.24936883, 0.6844717,  0.7804763},
-                     {0.75809366, 0.31532857, 0.7745503},
-                     {0.9981172,  0.7411593,  0.9452186}},
-                    {{0.86704135, 0.6183671,  0.9210026},
-                     {0.32587415, 0.34581956, 0.9669509},
-                     {0.80971164, 0.17067613, 0.41343793}},
-                    {{0.60252786, 0.9595469,  0.88964105},
-                     {0.9432012,  0.97242594, 0.95933896},
-                     {0.7273457,  0.36603138, 0.87227696}}
-                },
-                {
-                    {{0.55894744, 0.89736515, 0.69578433},
-                     {0.91855973, 0.8087555,  0.7418151},
-                     {0.88060623, 0.24907047, 0.91193837}},
-                    {{0.6807564,  0.73233694, 0.6256645},
-                     {0.37688458, 0.9176832,  0.9704928},
-                     {0.22513604, 0.99944496, 0.62019646}},
-                    {{0.6131163,  0.7195106,  0.5503507},
-                     {0.984294,   0.59290844, 0.745981},
-                     {0.3608653,  0.8571328,  0.16447252}}
-                }
-            }
-        });
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{
+                {{{{0.24936883, 0.6844717, 0.7804763},
+                   {0.75809366, 0.31532857, 0.7745503},
+                   {0.9981172, 0.7411593, 0.9452186}},
+                  {{0.86704135, 0.6183671, 0.9210026},
+                   {0.32587415, 0.34581956, 0.9669509},
+                   {0.80971164, 0.17067613, 0.41343793}},
+                  {{0.60252786, 0.9595469, 0.88964105},
+                   {0.9432012, 0.97242594, 0.95933896},
+                   {0.7273457, 0.36603138, 0.87227696}}},
+                 {{{0.55894744, 0.89736515, 0.69578433},
+                   {0.91855973, 0.8087555, 0.7418151},
+                   {0.88060623, 0.24907047, 0.91193837}},
+                  {{0.6807564, 0.73233694, 0.6256645},
+                   {0.37688458, 0.9176832, 0.9704928},
+                   {0.22513604, 0.99944496, 0.62019646}},
+                  {{0.6131163, 0.7195106, 0.5503507},
+                   {0.984294, 0.59290844, 0.745981},
+                   {0.3608653, 0.8571328, 0.16447252}}}}});
 
         std::shared_ptr<Node> mySqrt = Sqrt();
-        auto op = std::static_pointer_cast<OperatorTensor>(mySqrt -> getOperator());
-        mySqrt->getOperator()->associateInput(0,input);
+        auto op =
+            std::static_pointer_cast<OperatorTensor>(mySqrt->getOperator());
+        mySqrt->getOperator()->associateInput(0, input);
         mySqrt->getOperator()->setDataType(DataType::Float32);
         mySqrt->getOperator()->setBackend("cpu");
         mySqrt->forward();
 
-        float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
-        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
-        for (std::size_t i = 0; i< 54; ++i) {
-            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        float *resPtr =
+            static_cast<float *>(op->getOutput(0)->getImpl()->rawPtr());
+        float *expectedPtr =
+            static_cast<float *>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i < 54; ++i) {
+            REQUIRE(std::abs(resPtr[i] - expectedPtr[i]) < 0.00001);
         }
     }
 }
\ No newline at end of file
diff --git a/unit_tests/operator/Test_SubImpl.cpp b/unit_tests/operator/Test_SubImpl.cpp
index 44666ae631152c8898e24f7003b0c2ede8c67b84..8bf6937708f57fc76d6939bc1960087cd67148a9 100644
--- a/unit_tests/operator/Test_SubImpl.cpp
+++ b/unit_tests/operator/Test_SubImpl.cpp
@@ -10,13 +10,13 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
 #include <chrono>
+#include <cstddef> // std::size_t
+#include <cstdint> // std::uint16_t
 #include <iostream>
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric> // std::accumulate
+#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
 
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Sub.hpp"
@@ -29,24 +29,28 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
     // Create a random number generator
     std::random_device rd;
     std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> valueDist(0.1f, 1.1f); // Random float distribution between 0 and 1
-    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(10));
-    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(5));
-    std::uniform_int_distribution<int> boolDist(0,1);
+    std::uniform_real_distribution<float> valueDist(
+        0.1f,
+        1.1f); // Random float distribution between 0 and 1
+    std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2),
+                                                           std::size_t(10));
+    std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1),
+                                                          std::size_t(5));
+    std::uniform_int_distribution<int> boolDist(0, 1);
 
     // Create MatMul Operator
     std::shared_ptr<Node> mySub = Sub();
-    auto op = std::static_pointer_cast<OperatorTensor>(mySub-> getOperator());
+    auto op = std::static_pointer_cast<OperatorTensor>(mySub->getOperator());
     op->setDataType(DataType::Float32);
     op->setBackend("cpu");
 
     // Create 2 input Tensors
     std::shared_ptr<Tensor> T0 = std::make_shared<Tensor>();
-    op->associateInput(0,T0);
+    op->associateInput(0, T0);
     T0->setDataType(DataType::Float32);
     T0->setBackend("cpu");
     std::shared_ptr<Tensor> T1 = std::make_shared<Tensor>();
-    op -> associateInput(1,T1);
+    op->associateInput(1, T1);
     T1->setDataType(DataType::Float32);
     T1->setBackend("cpu");
 
@@ -61,12 +65,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
     std::chrono::duration<double, std::micro> duration{};
 
     SECTION("SubImpl_cpu::forward()") {
-        SECTION("Scalar / Scalar") {
-
-        }
-        SECTION("Scalar / +1-D Tensor") {
-
-        }
+        SECTION("Scalar / Scalar") {}
+        SECTION("Scalar / +1-D Tensor") {}
         SECTION("+1-D Tensor / +1-D Tensor - same dimensions") {
             std::size_t number_of_operation = 0;
 
@@ -77,13 +77,17 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 for (std::size_t i = 0; i < nbDims; ++i) {
                     dims.push_back(dimSizeDist(gen));
                 }
-                const std::size_t nb_elements = std::accumulate(dims.cbegin(), dims.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dims.cbegin(),
+                                    dims.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
 
                 // without broadcasting
-                float* array0 = new float[nb_elements];
-                float* array1 = new float[nb_elements];
-                float* result = new float[nb_elements];
+                float *array0 = new float[nb_elements];
+                float *array1 = new float[nb_elements];
+                float *result = new float[nb_elements];
 
                 for (std::size_t i = 0; i < nb_elements; ++i) {
                     array0[i] = valueDist(gen);
@@ -93,21 +97,23 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
 
                 // input0
                 T0->resize(dims);
-                T0 -> getImpl() -> setRawPtr(array0, nb_elements);
+                T0->getImpl()->setRawPtr(array0, nb_elements);
 
                 // input1
                 T1->resize(dims);
-                T1 -> getImpl() -> setRawPtr(array1, nb_elements);
+                T1->getImpl()->setRawPtr(array1, nb_elements);
 
                 // results
                 Tres->resize(dims);
-                Tres -> getImpl() -> setRawPtr(result, nb_elements);
+                Tres->getImpl()->setRawPtr(result, nb_elements);
 
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 mySub->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
 
@@ -117,8 +123,10 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
 
                 // with broadcasting
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
 
         SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
@@ -126,7 +134,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
                 // generate 2 random Tensors
-                // handle dimensions, replace some dimensions with '1' to get broadcasting
+                // handle dimensions, replace some dimensions with '1' to get
+                // broadcasting
                 constexpr std::size_t nbDims = 4;
                 std::vector<std::size_t> dims;
                 for (std::size_t i = 0; i < nbDims; ++i) {
@@ -146,37 +155,62 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 }
 
                 // create arrays and fill them with random values
-                float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-                float* array1 = new float[dims1[0]*dims1[1]*dims1[2]*dims1[3]];
-                float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
-
-                for (std::size_t i = 0; i < dims0[0]*dims0[1]*dims0[2]*dims0[3]; ++i) {
+                float *array0 =
+                    new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+                float *array1 =
+                    new float[dims1[0] * dims1[1] * dims1[2] * dims1[3]];
+                float *result = new float[dimsOut[0] * dimsOut[1] *
+                                          dimsOut[2] * dimsOut[3]];
+
+                for (std::size_t i = 0;
+                     i < dims0[0] * dims0[1] * dims0[2] * dims0[3];
+                     ++i) {
                     array0[i] = valueDist(gen);
                 }
-                for (std::size_t i = 0; i < dims1[0]*dims1[1]*dims1[2]*dims1[3]; ++i) {
+                for (std::size_t i = 0;
+                     i < dims1[0] * dims1[1] * dims1[2] * dims1[3];
+                     ++i) {
                     array1[i] = valueDist(gen);
                 }
 
                 // compute true result
-                const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-                const std::size_t strides1[nbDims] = {dims1[1]*dims1[2]*dims1[3], dims1[2]*dims1[3], dims1[3], 1};
+                const std::size_t strides0[nbDims] = {
+                    dims0[1] * dims0[2] * dims0[3],
+                    dims0[2] * dims0[3],
+                    dims0[3],
+                    1};
+                const std::size_t strides1[nbDims] = {
+                    dims1[1] * dims1[2] * dims1[3],
+                    dims1[2] * dims1[3],
+                    dims1[3],
+                    1};
                 for (std::size_t a = 0; a < dimsOut[0]; ++a) {
                     for (std::size_t b = 0; b < dimsOut[1]; ++b) {
-                        const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                                    + strides0[1] * ((dims0[1] > 1) ? b : 0);
-                        const std::size_t idx1_0 = strides1[0] * ((dims1[0] > 1) ? a : 0)
-                                                    + strides1[1] * ((dims1[1] > 1) ? b : 0);
+                        const std::size_t idx0_0 =
+                            strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                            strides0[1] * ((dims0[1] > 1) ? b : 0);
+                        const std::size_t idx1_0 =
+                            strides1[0] * ((dims1[0] > 1) ? a : 0) +
+                            strides1[1] * ((dims1[1] > 1) ? b : 0);
                         for (std::size_t c = 0; c < dimsOut[2]; ++c) {
-                            const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
+                            const std::size_t idx_out =
+                                dimsOut[3] *
+                                (c + dimsOut[2] * (b + dimsOut[1] * a));
                             for (std::size_t d = 0; d < dimsOut[3]; ++d) {
-                                std::size_t idx0 = idx0_0
-                                                    + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                                    + ((dims0[3] > 1) ? d : 0);
-                                std::size_t idx1 = idx1_0
-                                                    + strides1[2] * ((dims1[2] > 1) ? c : 0)
-                                                    + ((dims1[3] > 1) ? d : 0);
-                                result[idx_out + d] = array0[idx0] - array1[idx1];
-                                // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
+                                std::size_t idx0 =
+                                    idx0_0 +
+                                    strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                                    ((dims0[3] > 1) ? d : 0);
+                                std::size_t idx1 =
+                                    idx1_0 +
+                                    strides1[2] * ((dims1[2] > 1) ? c : 0) +
+                                    ((dims1[3] > 1) ? d : 0);
+                                result[idx_out + d] =
+                                    array0[idx0] - array1[idx1];
+                                // std::cout << "(" << idx0 << ", " << idx1 <<
+                                // ") -> " << array0[idx0] << " - " <<
+                                // array1[idx1] << " -> " << idx_out + d <<
+                                // std::endl;
                             }
                         }
                     }
@@ -185,22 +219,30 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 // conversion to Aidge::Tensors
                 // input0
                 T0->resize(dims0);
-                T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+                T0->getImpl()->setRawPtr(
+                    array0,
+                    dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
                 // input1
                 T1->resize(dims1);
-                T1 -> getImpl() -> setRawPtr(array1, dims1[0]*dims1[1]*dims1[2]*dims1[3]);
+                T1->getImpl()->setRawPtr(
+                    array1,
+                    dims1[0] * dims1[1] * dims1[2] * dims1[3]);
 
                 // results
                 Tres->resize(dimsOut);
-                Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+                Tres->getImpl()->setRawPtr(
+                    result,
+                    dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 mySub->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 // comparison between truth and computed result
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
@@ -209,15 +251,23 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 delete[] array1;
                 delete[] result;
 
-                const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dimsOut.cbegin(),
+                                    dimsOut.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
         SECTION("+1-D Tensor / 1-D Tensor") {
             std::size_t number_of_operation = 0;
-            std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(std::size_t(1), std::size_t(3));
+            std::uniform_int_distribution<std::size_t> nbRemovedDimsDist(
+                std::size_t(1),
+                std::size_t(3));
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
                 // generate 2 random Tensors
@@ -234,15 +284,24 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                         dims1[i] = 1;
                     }
                 }
-                dims1.erase(dims1.cbegin(), dims1.cbegin() + nbRemovedDimsDist(gen));
+                dims1.erase(dims1.cbegin(),
+                            dims1.cbegin() + nbRemovedDimsDist(gen));
 
                 // create arrays and fill them with random values
-                float* array0 = new float[dims0[0]*dims0[1]*dims0[2]*dims0[3]];
-                std::size_t array1_size = std::accumulate(dims1.cbegin(), dims1.cend(), std::size_t(1), std::multiplies<std::size_t>());
-                float* array1 = new float[array1_size];
-                float* result = new float[dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]];
-
-                for (std::size_t i = 0; i < (dims0[0]*dims0[1]*dims0[2]*dims0[3]); ++i) {
+                float *array0 =
+                    new float[dims0[0] * dims0[1] * dims0[2] * dims0[3]];
+                std::size_t array1_size =
+                    std::accumulate(dims1.cbegin(),
+                                    dims1.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
+                float *array1 = new float[array1_size];
+                float *result = new float[dimsOut[0] * dimsOut[1] *
+                                          dimsOut[2] * dimsOut[3]];
+
+                for (std::size_t i = 0;
+                     i < (dims0[0] * dims0[1] * dims0[2] * dims0[3]);
+                     ++i) {
                     array0[i] = valueDist(gen);
                 }
                 for (std::size_t i = 0; i < array1_size; ++i) {
@@ -251,27 +310,48 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
 
                 // compute true result
                 auto dims1_tmp = dims1;
-                dims1_tmp.insert(dims1_tmp.cbegin(), 4 - dims1_tmp.size(), std::size_t(1));
-
-                const std::size_t strides0[nbDims] = {dims0[1]*dims0[2]*dims0[3], dims0[2]*dims0[3], dims0[3], 1};
-                const std::size_t strides1[nbDims] = {dims1_tmp[1]*dims1_tmp[2]*dims1_tmp[3], dims1_tmp[2]*dims1_tmp[3], dims1_tmp[3], 1};
+                dims1_tmp.insert(dims1_tmp.cbegin(),
+                                 4 - dims1_tmp.size(),
+                                 std::size_t(1));
+
+                const std::size_t strides0[nbDims] = {
+                    dims0[1] * dims0[2] * dims0[3],
+                    dims0[2] * dims0[3],
+                    dims0[3],
+                    1};
+                const std::size_t strides1[nbDims] = {
+                    dims1_tmp[1] * dims1_tmp[2] * dims1_tmp[3],
+                    dims1_tmp[2] * dims1_tmp[3],
+                    dims1_tmp[3],
+                    1};
                 for (std::size_t a = 0; a < dimsOut[0]; ++a) {
                     for (std::size_t b = 0; b < dimsOut[1]; ++b) {
-                        const std::size_t idx0_0 = strides0[0] * ((dims0[0] > 1) ? a : 0)
-                                                    + strides0[1] * ((dims0[1] > 1) ? b : 0);
-                        const std::size_t idx1_0 = strides1[0] * ((dims1_tmp[0] > 1) ? a : 0)
-                                                    + strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
+                        const std::size_t idx0_0 =
+                            strides0[0] * ((dims0[0] > 1) ? a : 0) +
+                            strides0[1] * ((dims0[1] > 1) ? b : 0);
+                        const std::size_t idx1_0 =
+                            strides1[0] * ((dims1_tmp[0] > 1) ? a : 0) +
+                            strides1[1] * ((dims1_tmp[1] > 1) ? b : 0);
                         for (std::size_t c = 0; c < dimsOut[2]; ++c) {
-                            const std::size_t idx_out = dimsOut[3] * (c + dimsOut[2] * (b + dimsOut[1] * a));
+                            const std::size_t idx_out =
+                                dimsOut[3] *
+                                (c + dimsOut[2] * (b + dimsOut[1] * a));
                             for (std::size_t d = 0; d < dimsOut[3]; ++d) {
-                                std::size_t idx0 = idx0_0
-                                                    + strides0[2] * ((dims0[2] > 1) ? c : 0)
-                                                    + ((dims0[3] > 1) ? d : 0);
-                                std::size_t idx1 = idx1_0
-                                                    + strides1[2] * ((dims1_tmp[2] > 1) ? c : 0)
-                                                    + ((dims1_tmp[3] > 1) ? d : 0);
-                                result[idx_out + d] = array0[idx0] - array1[idx1];
-                                // std::cout << "(" << idx0 << ", " << idx1 << ") -> " << array0[idx0] << " - " << array1[idx1] << " -> " << idx_out + d << std::endl;
+                                std::size_t idx0 =
+                                    idx0_0 +
+                                    strides0[2] * ((dims0[2] > 1) ? c : 0) +
+                                    ((dims0[3] > 1) ? d : 0);
+                                std::size_t idx1 =
+                                    idx1_0 +
+                                    strides1[2] *
+                                        ((dims1_tmp[2] > 1) ? c : 0) +
+                                    ((dims1_tmp[3] > 1) ? d : 0);
+                                result[idx_out + d] =
+                                    array0[idx0] - array1[idx1];
+                                // std::cout << "(" << idx0 << ", " << idx1 <<
+                                // ") -> " << array0[idx0] << " - " <<
+                                // array1[idx1] << " -> " << idx_out + d <<
+                                // std::endl;
                             }
                         }
                     }
@@ -280,22 +360,28 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 // conversion to Aidge::Tensors
                 // input0
                 T0->resize(dims0);
-                T0 -> getImpl() -> setRawPtr(array0, dims0[0]*dims0[1]*dims0[2]*dims0[3]);
+                T0->getImpl()->setRawPtr(
+                    array0,
+                    dims0[0] * dims0[1] * dims0[2] * dims0[3]);
 
                 // input1
                 T1->resize(dims1);
-                T1 -> getImpl() -> setRawPtr(array1, array1_size);
+                T1->getImpl()->setRawPtr(array1, array1_size);
 
                 // results
                 Tres->resize(dimsOut);
-                Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
+                Tres->getImpl()->setRawPtr(
+                    result,
+                    dimsOut[0] * dimsOut[1] * dimsOut[2] * dimsOut[3]);
 
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 mySub->forward();
                 end = std::chrono::system_clock::now();
-                duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+                duration +=
+                    std::chrono::duration_cast<std::chrono::microseconds>(
+                        end - start);
 
                 // comparison between truth and computed result
                 REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
@@ -304,12 +390,18 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 delete[] array1;
                 delete[] result;
 
-                const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
+                const std::size_t nb_elements =
+                    std::accumulate(dimsOut.cbegin(),
+                                    dimsOut.cend(),
+                                    std::size_t(1),
+                                    std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
 
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "μs" << std::endl;
+            std::cout << "number of elements over time spent: "
+                      << (number_of_operation / duration.count()) << std::endl;
+            std::cout << "total time: " << duration.count() << "μs"
+                      << std::endl;
         }
     }
 }
diff --git a/unit_tests/recipies/Test_ConstantFolding.cpp b/unit_tests/recipies/Test_ConstantFolding.cpp
index cd035fd5336d3cb66fc70b1c0a4e5c82c9bef0d8..69e41c8d5b5a223201f8123ef0e2980ac3ae447a 100644
--- a/unit_tests/recipies/Test_ConstantFolding.cpp
+++ b/unit_tests/recipies/Test_ConstantFolding.cpp
@@ -11,11 +11,11 @@
 
 #include <catch2/catch_test_macros.hpp>
 
-#include "aidge/recipes/Recipes.hpp"
+#include "aidge/graph/OpArgs.hpp"
 #include "aidge/operator/Add.hpp"
 #include "aidge/operator/MatMul.hpp"
 #include "aidge/operator/Producer.hpp"
-#include "aidge/graph/OpArgs.hpp"
+#include "aidge/recipes/Recipes.hpp"
 #include "aidge/scheduler/SequentialScheduler.hpp"
 #include "aidge/utils/TensorUtils.hpp"
 #include <cstddef>
@@ -29,11 +29,34 @@ TEST_CASE("[ConstantFolding] forward", "[ConstantFolding][forward][CPU]") {
     auto matmul1 = MatMul("matmul1");
     auto add1 = Add("add1");
 
-    auto b0 = Producer(std::make_shared<Tensor>(Array1D<float,5>{{1, 2, 3, 4, 5}}), "B0", true);
-    auto w0 = Producer(std::make_shared<Tensor>(Array2D<float,5,5>{{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}}}), "W0", true);
-    auto b1 = Producer(std::make_shared<Tensor>(Array1D<float,5>{{1, 2, 3, 4, 5}}), "B1", true);
-    auto w1 = Producer(std::make_shared<Tensor>(Array2D<float,5,5>{{{6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}, {1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}),"W1", true);
-    auto input = Producer(std::make_shared<Tensor>(Array2D<float,2,5>{{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}), "input", true);
+    auto b0 =
+        Producer(std::make_shared<Tensor>(Array1D<float, 5>{{1, 2, 3, 4, 5}}),
+                 "B0",
+                 true);
+    auto w0 = Producer(
+        std::make_shared<Tensor>(Array2D<float, 5, 5>{{{1, 2, 3, 4, 5},
+                                                       {6, 7, 8, 9, 0},
+                                                       {1, 2, 3, 4, 5},
+                                                       {6, 7, 8, 9, 0},
+                                                       {1, 2, 3, 4, 5}}}),
+        "W0",
+        true);
+    auto b1 =
+        Producer(std::make_shared<Tensor>(Array1D<float, 5>{{1, 2, 3, 4, 5}}),
+                 "B1",
+                 true);
+    auto w1 = Producer(
+        std::make_shared<Tensor>(Array2D<float, 5, 5>{{{6, 7, 8, 9, 0},
+                                                       {1, 2, 3, 4, 5},
+                                                       {6, 7, 8, 9, 0},
+                                                       {1, 2, 3, 4, 5},
+                                                       {6, 7, 8, 9, 0}}}),
+        "W1",
+        true);
+    auto input = Producer(std::make_shared<Tensor>(Array2D<float, 2, 5>{
+                              {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}}}),
+                          "input",
+                          true);
 
     input->addChild(matmul0, 0, 0);
     w0->addChild(matmul0, 0, 1);
@@ -54,32 +77,38 @@ TEST_CASE("[ConstantFolding] forward", "[ConstantFolding][forward][CPU]") {
 
     // Check original graph
     REQUIRE(g->getNodes() ==
-            std::set<std::shared_ptr<Node>>({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1}));
-    REQUIRE(((matmul0->getParent(0) == input) && (matmul0->getParent(1) == w0)));
+            std::set<std::shared_ptr<Node>>(
+                {input, w0, matmul0, b0, add0, w1, matmul1, b1, add1}));
+    REQUIRE(
+        ((matmul0->getParent(0) == input) && (matmul0->getParent(1) == w0)));
     REQUIRE(((add0->getParent(0) == matmul0) && (add0->getParent(1) == b0)));
-    REQUIRE(((matmul1->getParent(0) == add0) && (matmul1->getParent(1) == w1)));
+    REQUIRE(
+        ((matmul1->getParent(0) == add0) && (matmul1->getParent(1) == w1)));
     REQUIRE(((add1->getParent(0) == matmul1) && (add1->getParent(1) == b1)));
 
     auto scheduler = SequentialScheduler(g);
     scheduler.forward();
 
-    const std::shared_ptr<Tensor> result = std::make_shared<Tensor>(Array2D<float,2,5>{{
-        { 1201.000000, 1532.000000, 1863.000000, 2194.000000, 785.000000},
-        { 2501.000000, 3207.000000, 3913.000000, 4619.000000, 1735.000000}
-    }});
+    const std::shared_ptr<Tensor> result = std::make_shared<
+        Tensor>(Array2D<float, 2, 5>{
+        {{1201.000000, 1532.000000, 1863.000000, 2194.000000, 785.000000},
+         {2501.000000, 3207.000000, 3913.000000, 4619.000000, 1735.000000}}});
 
     auto add1Op = std::static_pointer_cast<Add_Op>(add1->getOperator());
     REQUIRE(approxEq<float>(*(add1Op->getOutput(0)), *result));
 
-	// Transform GraphView inplace
+    // Transform GraphView inplace
     constantFolding(g);
 
-	// Check new GraphView
-	std::set<std::shared_ptr<Node>> newNodes = g->getNodes();
-	REQUIRE(newNodes != std::set<std::shared_ptr<Node>>({input, w0, matmul0, b0, add0, w1, matmul1, b1, add1}));
-	REQUIRE(newNodes.size() == 1);
-	REQUIRE((*newNodes.cbegin())->type() == "Producer");
+    // Check new GraphView
+    std::set<std::shared_ptr<Node>> newNodes = g->getNodes();
+    REQUIRE(newNodes !=
+            std::set<std::shared_ptr<Node>>(
+                {input, w0, matmul0, b0, add0, w1, matmul1, b1, add1}));
+    REQUIRE(newNodes.size() == 1);
+    REQUIRE((*newNodes.cbegin())->type() == "Producer");
 
-    auto prodOp = std::static_pointer_cast<Producer_Op>((*newNodes.cbegin())->getOperator());
+    auto prodOp = std::static_pointer_cast<Producer_Op>(
+        (*newNodes.cbegin())->getOperator());
     REQUIRE(approxEq<float>(*(prodOp->getOutput(0)), *result));
 }
diff --git a/unit_tests/recipies/Test_ConvToMatMul.cpp b/unit_tests/recipies/Test_ConvToMatMul.cpp
index 05c5eef83394ba8c965dfabae2bcd8c2b4502c79..4bcb4c9d7106c2045eccbf76461381c9b6546f21 100644
--- a/unit_tests/recipies/Test_ConvToMatMul.cpp
+++ b/unit_tests/recipies/Test_ConvToMatMul.cpp
@@ -11,12 +11,12 @@
 
 #include <catch2/catch_test_macros.hpp>
 
-#include "aidge/recipes/Recipes.hpp"
+#include "aidge/filler/Filler.hpp"
+#include "aidge/graph/OpArgs.hpp"
 #include "aidge/operator/Conv.hpp"
 #include "aidge/operator/Producer.hpp"
+#include "aidge/recipes/Recipes.hpp"
 #include "aidge/scheduler/SequentialScheduler.hpp"
-#include "aidge/filler/Filler.hpp"
-#include "aidge/graph/OpArgs.hpp"
 #include <cstddef>
 
 using namespace Aidge;
@@ -26,23 +26,43 @@ TEST_CASE("[ConvToMatMul] conv") {
     auto conv2 = Conv(4, 7, {3, 3}, "conv2", {1, 1}, {1, 1}, true);
     auto conv3 = Conv(7, 10, {1, 1}, "conv3", {2, 2});
 
-    auto g1 = Sequential({
-        Producer({2, 3, 13, 24}, "dataProvider"),
-        conv1,
-        conv2,
-        conv3
-    });
+    auto g1 = Sequential(
+        {Producer({2, 3, 13, 24}, "dataProvider"), conv1, conv2, conv3});
 
     g1->setBackend("cpu");
     g1->forwardDims();
 
     // Random initialization of input and weights
-    uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(0), -10.0, 10.0);
-    uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(1), -10.0, 10.0);
-    uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv1->getOperator())->getInput(2), -10.0, 10.0);
-    uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv2->getOperator())->getInput(1), -10.0, 10.0);
-    uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(1), -10.0, 10.0);
-    uniformFiller<float>(std::static_pointer_cast<OperatorTensor>(conv3->getOperator())->getInput(2), -10.0, 10.0);
+    uniformFiller<float>(
+        std::static_pointer_cast<OperatorTensor>(conv1->getOperator())
+            ->getInput(0),
+        -10.0,
+        10.0);
+    uniformFiller<float>(
+        std::static_pointer_cast<OperatorTensor>(conv1->getOperator())
+            ->getInput(1),
+        -10.0,
+        10.0);
+    uniformFiller<float>(
+        std::static_pointer_cast<OperatorTensor>(conv1->getOperator())
+            ->getInput(2),
+        -10.0,
+        10.0);
+    uniformFiller<float>(
+        std::static_pointer_cast<OperatorTensor>(conv2->getOperator())
+            ->getInput(1),
+        -10.0,
+        10.0);
+    uniformFiller<float>(
+        std::static_pointer_cast<OperatorTensor>(conv3->getOperator())
+            ->getInput(1),
+        -10.0,
+        10.0);
+    uniformFiller<float>(
+        std::static_pointer_cast<OperatorTensor>(conv3->getOperator())
+            ->getInput(2),
+        -10.0,
+        10.0);
 
     auto s1 = SequentialScheduler(g1);
     s1.forward();
@@ -52,7 +72,7 @@ TEST_CASE("[ConvToMatMul] conv") {
     auto g2 = g1->clone();
     g2->forwardDims();
     REQUIRE(convToMatMul(g2) == 3);
-    
+
     g2->setBackend("cpu");
 
     auto s2 = SequentialScheduler(g2);
@@ -60,14 +80,19 @@ TEST_CASE("[ConvToMatMul] conv") {
 
     g2->save("convToMatMul_after");
 
-    auto g1OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator());
-    auto g2OutOp = std::static_pointer_cast<OperatorTensor>((*g1->outputNodes().cbegin())->getOperator());
+    auto g1OutOp = std::static_pointer_cast<OperatorTensor>(
+        (*g1->outputNodes().cbegin())->getOperator());
+    auto g2OutOp = std::static_pointer_cast<OperatorTensor>(
+        (*g1->outputNodes().cbegin())->getOperator());
     REQUIRE(*(g1OutOp->getOutput(0)) == *(g2OutOp->getOutput(0)));
 
-    // Simplify the graph: freeze parameters to allow reshaping of the Producers
+    // Simplify the graph: freeze parameters to allow reshaping of the
+    // Producers
     for (auto node : g2->getNodes()) {
-        if (node->type() == Producer_Op::Type && node->name() != "dataProvider") {
-            std::static_pointer_cast<Producer_Op>(node->getOperator())->constant() = true;
+        if (node->type() == Producer_Op::Type &&
+            node->name() != "dataProvider") {
+            std::static_pointer_cast<Producer_Op>(node->getOperator())
+                ->constant() = true;
         }
     }
 
diff --git a/unit_tests/recipies/Test_ExplicitCastMove.cpp b/unit_tests/recipies/Test_ExplicitCastMove.cpp
index 27c788961b787c6f5248254f19ef7ac7a4366206..17af28a197e410d3ff129c9e561727c14160e37f 100644
--- a/unit_tests/recipies/Test_ExplicitCastMove.cpp
+++ b/unit_tests/recipies/Test_ExplicitCastMove.cpp
@@ -11,10 +11,10 @@
 
 #include <catch2/catch_test_macros.hpp>
 
-#include "aidge/recipes/Recipes.hpp"
+#include "aidge/graph/OpArgs.hpp"
 #include "aidge/operator/Conv.hpp"
 #include "aidge/operator/Producer.hpp"
-#include "aidge/graph/OpArgs.hpp"
+#include "aidge/recipes/Recipes.hpp"
 #include <cstddef>
 
 using namespace Aidge;
@@ -24,12 +24,8 @@ TEST_CASE("[ExplicitCastMove] conv") {
     auto conv2 = Conv(32, 64, {3, 3}, "conv2");
     auto conv3 = Conv(64, 10, {1, 1}, "conv3", {2, 2});
 
-    auto g1 = Sequential({
-        Producer({16, 3, 224, 224}, "dataProvider"),
-        conv1,
-        conv2,
-        conv3
-    });
+    auto g1 = Sequential(
+        {Producer({16, 3, 224, 224}, "dataProvider"), conv1, conv2, conv3});
 
     g1->setBackend("cpu");
     conv1->getOperator()->setDataType(DataType::Int32);
diff --git a/unit_tests/recipies/Test_FuseBatchNorm.cpp b/unit_tests/recipies/Test_FuseBatchNorm.cpp
index 68a01541894ba25a8841343d2b3943ccc08c7a9d..754c6771f303031ccb635a4690043c333f59af1d 100644
--- a/unit_tests/recipies/Test_FuseBatchNorm.cpp
+++ b/unit_tests/recipies/Test_FuseBatchNorm.cpp
@@ -10,13 +10,13 @@
  ********************************************************************************/
 
 #include <catch2/catch_test_macros.hpp>
-#include <memory>
 #include <cmath>
+#include <memory>
 
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/OpArgs.hpp"
-#include "aidge/operator/Conv.hpp"
 #include "aidge/operator/BatchNorm.hpp"
+#include "aidge/operator/Conv.hpp"
 #include "aidge/operator/Producer.hpp"
 #include "aidge/recipes/Recipes.hpp"
 #include "aidge/scheduler/SequentialScheduler.hpp"
@@ -30,86 +30,80 @@ TEST_CASE("[core/recipes] FuseBatchNorm", "[recipes][FuseBatchNorm]") {
     auto myConv = Conv(3, 3, {1, 1}, "conv1");
     auto myBN = BatchNorm<2>(32, 1.0e-5F, 0.1F, "batchnorm1");
 
-    auto myProdOp = std::static_pointer_cast<Producer_Op>(myProd->getOperator());
-    auto myConvOp = std::static_pointer_cast<Conv_Op<2>>(myConv->getOperator());
-    auto myBNOp = std::static_pointer_cast<BatchNorm_Op<2>>(myBN->getOperator());
+    auto myProdOp =
+        std::static_pointer_cast<Producer_Op>(myProd->getOperator());
+    auto myConvOp =
+        std::static_pointer_cast<Conv_Op<2>>(myConv->getOperator());
+    auto myBNOp =
+        std::static_pointer_cast<BatchNorm_Op<2>>(myBN->getOperator());
 
-    myProdOp->setOutput(0, std::make_shared<Tensor>(Array4D<float,2,3,3,3> { //NCHW
-        {
-                {
-                    {{8.28257084e-01, 7.99335480e-01, 7.36702740e-01},
-                     {2.36729562e-01, 8.61912668e-01, 9.93067741e-01},
-                     {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}},
-                    {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01},
-                     {1.31294072e-01, 7.10182846e-01, 1.08420849e-04},
-                     {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}},
-                    {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01},
-                     {8.07861805e-01, 7.79679358e-01, 5.01209974e-01},
-                     {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}
-                },
-                {
-                    {{6.22058094e-01, 2.32256651e-02, 6.18222237e-01},
-                     {9.58304763e-01, 2.11395025e-02, 4.95614648e-01},
-                     {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}},
-                    {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01},
-                     {1.12059772e-01, 7.64178872e-01, 7.60362148e-01},
-                     {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}},
-                    {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01},
-                     {9.52722490e-01, 6.35501027e-01, 5.67592978e-02},
-                     {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}
-                }
-            }
-    }));
-    myConvOp -> setInput(1, std::make_shared<Tensor>(Array4D<float,3,3,1,1> { //NCHW
-        {
-            {
-                {{8.28257084e-01}},
-                {{7.99335480e-01}},
-                {{7.36702740e-01}}
-            },
-            {
-                {{2.36729562e-01}},
-                {{8.61912668e-01}},
-                {{9.93067741e-01}}
-            },
-            {
-                {{1.63514376e-01}},
-                {{8.95773172e-02}},
-                {{2.96533108e-01}}
-            }
-        }
-    }));
-    myConvOp -> setInput(2, std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}}));
-    myBNOp -> setInput(1, std::make_shared<Tensor>(Array1D<float,3> {{0.9044, 0.3028, 0.0218}}));
-    myBNOp -> setInput(2, std::make_shared<Tensor>(Array1D<float,3> {{0.1332, 0.7503, 0.0878}}));
-    myBNOp -> setInput(3, std::make_shared<Tensor>(Array1D<float,3> {{0.9931, 0.8421, 0.9936}}));
-    myBNOp -> setInput(4, std::make_shared<Tensor>(Array1D<float,3> {{0.4470, 0.3064, 0.7061}}));
+    myProdOp->setOutput(
+        0,
+        std::make_shared<Tensor>(Array4D<float, 2, 3, 3, 3>{
+            // NCHW
+            {{{{8.28257084e-01, 7.99335480e-01, 7.36702740e-01},
+               {2.36729562e-01, 8.61912668e-01, 9.93067741e-01},
+               {1.63514376e-01, 8.95773172e-02, 2.96533108e-01}},
+              {{2.20776618e-01, 5.89067876e-01, 2.03930080e-01},
+               {1.31294072e-01, 7.10182846e-01, 1.08420849e-04},
+               {7.21750259e-01, 4.38212037e-01, 5.08823872e-01}},
+              {{4.30953979e-01, 1.51903450e-01, 3.76343548e-01},
+               {8.07861805e-01, 7.79679358e-01, 5.01209974e-01},
+               {9.31280375e-01, 9.94207084e-01, 1.74868107e-03}}},
+             {{{6.22058094e-01, 2.32256651e-02, 6.18222237e-01},
+               {9.58304763e-01, 2.11395025e-02, 4.95614648e-01},
+               {2.50825584e-01, 4.50860739e-01, 3.80362332e-01}},
+              {{9.91703272e-02, 5.06073236e-01, 4.88969564e-01},
+               {1.12059772e-01, 7.64178872e-01, 7.60362148e-01},
+               {2.84135342e-02, 4.29610193e-01, 1.27862811e-01}},
+              {{9.57209170e-01, 8.22797656e-01, 1.91352129e-01},
+               {9.52722490e-01, 6.35501027e-01, 5.67592978e-02},
+               {2.00799644e-01, 4.00822222e-01, 9.14380193e-01}}}}}));
+    myConvOp->setInput(
+        1,
+        std::make_shared<Tensor>(Array4D<float, 3, 3, 1, 1>{
+            // NCHW
+            {{{{8.28257084e-01}}, {{7.99335480e-01}}, {{7.36702740e-01}}},
+             {{{2.36729562e-01}}, {{8.61912668e-01}}, {{9.93067741e-01}}},
+             {{{1.63514376e-01}}, {{8.95773172e-02}}, {{2.96533108e-01}}}}}));
+    myConvOp->setInput(
+        2,
+        std::make_shared<Tensor>(Array1D<float, 3>{{0.4470, 0.3064, 0.7061}}));
+    myBNOp->setInput(
+        1,
+        std::make_shared<Tensor>(Array1D<float, 3>{{0.9044, 0.3028, 0.0218}}));
+    myBNOp->setInput(
+        2,
+        std::make_shared<Tensor>(Array1D<float, 3>{{0.1332, 0.7503, 0.0878}}));
+    myBNOp->setInput(
+        3,
+        std::make_shared<Tensor>(Array1D<float, 3>{{0.9931, 0.8421, 0.9936}}));
+    myBNOp->setInput(
+        4,
+        std::make_shared<Tensor>(Array1D<float, 3>{{0.4470, 0.3064, 0.7061}}));
 
-    auto g1 = Sequential({
-        myProd,
-        myConv,
-        myBN
-    });
-    g1 -> setName("fuseBNGraph");
-    g1 -> compile("cpu", DataType::Float32);
+    auto g1 = Sequential({myProd, myConv, myBN});
+    g1->setName("fuseBNGraph");
+    g1->compile("cpu", DataType::Float32);
 
     auto s = SequentialScheduler(g1);
     s.forward();
-    std::shared_ptr<Tensor> res1 = std::make_shared<Tensor>(*(myBNOp -> getOutput(0)));
+    std::shared_ptr<Tensor> res1 =
+        std::make_shared<Tensor>(*(myBNOp->getOutput(0)));
 
     fuseBatchNorm(g1);
 
     s.resetScheduling();
     s.forward();
-    std::shared_ptr<Tensor> res2 = std::make_shared<Tensor>(*(myConvOp -> getOutput(0)));
+    std::shared_ptr<Tensor> res2 =
+        std::make_shared<Tensor>(*(myConvOp->getOutput(0)));
 
-    REQUIRE(g1 -> outputNodes().size() == 1);
-    REQUIRE(g1 -> inputNodes().size() == 0);
+    REQUIRE(g1->outputNodes().size() == 1);
+    REQUIRE(g1->inputNodes().size() == 0);
     bool eq = true;
     for (std::size_t i = 0; i < res1->size(); ++i) {
-         eq &= std::abs(res1->get<float>(i) - res2->get<float>(i)) < 1.0e-06;
+        eq &= std::abs(res1->get<float>(i) - res2->get<float>(i)) < 1.0e-06;
     }
     REQUIRE(eq);
-
 }
 } // namespace Aidge
diff --git a/unit_tests/recipies/Test_HorizontalTiling.cpp b/unit_tests/recipies/Test_HorizontalTiling.cpp
index 7c127548417492141c3ea1eeb9374042befe75d2..e2611e8a3a5b4b3d128683ca268b234162125ba4 100644
--- a/unit_tests/recipies/Test_HorizontalTiling.cpp
+++ b/unit_tests/recipies/Test_HorizontalTiling.cpp
@@ -14,12 +14,11 @@
 
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/OpArgs.hpp"
+#include "aidge/operator/Concat.hpp"
 #include "aidge/operator/Conv.hpp"
 #include "aidge/operator/ReLU.hpp"
 #include "aidge/recipes/Recipes.hpp"
 #include "aidge/scheduler/SequentialScheduler.hpp"
-#include "aidge/operator/Concat.hpp"
-
 
 namespace Aidge {
 
@@ -29,136 +28,90 @@ TEST_CASE("[core/recipes] Tiling(transformation)", "[Tiling][Recipes]") {
 
         SECTION("Simple Node: Conv") {
             std::shared_ptr<Node> myReLU = ReLU("myReLU");
-            std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv");
-            std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
-                {
-                    {
-                        {{  0,   1,   2},
-                         {  3,   4,   5},
-                         {  6,   7,   8}},
-                        {{  9,  10,  11},
-                         { 12,  13,  14},
-                         { 15,  16,  17}},
-                        {{ 18,  19,  20},
-                         { 21,  22,  23},
-                         { 24,  25,  26}}
-                    },
-                    {
-                        {{ 27,  28,  29},
-                         { 30,  31,  32},
-                         { 33,  34,  35}},
-                        {{ 36,  37,  38},
-                         { 39,  40,  41},
-                         { 42,  43,  44}},
-                        {{ 45,  46,  47},
-                         { 48,  49,  50},
-                         { 51,  52,  53}}
-                    },
-                    {
-                        {{ 54,  55,  56},
-                         { 57,  58,  59},
-                         { 60,  61,  62}},
-                        {{ 63,  64,  65},
-                         { 66,  67,  68},
-                         { 69,  70,  71}},
-                        {{ 72,  73,  74},
-                         { 75,  76,  77},
-                         { 78,  79,  80}}
-                    },
-                    {
-                        {{ 81,  82,  83},
-                         { 84,  85,  86},
-                         { 87,  88,  89}},
-                        {{ 90,  91,  92},
-                         { 93,  94,  95},
-                         { 96,  97,  98}},
-                        {{ 99, 100, 101},
-                         {102, 103, 104},
-                         {105, 106, 107}}
-                    }
-                }
-            });
-            std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
-            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-                {
-                    {
-                        {{  0,   1,   2,   3,   4},
-                        {  5,   6,   7,   8,   9},
-                        { 10,  11,  12,  13,  14},
-                        { 15,  16,  17,  18,  19},
-                        { 20,  21,  22,  23,  24}},
-
-                        {{ 25,  26,  27,  28,  29},
-                        { 30,  31,  32,  33,  34},
-                        { 35,  36,  37,  38,  39},
-                        { 40,  41,  42,  43,  44},
-                        { 45,  46,  47,  48,  49}},
-
-                        {{ 50,  51,  52,  53,  54},
-                        { 55,  56,  57,  58,  59},
-                        { 60,  61,  62,  63,  64},
-                        { 65,  66,  67,  68,  69},
-                        { 70,  71,  72,  73,  74}}
-                    },
-                    {
-                        {{ 75,  76,  77,  78,  79},
-                        { 80,  81,  82,  83,  84},
-                        { 85,  86,  87,  88,  89},
-                        { 90,  91,  92,  93,  94},
-                        { 95,  96,  97,  98,  99}},
-
-                        {{100, 101, 102, 103, 104},
-                        {105, 106, 107, 108, 109},
-                        {110, 111, 112, 113, 114},
-                        {115, 116, 117, 118, 119},
-                        {120, 121, 122, 123, 124}},
-
-                        {{125, 126, 127, 128, 129},
-                        {130, 131, 132, 133, 134},
-                        {135, 136, 137, 138, 139},
-                        {140, 141, 142, 143, 144},
-                        {145, 146, 147, 148, 149}}
-                    }
-                }
-            });
-            std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
-                {
-                    {
-                        {{ 15226,  15577,  15928},
-                         { 16981,  17332,  17683},
-                         { 18736,  19087,  19438}},
-
-                        {{ 37818,  38898,  39978},
-                         { 43218,  44298,  45378},
-                         { 48618,  49698,  50778}},
-
-                        {{ 60426,  62235,  64044},
-                         { 69471,  71280,  73089},
-                         { 78516,  80325,  82134}},
-
-                        {{ 83016,  85554,  88092},
-                         { 95706,  98244, 100782},
-                         {108396, 110934, 113472}}
-                    },
-                    {
-                        {{ 41551,  41902,  42253},
-                         { 43306,  43657,  44008},
-                         { 45061,  45412,  45763}},
-
-                        {{118818, 119898, 120978},
-                         {124218, 125298, 126378},
-                         {129618, 130698, 131778}},
-
-                        {{196101, 197910, 199719},
-                         {205146, 206955, 208764},
-                         {214191, 216000, 217809}},
-
-                        {{273366, 275904, 278442},
-                         {286056, 288594, 291132},
-                         {298746, 301284, 303822}}
-                    }
-                }
-            });
+            std::shared_ptr<Node> myConv = Conv(3, 4, {3, 3}, "myconv");
+            std::shared_ptr<Tensor> myWeights =
+                std::make_shared<Tensor>(Array4D<int, 4, 3, 3, 3>{
+                    {{{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}},
+                      {{9, 10, 11}, {12, 13, 14}, {15, 16, 17}},
+                      {{18, 19, 20}, {21, 22, 23}, {24, 25, 26}}},
+                     {{{27, 28, 29}, {30, 31, 32}, {33, 34, 35}},
+                      {{36, 37, 38}, {39, 40, 41}, {42, 43, 44}},
+                      {{45, 46, 47}, {48, 49, 50}, {51, 52, 53}}},
+                     {{{54, 55, 56}, {57, 58, 59}, {60, 61, 62}},
+                      {{63, 64, 65}, {66, 67, 68}, {69, 70, 71}},
+                      {{72, 73, 74}, {75, 76, 77}, {78, 79, 80}}},
+                     {{{81, 82, 83}, {84, 85, 86}, {87, 88, 89}},
+                      {{90, 91, 92}, {93, 94, 95}, {96, 97, 98}},
+                      {{99, 100, 101}, {102, 103, 104}, {105, 106, 107}}}}});
+            std::shared_ptr<Tensor> myBias =
+                std::make_shared<Tensor>(Array1D<int, 4>{{7, 0, 9, 0}});
+            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
+                Array4D<int, 2, 3, 5, 5>{// NCHW
+                                         {{{{0, 1, 2, 3, 4},
+                                            {5, 6, 7, 8, 9},
+                                            {10, 11, 12, 13, 14},
+                                            {15, 16, 17, 18, 19},
+                                            {20, 21, 22, 23, 24}},
+
+                                           {{25, 26, 27, 28, 29},
+                                            {30, 31, 32, 33, 34},
+                                            {35, 36, 37, 38, 39},
+                                            {40, 41, 42, 43, 44},
+                                            {45, 46, 47, 48, 49}},
+
+                                           {{50, 51, 52, 53, 54},
+                                            {55, 56, 57, 58, 59},
+                                            {60, 61, 62, 63, 64},
+                                            {65, 66, 67, 68, 69},
+                                            {70, 71, 72, 73, 74}}},
+                                          {{{75, 76, 77, 78, 79},
+                                            {80, 81, 82, 83, 84},
+                                            {85, 86, 87, 88, 89},
+                                            {90, 91, 92, 93, 94},
+                                            {95, 96, 97, 98, 99}},
+
+                                           {{100, 101, 102, 103, 104},
+                                            {105, 106, 107, 108, 109},
+                                            {110, 111, 112, 113, 114},
+                                            {115, 116, 117, 118, 119},
+                                            {120, 121, 122, 123, 124}},
+
+                                           {{125, 126, 127, 128, 129},
+                                            {130, 131, 132, 133, 134},
+                                            {135, 136, 137, 138, 139},
+                                            {140, 141, 142, 143, 144},
+                                            {145, 146, 147, 148, 149}}}}});
+            std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(
+                Array4D<int, 2, 4, 3, 3>{{{{{15226, 15577, 15928},
+                                            {16981, 17332, 17683},
+                                            {18736, 19087, 19438}},
+
+                                           {{37818, 38898, 39978},
+                                            {43218, 44298, 45378},
+                                            {48618, 49698, 50778}},
+
+                                           {{60426, 62235, 64044},
+                                            {69471, 71280, 73089},
+                                            {78516, 80325, 82134}},
+
+                                           {{83016, 85554, 88092},
+                                            {95706, 98244, 100782},
+                                            {108396, 110934, 113472}}},
+                                          {{{41551, 41902, 42253},
+                                            {43306, 43657, 44008},
+                                            {45061, 45412, 45763}},
+
+                                           {{118818, 119898, 120978},
+                                            {124218, 125298, 126378},
+                                            {129618, 130698, 131778}},
+
+                                           {{196101, 197910, 199719},
+                                            {205146, 206955, 208764},
+                                            {214191, 216000, 217809}},
+
+                                           {{273366, 275904, 278442},
+                                            {286056, 288594, 291132},
+                                            {298746, 301284, 303822}}}}});
             myReLU->getOperator()->associateInput(0, myInput);
             myReLU->addChild(myConv, 0, 0);
             myConv->getOperator()->setInput(1, myWeights);
@@ -167,18 +120,29 @@ TEST_CASE("[core/recipes] Tiling(transformation)", "[Tiling][Recipes]") {
             std::shared_ptr<GraphView> g = std::make_shared<GraphView>();
             g->add({myReLU, myConv});
             g->compile("cpu", DataType::Int32);
-            std::set<std::shared_ptr<Node>> tiledConv = getConvHorizontalTiling(myConv, 2, 3);
+            std::set<std::shared_ptr<Node>> tiledConv =
+                getConvHorizontalTiling(myConv, 2, 3);
 
             SequentialScheduler s(g);
             s.forward();
-            REQUIRE(*(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->getOutput(0)) == *myOutput);
-
-            GraphView::replace({myConv, myConv->getParent(1), myConv->getParent(2)}, tiledConv);
-            g->compile("cpu", DataType::Int32, 0, {{2,3,5,5}});  // changes myInput DataType from Int32 to Float32. Why??????
+            REQUIRE(
+                *(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())
+                      ->getOutput(0)) == *myOutput);
+
+            GraphView::replace(
+                {myConv, myConv->getParent(1), myConv->getParent(2)},
+                tiledConv);
+            g->compile("cpu",
+                       DataType::Int32,
+                       0,
+                       {{2, 3, 5, 5}}); // changes myInput DataType from Int32
+                                        // to Float32. Why??????
             s.resetScheduling();
             s.forward();
 
-            REQUIRE(*(std::dynamic_pointer_cast<OperatorTensor>((*g->outputNodes().begin())->getOperator())->getOutput(0)) == *myOutput);
+            REQUIRE(*(std::dynamic_pointer_cast<OperatorTensor>(
+                          (*g->outputNodes().begin())->getOperator())
+                          ->getOutput(0)) == *myOutput);
         }
     }
 }
diff --git a/unit_tests/scheduler/Test_CastMove.cpp b/unit_tests/scheduler/Test_CastMove.cpp
index 5ca2cd9de4dcc9dab2c78f7ae1e1bf3090db8f2b..4c1bb18e4f6eb0682ab841a726f9d351971a5aef 100644
--- a/unit_tests/scheduler/Test_CastMove.cpp
+++ b/unit_tests/scheduler/Test_CastMove.cpp
@@ -14,79 +14,83 @@
 #include <string>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/utils/TensorUtils.hpp"
-#include "aidge/graph/Node.hpp"
 #include "aidge/graph/GraphView.hpp"
+#include "aidge/graph/Node.hpp"
 #include "aidge/graph/OpArgs.hpp"
-#include "aidge/scheduler/SequentialScheduler.hpp"
 #include "aidge/recipes/Recipes.hpp"
+#include "aidge/scheduler/SequentialScheduler.hpp"
+#include "aidge/utils/TensorUtils.hpp"
 
 #include "aidge/backend/cpu.hpp"
 
 using namespace Aidge;
 
 TEST_CASE("[cpu/castmove] CastMove(forward)") {
-    std::shared_ptr<Tensor> inputTensor =
-            std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4},
-                                                                 {5, 6, 7, 8, 9},
-                                                                 {10, 11, 12, 13, 14},
-                                                                 {15, 16, 17, 18, 19},
-                                                                 {20, 21, 22, 23, 24}}},
-                                                               {{{25, 26, 27, 28, 29},
-                                                                 {30, 31, 32, 33, 34},
-                                                                 {35, 36, 37, 38, 39},
-                                                                 {40, 41, 42, 43, 44},
-                                                                 {45, 46, 47, 48, 49}}}}});
-
-    std::shared_ptr<Tensor> weight1 = std::make_shared<Tensor>(
-            Array4D<int, 3, 1, 3, 3>{{{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}},
-                                      {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}},
-                                      {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}});
-
-    std::shared_ptr<Tensor> bias1 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
+    std::shared_ptr<Tensor> inputTensor = std::make_shared<Tensor>(
+        Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4},
+                                    {5, 6, 7, 8, 9},
+                                    {10, 11, 12, 13, 14},
+                                    {15, 16, 17, 18, 19},
+                                    {20, 21, 22, 23, 24}}},
+                                  {{{25, 26, 27, 28, 29},
+                                    {30, 31, 32, 33, 34},
+                                    {35, 36, 37, 38, 39},
+                                    {40, 41, 42, 43, 44},
+                                    {45, 46, 47, 48, 49}}}}});
+
+    std::shared_ptr<Tensor> weight1 =
+        std::make_shared<Tensor>(Array4D<int, 3, 1, 3, 3>{
+            {{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}},
+             {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}},
+             {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}});
+
+    std::shared_ptr<Tensor> bias1 =
+        std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
 
     SECTION("Test implicit") {
-        std::shared_ptr<GraphView> g =
-                Sequential({
-                    Conv(1, 3, {3, 3}, "conv1"),
-                    Conv(3, 4, {1, 1}, "conv2"),
-                    Conv(4, 3, {1, 1}, "conv3"),
-                    FC(27, 5, false, "fc")});
+        std::shared_ptr<GraphView> g = Sequential({Conv(1, 3, {3, 3}, "conv1"),
+                                                   Conv(3, 4, {1, 1}, "conv2"),
+                                                   Conv(4, 3, {1, 1}, "conv3"),
+                                                   FC(27, 5, false, "fc")});
 
         g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
         g->getNode("conv1")->getOperator()->setInput(1, weight1);
         g->getNode("conv1")->getOperator()->setInput(2, bias1);
 
-        std::shared_ptr<Tensor> weight2 =
-                std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
-                                                                   {{{4}}, {{5}}, {{6}}},
-                                                                   {{{7}}, {{8}}, {{9}}},
-                                                                   {{{10}}, {{11}}, {{12}}}}});
-        std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
+        std::shared_ptr<Tensor> weight2 = std::make_shared<Tensor>(
+            Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
+                                      {{{4}}, {{5}}, {{6}}},
+                                      {{{7}}, {{8}}, {{9}}},
+                                      {{{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias2 =
+            std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
         g->getNode("conv2")->getOperator()->setInput(1, weight2);
         g->getNode("conv2")->getOperator()->setInput(2, bias2);
         // *(g->getNode("conv2")->getOperator()->input(1, weight2);
 
         std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
-                Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
-                                          {{{5}}, {{6}}, {{7}}, {{8}}},
-                                          {{{9}}, {{10}}, {{11}}, {{12}}}}});
-        std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
+            Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
+                                      {{{5}}, {{6}}, {{7}}, {{8}}},
+                                      {{{9}}, {{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias3 =
+            std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
         g->getNode("conv3")->getOperator()->setInput(1, weight3);
         g->getNode("conv3")->getOperator()->setInput(2, bias3);
 
-        std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
-                Array2D<int, 5, 27>{{{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
-                                      15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
-                                     {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
-                                      12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
-                                     {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
-                                      9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
-                                     {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
-                                      6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
-                                     {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
-                                      3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
-        std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
+        std::shared_ptr<Tensor> weightfc =
+            std::make_shared<Tensor>(Array2D<int, 5, 27>{
+                {{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                  15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
+                 {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                  12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+                 {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
+                  9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
+                 {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
+                  6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
+                 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
+                  3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
+        std::shared_ptr<Tensor> biasfc =
+            std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
         g->getNode("fc")->getOperator()->setInput(1, weightfc);
         g->getNode("fc")->getOperator()->setInput(2, biasfc);
 
@@ -101,94 +105,140 @@ TEST_CASE("[cpu/castmove] CastMove(forward)") {
         REQUIRE_NOTHROW(scheduler.forward());
         scheduler.saveSchedulingDiagram("schedulingSequential");
 
-        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
-                {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
-                  {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
-                  {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
-                 {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
-                  {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
-                  {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
-
-        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
-                {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
-                  {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
-                  {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
-                  {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
-                 {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
-                  {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
-                  {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
-                  {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
-
-        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
-                {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
-                  {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
-                  {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
-                 {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
-                  {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
-                  {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
+        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<
+            Tensor>(Array4D<int, 2, 3, 3, 3>{
+            {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
+              {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
+              {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
+             {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
+              {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
+              {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
+
+        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(
+            Array4D<int, 2, 4, 3, 3>{{{{{6099, 7017, 7935},
+                                        {10689, 11607, 12525},
+                                        {15279, 16197, 17115}},
+                                       {{13786, 15838, 17890},
+                                        {24046, 26098, 28150},
+                                        {34306, 36358, 38410}},
+                                       {{21473, 24659, 27845},
+                                        {37403, 40589, 43775},
+                                        {53333, 56519, 59705}},
+                                       {{29160, 33480, 37800},
+                                        {50760, 55080, 59400},
+                                        {72360, 76680, 81000}}},
+                                      {{{29049, 29967, 30885},
+                                        {33639, 34557, 35475},
+                                        {38229, 39147, 40065}},
+                                       {{65086, 67138, 69190},
+                                        {75346, 77398, 79450},
+                                        {85606, 87658, 89710}},
+                                       {{101123, 104309, 107495},
+                                        {117053, 120239, 123425},
+                                        {132983, 136169, 139355}},
+                                       {{137160, 141480, 145800},
+                                        {158760, 163080, 167400},
+                                        {180360, 184680, 189000}}}}});
+
+        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 3, 3>{{{{{214731, 246591, 278451},
+                                        {374031, 405891, 437751},
+                                        {533331, 565191, 597051}},
+                                       {{496804, 570568, 644332},
+                                        {865624, 939388, 1013152},
+                                        {1234444, 1308208, 1381972}},
+                                       {{778877, 894545, 1010213},
+                                        {1357217, 1472885, 1588553},
+                                        {1935557, 2051225, 2166893}}},
+                                      {{{1011231, 1043091, 1074951},
+                                        {1170531, 1202391, 1234251},
+                                        {1329831, 1361691, 1393551}},
+                                       {{2340904, 2414668, 2488432},
+                                        {2709724, 2783488, 2857252},
+                                        {3078544, 3152308, 3226072}},
+                                       {{3670577, 3786245, 3901913},
+                                        {4248917, 4364585, 4480253},
+                                        {4827257, 4942925, 5058593}}}}});
 
         Tensor expectedOutput4 = Array2D<int, 2, 5>{
-                {{205050376, 198925904, 181355097, 196978090, 238868348},
-                {598467376, 561797804, 560823897, 593043790, 698672948}}};
-        std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
+            {{205050376, 198925904, 181355097, 196978090, 238868348},
+             {598467376, 561797804, 560823897, 593043790, 698672948}}};
+        std::shared_ptr<Tensor> other1 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv1")->getOperator())
+                ->getOutput(0);
         REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12));
-        std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other2 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv2")->getOperator())
+                ->getOutput(0);
         REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12));
-        std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
-        REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12));
-        std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other3 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv3")->getOperator())
+                ->getOutput(0);
+        REQUIRE(
+            approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12));
+        std::shared_ptr<Tensor> other4 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("fc")->getOperator())
+                ->getOutput(0);
         REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12));
     }
 
     SECTION("Half") {
-        Tensor refTensor = Array2D<float, 3, 2>{{{0.0, 1.0},{2.1, 3.4},{5000.0, 1.0e5}}};
+        Tensor refTensor =
+            Array2D<float, 3, 2>{{{0.0, 1.0}, {2.1, 3.4}, {5000.0, 1.0e5}}};
         Tensor tensor(DataType::Float16);
         tensor.copyCastFrom(refTensor);
-        REQUIRE(approxEq<float, half_float::half>(refTensor, tensor, 1.0e-3, 0.0));
+        REQUIRE(
+            approxEq<float, half_float::half>(refTensor, tensor, 1.0e-3, 0.0));
     }
 
     SECTION("Test explicit") {
-        std::shared_ptr<GraphView> g =
-                Sequential({
-                    Conv(1, 3, {3, 3}, "conv1"),
-                    Conv(3, 4, {1, 1}, "conv2"),
-                    Conv(4, 3, {1, 1}, "conv3"),
-                    FC(27, 5, false, "fc")});
+        std::shared_ptr<GraphView> g = Sequential({Conv(1, 3, {3, 3}, "conv1"),
+                                                   Conv(3, 4, {1, 1}, "conv2"),
+                                                   Conv(4, 3, {1, 1}, "conv3"),
+                                                   FC(27, 5, false, "fc")});
 
         g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
         g->getNode("conv1")->getOperator()->setInput(1, weight1);
         g->getNode("conv1")->getOperator()->setInput(2, bias1);
 
-        std::shared_ptr<Tensor> weight2 =
-                std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
-                                                                   {{{4}}, {{5}}, {{6}}},
-                                                                   {{{7}}, {{8}}, {{9}}},
-                                                                   {{{10}}, {{11}}, {{12}}}}});
-        std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
+        std::shared_ptr<Tensor> weight2 = std::make_shared<Tensor>(
+            Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
+                                      {{{4}}, {{5}}, {{6}}},
+                                      {{{7}}, {{8}}, {{9}}},
+                                      {{{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias2 =
+            std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
         g->getNode("conv2")->getOperator()->setInput(1, weight2);
         g->getNode("conv2")->getOperator()->setInput(2, bias2);
         // *(g->getNode("conv2")->getOperator()->input(1, weight2);
 
         std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
-                Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
-                                          {{{5}}, {{6}}, {{7}}, {{8}}},
-                                          {{{9}}, {{10}}, {{11}}, {{12}}}}});
-        std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
+            Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
+                                      {{{5}}, {{6}}, {{7}}, {{8}}},
+                                      {{{9}}, {{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias3 =
+            std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
         g->getNode("conv3")->getOperator()->setInput(1, weight3);
         g->getNode("conv3")->getOperator()->setInput(2, bias3);
 
-        std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
-                Array2D<int, 5, 27>{{{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
-                                      15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
-                                     {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
-                                      12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
-                                     {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
-                                      9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
-                                     {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
-                                      6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
-                                     {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
-                                      3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
-        std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
+        std::shared_ptr<Tensor> weightfc =
+            std::make_shared<Tensor>(Array2D<int, 5, 27>{
+                {{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                  15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
+                 {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                  12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+                 {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
+                  9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
+                 {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
+                  6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
+                 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
+                  3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
+        std::shared_ptr<Tensor> biasfc =
+            std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
         g->getNode("fc")->getOperator()->setInput(1, weightfc);
         g->getNode("fc")->getOperator()->setInput(2, biasfc);
 
@@ -205,42 +255,84 @@ TEST_CASE("[cpu/castmove] CastMove(forward)") {
         REQUIRE_NOTHROW(scheduler.forward());
         scheduler.saveSchedulingDiagram("schedulingSequential");
 
-        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
-                {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
-                  {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
-                  {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
-                 {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
-                  {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
-                  {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
-
-        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
-                {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
-                  {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
-                  {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
-                  {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
-                 {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
-                  {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
-                  {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
-                  {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
-
-        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
-                {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
-                  {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
-                  {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
-                 {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
-                  {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
-                  {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
+        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<
+            Tensor>(Array4D<int, 2, 3, 3, 3>{
+            {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
+              {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
+              {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
+             {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
+              {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
+              {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
+
+        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(
+            Array4D<int, 2, 4, 3, 3>{{{{{6099, 7017, 7935},
+                                        {10689, 11607, 12525},
+                                        {15279, 16197, 17115}},
+                                       {{13786, 15838, 17890},
+                                        {24046, 26098, 28150},
+                                        {34306, 36358, 38410}},
+                                       {{21473, 24659, 27845},
+                                        {37403, 40589, 43775},
+                                        {53333, 56519, 59705}},
+                                       {{29160, 33480, 37800},
+                                        {50760, 55080, 59400},
+                                        {72360, 76680, 81000}}},
+                                      {{{29049, 29967, 30885},
+                                        {33639, 34557, 35475},
+                                        {38229, 39147, 40065}},
+                                       {{65086, 67138, 69190},
+                                        {75346, 77398, 79450},
+                                        {85606, 87658, 89710}},
+                                       {{101123, 104309, 107495},
+                                        {117053, 120239, 123425},
+                                        {132983, 136169, 139355}},
+                                       {{137160, 141480, 145800},
+                                        {158760, 163080, 167400},
+                                        {180360, 184680, 189000}}}}});
+
+        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 3, 3>{{{{{214731, 246591, 278451},
+                                        {374031, 405891, 437751},
+                                        {533331, 565191, 597051}},
+                                       {{496804, 570568, 644332},
+                                        {865624, 939388, 1013152},
+                                        {1234444, 1308208, 1381972}},
+                                       {{778877, 894545, 1010213},
+                                        {1357217, 1472885, 1588553},
+                                        {1935557, 2051225, 2166893}}},
+                                      {{{1011231, 1043091, 1074951},
+                                        {1170531, 1202391, 1234251},
+                                        {1329831, 1361691, 1393551}},
+                                       {{2340904, 2414668, 2488432},
+                                        {2709724, 2783488, 2857252},
+                                        {3078544, 3152308, 3226072}},
+                                       {{3670577, 3786245, 3901913},
+                                        {4248917, 4364585, 4480253},
+                                        {4827257, 4942925, 5058593}}}}});
 
         Tensor expectedOutput4 = Array2D<int, 2, 5>{
-                {{205050376, 198925904, 181355097, 196978090, 238868348},
-                {598467376, 561797804, 560823897, 593043790, 698672948}}};
-        std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
+            {{205050376, 198925904, 181355097, 196978090, 238868348},
+             {598467376, 561797804, 560823897, 593043790, 698672948}}};
+        std::shared_ptr<Tensor> other1 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv1")->getOperator())
+                ->getOutput(0);
         REQUIRE(approxEq<float, int>(*other1, *expectedOutput1, 0.0, 1.0e-12));
-        std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other2 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv2")->getOperator())
+                ->getOutput(0);
         REQUIRE(approxEq<int>(*other2, *expectedOutput2, 0.0, 1.0e-12));
-        std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
-        REQUIRE(approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12));
-        std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other3 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv3")->getOperator())
+                ->getOutput(0);
+        REQUIRE(
+            approxEq<double, int>(*other3, *expectedOutput3, 0.0, 1.0e-12));
+        std::shared_ptr<Tensor> other4 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("fc")->getOperator())
+                ->getOutput(0);
         REQUIRE(approxEq<int>(*other4, expectedOutput4, 0.0, 1.0e-12));
     }
 }
diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp
index 78a10c308a60f026b83ea64cfbd25a848099eb90..7e2dd7a5227c132173525581e07c438bd08907f8 100644
--- a/unit_tests/scheduler/Test_Scheduler.cpp
+++ b/unit_tests/scheduler/Test_Scheduler.cpp
@@ -14,81 +14,84 @@
 #include <string>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/graph/Node.hpp"
 #include "aidge/graph/GraphView.hpp"
+#include "aidge/graph/Node.hpp"
 #include "aidge/graph/OpArgs.hpp"
 #include "aidge/operator/Memorize.hpp"
-#include "aidge/scheduler/SequentialScheduler.hpp"
 #include "aidge/scheduler/ParallelScheduler.hpp"
+#include "aidge/scheduler/SequentialScheduler.hpp"
 
 #include "aidge/backend/cpu.hpp"
 #include "aidge/recipes/GraphViewHelper.hpp"
 
-
 namespace Aidge {
 
 TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
-    std::shared_ptr<Tensor> inputTensor =
-            std::make_shared<Tensor>(Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4},
-                                                                 {5, 6, 7, 8, 9},
-                                                                 {10, 11, 12, 13, 14},
-                                                                 {15, 16, 17, 18, 19},
-                                                                 {20, 21, 22, 23, 24}}},
-                                                               {{{25, 26, 27, 28, 29},
-                                                                 {30, 31, 32, 33, 34},
-                                                                 {35, 36, 37, 38, 39},
-                                                                 {40, 41, 42, 43, 44},
-                                                                 {45, 46, 47, 48, 49}}}}});
-
-    std::shared_ptr<Tensor> weight1 = std::make_shared<Tensor>(
-            Array4D<int, 3, 1, 3, 3>{{{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}},
-                                      {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}},
-                                      {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}});
-
-    std::shared_ptr<Tensor> bias1 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
+    std::shared_ptr<Tensor> inputTensor = std::make_shared<Tensor>(
+        Array4D<int, 2, 1, 5, 5>{{{{{0, 1, 2, 3, 4},
+                                    {5, 6, 7, 8, 9},
+                                    {10, 11, 12, 13, 14},
+                                    {15, 16, 17, 18, 19},
+                                    {20, 21, 22, 23, 24}}},
+                                  {{{25, 26, 27, 28, 29},
+                                    {30, 31, 32, 33, 34},
+                                    {35, 36, 37, 38, 39},
+                                    {40, 41, 42, 43, 44},
+                                    {45, 46, 47, 48, 49}}}}});
+
+    std::shared_ptr<Tensor> weight1 =
+        std::make_shared<Tensor>(Array4D<int, 3, 1, 3, 3>{
+            {{{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}},
+             {{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}}},
+             {{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}}}});
+
+    std::shared_ptr<Tensor> bias1 =
+        std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
 
     SECTION("Test Sequential graph") {
-        std::shared_ptr<GraphView> g =
-                Sequential({
-                    Conv(1, 3, {3, 3}, "conv1"),
-                    Conv(3, 4, {1, 1}, "conv2"),
-                    Conv(4, 3, {1, 1}, "conv3"),
-                    FC(27, 5, false, "fc")});
+        std::shared_ptr<GraphView> g = Sequential({Conv(1, 3, {3, 3}, "conv1"),
+                                                   Conv(3, 4, {1, 1}, "conv2"),
+                                                   Conv(4, 3, {1, 1}, "conv3"),
+                                                   FC(27, 5, false, "fc")});
 
         g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
         g->getNode("conv1")->getOperator()->setInput(1, weight1);
         g->getNode("conv1")->getOperator()->setInput(2, bias1);
 
-        std::shared_ptr<Tensor> weight2 =
-                std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
-                                                                   {{{4}}, {{5}}, {{6}}},
-                                                                   {{{7}}, {{8}}, {{9}}},
-                                                                   {{{10}}, {{11}}, {{12}}}}});
-        std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
+        std::shared_ptr<Tensor> weight2 = std::make_shared<Tensor>(
+            Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
+                                      {{{4}}, {{5}}, {{6}}},
+                                      {{{7}}, {{8}}, {{9}}},
+                                      {{{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias2 =
+            std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
         g->getNode("conv2")->getOperator()->setInput(1, weight2);
         g->getNode("conv2")->getOperator()->setInput(2, bias2);
         // *(g->getNode("conv2")->getOperator()->input(1, weight2);
 
         std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
-                Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
-                                          {{{5}}, {{6}}, {{7}}, {{8}}},
-                                          {{{9}}, {{10}}, {{11}}, {{12}}}}});
-        std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
+            Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
+                                      {{{5}}, {{6}}, {{7}}, {{8}}},
+                                      {{{9}}, {{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias3 =
+            std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
         g->getNode("conv3")->getOperator()->setInput(1, weight3);
         g->getNode("conv3")->getOperator()->setInput(2, bias3);
 
-        std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
-                Array2D<int, 5, 27>{{{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
-                                      15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
-                                     {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
-                                      12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
-                                     {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
-                                      9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
-                                     {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
-                                      6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
-                                     {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
-                                      3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
-        std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
+        std::shared_ptr<Tensor> weightfc =
+            std::make_shared<Tensor>(Array2D<int, 5, 27>{
+                {{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                  15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
+                 {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                  12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+                 {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
+                  9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
+                 {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
+                  6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
+                 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
+                  3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
+        std::shared_ptr<Tensor> biasfc =
+            std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
         g->getNode("fc")->getOperator()->setInput(1, weightfc);
         g->getNode("fc")->getOperator()->setInput(2, biasfc);
 
@@ -100,101 +103,150 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
         REQUIRE_NOTHROW(scheduler.forward());
         scheduler.saveSchedulingDiagram("schedulingSequential");
 
-        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
-                {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
-                  {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
-                  {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
-                 {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
-                  {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
-                  {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
-
-        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
-                {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
-                  {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
-                  {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
-                  {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
-                 {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
-                  {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
-                  {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
-                  {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
-
-        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
-                {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
-                  {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
-                  {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
-                 {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
-                  {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
-                  {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
+        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<
+            Tensor>(Array4D<int, 2, 3, 3, 3>{
+            {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
+              {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
+              {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
+             {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
+              {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
+              {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
+
+        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(
+            Array4D<int, 2, 4, 3, 3>{{{{{6099, 7017, 7935},
+                                        {10689, 11607, 12525},
+                                        {15279, 16197, 17115}},
+                                       {{13786, 15838, 17890},
+                                        {24046, 26098, 28150},
+                                        {34306, 36358, 38410}},
+                                       {{21473, 24659, 27845},
+                                        {37403, 40589, 43775},
+                                        {53333, 56519, 59705}},
+                                       {{29160, 33480, 37800},
+                                        {50760, 55080, 59400},
+                                        {72360, 76680, 81000}}},
+                                      {{{29049, 29967, 30885},
+                                        {33639, 34557, 35475},
+                                        {38229, 39147, 40065}},
+                                       {{65086, 67138, 69190},
+                                        {75346, 77398, 79450},
+                                        {85606, 87658, 89710}},
+                                       {{101123, 104309, 107495},
+                                        {117053, 120239, 123425},
+                                        {132983, 136169, 139355}},
+                                       {{137160, 141480, 145800},
+                                        {158760, 163080, 167400},
+                                        {180360, 184680, 189000}}}}});
+
+        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 3, 3>{{{{{214731, 246591, 278451},
+                                        {374031, 405891, 437751},
+                                        {533331, 565191, 597051}},
+                                       {{496804, 570568, 644332},
+                                        {865624, 939388, 1013152},
+                                        {1234444, 1308208, 1381972}},
+                                       {{778877, 894545, 1010213},
+                                        {1357217, 1472885, 1588553},
+                                        {1935557, 2051225, 2166893}}},
+                                      {{{1011231, 1043091, 1074951},
+                                        {1170531, 1202391, 1234251},
+                                        {1329831, 1361691, 1393551}},
+                                       {{2340904, 2414668, 2488432},
+                                        {2709724, 2783488, 2857252},
+                                        {3078544, 3152308, 3226072}},
+                                       {{3670577, 3786245, 3901913},
+                                        {4248917, 4364585, 4480253},
+                                        {4827257, 4942925, 5058593}}}}});
 
         Tensor expectedOutput4 = Array2D<int, 2, 5>{
-                {{205050376, 198925904, 181355097, 196978090, 238868348},
-                {598467376, 561797804, 560823897, 593043790, 698672948}}};
-        std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
+            {{205050376, 198925904, 181355097, 196978090, 238868348},
+             {598467376, 561797804, 560823897, 593043790, 698672948}}};
+        std::shared_ptr<Tensor> other1 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv1")->getOperator())
+                ->getOutput(0);
         bool equal1 = (*other1 == *expectedOutput1);
         REQUIRE(equal1);
-        std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other2 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv2")->getOperator())
+                ->getOutput(0);
         bool equal2 = (*other2 == *expectedOutput2);
         REQUIRE(equal2);
-        std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other3 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv3")->getOperator())
+                ->getOutput(0);
         bool equal3 = (*other3 == *expectedOutput3);
         REQUIRE(equal3);
-        std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other4 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("fc")->getOperator())
+                ->getOutput(0);
         bool equal4 = (*other4 == expectedOutput4);
         REQUIRE(equal4);
     }
 
     SECTION("Test Parallel graph") {
-        std::shared_ptr<GraphView> g =
-                Sequential({Conv(1, 3, {3, 3}, "inputConv"),
-                            Parallel({
-                                Sequential({
-                                    Parallel({
-                                        Conv(3, 3, {1, 1}, "conv1.1"),
-                                        Conv(3, 3, {1, 1}, "conv1.2")}),
-                                    Add("add1")}),
-                                Conv(3, 3, {1, 1}, "conv1.3")}),
-                            Add("add2"),
-                            Conv(3, 2, {1, 1}, "conv2"),
-                            FC(18, 5, false, "out")});
+        std::shared_ptr<GraphView> g = Sequential(
+            {Conv(1, 3, {3, 3}, "inputConv"),
+             Parallel({Sequential({Parallel({Conv(3, 3, {1, 1}, "conv1.1"),
+                                             Conv(3, 3, {1, 1}, "conv1.2")}),
+                                   Add("add1")}),
+                       Conv(3, 3, {1, 1}, "conv1.3")}),
+             Add("add2"),
+             Conv(3, 2, {1, 1}, "conv2"),
+             FC(18, 5, false, "out")});
 
         g->getNode("inputConv")->getOperator()->setInput(0, inputTensor);
         g->getNode("inputConv")->getOperator()->setInput(1, weight1);
         g->getNode("inputConv")->getOperator()->setInput(2, bias1);
 
-        std::shared_ptr<Tensor> conv11Weight = std::make_shared<Tensor>(Array4D<int, 3, 3, 1, 1>{
-                {{{{1}}, {{2}}, {{3}}}, {{{4}}, {{5}}, {{6}}}, {{{7}}, {{8}}, {{9}}}}});
+        std::shared_ptr<Tensor> conv11Weight = std::make_shared<Tensor>(
+            Array4D<int, 3, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
+                                      {{{4}}, {{5}}, {{6}}},
+                                      {{{7}}, {{8}}, {{9}}}}});
         g->getNode("conv1.1")->getOperator()->setInput(1, conv11Weight);
         g->getNode("conv1.1")->getOperator()->setInput(2, bias1);
 
-        std::shared_ptr<Tensor> conv12Weight = std::make_shared<Tensor>(Array4D<int, 3, 3, 1, 1>{
-                {{{{11}}, {{12}}, {{13}}}, {{{14}}, {{15}}, {{16}}}, {{{17}}, {{18}}, {{19}}}}});
+        std::shared_ptr<Tensor> conv12Weight = std::make_shared<Tensor>(
+            Array4D<int, 3, 3, 1, 1>{{{{{11}}, {{12}}, {{13}}},
+                                      {{{14}}, {{15}}, {{16}}},
+                                      {{{17}}, {{18}}, {{19}}}}});
         g->getNode("conv1.2")->getOperator()->setInput(1, conv12Weight);
         g->getNode("conv1.2")->getOperator()->setInput(2, bias1);
 
-        std::shared_ptr<Tensor> conv13Weight = std::make_shared<Tensor>(Array4D<int, 3, 3, 1, 1>{
-                {{{{21}}, {{22}}, {{23}}}, {{{24}}, {{25}}, {{26}}}, {{{27}}, {{28}}, {{29}}}}});
+        std::shared_ptr<Tensor> conv13Weight = std::make_shared<Tensor>(
+            Array4D<int, 3, 3, 1, 1>{{{{{21}}, {{22}}, {{23}}},
+                                      {{{24}}, {{25}}, {{26}}},
+                                      {{{27}}, {{28}}, {{29}}}}});
         g->getNode("conv1.3")->getOperator()->setInput(1, conv13Weight);
         g->getNode("conv1.3")->getOperator()->setInput(2, bias1);
 
-        std::shared_ptr<Tensor> conv2Weight = std::make_shared<Tensor>(
-                Array4D<int, 2, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}}, {{{4}}, {{5}}, {{6}}}}});
-        std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 2>{{1, 2}});
+        std::shared_ptr<Tensor> conv2Weight =
+            std::make_shared<Tensor>(Array4D<int, 2, 3, 1, 1>{
+                {{{{1}}, {{2}}, {{3}}}, {{{4}}, {{5}}, {{6}}}}});
+        std::shared_ptr<Tensor> bias2 =
+            std::make_shared<Tensor>(Array1D<int, 2>{{1, 2}});
         g->getNode("conv2")->getOperator()->setInput(1, conv2Weight);
         g->getNode("conv2")->getOperator()->setInput(2, bias2);
 
-        std::shared_ptr<Tensor> fcWeight = std::make_shared<Tensor>(
-                Array2D<int, 5, 18>{{{1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3},
-                                     {4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1},
-                                     {2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4},
-                                     {5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2},
-                                     {3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}}});
-        std::shared_ptr<Tensor> fcBias = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
+        std::shared_ptr<Tensor> fcWeight =
+            std::make_shared<Tensor>(Array2D<int, 5, 18>{
+                {{1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3},
+                 {4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1},
+                 {2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4},
+                 {5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2},
+                 {3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}}});
+        std::shared_ptr<Tensor> fcBias =
+            std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
         g->getNode("out")->getOperator()->setInput(1, fcWeight);
         g->getNode("out")->getOperator()->setInput(2, fcBias);
 
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
-                Array2D<int, 2, 5>{{{124324368, 130692907, 133325056, 125044620, 142843879},
-                                    {369195468, 394615207, 382643056, 379441320, 416291779}}});
+        std::shared_ptr<Tensor> expectedOutput =
+            std::make_shared<Tensor>(Array2D<int, 2, 5>{
+                {{124324368, 130692907, 133325056, 125044620, 142843879},
+                 {369195468, 394615207, 382643056, 379441320, 416291779}}});
 
         g->setBackend("cpu");
         g->setDataType(Aidge::DataType::Int32);
@@ -202,22 +254,21 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
         SequentialScheduler scheduler(g);
         REQUIRE_NOTHROW(scheduler.forward());
         scheduler.saveSchedulingDiagram("schedulingSequential");
-        std::shared_ptr<Tensor> result =
-                std::static_pointer_cast<Tensor>(g->getNode("out")->getOperator()->getRawOutput(0));
+        std::shared_ptr<Tensor> result = std::static_pointer_cast<Tensor>(
+            g->getNode("out")->getOperator()->getRawOutput(0));
         bool equal = (*result == *expectedOutput);
         REQUIRE(equal);
     }
 
-    SECTION("Test Residual graph") {
-    }
+    SECTION("Test Residual graph") {}
 
     SECTION("Test Recurrent graph (sequential)") {
         std::shared_ptr<Tensor> in = std::make_shared<Tensor>(
-                Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}});
+            Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}});
         std::shared_ptr<Tensor> initTensor = std::make_shared<Tensor>(
-                Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}});
+            Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}});
         std::shared_ptr<Tensor> biasTensor = std::make_shared<Tensor>(
-                Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}});
+            Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}});
 
         auto add1 = Add("add1");
         auto mem = Memorize(3, "mem1");
@@ -245,23 +296,22 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
         scheduler.saveSchedulingDiagram("schedulingRecurrent_seq");
 
         std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
-                Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}});
-        std::shared_ptr<Tensor> result =
-                std::static_pointer_cast<Tensor>(g->getNode("add2")->getOperator()->getRawOutput(0));
+            Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}});
+        std::shared_ptr<Tensor> result = std::static_pointer_cast<Tensor>(
+            g->getNode("add2")->getOperator()->getRawOutput(0));
         result->print();
         expectedOutput->print();
         bool equal = (*result == *expectedOutput);
         REQUIRE(equal);
     }
 
-
     SECTION("Test Recurrent graph (parallel)") {
         std::shared_ptr<Tensor> in = std::make_shared<Tensor>(
-                Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}});
+            Array2D<int, 2, 3>{{{1, 2, 3}, {4, 5, 6}}});
         std::shared_ptr<Tensor> initTensor = std::make_shared<Tensor>(
-                Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}});
+            Array2D<int, 2, 3>{{{0, 0, 0}, {1, 1, 1}}});
         std::shared_ptr<Tensor> biasTensor = std::make_shared<Tensor>(
-                Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}});
+            Array2D<int, 2, 3>{{{2, 0, 0}, {1, 0, 0}}});
 
         auto add1 = Add("add1");
         auto mem = Memorize(3, "mem1");
@@ -288,9 +338,9 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
         scheduler.saveSchedulingDiagram("schedulingRecurrent_par");
 
         std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(
-                Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}});
-        std::shared_ptr<Tensor> result =
-                std::static_pointer_cast<Tensor>(g->getNode("add2")->getOperator()->getRawOutput(0));
+            Array2D<int, 2, 3>{{{5, 6, 9}, {14, 16, 19}}});
+        std::shared_ptr<Tensor> result = std::static_pointer_cast<Tensor>(
+            g->getNode("add2")->getOperator()->getRawOutput(0));
         result->print();
         expectedOutput->print();
         bool equal = (*result == *expectedOutput);
@@ -298,54 +348,57 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
     }
 
     SECTION("Test ConnectInput graph") {
-        std::shared_ptr<GraphView> g =
-                Sequential({
-                    Conv(1, 3, {3, 3}, "conv1"),
-                    Conv(3, 4, {1, 1}, "conv2"),
-                    Conv(4, 3, {1, 1}, "conv3"),
-                    FC(27, 5, false, "fc")});
+        std::shared_ptr<GraphView> g = Sequential({Conv(1, 3, {3, 3}, "conv1"),
+                                                   Conv(3, 4, {1, 1}, "conv2"),
+                                                   Conv(4, 3, {1, 1}, "conv3"),
+                                                   FC(27, 5, false, "fc")});
 
         // g->getNode("conv1")->getOperator()->setInput(0, inputTensor);
         g->getNode("conv1")->getOperator()->setInput(1, weight1);
         g->getNode("conv1")->getOperator()->setInput(2, bias1);
 
-        std::shared_ptr<Tensor> weight2 =
-                std::make_shared<Tensor>(Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
-                                                                   {{{4}}, {{5}}, {{6}}},
-                                                                   {{{7}}, {{8}}, {{9}}},
-                                                                   {{{10}}, {{11}}, {{12}}}}});
-        std::shared_ptr<Tensor> bias2 = std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
+        std::shared_ptr<Tensor> weight2 = std::make_shared<Tensor>(
+            Array4D<int, 4, 3, 1, 1>{{{{{1}}, {{2}}, {{3}}},
+                                      {{{4}}, {{5}}, {{6}}},
+                                      {{{7}}, {{8}}, {{9}}},
+                                      {{{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias2 =
+            std::make_shared<Tensor>(Array1D<int, 4>{{1, 2, 3, 4}});
         g->getNode("conv2")->getOperator()->setInput(1, weight2);
         g->getNode("conv2")->getOperator()->setInput(2, bias2);
         // *(g->getNode("conv2")->getOperator()->input(1, weight2);
 
         std::shared_ptr<Tensor> weight3 = std::make_shared<Tensor>(
-                Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
-                                          {{{5}}, {{6}}, {{7}}, {{8}}},
-                                          {{{9}}, {{10}}, {{11}}, {{12}}}}});
-        std::shared_ptr<Tensor> bias3 = std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
+            Array4D<int, 3, 4, 1, 1>{{{{{1}}, {{2}}, {{3}}, {{4}}},
+                                      {{{5}}, {{6}}, {{7}}, {{8}}},
+                                      {{{9}}, {{10}}, {{11}}, {{12}}}}});
+        std::shared_ptr<Tensor> bias3 =
+            std::make_shared<Tensor>(Array1D<int, 3>{{1, 2, 3}});
         g->getNode("conv3")->getOperator()->setInput(1, weight3);
         g->getNode("conv3")->getOperator()->setInput(2, bias3);
 
-        std::shared_ptr<Tensor> weightfc = std::make_shared<Tensor>(
-                Array2D<int, 5, 27>{{{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
-                                      15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
-                                     {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
-                                      12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
-                                     {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
-                                      9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
-                                     {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
-                                      6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
-                                     {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
-                                      3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
-        std::shared_ptr<Tensor> biasfc = std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
+        std::shared_ptr<Tensor> weightfc =
+            std::make_shared<Tensor>(Array2D<int, 5, 27>{
+                {{1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                  15, 1, 2, 3, 4, 5, 6, 7, 8, 9,  10, 11, 12},
+                 {13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+                  12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+                 {10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5, 6, 7, 8,
+                  9,  10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6},
+                 {7, 8, 9, 10, 11, 12, 13, 14, 15, 1,  2, 3, 4, 5,
+                  6, 7, 8, 9,  10, 11, 12, 13, 14, 15, 1, 2, 3},
+                 {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2,
+                  3, 4, 5, 6, 7, 8, 9,  10, 11, 12, 13, 14, 15}}});
+        std::shared_ptr<Tensor> biasfc =
+            std::make_shared<Tensor>(Array1D<int, 5>{{1, 2, 3, 4, 5}});
         g->getNode("fc")->getOperator()->setInput(1, weightfc);
         g->getNode("fc")->getOperator()->setInput(2, biasfc);
 
         // input->addChild(g);
         g->setDataType(Aidge::DataType::Int32);
         g->setBackend("cpu");
-        std::vector<std::vector<Aidge::DimSize_t>> dims = {inputTensor->dims()};
+        std::vector<std::vector<Aidge::DimSize_t>> dims = {
+            inputTensor->dims()};
         g->forwardDims(dims);
         SequentialScheduler scheduler(g);
 
@@ -354,87 +407,132 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") {
 
         scheduler.saveSchedulingDiagram("schedulingSequential");
 
-        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
-                {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
-                  {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
-                  {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
-                 {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
-                  {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
-                  {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
-
-        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(Array4D<int, 2, 4, 3, 3>{
-                {{{{6099, 7017, 7935}, {10689, 11607, 12525}, {15279, 16197, 17115}},
-                  {{13786, 15838, 17890}, {24046, 26098, 28150}, {34306, 36358, 38410}},
-                  {{21473, 24659, 27845}, {37403, 40589, 43775}, {53333, 56519, 59705}},
-                  {{29160, 33480, 37800}, {50760, 55080, 59400}, {72360, 76680, 81000}}},
-                 {{{29049, 29967, 30885}, {33639, 34557, 35475}, {38229, 39147, 40065}},
-                  {{65086, 67138, 69190}, {75346, 77398, 79450}, {85606, 87658, 89710}},
-                  {{101123, 104309, 107495}, {117053, 120239, 123425}, {132983, 136169, 139355}},
-                  {{137160, 141480, 145800}, {158760, 163080, 167400}, {180360, 184680, 189000}}}}});
-
-        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(Array4D<int, 2, 3, 3, 3>{
-                {{{{214731, 246591, 278451}, {374031, 405891, 437751}, {533331, 565191, 597051}},
-                  {{496804, 570568, 644332}, {865624, 939388, 1013152}, {1234444, 1308208, 1381972}},
-                  {{778877, 894545, 1010213}, {1357217, 1472885, 1588553}, {1935557, 2051225, 2166893}}},
-                 {{{1011231, 1043091, 1074951}, {1170531, 1202391, 1234251}, {1329831, 1361691, 1393551}},
-                  {{2340904, 2414668, 2488432}, {2709724, 2783488, 2857252}, {3078544, 3152308, 3226072}},
-                  {{3670577, 3786245, 3901913}, {4248917, 4364585, 4480253}, {4827257, 4942925, 5058593}}}}});
+        std::shared_ptr<Tensor> expectedOutput1 = std::make_shared<
+            Tensor>(Array4D<int, 2, 3, 3, 3>{
+            {{{{367, 412, 457}, {592, 637, 682}, {817, 862, 907}},
+              {{854, 980, 1106}, {1484, 1610, 1736}, {2114, 2240, 2366}},
+              {{1341, 1548, 1755}, {2376, 2583, 2790}, {3411, 3618, 3825}}},
+             {{{1492, 1537, 1582}, {1717, 1762, 1807}, {1942, 1987, 2032}},
+              {{4004, 4130, 4256}, {4634, 4760, 4886}, {5264, 5390, 5516}},
+              {{6516, 6723, 6930}, {7551, 7758, 7965}, {8586, 8793, 9000}}}}});
+
+        std::shared_ptr<Tensor> expectedOutput2 = std::make_shared<Tensor>(
+            Array4D<int, 2, 4, 3, 3>{{{{{6099, 7017, 7935},
+                                        {10689, 11607, 12525},
+                                        {15279, 16197, 17115}},
+                                       {{13786, 15838, 17890},
+                                        {24046, 26098, 28150},
+                                        {34306, 36358, 38410}},
+                                       {{21473, 24659, 27845},
+                                        {37403, 40589, 43775},
+                                        {53333, 56519, 59705}},
+                                       {{29160, 33480, 37800},
+                                        {50760, 55080, 59400},
+                                        {72360, 76680, 81000}}},
+                                      {{{29049, 29967, 30885},
+                                        {33639, 34557, 35475},
+                                        {38229, 39147, 40065}},
+                                       {{65086, 67138, 69190},
+                                        {75346, 77398, 79450},
+                                        {85606, 87658, 89710}},
+                                       {{101123, 104309, 107495},
+                                        {117053, 120239, 123425},
+                                        {132983, 136169, 139355}},
+                                       {{137160, 141480, 145800},
+                                        {158760, 163080, 167400},
+                                        {180360, 184680, 189000}}}}});
+
+        std::shared_ptr<Tensor> expectedOutput3 = std::make_shared<Tensor>(
+            Array4D<int, 2, 3, 3, 3>{{{{{214731, 246591, 278451},
+                                        {374031, 405891, 437751},
+                                        {533331, 565191, 597051}},
+                                       {{496804, 570568, 644332},
+                                        {865624, 939388, 1013152},
+                                        {1234444, 1308208, 1381972}},
+                                       {{778877, 894545, 1010213},
+                                        {1357217, 1472885, 1588553},
+                                        {1935557, 2051225, 2166893}}},
+                                      {{{1011231, 1043091, 1074951},
+                                        {1170531, 1202391, 1234251},
+                                        {1329831, 1361691, 1393551}},
+                                       {{2340904, 2414668, 2488432},
+                                        {2709724, 2783488, 2857252},
+                                        {3078544, 3152308, 3226072}},
+                                       {{3670577, 3786245, 3901913},
+                                        {4248917, 4364585, 4480253},
+                                        {4827257, 4942925, 5058593}}}}});
 
         Tensor expectedOutput4 = Array2D<int, 2, 5>{
-                {{205050376, 198925904, 181355097, 196978090, 238868348},
-                {598467376, 561797804, 560823897, 593043790, 698672948}}};
-        std::shared_ptr<Tensor> other1 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv1")->getOperator())->getOutput(0);
+            {{205050376, 198925904, 181355097, 196978090, 238868348},
+             {598467376, 561797804, 560823897, 593043790, 698672948}}};
+        std::shared_ptr<Tensor> other1 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv1")->getOperator())
+                ->getOutput(0);
         bool equal1 = (*other1 == *expectedOutput1);
         REQUIRE(equal1);
-        std::shared_ptr<Tensor> other2 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv2")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other2 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv2")->getOperator())
+                ->getOutput(0);
         bool equal2 = (*other2 == *expectedOutput2);
         REQUIRE(equal2);
-        std::shared_ptr<Tensor> other3 = std::static_pointer_cast<OperatorTensor>(g->getNode("conv3")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other3 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("conv3")->getOperator())
+                ->getOutput(0);
         bool equal3 = (*other3 == *expectedOutput3);
         REQUIRE(equal3);
-        std::shared_ptr<Tensor> other4 = std::static_pointer_cast<OperatorTensor>(g->getNode("fc")->getOperator())->getOutput(0);
+        std::shared_ptr<Tensor> other4 =
+            std::static_pointer_cast<OperatorTensor>(
+                g->getNode("fc")->getOperator())
+                ->getOutput(0);
         bool equal4 = (*other4 == expectedOutput4);
         REQUIRE(equal4);
     }
 }
 
-TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward]") {
+TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)",
+          "[scheduler][backward]") {
 
     // create GraphView
-    std::shared_ptr<GraphView> gv = Sequential({ReLU("relu0"), Sqrt("srqt0"), ReLU("relu1")});
-
-    std::shared_ptr<Tensor> inputTensor =
-            std::make_shared<Tensor>(Array4D<float, 2, 1, 5, 5>{{{{{0.0f,  1.0f,  2.0f,  3.0f,  4.0f},
-                                                                 {5.0f,  6.0f,  7.0f,  8.0f,  9.0f},
-                                                                {10.0f, 11.0f, 12.0f, 13.0f, 14.0f},
-                                                                {15.0f, 16.0f, 17.0f, 18.0f, 19.0f},
-                                                                {20.0f, 21.0f, 22.0f, 23.0f, 24.0f}}},
-                                                              {{{25.0f, 26.0f, 27.0f, 28.0f, 29.0f},
-                                                                {30.0f, 31.0f, 32.0f, 33.0f, 34.0f},
-                                                                {35.0f, 36.0f, 37.0f, 38.0f, 39.0f},
-                                                                {40.0f, 41.0f, 42.0f, 43.0f, 44.0f},
-                                                                {45.0f, 46.0f, 47.0f, 48.0f, 49.0f}}}}});
+    std::shared_ptr<GraphView> gv =
+        Sequential({ReLU("relu0"), Sqrt("srqt0"), ReLU("relu1")});
+
+    std::shared_ptr<Tensor> inputTensor = std::make_shared<Tensor>(
+        Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 2.0f, 3.0f, 4.0f},
+                                      {5.0f, 6.0f, 7.0f, 8.0f, 9.0f},
+                                      {10.0f, 11.0f, 12.0f, 13.0f, 14.0f},
+                                      {15.0f, 16.0f, 17.0f, 18.0f, 19.0f},
+                                      {20.0f, 21.0f, 22.0f, 23.0f, 24.0f}}},
+                                    {{{25.0f, 26.0f, 27.0f, 28.0f, 29.0f},
+                                      {30.0f, 31.0f, 32.0f, 33.0f, 34.0f},
+                                      {35.0f, 36.0f, 37.0f, 38.0f, 39.0f},
+                                      {40.0f, 41.0f, 42.0f, 43.0f, 44.0f},
+                                      {45.0f, 46.0f, 47.0f, 48.0f, 49.0f}}}}});
     auto label = inputTensor;
     // implem already set to default
     auto myProd = Producer(inputTensor, "prod");
-    myProd -> addChild(gv);
-    gv -> compile("cpu", DataType::Float32);
+    myProd->addChild(gv);
+    gv->compile("cpu", DataType::Float32);
 
     SequentialScheduler scheduler(gv);
     scheduler.forward();
     auto outNode = gv->getOrderedOutputs()[0].first;
-    std::shared_ptr<Tensor> predictedOutput = std::dynamic_pointer_cast<OperatorTensor>(outNode->getOperator())->getOutput(0);
-    std::shared_ptr<Tensor> targetOutput =
-          std::make_shared<Tensor>(Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 1.0f, 2.0f, 2.0f},
-                                                                 {2.0f, 2.0f, 3.0f, 3.0f, 3.0f},
-                                                                 {3.0f, 3.0f, 3.0f, 4.0f, 4.0f},
-                                                                 {4.0f, 4.0f, 4.0f, 4.0f, 4.0f},
-                                                                 {4.0f, 5.0f, 5.0f, 5.0f, 5.0f}}},
-                                                               {{{5.0f, 5.0f, 5.0f, 5.0f, 5.0f},
-                                                                 {5.0f, 6.0f, 6.0f, 6.0f, 6.0f},
-                                                                 {6.0f, 6.0f, 6.0f, 6.0f, 6.0f},
-                                                                 {6.0f, 6.0f, 6.0f, 7.0f, 7.0f},
-                                                                 {7.0f, 7.0f, 7.0f, 7.0f, 7.0f}}}}});
+    std::shared_ptr<Tensor> predictedOutput =
+        std::dynamic_pointer_cast<OperatorTensor>(outNode->getOperator())
+            ->getOutput(0);
+    std::shared_ptr<Tensor> targetOutput = std::make_shared<Tensor>(
+        Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 1.0f, 2.0f, 2.0f},
+                                      {2.0f, 2.0f, 3.0f, 3.0f, 3.0f},
+                                      {3.0f, 3.0f, 3.0f, 4.0f, 4.0f},
+                                      {4.0f, 4.0f, 4.0f, 4.0f, 4.0f},
+                                      {4.0f, 5.0f, 5.0f, 5.0f, 5.0f}}},
+                                    {{{5.0f, 5.0f, 5.0f, 5.0f, 5.0f},
+                                      {5.0f, 6.0f, 6.0f, 6.0f, 6.0f},
+                                      {6.0f, 6.0f, 6.0f, 6.0f, 6.0f},
+                                      {6.0f, 6.0f, 6.0f, 7.0f, 7.0f},
+                                      {7.0f, 7.0f, 7.0f, 7.0f, 7.0f}}}}});
     predictedOutput->setGrad(targetOutput);
     REQUIRE_NOTHROW(scheduler.backward());
 }