diff --git a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp
index 42505d385fde7e72e09531f1607287ffc6978f75..8ec1669b92a5116999413cf55a8c5113363ef330 100644
--- a/unit_tests/operator/Test_ConstantOfShapeImpl.cpp
+++ b/unit_tests/operator/Test_ConstantOfShapeImpl.cpp
@@ -9,32 +9,27 @@
  *
  ********************************************************************************/
 
-#include <algorithm>
-#include <chrono>
-#include <cmath>
-#include <cstddef> // std::size_t
-#include <cstdint> // std::uint16_t
-#include <iostream>
+#include <chrono>      // std::micro, std::chrono::time_point,
+                       // std::chrono::system_clock
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::int64_t, std::uint16_t
 #include <memory>
-#include <numeric> // std::accumulate
-#include <ostream>
-#include <random> // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <random>      // std::random_device, std::mt19937
+                       // std::uniform_int_distribution, std::uniform_real_distribution
+#include <vector>
 
-#include "catch2/internal/catch_compiler_capabilities.hpp"
-#include "catch2/internal/catch_enforce.hpp"
 #include <catch2/catch_test_macros.hpp>
 #include <catch2/generators/catch_generators_random.hpp>
 
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp"
+#include "aidge/data/Data.hpp"
 #include "aidge/data/Tensor.hpp"
+#include "aidge/filler/Filler.hpp"
 #include "aidge/operator/ConstantOfShape.hpp"
+#include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/TensorUtils.hpp"
-#include <aidge/data/Data.hpp>
-#include <aidge/data/half.hpp>
-#include <aidge/filler/Filler.hpp>
-#include <aidge/operator/OperatorTensor.hpp>
-#include <aidge/operator/Reshape.hpp>
-#include <aidge/utils/TensorUtils.hpp>
-#include <aidge/utils/Types.h>
+#include "aidge/utils/Types.h"
 
 namespace Aidge {
 TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") {
@@ -62,7 +57,7 @@ TEST_CASE("[cpu/operator] ConstantOfShape", "[ConstantOfShape][CPU]") {
     result->setDataType(DataType::Int64);
     result->setBackend("cpu");
     for (DimSize_t i = 0; i < result->size(); ++i) {
-      result->set<int64_t>(i, input_tensor_values_dist(gen));
+      result->set<std::int64_t>(i, input_tensor_values_dist(gen));
     }
     return result;
   };
diff --git a/unit_tests/operator/Test_DivImpl.cpp b/unit_tests/operator/Test_DivImpl.cpp
index 5d7dfdf12032d4c444e38cda6d2a4298fc552b14..b03fe4aa91e96299f2a748026ee8ca5e5d57fb5c 100644
--- a/unit_tests/operator/Test_DivImpl.cpp
+++ b/unit_tests/operator/Test_DivImpl.cpp
@@ -9,17 +9,26 @@
  *
  ********************************************************************************/
 
-#include <catch2/catch_test_macros.hpp>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
-#include <chrono>
-#include <iostream>
+#include <chrono>      // std::micro, std::chrono::time_point,
+                       // std::chrono::system_clock
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::uint16_t
+#include <functional>  // std::multiplies
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric>     // std::accumulate
+#include <random>      // std::random_device, std::mt19937
+                       // std::uniform_int_distribution, std::uniform_real_distribution
+#include <vector>
+
+#include <catch2/catch_test_macros.hpp>
+#include <fmt/core.h>
 
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/operator/DivImpl.hpp"
+#include "aidge/data/Data.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Div.hpp"
+#include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/TensorUtils.hpp"
 
 namespace Aidge {
@@ -117,8 +126,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
 
                 // with broadcasting
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
 
         SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
@@ -212,8 +221,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
         SECTION("+1-D Tensor / 1-D Tensor") {
             std::size_t number_of_operation = 0;
@@ -308,8 +317,8 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 number_of_operation += nb_elements;
             }
 
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
     }
 }
diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp
index d6e934b4dc8d84e8a595eb74d1af9d2c68c892d1..daef47b32ffcca880a1bf2438e9ee9c35adbb2c8 100644
--- a/unit_tests/operator/Test_MatMulImpl.cpp
+++ b/unit_tests/operator/Test_MatMulImpl.cpp
@@ -9,21 +9,26 @@
  *
  ********************************************************************************/
 
-#include <catch2/catch_test_macros.hpp>
-#include <cstddef>  // std::size_t
-#include <cstdint>  // std::uint16_t
-#include <chrono>
-#include <iostream>
+#include <chrono>      // std::micro, std::chrono::time_point,
+                       // std::chrono::system_clock, std::chrono::duration
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::uint16_t
 #include <memory>
-#include <random>   // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <random>      // std::random_device, std::mt19937
+                       // std::uniform_int_distribution, std::uniform_real_distribution
+#include <vector>
+
+#include <catch2/catch_test_macros.hpp>
+#include <fmt/core.h>
 
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
+#include "aidge/data/Data.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/MatMul.hpp"
 #include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/TensorUtils.hpp"
 
-#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
-
 namespace Aidge {
 
 TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
@@ -106,8 +111,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             delete[] bigArray2;
             delete[] res;
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
-        std::cout << "total time: " << duration.count() << std::endl;
+        fmt::print("INFO: number of multiplications over time spent: {}\n", (totalComputation / duration.count()));
+        fmt::print("INFO: total time: {} Î¼s\n", duration.count());
     }
 
     SECTION("3-D Tensors") {
@@ -174,8 +179,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             delete[] bigArray2;
             delete[] res;
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
-        std::cout << "total time: " << duration.count() << std::endl;
+        fmt::print("INFO: number of multiplications over time spent: {}\n", (totalComputation / duration.count()));
+        fmt::print("INFO: total time: {} Î¼s\n", duration.count());
     }
 
     SECTION("4-D Tensors") {
@@ -244,8 +249,8 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             delete[] bigArray2;
             delete[] res;
         }
-        std::cout << "multiplications over time spent: " << totalComputation/duration.count() << std::endl;
-        std::cout << "total time: " << duration.count() << std::endl;
+        fmt::print("INFO: number of multiplications over time spent: {}\n", (totalComputation / duration.count()));
+        fmt::print("INFO: total time: {} Î¼s\n", duration.count());
     }
 
     SECTION("+2-D / 1-D") {
diff --git a/unit_tests/operator/Test_MulImpl.cpp b/unit_tests/operator/Test_MulImpl.cpp
index 3378861d0d3d7e74e7867c2765a0b09069fa8caf..925b9f2059518d434b74a0e2fd0cde79b334c54e 100644
--- a/unit_tests/operator/Test_MulImpl.cpp
+++ b/unit_tests/operator/Test_MulImpl.cpp
@@ -9,351 +9,338 @@
  *
  ********************************************************************************/
 
-#include <catch2/catch_test_macros.hpp>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
-#include <chrono>
-#include <iostream>
+#include <chrono>      // std::micro, std::chrono::time_point,
+                       // std::chrono::system_clock,
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::uint16_t
+#include <functional>  // std::multiplies
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric>     // std::accumulate
+#include <random>      // std::random_device, std::mt19937
+                       // std::uniform_int_distribution, std::uniform_real_distribution
+#include <vector>
+
+#include <catch2/catch_test_macros.hpp>
+#include <fmt/core.h>
 
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/operator/MulImpl.hpp"
+#include "aidge/data/Data.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Mul.hpp"
+#include "aidge/utils/ArrayHelpers.hpp"
+#include "aidge/utils/Log.hpp"
 #include "aidge/utils/TensorUtils.hpp"
 
 namespace Aidge {
 
-    TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]")
-    {
-        std::shared_ptr<Node> myMul = Mul();
-        auto op = std::static_pointer_cast<OperatorTensor>(myMul->getOperator());
-        op->setDataType(DataType::Float32);
-        op->setBackend("cpu");
+TEST_CASE("[CPU/Operator] Mul Backward", "[Mul][CPU][Backward]")
+{
+    using aif32 = cpptype_t<DataType::Float32>;
+    std::shared_ptr<Mul_Op> op = std::make_shared<Mul_Op>();
+    op->setDataType(DataType::Float32);
+    op->setBackend("cpu");
 
-        SECTION("Case 1: 2D and 1D tensors") {
-            const auto T0 = std::make_shared<Tensor>(Array2D<float,2,3>(
+    SECTION("Case 1: 2D and 1D tensors") {
+        const auto T0 = std::make_shared<Tensor>(Array2D<aif32,2,3>(
+            {
                 {
-                    {
-                        {1,2,3},{4,5,6}
-                    }
+                    {1,2,3},{4,5,6}
                 }
-            ));
-
-            const auto T1 = std::make_shared<Tensor>(Array1D<float,3>(
-                {0.1,0.2,0.3}
-            ));
+            }
+        ));
 
-            T0->setDataType(DataType::Float32);
-            T0->setBackend("cpu");
-            T1->setDataType(DataType::Float32);
-            T1->setBackend("cpu");
+        const auto T1 = std::make_shared<Tensor>(Array1D<aif32,3>(
+            {0.1,0.2,0.3}
+        ));
 
-            op->getOutput(0)->setGrad(std::make_shared<Tensor>(Array2D<float,2,3>({{{1.0,1.0,1.0},{1.0,1.0,1.0}}})));
+        op->getOutput(0)->setGrad(std::make_shared<Tensor>(Array2D<aif32,2,3>({{{1.0,1.0,1.0},{1.0,1.0,1.0}}})));
 
-            op->associateInput(0,T0);
-            op->associateInput(1,T1);
-            op->forwardDims();
+        op->associateInput(0,T0);
+        op->associateInput(1,T1);
+        op->forwardDims();
 
-            myMul->forward();
-            myMul->backward();
+        op->forward();
+        op->backward();
 
-            auto T0Grad = std::make_shared<Tensor>(Array2D<float, 2,3>({{{0.1,0.2,0.3},{0.1, 0.2, 0.3}}}));
-            auto T1Grad = std::make_shared<Tensor>(Array1D<float, 3>({5,7,9}));
+        const Tensor T0Grad = Array2D<aif32, 2, 3>({{{0.1,0.2,0.3},{0.1, 0.2, 0.3}}});
+        const Tensor T1Grad = Array1D<aif32, 3>({5,7,9});
 
-            REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *T0Grad));
-            REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *T1Grad));
-        }
+        REQUIRE(approxEq<aif32>(*(op->getInput(0)->grad()), T0Grad));
+        REQUIRE(approxEq<aif32>(*(op->getInput(1)->grad()), T1Grad));
+    }
 
-        SECTION("Case 2: 3D and 1D tensors") {
-            const auto T0 = std::make_shared<Tensor>(Array3D<float,2,2,3>(
+    SECTION("Case 2: 3D and 1D tensors") {
+        const auto T0 = std::make_shared<Tensor>(Array3D<aif32,2,2,3>(
+            {
                 {
                     {
-                        {
-                            {1.0, 2.0, 3.0},
-                            {4.0, 5.0, 6.0}
-                        },
-                        {
-                            {7.0, 8.0, 9.0},
-                            {10.0, 11.0, 12.0}
-                        }
+                        {1.0, 2.0, 3.0},
+                        {4.0, 5.0, 6.0}
+                    },
+                    {
+                        {7.0, 8.0, 9.0},
+                        {10.0, 11.0, 12.0}
                     }
                 }
-            ));
-
-            const auto T1 = std::make_shared<Tensor>(Array1D<float, 3>({0.3,0.2,0.1}));
+            }
+        ));
 
-            const auto newGrad = std::make_shared<Tensor>(Array3D<float,2,2,3>(
-                    {
-                        {
-                            {
-                                {1, 1, 1},
-                                {1, 1, 1}
-                            },
-                            {
-                                {1, 1, 1},
-                                {1, 1, 1}
-                            }
-                        }
-                    }
-                ));
+        const auto T1 = std::make_shared<Tensor>(Array1D<aif32, 3>({0.3,0.2,0.1}));
 
-            const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,2,3>(
+        const auto newGrad = std::make_shared<Tensor>(Array3D<aif32,2,2,3>(
                 {
                     {
                         {
-                            {0.3, 0.2, 0.1},
-                            {0.3, 0.2, 0.1}
+                            {1, 1, 1},
+                            {1, 1, 1}
                         },
                         {
-                            {0.3, 0.2, 0.1},
-                            {0.3, 0.2, 0.1}
+                            {1, 1, 1},
+                            {1, 1, 1}
                         }
                     }
                 }
             ));
 
-            const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float,3>(
-                {22.0, 26.0, 30.0}
-            ));
-
-            for(auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
+        const Tensor expectedGrad0 = Array3D<aif32,2,2,3>(
             {
-                    T->setBackend("cpu") ;
-                    T->setDataType(DataType::Float32);
+                {
+                    {
+                        {0.3, 0.2, 0.1},
+                        {0.3, 0.2, 0.1}
+                    },
+                    {
+                        {0.3, 0.2, 0.1},
+                        {0.3, 0.2, 0.1}
+                    }
+                }
             }
+        );
 
-            op->associateInput(0, T0);
-            op->associateInput(1, T1);
-            op->getOutput(0)->setGrad(newGrad);
-            op->forwardDims();
+        const Tensor expectedGrad1 = Array1D<aif32,3>(
+            {22.0, 26.0, 30.0}
+        );
 
-            myMul->backward();
+        op->associateInput(0, T0);
+        op->associateInput(1, T1);
+        op->getOutput(0)->setGrad(newGrad);
+        op->forwardDims();
 
-            REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
-            REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
-        }
+        op->backward();
+
+        REQUIRE(approxEq<aif32>(*(op->getInput(0)->grad()), expectedGrad0));
+        REQUIRE(approxEq<aif32>(*(op->getInput(1)->grad()), expectedGrad1));
+    }
 
-        SECTION("Case 3: 4D and 2D tensors") {
-            const auto T0 = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>(
+    SECTION("Case 3: 4D and 2D tensors") {
+        const auto T0 = std::make_shared<Tensor>(Array4D<aif32,2, 2, 3, 3>(
+            {
                 {
                     {
                         {
-                            {
-                                {1.0, 2.0, 3.0},
-                                {4.0, 5.0, 6.0},
-                                {7.0, 8.0, 9.0}
-                            },
-                            {
-                                {10.0, 11.0, 12.0},
-                                {13.0, 14.0, 15.0},
-                                {16.0, 17.0, 18.0}
-                            }
+                            {1.0, 2.0, 3.0},
+                            {4.0, 5.0, 6.0},
+                            {7.0, 8.0, 9.0}
                         },
                         {
-                            {
-                                {19.0, 20.0, 21.0},
-                                {22.0, 23.0, 24.0},
-                                {25.0, 26.0, 27.0}
-                            },
-                            {
-                                {28.0, 29.0, 30.0},
-                                {31.0, 32.0, 33.0},
-                                {34.0, 35.0, 36.0}
-                            }
+                            {10.0, 11.0, 12.0},
+                            {13.0, 14.0, 15.0},
+                            {16.0, 17.0, 18.0}
+                        }
+                    },
+                    {
+                        {
+                            {19.0, 20.0, 21.0},
+                            {22.0, 23.0, 24.0},
+                            {25.0, 26.0, 27.0}
+                        },
+                        {
+                            {28.0, 29.0, 30.0},
+                            {31.0, 32.0, 33.0},
+                            {34.0, 35.0, 36.0}
                         }
                     }
                 }
-            ));
+            }
+        ));
 
-            const auto T1 = std::make_shared<Tensor>(Array2D<float, 3,3>(
+        const auto T1 = std::make_shared<Tensor>(Array2D<aif32, 3,3>(
+            {
                 {
-                    {
-                        {0.5,0.3,0.1},
-                        {0.4,0.2,0.6},
-                        {0.7,0.8,0.9}
-                    }
+                    {0.5,0.3,0.1},
+                    {0.4,0.2,0.6},
+                    {0.7,0.8,0.9}
                 }
-            ));
+            }
+        ));
 
-            const auto newGrad = std::make_shared<Tensor>(Array4D<float,2, 2, 3, 3>(
+        const auto newGrad = std::make_shared<Tensor>(Array4D<aif32,2, 2, 3, 3>(
+            {
                 {
                     {
                         {
-                            {
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0}
-                            },
-                            {
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0}
-                            }
+                            {1.0, 1.0, 1.0},
+                            {1.0, 1.0, 1.0},
+                            {1.0, 1.0, 1.0}
                         },
                         {
-                            {
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0}
-                            },
-                            {
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0},
-                                {1.0, 1.0, 1.0}
-                            }
+                            {1.0, 1.0, 1.0},
+                            {1.0, 1.0, 1.0},
+                            {1.0, 1.0, 1.0}
                         }
-                    }
-                }
-            ));
-
-            const auto expectedGrad0 = std::make_shared<Tensor>(Array4D<float,2,2,3,3>(
-                {
+                    },
                     {
                         {
-                            {
-                                {0.5, 0.3, 0.1},
-                                {0.4, 0.2, 0.6},
-                                {0.7, 0.8, 0.9}
-                            },
-                            {
-                                {0.5, 0.3, 0.1},
-                                {0.4, 0.2, 0.6},
-                                {0.7, 0.8, 0.9}
-                            }
+                            {1.0, 1.0, 1.0},
+                            {1.0, 1.0, 1.0},
+                            {1.0, 1.0, 1.0}
                         },
                         {
-                            {
-                                {0.5, 0.3, 0.1},
-                                {0.4, 0.2, 0.6},
-                                {0.7, 0.8, 0.9}
-                            },
-                            {
-                                {0.5, 0.3, 0.1},
-                                {0.4, 0.2, 0.6},
-                                {0.7, 0.8, 0.9}
-                            }
+                            {1.0, 1.0, 1.0},
+                            {1.0, 1.0, 1.0},
+                            {1.0, 1.0, 1.0}
                         }
                     }
                 }
-            ));
+            }
+        ));
 
-            const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 3>(
+        const Tensor expectedGrad0 = Array4D<aif32,2,2,3,3>(
+            {
                 {
                     {
-                        {58.0, 62.0, 66.0},
-                        {70.0, 74.0, 78.0},
-                        {82.0, 86.0, 90.0}
+                        {
+                            {0.5, 0.3, 0.1},
+                            {0.4, 0.2, 0.6},
+                            {0.7, 0.8, 0.9}
+                        },
+                        {
+                            {0.5, 0.3, 0.1},
+                            {0.4, 0.2, 0.6},
+                            {0.7, 0.8, 0.9}
+                        }
+                    },
+                    {
+                        {
+                            {0.5, 0.3, 0.1},
+                            {0.4, 0.2, 0.6},
+                            {0.7, 0.8, 0.9}
+                        },
+                        {
+                            {0.5, 0.3, 0.1},
+                            {0.4, 0.2, 0.6},
+                            {0.7, 0.8, 0.9}
+                        }
                     }
                 }
-            ));
+            }
+        );
 
-            for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
+        const Tensor expectedGrad1 = Array2D<aif32,3, 3>(
             {
-                    T->setBackend("cpu") ;
-                    T->setDataType(DataType::Float32);
+                {
+                    {58.0, 62.0, 66.0},
+                    {70.0, 74.0, 78.0},
+                    {82.0, 86.0, 90.0}
+                }
             }
+        );
 
-            op->associateInput(0, T0);
-            op->associateInput(1, T1);
-            op->getOutput(0)->setGrad(newGrad);
-            op->forwardDims();
+        op->associateInput(0, T0);
+        op->associateInput(1, T1);
+        op->getOutput(0)->setGrad(newGrad);
+        op->forwardDims();
 
-            myMul->backward();
+        op->backward();
 
-            REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
-            REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
-        }
+        REQUIRE(approxEq<aif32>(*(op->getInput(0)->grad()), expectedGrad0));
+        REQUIRE(approxEq<aif32>(*(op->getInput(1)->grad()), expectedGrad1));
+    }
 
-        SECTION("Case 4: 3D and 2D tensors") {
-            const auto T0 = std::make_shared<Tensor>(Array3D<float, 2, 3, 4>(
+    SECTION("Case 4: 3D and 2D tensors") {
+        const auto T0 = std::make_shared<Tensor>(Array3D<aif32, 2, 3, 4>(
+            {
                 {
                     {
-                        {
-                            {1.0, 2.0, 3.0, 4.0},
-                            {5.0, 6.0, 7.0, 8.0},
-                            {9.0, 10.0, 11.0, 12.0},
-                        },
-                        {
-                            {13.0, 14.0, 15.0, 16.0},
-                            {17.0, 18.0, 19.0, 20.0},
-                            {21.0, 22.0, 23.0, 24.0},
-                        }
-                    }
-                }
-            ));
-
-            const auto T1 = std::make_shared<Tensor>(Array2D<float, 3, 4>(
-                {
+                        {1.0, 2.0, 3.0, 4.0},
+                        {5.0, 6.0, 7.0, 8.0},
+                        {9.0, 10.0, 11.0, 12.0},
+                    },
                     {
-                        {0.1, 0.2, 0.3, 0.4},
-                        {0.5, 0.6, 0.7, 0.8},
-                        {0.9, 1.0, 1.1, 1.2}
+                        {13.0, 14.0, 15.0, 16.0},
+                        {17.0, 18.0, 19.0, 20.0},
+                        {21.0, 22.0, 23.0, 24.0},
                     }
                 }
-            ));
+            }
+        ));
 
-            const auto newGrad = std::make_shared<Tensor>(Array3D<float, 2,3,4>(
+        const auto T1 = std::make_shared<Tensor>(Array2D<aif32, 3, 4>(
+            {
                 {
-                    {
-                        {
-                            {1.0, 1.0, 1.0, 1.0},
-                            {1.0, 1.0, 1.0, 1.0},
-                            {1.0, 1.0, 1.0, 1.0},
-                        },
-                        {
-                            {1.0, 1.0, 1.0, 1.0},
-                            {1.0, 1.0, 1.0, 1.0},
-                            {1.0, 1.0, 1.0, 1.0},
-                        }
-                    }
+                    {0.1, 0.2, 0.3, 0.4},
+                    {0.5, 0.6, 0.7, 0.8},
+                    {0.9, 1.0, 1.1, 1.2}
                 }
-            ));
+            }
+        ));
 
-            const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float,2,3,4>(
+        const auto newGrad = std::make_shared<Tensor>(Array3D<aif32, 2,3,4>(
+            {
                 {
                     {
-                        {
-                            {0.1, 0.2, 0.3, 0.4},
-                            {0.5, 0.6, 0.7, 0.8},
-                            {0.9, 1.0, 1.1, 1.2}
-                        },
-                        {
-                            {0.1, 0.2, 0.3, 0.4},
-                            {0.5, 0.6, 0.7, 0.8},
-                            {0.9, 1.0, 1.1, 1.2}
-                        }
+                        {1.0, 1.0, 1.0, 1.0},
+                        {1.0, 1.0, 1.0, 1.0},
+                        {1.0, 1.0, 1.0, 1.0},
+                    },
+                    {
+                        {1.0, 1.0, 1.0, 1.0},
+                        {1.0, 1.0, 1.0, 1.0},
+                        {1.0, 1.0, 1.0, 1.0},
                     }
                 }
-            ));
+            }
+        ));
 
-            const auto expectedGrad1 = std::make_shared<Tensor>(Array2D<float,3, 4>(
+        const Tensor expectedGrad0 = Array3D<aif32,2,3,4>(
+            {
                 {
                     {
-                        {14.0, 16.0, 18.0, 20.0},
-                        {22.0, 24.0, 26.0, 28.0},
-                        {30.0, 32.0, 34.0, 36.0}
+                        {0.1, 0.2, 0.3, 0.4},
+                        {0.5, 0.6, 0.7, 0.8},
+                        {0.9, 1.0, 1.1, 1.2}
+                    },
+                    {
+                        {0.1, 0.2, 0.3, 0.4},
+                        {0.5, 0.6, 0.7, 0.8},
+                        {0.9, 1.0, 1.1, 1.2}
                     }
                 }
-            ));
+            }
+        );
 
-            for(const auto T: {T0, T1, newGrad, expectedGrad0, expectedGrad1})
+        const Tensor expectedGrad1 = Array2D<aif32,3,4>(
             {
-                T->setBackend("cpu") ;
-                T->setDataType(DataType::Float32);
+                {
+                    {14.0, 16.0, 18.0, 20.0},
+                    {22.0, 24.0, 26.0, 28.0},
+                    {30.0, 32.0, 34.0, 36.0}
+                }
             }
+        );
 
-            op->associateInput(0, T0);
-            op->associateInput(1, T1);
-            op->getOutput(0)->setGrad(newGrad);
-            op->forwardDims();
+        op->associateInput(0, T0);
+        op->associateInput(1, T1);
+        op->getOutput(0)->setGrad(newGrad);
+        op->forwardDims();
 
-            myMul->backward();
+        op->backward();
 
-            REQUIRE(approxEq<float>(*(op->getInput(0)->grad()), *expectedGrad0));
-            REQUIRE(approxEq<float>(*(op->getInput(1)->grad()), *expectedGrad1));
-        }
+        REQUIRE(approxEq<aif32>(*(op->getInput(0)->grad()), expectedGrad0));
+        REQUIRE(approxEq<aif32>(*(op->getInput(1)->grad()), expectedGrad1));
     }
+}
 
 TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
     constexpr std::uint16_t NBTRIALS = 10;
@@ -366,8 +353,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
     std::uniform_int_distribution<int> boolDist(0,1);
 
     // Create MatMul Operator
-    std::shared_ptr<Node> myMul = Mul();
-    auto op = std::static_pointer_cast<OperatorTensor>(myMul-> getOperator());
+    std::shared_ptr<Mul_Op> op = std::make_shared<Mul_Op>();
     op->setDataType(DataType::Float32);
     op->setBackend("cpu");
 
@@ -441,7 +427,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
 
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
-                myMul->forward();
+                op->forward();
                 end = std::chrono::system_clock::now();
                 duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
 
@@ -451,8 +437,8 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 delete[] array1;
                 delete[] result;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
 
 
@@ -568,7 +554,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
-                myMul->forward();
+                op->forward();
                 end = std::chrono::system_clock::now();
                 duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
 
@@ -582,8 +568,8 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
         SECTION("+1-D Tensor / 1-D Tensor") {
             std::size_t number_of_operation = 0;
@@ -663,7 +649,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 // compute result
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
-                myMul->forward();
+                op->forward();
                 end = std::chrono::system_clock::now();
                 duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
 
@@ -678,8 +664,8 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 number_of_operation += nb_elements;
             }
 
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
     }
 }
diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp
index cb5d8872c9c7242bb4aa4efca388d53b578417f9..8238da3970740f4b8d6095d7a28c000319ea004e 100644
--- a/unit_tests/operator/Test_PowImpl.cpp
+++ b/unit_tests/operator/Test_PowImpl.cpp
@@ -9,18 +9,26 @@
  *
  ********************************************************************************/
 
-#include <catch2/catch_test_macros.hpp>
-#include <cmath>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
-#include <chrono>
-#include <iostream>
+#include <chrono>      // std::micro, std::chrono::time_point,
+                       // std::chrono::system_clock, std::chrono::duration
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::uint16_t
+#include <functional>  // std::multiplies
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric>     // std::accumulate
+#include <random>      // std::random_device, std::mt19937
+                       // std::uniform_int_distribution, std::uniform_real_distribution
+#include <vector>
+
+#include <catch2/catch_test_macros.hpp>
+#include <fmt/core.h>
 
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/operator/PowImpl.hpp"
+#include "aidge/data/Data.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Pow.hpp"
+#include "aidge/utils/ArrayHelpers.hpp"
 #include "aidge/utils/TensorUtils.hpp"
 
 namespace Aidge {
@@ -118,8 +126,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
 
                 // with broadcasting
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
 
         SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
@@ -213,8 +221,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
         SECTION("+1-D Tensor / 1-D Tensor") {
             std::size_t number_of_operation = 0;
@@ -309,8 +317,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 number_of_operation += nb_elements;
             }
 
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
     }
 
@@ -440,7 +448,7 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                     }
                 }
             ));
-            const auto expectedGrad0 = std::make_shared<Tensor>(Array3D<float, 2, 2, 3>(
+            const Tensor expectedGrad0 = Array3D<float, 2, 2, 3>(
                 {
                     {
                         {
@@ -453,18 +461,13 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                         }
                     }
                 }
-            ));
-            const auto expectedGrad1 = std::make_shared<Tensor>(Array1D<float, 3>(
+            );
+            const Tensor expectedGrad1 = Array1D<float, 3>(
                 {
                     {14.14779854, 22.99299049, 33.56402588}
                 }
-            ));
+            );
 
-            for(const auto T: {input0, input1, gradOut, expectedGrad0, expectedGrad1})
-            {
-                    T->setBackend("cpu") ;
-                    T->setDataType(DataType::Float32);
-            }
             std::shared_ptr<Node> powOp = Pow();
             auto opr = std::static_pointer_cast<OperatorTensor>(powOp-> getOperator());
             opr->setDataType(DataType::Float32);
@@ -475,8 +478,8 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
             powOp->forward();
 
             powOp->backward();
-            REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), *expectedGrad0));
-            REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), *expectedGrad1));
+            REQUIRE(approxEq<float>(*(opr->getInput(0)->grad()), expectedGrad0));
+            REQUIRE(approxEq<float>(*(opr->getInput(1)->grad()), expectedGrad1));
         }
     }
 }
diff --git a/unit_tests/operator/Test_RoundImpl.cpp b/unit_tests/operator/Test_RoundImpl.cpp
index b4cf9ffbedc18b35b42ebbc05971f86e0fa584e3..8b5dd53a79242a38063f178807d5b6b40f2c0e96 100644
--- a/unit_tests/operator/Test_RoundImpl.cpp
+++ b/unit_tests/operator/Test_RoundImpl.cpp
@@ -9,15 +9,23 @@
  *
  ********************************************************************************/
 
-#include <catch2/catch_test_macros.hpp>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
-#include <chrono>
-#include <iostream>
+#include <chrono>      // std::micro, std::chrono::time_point,
+                       // std::chrono::system_clock, std::chrono::duration
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::uint16_t
+#include <functional>  // std::multiplies
 #include <memory>
-#include <numeric>   
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
-#include <iomanip>
+#include <numeric>     // std::accumulate
+#include <random>      // std::random_device, std::mt19937
+                       // std::uniform_int_distribution, std::uniform_real_distribution
+#include <vector>
+
+#include <catch2/catch_test_macros.hpp>
+#include <fmt/core.h>
+
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/operator/RoundImpl.hpp"
+#include "aidge/data/Data.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Round.hpp"
 #include "aidge/utils/TensorUtils.hpp"
@@ -29,7 +37,7 @@ TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") {
     // Create a random number generator
     std::random_device rd;
     std::mt19937 gen(rd());
-    std::uniform_real_distribution<float> valueDist(-15, 15); 
+    std::uniform_real_distribution<float> valueDist(-15, 15);
     std::uniform_int_distribution<std::size_t> dimSizeDist(std::size_t(2), std::size_t(5));
     std::uniform_int_distribution<std::size_t> nbDimsDist(std::size_t(1), std::size_t(3));
 
@@ -59,7 +67,7 @@ TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") {
             std::size_t number_of_operation = 0;
 
             for (std::uint16_t trial = 0; trial < NBTRIALS; ++trial) {
-                
+
                 // generate 2 random Tensors
                 const std::size_t nbDims = nbDimsDist(gen);
                 std::vector<std::size_t> dims;
@@ -72,7 +80,7 @@ TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") {
                 // without broadcasting
                 float* array0 = new float[nb_elements];
                 float* result = new float[nb_elements];
-                
+
                 for (std::size_t i = 0; i < nb_elements; ++i) {
                     array0[i] = valueDist(gen);
                     result[i] = std::nearbyint(array0[i]);
@@ -86,29 +94,22 @@ TEST_CASE("[cpu/operator] Round_Test", "[Round][CPU]") {
                 // results
                 Tres->resize(dims);
                 Tres -> getImpl() -> setRawPtr(result, nb_elements);
-                
+
                 op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myRound->forward();
                 end = std::chrono::system_clock::now();
                 duration += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
 
-                bool is_eq = approxEq<float>(*(op->getOutput(0)), *Tres);
-
-                auto Output = *(op->getOutput(0));
-                
-                auto prt = Output.getImpl()->rawPtr();
-
-                REQUIRE(is_eq);
-                
+                REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
 
                 delete[] array0;
                 delete[] result;
 
 
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {} Î¼s\n", duration.count());
         }
     }
 } // namespace Aidge
diff --git a/unit_tests/operator/Test_SubImpl.cpp b/unit_tests/operator/Test_SubImpl.cpp
index 44666ae631152c8898e24f7003b0c2ede8c67b84..471ae560a35b480945d7e5c85fb93bbbc8d459f6 100644
--- a/unit_tests/operator/Test_SubImpl.cpp
+++ b/unit_tests/operator/Test_SubImpl.cpp
@@ -9,17 +9,26 @@
  *
  ********************************************************************************/
 
-#include <catch2/catch_test_macros.hpp>
-#include <cstddef>   // std::size_t
-#include <cstdint>   // std::uint16_t
-#include <chrono>
-#include <iostream>
+#include <chrono>      // std::micro, std::chrono::time_point,
+                       // std::chrono::system_clock
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::uint16_t
+#include <functional>  // std::multiplies
 #include <memory>
-#include <numeric>   // std::accumulate
-#include <random>    // std::random_device, std::mt19937, std::uniform_real_distribution
+#include <numeric>     // std::accumulate
+#include <random>      // std::random_device, std::mt19937
+                       // std::uniform_int_distribution, std::uniform_real_distribution
+#include <vector>
+
+#include <catch2/catch_test_macros.hpp>
+#include <fmt/core.h>
 
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/operator/SubImpl.hpp"
+#include "aidge/data/Data.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/Sub.hpp"
+#include "aidge/operator/OperatorTensor.hpp"
 #include "aidge/utils/TensorUtils.hpp"
 
 namespace Aidge {
@@ -117,8 +126,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
 
                 // with broadcasting
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {}Î¼s\n", duration.count());
         }
 
         SECTION("+1-D Tensor / +1-D Tensor - broadcasting") {
@@ -212,8 +221,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 const std::size_t nb_elements = std::accumulate(dimsOut.cbegin(), dimsOut.cend(), std::size_t(1), std::multiplies<std::size_t>());
                 number_of_operation += nb_elements;
             }
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {}Î¼s\n", duration.count());
         }
         SECTION("+1-D Tensor / 1-D Tensor") {
             std::size_t number_of_operation = 0;
@@ -308,8 +317,8 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 number_of_operation += nb_elements;
             }
 
-            std::cout << "number of elements over time spent: " << (number_of_operation / duration.count())<< std::endl;
-            std::cout << "total time: " << duration.count() << "Î¼s" << std::endl;
+            fmt::print("INFO: number of elements over time spent: {}\n", (number_of_operation / duration.count()));
+            fmt::print("INFO: total time: {}Î¼s\n", duration.count());
         }
     }
 }