diff --git a/README.md b/README.md
index 74eb50826bf6f88a0ded363138adba04827390d0..865cb08a17ebf8638cb2ac56773a4f464860b8ae 100644
--- a/README.md
+++ b/README.md
@@ -14,9 +14,11 @@ So far be sure to have the correct requirements to use this library
 
 ## Pip installation
 
-You will need to install first the aidge_core library before installing aidge_cpu.
-Also, make sure that the install path was set before installing aidge_core library.
-Then run in your python environnement : 
+You will need to install first the ``aidge_core`` library before installing ``aidge_backend_cpu``.
+
+If you have set a custom install path for the ``aidge_core`` library, make sure to use the same one here.
+
+Then run in your python environnement :
 ``` bash
 pip install . -v
 ```
@@ -46,4 +48,4 @@ Important: this command can also be run with `make`.
 To compile the CPU library with the python binding + the associated unitary tests, run
 ```
 make cpu_with_pybind_tests
-```
\ No newline at end of file
+```
diff --git a/aidge_backend_cpu/unit_tests/test_recipies.py b/aidge_backend_cpu/unit_tests/test_recipies.py
new file mode 100644
index 0000000000000000000000000000000000000000..60949adf245f4f4a7ed316879fb307131f70739a
--- /dev/null
+++ b/aidge_backend_cpu/unit_tests/test_recipies.py
@@ -0,0 +1,77 @@
+"""
+Copyright (c) 2023 CEA-List
+
+This program and the accompanying materials are made available under the
+terms of the Eclipse Public License 2.0 which is available at
+http://www.eclipse.org/legal/epl-2.0.
+
+SPDX-License-Identifier: EPL-2.0
+"""
+
+import unittest
+import aidge_core
+import aidge_backend_cpu
+
+from functools import reduce
+import numpy as np
+
+class test_recipies(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_fuse_batchnorm(self):
+        dims = [1, 1, 10, 10]
+        size = reduce((lambda x, y: x*y), dims)
+
+        input_data =  np.arange(size).reshape(dims).astype(np.float32)
+        input_tensor = aidge_core.Tensor(input_data)
+
+        input_node = aidge_core.Producer(input_tensor, "X")
+        conv = aidge_core.Conv2D(1, 1, [3, 3], name="Conv0")
+        bn = aidge_core.BatchNorm2D(name="Add0")
+
+        graph_view = aidge_core.sequential([conv, bn])
+
+        # Add random values to conv and BatchNorm parameters
+        input_node.add_child(graph_view)
+        input_node.get_operator().set_datatype(aidge_core.DataType.Float32)
+        input_node.get_operator().set_backend("cpu")
+        graph_view.set_datatype(aidge_core.DataType.Float32)
+        graph_view.set_backend("cpu")
+
+        np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32)
+        np_bias = np.arange(1).reshape([1, 1]).astype(np.float32)
+
+        np_scale = np.array([0.05]).astype(np.float32)
+        np_shift = np.array([0.05]).astype(np.float32)
+        np_mean = np.array([0.05]).astype(np.float32)
+        np_var = np.array([0.05]).astype(np.float32)
+        conv.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_weights))
+        conv.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_bias))
+        bn.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_scale))
+        bn.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_shift))
+        bn.input(3)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_mean))
+        bn.input(4)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_var))
+        scheduler0 = aidge_core.SequentialScheduler(graph_view)
+        scheduler0.forward()
+
+        for outNode in graph_view.get_output_nodes():
+            output_aidge0 = outNode.get_operator().output(0)
+
+        aidge_core.fuse_batchnorm(graph_view)
+        scheduler1 = aidge_core.SequentialScheduler(graph_view)
+        scheduler1.forward()
+
+        for outNode in graph_view.get_output_nodes():
+            output_aidge1 = outNode.get_operator().output(0)
+
+        self.assertTrue(aidge_core.approx_eq(output_aidge0, output_aidge1, 0.000001, 0.0001))
+
+if __name__ == '__main__':
+    unittest.main()
+
+
+
diff --git a/aidge_backend_cpu/unit_tests/test_tensor.py b/aidge_backend_cpu/unit_tests/test_tensor.py
index 1d12fc0cbadf71f04226a98e2e65984abc7e3254..438b6acd51791a52c9e308fb1aceaefb2a45fb29 100644
--- a/aidge_backend_cpu/unit_tests/test_tensor.py
+++ b/aidge_backend_cpu/unit_tests/test_tensor.py
@@ -45,5 +45,17 @@ class test_tensor(unittest.TestCase):
             self.assertTrue(i_t == i_n) # TODO : May need to change this to a difference
         for i,j in zip(t.dims(), np_array.shape):
             self.assertEqual(i,j)
+
+    def test_get_set(self):
+        dims = [2,2,2]
+
+        np_array = np.arange(8).reshape(dims)
+        # Numpy -> Tensor
+        t = aidge_core.Tensor(np_array)
+        for i in range(8):
+            self.assertEqual(t[i], i)
+            t[i] = 5
+            self.assertEqual(t[i], 5)
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp
index 336d549a71f625667e7e3d368819400396b893e1..1f45d700f6fc9f1d69682cb2de601979049c0af6 100644
--- a/include/aidge/backend/cpu.hpp
+++ b/include/aidge/backend/cpu.hpp
@@ -21,6 +21,7 @@
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
 #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
 #include "aidge/backend/cpu/operator/ProducerImpl.hpp"
 #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp
index dfcb8afa79c98438ae261a244aee94f4ede6c0b3..012ff5af1c15e73fe76114a23ec62f9ef023bce2 100644
--- a/include/aidge/backend/cpu/data/TensorImpl.hpp
+++ b/include/aidge/backend/cpu/data/TensorImpl.hpp
@@ -47,6 +47,10 @@ class TensorImpl_cpu : public TensorImpl {
         return mData.data();
     };
 
+   void* getRaw(std::size_t idx){
+       return  static_cast<void*>(static_cast<T *>(rawPtr()) + idx);
+   };
+
     virtual ~TensorImpl_cpu() = default;
 
     void setRawPtr(void *ptr) override final {
diff --git a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
index 490598599aedf24b26865ce6a1ddb3fe32044b1b..221e36dcfac44e21d1b1a35674ca21403b4b57ab 100644
--- a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
@@ -20,7 +20,7 @@ namespace Aidge {
 
 template <class I1, class O>
 void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) {
-    // FIXME: missing Add parameters as arguments
+    // FIXME: missing Add attributes as arguments
     const I1* input1 = static_cast<const I1*>(input1_);
     O* output = static_cast<O*>(output_);
 
@@ -32,7 +32,7 @@ void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* inp
 template <class I1, class I2, class O>
 void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
                                       void* output_) {
-    // FIXME: missing Add parameters as arguments
+    // FIXME: missing Add attributes as arguments
     const I1* input1 = static_cast<const I1*>(input1_);
     const I2* input2 = static_cast<const I2*>(input2_);
     O* output = static_cast<O*>(output_);
@@ -45,7 +45,7 @@ void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* inp
 template <class I1, class I2, class I3, class O>
 void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
                                       const void* input3_, void* output_) {
-    // FIXME: missing Add parameters as arguments
+    // FIXME: missing Add attributes as arguments
     const I1* input1 = static_cast<const I1*>(input1_);
     const I2* input2 = static_cast<const I2*>(input2_);
     const I3* input3 = static_cast<const I3*>(input3_);
diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
index 8373cb84a550efd8741a2dbc04c1e94ad37fe611..cfbcadfe6b719369618955a14c4cde5733ef6773 100644
--- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
@@ -29,11 +29,11 @@ namespace Aidge {
 class AvgPoolingImpl2DForward_cpu
     : public Registrable<AvgPoolingImpl2DForward_cpu,
                          std::tuple<DataType, DataType>,
-                         void(const AvgPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+                         void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
 class AvgPoolingImpl2DBackward_cpu
     : public Registrable<AvgPoolingImpl2DBackward_cpu,
                          std::tuple<DataType, DataType>,
-                         void(const AvgPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+                         void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
 
 class AvgPoolingImpl2D_cpu : public OperatorImpl {
    private:
diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
index 776e020f1a20056db345c8e845fd73bb31b4138b..60b4923bdc18674da52be9bd07d9947fb9790f0d 100644
--- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
@@ -26,51 +26,51 @@ namespace Aidge {
  * @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
  * @tparam I Input data type.
  * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
+ * @param params tuple of Attributes from the Operator
  * @param dims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters &params,
+void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
                                              const std::array<DimSize_t, 4> &dims,
                                              const void *input_,
                                              void *output_) {
-    // FIXME: missing convolution parameters as arguments
+    // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(attrs)[0] + std::get<2>(attrs)[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) /
+                                static_cast<float>(std::get<0>(attrs)[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(attrs)[1] + std::get<2>(attrs)[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1])/
+                                static_cast<float>(std::get<0>(attrs)[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
     // input  (batch, ch, Xin, Yin)
     // weight (outCh, ch, kernelX, kernelY)
-    // does not take Dilation parameter into account
+    // does not take Dilation attribute into account
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
         for (std::size_t ch = 0; ch < dims[1]; ++ch) {
             const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
             const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
             for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]);
+                const signedsize difx = static_cast<signedsize>(std::get<2>(attrs)[0] - ox * std::get<0>(attrs)[0]);
                 const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx);
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx);
                 for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]);
+                    const signedsize dify = static_cast<signedsize>(std::get<2>(attrs)[1] - oy * std::get<0>(attrs)[1]);
                     const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify);
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify);
                     const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const std::size_t ix = ox * std::get<0>(params)[0];
-                    const std::size_t iy = oy * std::get<0>(params)[1];
+                    const std::size_t ix = ox * std::get<0>(attrs)[0];
+                    const std::size_t iy = oy * std::get<0>(attrs)[1];
 
                     if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
                         output[oIndexFull] += static_cast<O>(
diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
index d9f25b4a8e38510f82fc5afe9ed4b656197a47d5..30557f6cbba05829b3cc9e17364ae4d933a568cf 100644
--- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
@@ -29,7 +29,7 @@ namespace Aidge {
 class BatchNormImpl2DForward_cpu
     : public Registrable<BatchNormImpl2DForward_cpu,
                          std::tuple<DataType, DataType, DataType>,
-                         void(const BatchNorm_Op<2>::Parameters &,
+                         void(const BatchNorm_Op<2>::Attrs &,
                               const std::array<DimSize_t, 4> &,
                               const void *,
                               const void *,
@@ -41,7 +41,7 @@ class BatchNormImpl2DForward_cpu
 class BatchNormImpl2DBackward_cpu
     : public Registrable<BatchNormImpl2DBackward_cpu,
                          std::tuple<DataType, DataType, DataType>,
-                         void(const BatchNorm_Op<2>::Parameters &,
+                         void(const BatchNorm_Op<2>::Attrs &,
                               const std::array<DimSize_t, 4> &,
                               const void *,
                               const void *,
diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
index eedb80bde60d65b53bac70cc33ca83eb4f0121e7..486829e782ae2173332a7efa6646bb7bba322252 100644
--- a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
@@ -27,7 +27,7 @@ namespace Aidge {
  * @tparam W Weight data type.
  * @tparam B Bias data type.
  * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
+ * @param params tuple of Attributes from the Operator
  * @param dims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param scale_ const scale Tensor.
@@ -37,9 +37,9 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class P, class O>
-void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
                                        const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
-    // FIXME: missing convolution parameters as arguments
+    // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const P *scale = static_cast<const P *>(scale_);
     const P *shift = static_cast<const P *>(shift_);
@@ -52,12 +52,12 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &param
     const DimSize_t featureMapSize = dims[2]*dims[3];
 
 
-    if ((freeze == true) || (std::get<1>(params) == 0.0f)) {
+    if ((freeze == true) || (std::get<1>(attrs) == 0.0f)) {
         for (std::size_t batch = 0; batch < nbBatch; ++batch) {
             for (std::size_t ch = 0; ch < nbChannels; ++ch) {
                 const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
                 std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
-                const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params)));
+                const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(attrs)));
 
                 for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
                     output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
@@ -81,10 +81,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &param
             const I inputMean = sum / static_cast<I>(nbDataPerChannel);
             const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel)  - inputMean*inputMean;
 
-            batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params);
-            batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params);
+            batchMean[ch] = batchMean[ch]*(1-std::get<1>(attrs)) + inputMean*std::get<1>(attrs);
+            batchVar[ch] = batchVar[ch]*(1-std::get<1>(attrs)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(attrs);
 
-            const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params)));
+            const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(attrs)));
             for (std::size_t batch = 0; batch < nbBatch; ++batch) {
                 const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
                 for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
index 0d21c676d797b2fc4e95c4aea47674c8fca5eef4..2826b635590c5d19f34c8e4beee20fc8dba2183b 100644
--- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
@@ -29,12 +29,12 @@ namespace Aidge {
 class ConvDepthWiseImpl2DForward_cpu
     : public Registrable<ConvDepthWiseImpl2DForward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const ConvDepthWise_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
+                         void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
                               const void *, const void *, void *)> {};
 class ConvDepthWiseImpl2DBackward_cpu
     : public Registrable<ConvDepthWiseImpl2DBackward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const ConvDepthWise_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
+                         void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
                               const void *, const void *, void *)> {};
 
 class ConvDepthWiseImpl2D_cpu : public OperatorImpl {
diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
index ee2d82e00376c5a2cc5a075565e35eb8885c021e..669bdbc898528b0f96a59dd3c6f8e438ae1291e4 100644
--- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
@@ -27,7 +27,7 @@ namespace Aidge {
  * @tparam W Weight data type.
  * @tparam B Bias data type.
  * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
+ * @param params tuple of Attributes from the Operator
  * @param dims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param weights_ const weight Tensor.
@@ -35,9 +35,9 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
                                        const void *input_, const void *weights_, const void *biases_, void *output_) {
-    // FIXME: missing convolution parameters as arguments
+    // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
     const B *biases = static_cast<const B *>(biases_);
@@ -46,52 +46,52 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameter
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(attrs)[0] + std::get<4>(attrs)[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) /
+                                static_cast<float>(std::get<0>(attrs)[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(attrs)[1] + std::get<4>(attrs)[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) /
+                                static_cast<float>(std::get<0>(attrs)[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
     // input  (batch, ch, Xin, Yin)
     // weight (outCh, ch, kernelX, kernelY)
-    // does not take Dilation parameter into account
+    // does not take Dilation attribute into account
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) {
-            const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize;
+        for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) {
+            const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize;
             B biasVal = (biases != nullptr) ? biases[ch] : B(0);
             std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
             const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
-            const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1];
+            const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1];
             for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]);
+                const signedsize difx = static_cast<signedsize>(std::get<4>(attrs)[0] - ox * std::get<0>(attrs)[0]);
                 const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx);
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx);
                 for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]);
+                    const signedsize dify = static_cast<signedsize>(std::get<4>(attrs)[1] - oy * std::get<0>(attrs)[1]);
                     const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify);
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify);
                     const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0];
-                    const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1];
+                    const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]) - std::get<4>(attrs)[0];
+                    const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]) - std::get<4>(attrs)[1];
 
                     if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                        output[oIndexFull] +=  (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                        output[oIndexFull] +=  (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
                     } else {
                         for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
                             for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] *
+                                output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] *
                                                         input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
                             }
                         }
diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp
index 1f3dffe43b966bc37887f267cc56760a899476f9..b9411fe0f1ac079d9857cc8f2178fc98fadc3a77 100644
--- a/include/aidge/backend/cpu/operator/ConvImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp
@@ -29,12 +29,12 @@ namespace Aidge {
 class ConvImpl2DForward_cpu
     : public Registrable<ConvImpl2DForward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
+                         void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
                               const void *, const void *, void *)> {};
 class ConvImpl2DBackward_cpu
     : public Registrable<ConvImpl2DBackward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
+                         void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
                               const void *, const void *, void *)> {};
 
 class ConvImpl2D_cpu : public OperatorImpl {
diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
index bc2f10099f42cba91be8d089b66dc176fdeb7c10..9d4d6dfdfcc114e47e478089c4d5a42c2bee0f28 100644
--- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
@@ -27,7 +27,7 @@ namespace Aidge {
  * @tparam W Weight data type.
  * @tparam B Bias data type.
  * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
+ * @param params tuple of Attributes from the Operator
  * @param dims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param weights_ const weight Tensor.
@@ -35,9 +35,9 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
                                        const void *input_, const void *weights_, const void *biases_, void *output_) {
-    // FIXME: missing convolution parameters as arguments
+    // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
     const B *biases = static_cast<const B *>(biases_);
@@ -45,34 +45,34 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const s
 /*
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0]));
+            static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
+                                static_cast<float>(std::get<0>(attrs)[0]));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1]));
+            static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
+                                static_cast<float>(std::get<0>(attrs)[1]));
 
     // TODO: kernel computation
     // output (Xout, Yout, outCh, batch)
     // input  (Xin, Yin, inCh, batch)
     // weight (kernelX, kernelY, inCh, outCh)
-    // does not take Dilation parameter into account
+    // does not take Dilation attribute into account
     for (std::size_t ox = 0; ox < oxSize; ++ox) {
         for (std::size_t oy = 0; oy < oySize; ++oy) {
-            const std::size_t ix = ox * std::get<0>(params)[0];
-            const std::size_t iy = oy * std::get<0>(params)[1];
+            const std::size_t ix = ox * std::get<0>(attrs)[0];
+            const std::size_t iy = oy * std::get<0>(attrs)[1];
 
-            for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
-                const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox));
+            for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
+                const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox));
                 B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
                 for (std::size_t batch = 0; batch < dims[3]; ++batch) {
                     output[oIndex + batch] = biasVal;
                 }
                 for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
-                    for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) {
-                        for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) {
+                    for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) {
+                        for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) {
                             const std::size_t wIndex =
-                                    outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx));
+                                    outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx));
                             std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
                             for (std::size_t batch = 0; batch < dims[3]; ++batch) {
                                 output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
@@ -88,53 +88,53 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const s
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(attrs)[0] + std::get<5>(attrs)[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
+                                static_cast<float>(std::get<0>(attrs)[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(attrs)[1] + std::get<5>(attrs)[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
+                                static_cast<float>(std::get<0>(attrs)[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
     // input  (batch, inCh, Xin, Yin)
     // weight (outCh, inCh, kernelX, kernelY)
-    // does not take Dilation parameter into account
+    // does not take Dilation attribute into account
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
-            const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize;
+        for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
+            const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize;
             B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
             std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
             for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
                 const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
-                const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1];
+                const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1];
                 for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                    const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]);
+                    const signedsize difx = static_cast<signedsize>(std::get<5>(attrs)[0] - ox * std::get<0>(attrs)[0]);
                     const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                    const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx);
+                    const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx);
                     for (std::size_t oy = 0; oy < oySize; ++oy) {
-                        const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]);
+                        const signedsize dify = static_cast<signedsize>(std::get<5>(attrs)[1] - oy * std::get<0>(attrs)[1]);
                         const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                        const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify);
+                        const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify);
                         const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                        const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0];
-                        const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1];
+                        const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]) - std::get<5>(attrs)[0];
+                        const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]) - std::get<5>(attrs)[1];
 
                         if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                            output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                            output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
                         } else {
                             for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
                                 for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                    output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] *
+                                    output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] *
                                                             input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
                                 }
                             }
diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp
index c69cc0b08a58877108c78d6f12c29e9089c2f665..1dfa40439dbba9cdd4fe3436fea30f771678c1ff 100644
--- a/include/aidge/backend/cpu/operator/FCImpl.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl.hpp
@@ -26,11 +26,11 @@ namespace Aidge {
 // compute kernel registry for forward and backward
 class FCImplForward_cpu : public Registrable<FCImplForward_cpu,
                                                  std::tuple<DataType, DataType, DataType, DataType>,
-                                                 void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t,
+                                                 void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t,
                                                       const void *, const void *, const void *, void *)> {};
 class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
                                                   std::tuple<DataType, DataType, DataType, DataType>,
-                                                  void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t,
+                                                  void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t,
                                                        const void *, const void *, const void *, void *)> {};
 
 class FCImpl_cpu : public OperatorImpl {
diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
index d6acb7dfea3415a8d67384745e16ecdd8bf06324..91e2558a7ef1079cbc9fb11f78fab53ef4246149 100644
--- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
@@ -19,17 +19,17 @@
 
 namespace Aidge {
 // template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims,
+// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 4>& dims,
 //                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-//     // FIXME: missing FC parameters as arguments
+//     // FIXME: missing FC attributes as arguments
 //     const I* input = static_cast<const I*>(input_);
 //     const W* weights = static_cast<const W*>(weights_);
 //     const B* biases = static_cast<const B*>(biases_);
 //     O* output = static_cast<O*>(output_);
 
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
+//     for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
 //         std::size_t oIndex = outIdx * dims[3];
-//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
+//         const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
 //         for (std::size_t batch = 0; batch < dims[3]; ++batch) {
 //             output[oIndex + batch] = bias;
 //         }
@@ -39,10 +39,10 @@ namespace Aidge {
 //         for (std::size_t iy = 0; iy < dims[1]; ++iy) {
 //             for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
 //                 const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
-//                 for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
+//                 for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
 //                     const std::size_t oIndex = dims[3] * outCh;
-//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) +
-//                                           outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
+//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) +
+//                                           outCh;  // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
 //                     for (std::size_t batch = 0; batch < dims[3]; ++batch) {
 //                         output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
 //                     }
@@ -53,9 +53,9 @@ namespace Aidge {
 // }
 
 // template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims,
+// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 2>& dims,
 //                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-//     // FIXME: missing FC parameters as arguments
+//     // FIXME: missing FC attributes as arguments
 //     const I* input = static_cast<const I*>(input_);
 //     const W* weights = static_cast<const W*>(weights_);
 //     const B* biases = static_cast<const B*>(biases_);
@@ -63,9 +63,9 @@ namespace Aidge {
 
 //     // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
 
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
+//     for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
 //         std::size_t oIndex = outIdx * dims[0];
-//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
+//         const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
 //         for (std::size_t batch = 0; batch < dims[0]; ++batch) {
 //             output[oIndex + batch] = bias;
 //         }
@@ -74,8 +74,8 @@ namespace Aidge {
 //     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
 //         const std::size_t oIndex = dims[1] * batch;
 //         for (std::size_t i = 0; i < dims[1]; ++i) {
-//             for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
-//                 std::size_t wIndex = i * std::get<0>(params) + outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
+//             for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
+//                 std::size_t wIndex = i * std::get<0>(attrs) + outCh;  // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
 //                 output[oIndex + outCh] += weights[wIndex] * input[i + batch];
 //             }
 //         }
@@ -83,29 +83,29 @@ namespace Aidge {
 // }
 
 template <class I, class W, class B, class O>
-void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize,
+void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-    // FIXME: missing FC parameters as arguments
+    // FIXME: missing FC attributes as arguments
     const I* input = static_cast<const I*>(input_);
     const W* weights = static_cast<const W*>(weights_);
     const B* biases = static_cast<const B*>(biases_);
     O* output = static_cast<O*>(output_);
 
-    if (std::get<1>(params)) {
-        std::fill(output, output+(batchSize*std::get<0>(params)), B(0));
+    if (std::get<1>(attrs)) {
+        std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0));
     }
     else {
         for (std::size_t batch = 0; batch < batchSize; ++batch) {
-            std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params)));
+            std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs)));
         }
     }
 
     for (std::size_t batch = 0; batch < batchSize; ++batch) {
-        for (std::size_t out = 0; out < std::get<0>(params); ++out) {
-            output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize,
+        for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
+            output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
                                                         input + (batch + 1)*oneInputSize,
                                                         weights + out*oneInputSize,
-                                                        output[out + batch*std::get<0>(params)]);
+                                                        output[out + batch*std::get<0>(attrs)]);
         }
     }
 }
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
index abe167bea16de01f861beb9701f747d39f265d9d..386ef999fddbda184edee88723d213f53ff62ded 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
@@ -24,10 +24,10 @@ namespace Aidge {
 
 // compute kernel registry for forward and backward
 class LeakyReLUImplForward_cpu
-    : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Parameters&, std::size_t, const void*, void*)> {
+    : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
 };
 class LeakyReLUImplBackward_cpu
-    : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Parameters&, std::size_t, const void*, void*)> {
+    : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
 };
 
 class LeakyReLUImpl_cpu : public OperatorImpl {
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
index ff9a8ac6a8f968f244429b330401d794f16fac01..761b9579c3c3dc187e4b0fac24812fa77f916e65 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
@@ -18,14 +18,14 @@
 
 namespace Aidge {
 template <class I, class O>
-void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params,
+void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs,
                                      std::size_t inputLenght,
                                      const void* input_,
                                      void* output_) {
 
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
-    I negativeSlope = static_cast<I>(std::get<0>(params));
+    I negativeSlope = static_cast<I>(std::get<0>(attrs));
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
diff --git a/include/aidge/backend/cpu/operator/MatMulImpl.hpp b/include/aidge/backend/cpu/operator/MatMulImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bf8e31efd253ee8855f3473ef0b4a60c59a04b5f
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/MatMulImpl.hpp
@@ -0,0 +1,76 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_H_
+#define AIDGE_CPU_OPERATOR_MATMULIMPL_H_
+
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/MatMul.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+// class MatMul_Op;
+
+// compute kernel registry for forward and backward
+class MatMulImplForward_cpu
+    : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>,
+                         void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t,
+                              const void *, const void *, void *)> {};
+class MatMulImplBackward_cpu
+    : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>,
+                         void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t,
+                              const void *, const void *, void *)> {};
+
+class MatMulImpl_cpu : public OperatorImpl {
+private:
+    const MatMul_Op &mOp;
+    std::array<NbElts_t, 2> mNbConsumedData;
+    std::array<NbElts_t, 1> mNbProducedData;
+
+public:
+    MatMulImpl_cpu(const MatMul_Op &op)
+        : mOp(op),
+          mNbConsumedData({0, 0}),
+          mNbProducedData({0})
+        {
+            // ctor
+        }
+
+    static std::unique_ptr<MatMulImpl_cpu> create(const MatMul_Op &op)
+    {
+        return std::make_unique<MatMulImpl_cpu>(op);
+    }
+
+public:
+    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/,
+                               const std::vector<DimSize_t> & /*inputsSize*/) const override final;
+    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
+    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
+
+    void updateConsummerProducer() override final;
+
+    void forward();
+    void backward();
+};
+
+namespace {
+static Registrar<MatMul_Op> registrarMatMulImpl_cpu("cpu", Aidge::MatMulImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bc52779eff274379a853ea84fb839c9486652433
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp
@@ -0,0 +1,58 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include <algorithm>
+
+#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
+
+namespace Aidge {
+
+template <class I, class W, class O>
+void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
+                                   const void* input_, const void* weights_, void* output_) {
+    // FIXME: missing MatMul parameters as arguments
+    const I* input = static_cast<const I*>(input_);
+    const W* weights = static_cast<const W*>(weights_);
+    O* output = static_cast<O*>(output_);
+
+
+    std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0));
+
+    for (std::size_t batch = 0; batch < batchSize; ++batch) {
+        for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
+            output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
+                                                        input + (batch + 1)*oneInputSize,
+                                                        weights + out*oneInputSize,
+                                                        output[out + batch*std::get<0>(attrs)]);
+        }
+    }
+}
+
+
+namespace {
+static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>);
+static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>);
+static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>);
+}  // namespace
+
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
index cb11b3016b9f694cc518f20a62ea143a94a58afe..37549349b9f5ffbf443d976135db05b4cec209b7 100644
--- a/include/aidge/backend/cpu/operator/ScalingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
@@ -18,16 +18,17 @@
 #include "aidge/utils/Types.h"
 #include <memory>
 #include <vector>
+#include <array>
 
 namespace Aidge {
 // class Scaling_Op;
 
 // compute kernel registry for forward and backward
 class ScalingImplForward_cpu
-    : public Registrable<ScalingImplForward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Parameters&, std::size_t, const void*, void*)> {
+    : public Registrable<ScalingImplForward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> {
 };
 class ScalingImplBackward_cpu
-    : public Registrable<ScalingImplBackward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Parameters&, std::size_t, const void*, void*)> {
+    : public Registrable<ScalingImplBackward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> {
 };
 
 class ScalingImpl_cpu : public OperatorImpl {
@@ -47,7 +48,7 @@ class ScalingImpl_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
 
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
index c5b06290ee04ecf9759f418cd26d83e889fcc84e..8fe13bce3a4c470d77b083603d3b889a46fda71f 100644
--- a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
@@ -18,14 +18,14 @@
 
 namespace Aidge {
 template <class I, class O>
-void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Parameters& params,
+void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Attrs& attrs,
                                      std::size_t inputLenght,
                                      const void* input_,
                                      void* output_) {
 
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
-    I scalingFactor = static_cast<I>(std::get<0>(params));
+    const I& scalingFactor = static_cast<const I&>(std::get<0>(attrs));
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = input[i] * scalingFactor;
diff --git a/setup.py b/setup.py
index 0b0f66e9132d66cdb6385d7f8c6c69ae0cc5d0e3..16305afdfdfa5de2e328460d9e96c77eb96a9d98 100644
--- a/setup.py
+++ b/setup.py
@@ -62,11 +62,11 @@ class CMakeBuild(build_ext):
 
         os.chdir(str(build_temp))
 
-        # Impose to use the executable of the python 
+        # Impose to use the executable of the python
         # used to launch setup.py to setup PythonInterp
         param_py = "-DPYTHON_EXECUTABLE=" + sys.executable
-        
-        install_path = f"{build_temp}/install" if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"]
+
+        install_path = os.path.join(sys.prefix, "lib", "libAidge")  if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"]
 
         self.spawn(['cmake', str(cwd), param_py, '-DTEST=OFF', f'-DCMAKE_INSTALL_PREFIX:PATH={install_path}'])
         if not self.dry_run:
@@ -83,11 +83,11 @@ class CMakeBuild(build_ext):
             for file in files:
                 if file.endswith('.so') and (root != str(aidge_package.absolute())):
                     currentFile=os.path.join(root, file)
-                    shutil.copy(currentFile, str(aidge_package.absolute())) 
+                    shutil.copy(currentFile, str(aidge_package.absolute()))
 
         # Copy version.txt in aidge_package
         os.chdir(os.path.dirname(__file__))
-        shutil.copy("version.txt", str(aidge_package.absolute()))    
+        shutil.copy("version.txt", str(aidge_package.absolute()))
 
 
 if __name__ == '__main__':
diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp
index d3da42185237a59146af17199e34a00dbebd6d96..be7923339308073c26b60ee0349a44037769765a 100644
--- a/src/operator/AddImpl.cpp
+++ b/src/operator/AddImpl.cpp
@@ -99,6 +99,7 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOInd
 Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
+    (void) outputIdx; // avoid unused warning
 
     const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(),
@@ -167,6 +168,7 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOInd
 Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
+    (void) outputIdx; // avoid unused warning
 
     const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(),
diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp
index 6c434a5c38853a1dee66db5be95b6b1bfdde8162..b1f82bbb4323a402d698d772966409e1a8f7224b 100644
--- a/src/operator/AvgPoolingImpl.cpp
+++ b/src/operator/AvgPoolingImpl.cpp
@@ -70,7 +70,7 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() {
             Registrar<AvgPoolingImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(),
+    kernelFunc(mOp.getStaticAttributes(),
                mOp.getInput(0)->dims<4>(),
                mOp.getInput(0)->getImpl()->rawPtr(),
                mOp.getOutput(0)->getImpl()->rawPtr());
diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp
index a0d4d032ded9ede1b2dba307aa967af330167d25..90ee2b7a2361166109568e317a1788137150a8d1 100644
--- a/src/operator/BatchNormImpl.cpp
+++ b/src/operator/BatchNormImpl.cpp
@@ -76,7 +76,7 @@ void Aidge::BatchNormImpl2D_cpu::forward() {
                                                           mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(),
+    kernelFunc(mOp.getStaticAttributes(),
                mOp.getInput(0)->dims<4>(),
                mOp.getInput(0)->getImpl()->rawPtr(),
                mOp.getInput(1)->getImpl()->rawPtr(),
diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp
index 3e920cf68366b82bce8df29c8aea0c838e6a1364..7801f64ef46ced22d95af47b8b0e8cc9888a81da 100644
--- a/src/operator/ConvDepthWiseImpl.cpp
+++ b/src/operator/ConvDepthWiseImpl.cpp
@@ -77,7 +77,7 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
                                                           mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
+    kernelFunc(mOp.getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
                mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(),
                mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr());
 }
diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp
index b4ddf80929923a9c2c5998ac8614ebb0d3afe000..edab4432fd5792f27ea158f265641855532d6d0b 100644
--- a/src/operator/ConvImpl.cpp
+++ b/src/operator/ConvImpl.cpp
@@ -75,7 +75,7 @@ void Aidge::ConvImpl2D_cpu::forward() {
                                                           mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
+    kernelFunc(mOp.getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
                mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(),
                mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr());
 
diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp
index 086902be0ab1c2027a8c62c143bc27921e5e9e1b..3cf1ccf6e951ea05521ef67c99a3e628e0f620f5 100644
--- a/src/operator/FCImpl.cpp
+++ b/src/operator/FCImpl.cpp
@@ -98,7 +98,7 @@ void Aidge::FCImpl_cpu::forward()
     // Call kernel
     // if (mOp.getInput(0)->nbDims() == 4) {
     //     kernelFunc(
-    //         mOp.getParams(),
+    //         mOp.getStaticAttributes(),
     //         std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
     //         mOp.getInput(0)->getImpl()->rawPtr(),
     //         mOp.mInputs[1]->getImpl()->rawPtr(),
@@ -107,7 +107,7 @@ void Aidge::FCImpl_cpu::forward()
     // }
     // else
     kernelFunc(
-        mOp.getParams(),
+        mOp.getStaticAttributes(),
         mOp.getInput(0)->dims()[0],
         mOp.getInput(0)->sizeM1(),
         mOp.getInput(0)->getImpl()->rawPtr(),
diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp
index f6a44d381081c7c7f1dcbbf02d91212168cc07aa..316d3641bb960ed8850a94f40186b77cc8522b58 100644
--- a/src/operator/LeakyReLUImpl.cpp
+++ b/src/operator/LeakyReLUImpl.cpp
@@ -65,7 +65,7 @@ void Aidge::LeakyReLUImpl_cpu::forward() {
         mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(),
+    kernelFunc(mOp.getStaticAttributes(),
         std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
         mOp.getInput(0)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..972e1f0fc87ad00afe670d77afc8617137076a08
--- /dev/null
+++ b/src/operator/MatMulImpl.cpp
@@ -0,0 +1,121 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/operator/MatMul.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
+#include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp"
+
+Aidge::NbElts_t Aidge::MatMulImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const
+{
+    assert(mOp.getInput(inputIdx) && "requires valid input");
+
+    // Requires the whole tensors
+    const auto &inputDims
+        = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
+
+    return std::accumulate(
+        inputDims.begin(),
+        inputDims.end(),
+        Aidge::NbElts_t(1),
+        std::multiplies<Aidge::NbElts_t>());
+}
+
+Aidge::NbElts_t
+    Aidge::MatMulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const
+{
+    // for the direct convolution algorithm, convolutions can be in-place, if
+    // there is no padding!
+    return 0;
+}
+
+Aidge::NbElts_t Aidge::MatMulImpl_cpu::getRequiredMemory(
+    const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const
+{
+    // Requires the whole tensors, regardless of available data on inputs
+    assert(outputIdx == 0 && "operator has only one output");
+    (void) outputIdx;
+
+    const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
+    return std::accumulate(
+        outputDims.begin(),
+        outputDims.end(),
+        static_cast<NbElts_t>(1),
+        std::multiplies<NbElts_t>());
+}
+
+Aidge::NbElts_t Aidge::MatMulImpl_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const
+{
+    assert((inputIdx != gk_IODefaultIndex) && (inputIdx < mNbConsumedData.size()));
+    return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
+}
+
+Aidge::NbElts_t Aidge::MatMulImpl_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const
+{
+    assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size());
+    return mNbProducedData[static_cast<std::size_t>(outputIdx)];
+}
+
+void Aidge::MatMulImpl_cpu::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx]
+            += getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum
+                                              // amount for a forward pass
+
+    mNbProducedData[0] += getRequiredMemory(0, {});
+}
+
+void Aidge::MatMulImpl_cpu::forward()
+{
+    // FIXME: uncomment the following code once memory handling will work
+    assert(mOp.getInput(0) && "missing input #0");
+    assert(mOp.mInputs[1] && "missing input #1");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
+        {mOp.getInput(0)->dataType(),
+         mOp.mInputs[1]->dataType(),
+         mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    // if (mOp.getInput(0)->nbDims() == 4) {
+    //     kernelFunc(
+    //         mOp.getStaticAttributes(),
+    //         std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
+    //         mOp.getInput(0)->getImpl()->rawPtr(),
+    //         mOp.mInputs[1]->getImpl()->rawPtr(),
+    //         mOp.mInputs[2]->getImpl()->rawPtr(),
+    //         mOp.getOutput(0)->getImpl()->rawPtr());
+    // }
+    // else
+    kernelFunc(
+        mOp.getStaticAttributes(),
+        mOp.getInput(0)->dims()[0],
+        mOp.getInput(0)->sizeM1(),
+        mOp.getInput(0)->getImpl()->rawPtr(),
+        mOp.mInputs[1]->getImpl()->rawPtr(),
+        mOp.getOutput(0)->getImpl()->rawPtr());
+
+
+}
+
+void Aidge::MatMulImpl_cpu::backward()
+{
+    printf("Not implemented yet.\n");
+}
diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp
index 0770a0ccc1434d03fc26b07c425053cd7c09bee6..84cd6ee33a8316a24bae472c74c039dabe0afba3 100644
--- a/src/operator/ScalingImpl.cpp
+++ b/src/operator/ScalingImpl.cpp
@@ -10,16 +10,14 @@
  ********************************************************************************/
 
 #include <cassert>
-#include <chrono>  // std::chrono::milliseconds
-#include <numeric> // std::accumulate
-#include <thread>  // std::this_thread::sleep_for
+#include <numeric>    // std::accumulate
+#include <functional> // std::multiplies
 
 #include "aidge/operator/Scaling.hpp"
 
 #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
 #include "aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
-#include <numeric>
 #include <vector>
 
 // FIXME: replace whole Tensor with minimum needed data quantity
@@ -38,7 +36,9 @@ Aidge::NbElts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIn
     return 0;
 }
 
-Aidge::NbElts_t Aidge::ScalingImpl_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const {
+Aidge::NbElts_t Aidge::ScalingImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t> &inputsSize) const {
+    (void) outputIdx;
+    (void) inputsSize;
     const auto& outputDims = mOp.getOutput(0)->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(),
                         static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
@@ -68,7 +68,7 @@ void Aidge::ScalingImpl_cpu::forward() {
         mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(),
+    kernelFunc(mOp.getStaticAttributes(),
         std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
         mOp.getInput(0)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
diff --git a/unit_tests/Test_TensorImpl.cpp b/unit_tests/data/Test_TensorImpl.cpp
similarity index 78%
rename from unit_tests/Test_TensorImpl.cpp
rename to unit_tests/data/Test_TensorImpl.cpp
index d28505f7b6f2961e581cadee778bdb16364353de..6c75c4dc19ff1b646308858ad262441d43390122 100644
--- a/unit_tests/Test_TensorImpl.cpp
+++ b/unit_tests/data/Test_TensorImpl.cpp
@@ -41,12 +41,12 @@ TEST_CASE("Tensor creation") {
     }
 
     SECTION("get function") {
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 0, 0})) == 1);
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 0, 1})) == 2);
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 1, 1})) == 4);
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({1, 1, 0})) == 7);
-      x.get<int>(std::array<std::size_t, 3>({1, 1, 1})) = 36;
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({1, 1, 1})) == 36);
+      REQUIRE(x.get<int>({0, 0, 0}) == 1);
+      REQUIRE(x.get<int>({0, 0, 1}) == 2);
+      REQUIRE(x.get<int>({0, 1, 1}) == 4);
+      REQUIRE(x.get<int>({1, 1, 0}) == 7);
+      x.get<int>({1, 1, 1}) = 36;
+      REQUIRE(x.get<int>({1, 1, 1}) == 36);
     }
 
     SECTION("Pretty printing for debug") { REQUIRE_NOTHROW(x.print()); }
diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp
index e24d7ac6bd97586ebdeddce5ccb75807ddf530f0..18d98d169ddcb74310c5153d7c2c95103c395bb7 100644
--- a/unit_tests/operator/Test_AddImpl.cpp
+++ b/unit_tests/operator/Test_AddImpl.cpp
@@ -19,25 +19,25 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Add(forward)") {
-    std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { 
-        {
-            {
-                {{20, 47},{21, 48},{22, 49}},
-                {{23, 50},{24, 51},{25, 52}},
-                {{26, 53},{27, 54},{28, 55}}
-            },
-            {
-                {{29, 56},{30, 57},{31, 58}},
-                {{32, 59},{33, 60},{34, 61}},
-                {{35, 62},{36, 63},{37, 64}}
-            },
-            {
-                {{38, 65},{39, 66},{40, 67}},
-                {{41, 68},{42, 69},{43, 70}},
-                {{44, 71},{45, 72},{46, 73}}
-            }
-        }
-    });
+    std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
+        {                                       //
+            {                                   //
+                {{20, 47},{21, 48},{22, 49}},   //
+                {{23, 50},{24, 51},{25, 52}},   //
+                {{26, 53},{27, 54},{28, 55}}    //
+            },                                  //
+            {                                   //
+                {{29, 56},{30, 57},{31, 58}},   //
+                {{32, 59},{33, 60},{34, 61}},   //
+                {{35, 62},{36, 63},{37, 64}}    //
+            },                                  //
+            {                                   //
+                {{38, 65},{39, 66},{40, 67}},   //
+                {{41, 68},{42, 69},{43, 70}},   //
+                {{44, 71},{45, 72},{46, 73}}    //
+            }                                   //
+        }                                       //
+    });                                         //
 
     SECTION("One input") {
         std::shared_ptr<Node> myAdd = Add<1>();
@@ -51,7 +51,7 @@ TEST_CASE("[cpu/operator] Add(forward)") {
     }
 
     SECTION("Two inputs") {
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { 
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
             {
                 {
                     {{40,  94},{42,  96},{44,  98}},
@@ -81,9 +81,9 @@ TEST_CASE("[cpu/operator] Add(forward)") {
 
         REQUIRE(*std::static_pointer_cast<Tensor>(myAdd->getOperator()->getOutput(0)) == *expectedOutput);
     }
-    
+
     SECTION("Three inputs") {
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { 
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
             {
                 {
                     {{ 60, 141},{ 63, 144},{ 66, 147}},
diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp
index 7096962e196c2ace4abf2b0b14aca8dfa37d3441..d5bd91ff75404a7b928c8919c64e06315b78206f 100644
--- a/unit_tests/operator/Test_LeakyReLUImpl.cpp
+++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp
@@ -153,7 +153,7 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
         REQUIRE(*myLeakyReLU->getOperator()->getOutput(0) == *expectedOutput);
     }
 
-    SECTION("Test construction parameter: negative_slop") {
+    SECTION("Test construction attribute: negative_slop") {
         std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> {
             {0.0f, 1.0f, 2.0f,-3.0f, 4.0f,-5.0f,-6.0f, 7.0f, 8.0f, 9.0f}
         });
diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0da01b3287043e07e5b967df8882960cfb814f8f
--- /dev/null
+++ b/unit_tests/operator/Test_MatMulImpl.cpp
@@ -0,0 +1,108 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <memory>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/MatMul.hpp"
+
+#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul]") {
+    // Test MatMul forward with batch size = 2 and feature size = 75
+    std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{
+            {{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}}});
+    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{
+            {{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}});
+
+    std::shared_ptr<Node> myMatMul = MatMul(5, "mymatmul");
+    myMatMul->getOperator()->setDatatype(DataType::Int32);
+    myMatMul->getOperator()->setBackend("cpu");
+    myMatMul->getOperator()->associateInput(1, myWeights);
+
+    SECTION("2D input") {
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{
+                {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
+                  38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
+                  57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74},
+                 {75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+                  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+                  135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}});
+        myMatMul->getOperator()->associateInput(0, myInput);
+        myMatMul->getOperator()->computeOutputDims();
+        myMatMul->forward();
+        REQUIRE(*std::static_pointer_cast<Tensor>(myMatMul->getOperator()->getOutput(0)) == *myOutput);
+    }
+    SECTION("4D input") {
+        std::shared_ptr<Tensor> myInput =
+                std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4},
+                                                                     {5, 6, 7, 8, 9},
+                                                                     {10, 11, 12, 13, 14},
+                                                                     {15, 16, 17, 18, 19},
+                                                                     {20, 21, 22, 23, 24}},
+                                                                    {{25, 26, 27, 28, 29},
+                                                                     {30, 31, 32, 33, 34},
+                                                                     {35, 36, 37, 38, 39},
+                                                                     {40, 41, 42, 43, 44},
+                                                                     {45, 46, 47, 48, 49}},
+                                                                    {{50, 51, 52, 53, 54},
+                                                                     {55, 56, 57, 58, 59},
+                                                                     {60, 61, 62, 63, 64},
+                                                                     {65, 66, 67, 68, 69},
+                                                                     {70, 71, 72, 73, 74}}},
+                                                                   {{{75, 76, 77, 78, 79},
+                                                                     {80, 81, 82, 83, 84},
+                                                                     {85, 86, 87, 88, 89},
+                                                                     {90, 91, 92, 93, 94},
+                                                                     {95, 96, 97, 98, 99}},
+                                                                    {{100, 101, 102, 103, 104},
+                                                                     {105, 106, 107, 108, 109},
+                                                                     {110, 111, 112, 113, 114},
+                                                                     {115, 116, 117, 118, 119},
+                                                                     {120, 121, 122, 123, 124}},
+                                                                    {{125, 126, 127, 128, 129},
+                                                                     {130, 131, 132, 133, 134},
+                                                                     {135, 136, 137, 138, 139},
+                                                                     {140, 141, 142, 143, 144},
+                                                                     {145, 146, 147, 148, 149}}}}});
+        myMatMul->getOperator()->associateInput(0, myInput);
+        myMatMul->getOperator()->computeOutputDims();
+        myMatMul->forward();
+        REQUIRE(*std::static_pointer_cast<Tensor>(myMatMul->getOperator()->getOutput(0)) == *myOutput);
+    }
+
+    // std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl;
+}
\ No newline at end of file
diff --git a/unit_tests/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp
similarity index 100%
rename from unit_tests/Test_Scheduler.cpp
rename to unit_tests/scheduler/Test_Scheduler.cpp