diff --git a/.gitlab/ci/_global.gitlab-ci.yml b/.gitlab/ci/_global.gitlab-ci.yml
index 1615b8974db11d93cb3305ce800e46cf5377bc33..331373fe0f27e7750183eb2e76fe83300cf316a8 100644
--- a/.gitlab/ci/_global.gitlab-ci.yml
+++ b/.gitlab/ci/_global.gitlab-ci.yml
@@ -9,6 +9,14 @@ variables:
   GIT_SSL_NO_VERIFY: 1
   DEBIAN_FRONTEND: noninteractive
 
+# See https://docs.gitlab.com/ee/ci/yaml/workflow.html#switch-between-branch-pipelines-and-merge-request-pipelines
+workflow:
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+    - if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS
+      when: never
+    - if: $CI_COMMIT_BRANCH
+
 default:
   image: nvidia/cuda:12.2.0-devel-ubuntu22.04
   before_script:
diff --git a/.gitlab/ci/build.gitlab-ci.yml b/.gitlab/ci/build.gitlab-ci.yml
index 68fcb6b4bc0dac08c4f0029ec1f2d3404226c1c2..e996def7984908ea751c5e033814e81b18a3d51b 100644
--- a/.gitlab/ci/build.gitlab-ci.yml
+++ b/.gitlab/ci/build.gitlab-ci.yml
@@ -23,17 +23,105 @@ build:ubuntu_cpp:
       - build_cpp/
       - install_cpp/
 
-build:ubuntu_python:
+build:ubuntu_cpp_g++10:
+  stage: build
+  needs: []
+  tags:
+    - docker
+
+  script:
+    # Download dependencies
+    # aidge_core
+    - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
+    - unzip -o build_artifacts.zip -d .
+    - rm -rf build_cpp
+
+    # Build current module
+    - export CMAKE_PREFIX_PATH=../install_cpp
+    - apt install -y g++-10
+    - mkdir -p build_cpp
+    - mkdir -p install_cpp
+    - cd build_cpp
+    - export CXX=/usr/bin/g++-10
+    - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
+    - make -j4 all install
+
+build:ubuntu_cpp_g++12:
+  stage: build
+  needs: []
+  tags:
+    - docker
+
+  script:
+    # Download dependencies
+    # aidge_core
+    - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
+    - unzip -o build_artifacts.zip -d .
+    - rm -rf build_cpp
+
+    # Build current module
+    - export CMAKE_PREFIX_PATH=../install_cpp
+    - apt install -y g++-12
+    - mkdir -p build_cpp
+    - mkdir -p install_cpp
+    - cd build_cpp
+    - export CXX=/usr/bin/g++-12
+    - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
+    - make -j4 all install
+
+build:ubuntu_cpp_clang12:
+  stage: build
+  needs: []
+  tags:
+    - docker
+
+  script:
+    # Download dependencies
+    # aidge_core
+    - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
+    - unzip -o build_artifacts.zip -d .
+    - rm -rf build_cpp
+
+    # Build current module
+    - export CMAKE_PREFIX_PATH=../install_cpp
+    - apt install -y clang-12
+    - mkdir -p build_cpp
+    - mkdir -p install_cpp
+    - cd build_cpp
+    - export CXX=/usr/bin/clang++-12
+    - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
+    - make -j4 all install
+
+build:ubuntu_cpp_clang15:
   stage: build
   needs: []
   tags:
     - docker
+
   script:
     # Download dependencies
-    # aidge_core (CPP)
+    # aidge_core
     - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
     - unzip -o build_artifacts.zip -d .
     - rm -rf build_cpp
+
+    # Build current module
+    - export CMAKE_PREFIX_PATH=../install_cpp
+    - apt install -y clang-15
+    - mkdir -p build_cpp
+    - mkdir -p install_cpp
+    - cd build_cpp
+    - export CXX=/usr/bin/clang++-15
+    - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
+    - make -j4 all install
+
+build:ubuntu_python:
+  stage: build
+  needs: []
+  tags:
+    - docker
+  script:
+    # Download dependencies
     # aidge_core (Python)
     - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_python"'
     - unzip -o build_artifacts.zip -d .
@@ -41,8 +129,7 @@ build:ubuntu_python:
     - python3 -m pip install virtualenv
     - virtualenv venv
     - source venv/bin/activate
-    - export AIDGE_INSTALL=`pwd`/install
-    - export CMAKE_PREFIX_PATH=../install_cpp
+    - python3 -m pip install -r requirements.txt
     - python3 -m pip install .
   artifacts:
     expire_in: 1 week
@@ -84,3 +171,35 @@ build:windows_cpp:
     paths:
       - build_cpp/
       - install_cpp/
+
+build:windows_python:
+  stage: build
+  needs: []
+  tags:
+    - windows
+
+  image: buildtools
+  before_script:
+    # Install Chocolatey
+    - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
+    # Install dependencies
+    - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
+    - choco install git -Y
+    - choco install python -Y
+    # Update PATH
+    - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+  script:
+    # Download dependencies
+    # aidge_core (Python)
+    - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_python" -o build_artifacts.zip'
+    - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
+
+    - python -m pip install virtualenv
+    - virtualenv venv
+    - venv\Scripts\Activate.ps1
+    - python -m pip install -r requirements.txt
+    - python -m pip install .
+  artifacts:
+    expire_in: 1 week
+    paths:
+      - venv/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 51ee1f6d5df771fcccd1b05a45861eb2f1d3bbbe..51a6ebe10d7b8d03fcb94898de55734dbabf9b0c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,11 +12,6 @@ set(module_name _${project}) # target name
 
 project(${project})
 
-##############################################
-# Import utils CMakeLists
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
-include(PybindModuleCreation)
-
 ##############################################
 # Define options
 option(PYBIND "python binding" ON)
diff --git a/README.md b/README.md
index 74eb50826bf6f88a0ded363138adba04827390d0..865cb08a17ebf8638cb2ac56773a4f464860b8ae 100644
--- a/README.md
+++ b/README.md
@@ -14,9 +14,11 @@ So far be sure to have the correct requirements to use this library
 
 ## Pip installation
 
-You will need to install first the aidge_core library before installing aidge_cpu.
-Also, make sure that the install path was set before installing aidge_core library.
-Then run in your python environnement : 
+You will need to install first the ``aidge_core`` library before installing ``aidge_backend_cpu``.
+
+If you have set a custom install path for the ``aidge_core`` library, make sure to use the same one here.
+
+Then run in your python environnement :
 ``` bash
 pip install . -v
 ```
@@ -46,4 +48,4 @@ Important: this command can also be run with `make`.
 To compile the CPU library with the python binding + the associated unitary tests, run
 ```
 make cpu_with_pybind_tests
-```
\ No newline at end of file
+```
diff --git a/aidge_backend_cpu/unit_tests/test_recipies.py b/aidge_backend_cpu/unit_tests/test_recipies.py
new file mode 100644
index 0000000000000000000000000000000000000000..60949adf245f4f4a7ed316879fb307131f70739a
--- /dev/null
+++ b/aidge_backend_cpu/unit_tests/test_recipies.py
@@ -0,0 +1,77 @@
+"""
+Copyright (c) 2023 CEA-List
+
+This program and the accompanying materials are made available under the
+terms of the Eclipse Public License 2.0 which is available at
+http://www.eclipse.org/legal/epl-2.0.
+
+SPDX-License-Identifier: EPL-2.0
+"""
+
+import unittest
+import aidge_core
+import aidge_backend_cpu
+
+from functools import reduce
+import numpy as np
+
+class test_recipies(unittest.TestCase):
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_fuse_batchnorm(self):
+        dims = [1, 1, 10, 10]
+        size = reduce((lambda x, y: x*y), dims)
+
+        input_data =  np.arange(size).reshape(dims).astype(np.float32)
+        input_tensor = aidge_core.Tensor(input_data)
+
+        input_node = aidge_core.Producer(input_tensor, "X")
+        conv = aidge_core.Conv2D(1, 1, [3, 3], name="Conv0")
+        bn = aidge_core.BatchNorm2D(name="Add0")
+
+        graph_view = aidge_core.sequential([conv, bn])
+
+        # Add random values to conv and BatchNorm parameters
+        input_node.add_child(graph_view)
+        input_node.get_operator().set_datatype(aidge_core.DataType.Float32)
+        input_node.get_operator().set_backend("cpu")
+        graph_view.set_datatype(aidge_core.DataType.Float32)
+        graph_view.set_backend("cpu")
+
+        np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32)
+        np_bias = np.arange(1).reshape([1, 1]).astype(np.float32)
+
+        np_scale = np.array([0.05]).astype(np.float32)
+        np_shift = np.array([0.05]).astype(np.float32)
+        np_mean = np.array([0.05]).astype(np.float32)
+        np_var = np.array([0.05]).astype(np.float32)
+        conv.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_weights))
+        conv.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_bias))
+        bn.input(1)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_scale))
+        bn.input(2)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_shift))
+        bn.input(3)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_mean))
+        bn.input(4)[0].get_operator().set_output_tensor(aidge_core.Tensor(np_var))
+        scheduler0 = aidge_core.SequentialScheduler(graph_view)
+        scheduler0.forward()
+
+        for outNode in graph_view.get_output_nodes():
+            output_aidge0 = outNode.get_operator().output(0)
+
+        aidge_core.fuse_batchnorm(graph_view)
+        scheduler1 = aidge_core.SequentialScheduler(graph_view)
+        scheduler1.forward()
+
+        for outNode in graph_view.get_output_nodes():
+            output_aidge1 = outNode.get_operator().output(0)
+
+        self.assertTrue(aidge_core.approx_eq(output_aidge0, output_aidge1, 0.000001, 0.0001))
+
+if __name__ == '__main__':
+    unittest.main()
+
+
+
diff --git a/aidge_backend_cpu/unit_tests/test_scheduler.py b/aidge_backend_cpu/unit_tests/test_scheduler.py
index d8cf3e164da4bd34273905b0b0e156cf057635a5..3449ff513ef618e24788419c835b7277a1e751f1 100644
--- a/aidge_backend_cpu/unit_tests/test_scheduler.py
+++ b/aidge_backend_cpu/unit_tests/test_scheduler.py
@@ -55,6 +55,8 @@ class test_scheduler(unittest.TestCase):
         graph_view.set_datatype(aidge_core.DataType.Float32)
         graph_view.set_backend("cpu")
 
+        graph_view.forward_dims()
+
         scheduler = aidge_core.SequentialScheduler(graph_view)
         scheduler.generate_scheduling()
 
@@ -80,6 +82,8 @@ class test_scheduler(unittest.TestCase):
         graph_view.set_datatype(aidge_core.DataType.Float32)
         graph_view.set_backend("cpu")
 
+        graph_view.forward_dims()
+
         scheduler = aidge_core.SequentialScheduler(graph_view)
         scheduler.generate_scheduling()
 
diff --git a/aidge_backend_cpu/unit_tests/test_tensor.py b/aidge_backend_cpu/unit_tests/test_tensor.py
index 1d12fc0cbadf71f04226a98e2e65984abc7e3254..438b6acd51791a52c9e308fb1aceaefb2a45fb29 100644
--- a/aidge_backend_cpu/unit_tests/test_tensor.py
+++ b/aidge_backend_cpu/unit_tests/test_tensor.py
@@ -45,5 +45,17 @@ class test_tensor(unittest.TestCase):
             self.assertTrue(i_t == i_n) # TODO : May need to change this to a difference
         for i,j in zip(t.dims(), np_array.shape):
             self.assertEqual(i,j)
+
+    def test_get_set(self):
+        dims = [2,2,2]
+
+        np_array = np.arange(8).reshape(dims)
+        # Numpy -> Tensor
+        t = aidge_core.Tensor(np_array)
+        for i in range(8):
+            self.assertEqual(t[i], i)
+            t[i] = 5
+            self.assertEqual(t[i], 5)
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp
index 95b2f7b8e2ff70c9b9224bea1137ad74e469ffb8..5a7ac3958b76e94c8389b0287fdac40c8c3a5ad8 100644
--- a/include/aidge/backend/cpu.hpp
+++ b/include/aidge/backend/cpu.hpp
@@ -15,13 +15,22 @@
 #include "aidge/backend/cpu/data/TensorImpl.hpp"
 #include "aidge/backend/cpu/operator/AddImpl.hpp"
 #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
+#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/backend/cpu/operator/DivImpl.hpp"
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
 #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
+#include "aidge/backend/cpu/operator/MulImpl.hpp"
+#include "aidge/backend/cpu/operator/PadImpl.hpp"
+#include "aidge/backend/cpu/operator/PowImpl.hpp"
 #include "aidge/backend/cpu/operator/ProducerImpl.hpp"
 #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
+#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
+#include "aidge/backend/cpu/operator/SubImpl.hpp"
 
 #endif /* AIDGE_CPU_IMPORTS_H_ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp
index 014939e106e5891c86b007f4bd3905c765ec8754..967b42333a8748999f99afcb6e52b9a15bf936e3 100644
--- a/include/aidge/backend/cpu/data/TensorImpl.hpp
+++ b/include/aidge/backend/cpu/data/TensorImpl.hpp
@@ -47,6 +47,10 @@ class TensorImpl_cpu : public TensorImpl {
         return mData.data();
     };
 
+   void* getRaw(std::size_t idx){
+       return  static_cast<void*>(static_cast<T *>(rawPtr()) + idx);
+   };
+
     virtual ~TensorImpl_cpu() = default;
 
     void setRawPtr(void *ptr) override final {
diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp
index 6e1cd03a3af81ee85f4f9e0e212af7c02089734e..9dbd21501462c010384248544b81bb9f26346604 100644
--- a/include/aidge/backend/cpu/operator/AddImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AddImpl.hpp
@@ -53,152 +53,51 @@ class AddImplBackward_cpu<3>
 
 template <DimIdx_t NUM>
 class AddImpl_cpu : public OperatorImpl {
-   private:
-    const Add_Op<NUM>& mOp;
-    std::array<NbElts_t, NUM> mNbConsumedData = {};
-    std::array<NbElts_t, 1> mNbProducedData = {};
-
-   public:
-    AddImpl_cpu(const Add_Op<NUM>& op) : mOp(op) {}
+public:
+    AddImpl_cpu(const Add_Op<NUM>& op) : OperatorImpl(op) {}
 
     static std::unique_ptr<AddImpl_cpu<NUM>> create(const Add_Op<NUM>& op) {
         return std::make_unique<AddImpl_cpu<NUM>>(op);
     }
-
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final {
-        assert(mOp.getInput(inputIdx) && "requires valid input");
-
-        // Requires the whole tensors
-        const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
-        return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
-    }
-
-    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final {
-        // for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
-        return 0;
-    }
-
-    NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final {
-        // Requires the whole tensors, regardless of available data on inputs
-        assert(outputIdx == 0 && "operator has only one output");
-        (void) outputIdx;
-
-        const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-        return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
-    }
-
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final {
-        assert(inputIdx < mNbConsumedData.size());
-        return mNbConsumedData[inputIdx];
-    }
-
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final {
-        assert(outputIdx < mNbProducedData.size());
-        return mNbProducedData[outputIdx];
-    }
-    void updateConsummerProducer() override final;
-
-    void forward() {
-        // nothing
-    }
-
-    void backward() { printf("Not implemented yet.\n"); }
 };
 
 template <>
 class AddImpl_cpu<1> : public OperatorImpl {
-   private:
-    const Add_Op<1>& mOp;
-    std::array<NbElts_t, 1> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
-   public:
-    AddImpl_cpu(const Add_Op<1>& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
+public:
+    AddImpl_cpu(const Add_Op<1>& op) : OperatorImpl(op) {}
 
     static std::unique_ptr<AddImpl_cpu<1>> create(const Add_Op<1>& op) {
         return std::make_unique<AddImpl_cpu<1>>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final;
-
     NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
-
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/,
-                               const std::vector<DimSize_t> &/*inputsSize*/) const override final;
-
-    NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final;
-
-    NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final;
-    void updateConsummerProducer() override final;
-
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 template <>
 class AddImpl_cpu<2> : public OperatorImpl {
-   private:
-    const Add_Op<2>& mOp;
-    std::array<NbElts_t, 2> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
-   public:
-    AddImpl_cpu(const Add_Op<2>& op) : mOp(op), mNbConsumedData({0, 0}), mNbProducedData({0}) {}
+public:
+    AddImpl_cpu(const Add_Op<2>& op) : OperatorImpl(op) {}
 
     static std::unique_ptr<AddImpl_cpu<2>> create(const Add_Op<2>& op) {
         return std::make_unique<AddImpl_cpu<2>>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
-
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/,
-                               const std::vector<DimSize_t>& /*inputsSize*/) const override final;
-
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-
-    NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final;
-    void updateConsummerProducer() override final;
-
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 template <>
 class AddImpl_cpu<3> : public OperatorImpl {
-   private:
-    const Add_Op<3>& mOp;
-    std::array<NbElts_t, 3> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
-   public:
-    AddImpl_cpu(const Add_Op<3>& op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {}
+public:
+    AddImpl_cpu(const Add_Op<3>& op) : OperatorImpl(op) {}
 
     static std::unique_ptr<AddImpl_cpu<3>> create(const Add_Op<3>& op) {
         return std::make_unique<AddImpl_cpu<3>>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
-
     NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
-
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
-
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-    void updateConsummerProducer() override final;
-
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
index 490598599aedf24b26865ce6a1ddb3fe32044b1b..221e36dcfac44e21d1b1a35674ca21403b4b57ab 100644
--- a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
@@ -20,7 +20,7 @@ namespace Aidge {
 
 template <class I1, class O>
 void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) {
-    // FIXME: missing Add parameters as arguments
+    // FIXME: missing Add attributes as arguments
     const I1* input1 = static_cast<const I1*>(input1_);
     O* output = static_cast<O*>(output_);
 
@@ -32,7 +32,7 @@ void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* inp
 template <class I1, class I2, class O>
 void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
                                       void* output_) {
-    // FIXME: missing Add parameters as arguments
+    // FIXME: missing Add attributes as arguments
     const I1* input1 = static_cast<const I1*>(input1_);
     const I2* input2 = static_cast<const I2*>(input2_);
     O* output = static_cast<O*>(output_);
@@ -45,7 +45,7 @@ void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* inp
 template <class I1, class I2, class I3, class O>
 void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
                                       const void* input3_, void* output_) {
-    // FIXME: missing Add parameters as arguments
+    // FIXME: missing Add attributes as arguments
     const I1* input1 = static_cast<const I1*>(input1_);
     const I2* input2 = static_cast<const I2*>(input2_);
     const I3* input3 = static_cast<const I3*>(input3_);
diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
index 8373cb84a550efd8741a2dbc04c1e94ad37fe611..e3c3a6a28b08386a3b93702f8ce64df68f703119 100644
--- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
@@ -29,36 +29,22 @@ namespace Aidge {
 class AvgPoolingImpl2DForward_cpu
     : public Registrable<AvgPoolingImpl2DForward_cpu,
                          std::tuple<DataType, DataType>,
-                         void(const AvgPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+                         void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
 class AvgPoolingImpl2DBackward_cpu
     : public Registrable<AvgPoolingImpl2DBackward_cpu,
                          std::tuple<DataType, DataType>,
-                         void(const AvgPooling_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+                         void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
 
 class AvgPoolingImpl2D_cpu : public OperatorImpl {
-   private:
-    const AvgPooling_Op<2> &mOp;
-    std::array<NbElts_t, 1> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
-   public:
-    AvgPoolingImpl2D_cpu(const AvgPooling_Op<2> &op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
+public:
+    AvgPoolingImpl2D_cpu(const AvgPooling_Op<2> &op) : OperatorImpl(op) {}
 
     static std::unique_ptr<AvgPoolingImpl2D_cpu> create(const AvgPooling_Op<2> &op) {
         return std::make_unique<AvgPoolingImpl2D_cpu>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final;
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-    void updateConsummerProducer() override final;
-
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
index 776e020f1a20056db345c8e845fd73bb31b4138b..ea46a540ad04b6227d6ec01c965e2eb99806d5e1 100644
--- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
@@ -26,51 +26,51 @@ namespace Aidge {
  * @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
  * @tparam I Input data type.
  * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
+ * @param params tuple of Attributes from the Operator
  * @param dims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters &params,
+void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
                                              const std::array<DimSize_t, 4> &dims,
                                              const void *input_,
                                              void *output_) {
-    // FIXME: missing convolution parameters as arguments
+    // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) /
+                                static_cast<float>(std::get<0>(attrs)[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1]) /
+                                static_cast<float>(std::get<0>(attrs)[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
     // input  (batch, ch, Xin, Yin)
     // weight (outCh, ch, kernelX, kernelY)
-    // does not take Dilation parameter into account
+    // does not take Dilation attribute into account
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
         for (std::size_t ch = 0; ch < dims[1]; ++ch) {
             const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
             const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
             for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]);
+                const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
                 const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx);
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx);
                 for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]);
+                    const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
                     const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify);
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify);
                     const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const std::size_t ix = ox * std::get<0>(params)[0];
-                    const std::size_t iy = oy * std::get<0>(params)[1];
+                    const std::size_t ix = ox * std::get<0>(attrs)[0];
+                    const std::size_t iy = oy * std::get<0>(attrs)[1];
 
                     if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
                         output[oIndexFull] += static_cast<O>(
diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
index d9f25b4a8e38510f82fc5afe9ed4b656197a47d5..060e19b135c12832e8a7e8cc9c0db828d4a204d1 100644
--- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
@@ -29,7 +29,7 @@ namespace Aidge {
 class BatchNormImpl2DForward_cpu
     : public Registrable<BatchNormImpl2DForward_cpu,
                          std::tuple<DataType, DataType, DataType>,
-                         void(const BatchNorm_Op<2>::Parameters &,
+                         void(const BatchNorm_Op<2>::Attrs &,
                               const std::array<DimSize_t, 4> &,
                               const void *,
                               const void *,
@@ -41,7 +41,7 @@ class BatchNormImpl2DForward_cpu
 class BatchNormImpl2DBackward_cpu
     : public Registrable<BatchNormImpl2DBackward_cpu,
                          std::tuple<DataType, DataType, DataType>,
-                         void(const BatchNorm_Op<2>::Parameters &,
+                         void(const BatchNorm_Op<2>::Attrs &,
                               const std::array<DimSize_t, 4> &,
                               const void *,
                               const void *,
@@ -51,29 +51,15 @@ class BatchNormImpl2DBackward_cpu
                               void *)> {};
 
 class BatchNormImpl2D_cpu : public OperatorImpl {
-   private:
-    const BatchNorm_Op<2> &mOp;
-    std::array<NbElts_t, 5> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
-   public:
-    BatchNormImpl2D_cpu(const BatchNorm_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0, 0, 0}), mNbProducedData({0}) {}
+public:
+    BatchNormImpl2D_cpu(const BatchNorm_Op<2> &op) : OperatorImpl(op) {}
 
     static std::unique_ptr<BatchNormImpl2D_cpu> create(const BatchNorm_Op<2> &op) {
         return std::make_unique<BatchNormImpl2D_cpu>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final;
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-    void updateConsummerProducer() override final;
-
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
index eedb80bde60d65b53bac70cc33ca83eb4f0121e7..486829e782ae2173332a7efa6646bb7bba322252 100644
--- a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
@@ -27,7 +27,7 @@ namespace Aidge {
  * @tparam W Weight data type.
  * @tparam B Bias data type.
  * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
+ * @param params tuple of Attributes from the Operator
  * @param dims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param scale_ const scale Tensor.
@@ -37,9 +37,9 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class P, class O>
-void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
                                        const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
-    // FIXME: missing convolution parameters as arguments
+    // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const P *scale = static_cast<const P *>(scale_);
     const P *shift = static_cast<const P *>(shift_);
@@ -52,12 +52,12 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &param
     const DimSize_t featureMapSize = dims[2]*dims[3];
 
 
-    if ((freeze == true) || (std::get<1>(params) == 0.0f)) {
+    if ((freeze == true) || (std::get<1>(attrs) == 0.0f)) {
         for (std::size_t batch = 0; batch < nbBatch; ++batch) {
             for (std::size_t ch = 0; ch < nbChannels; ++ch) {
                 const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
                 std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
-                const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params)));
+                const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(attrs)));
 
                 for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
                     output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
@@ -81,10 +81,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &param
             const I inputMean = sum / static_cast<I>(nbDataPerChannel);
             const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel)  - inputMean*inputMean;
 
-            batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params);
-            batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params);
+            batchMean[ch] = batchMean[ch]*(1-std::get<1>(attrs)) + inputMean*std::get<1>(attrs);
+            batchVar[ch] = batchVar[ch]*(1-std::get<1>(attrs)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(attrs);
 
-            const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params)));
+            const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(attrs)));
             for (std::size_t batch = 0; batch < nbBatch; ++batch) {
                 const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
                 for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
index 0d21c676d797b2fc4e95c4aea47674c8fca5eef4..7b5dbfb0801fb314d91da15c8a9c4b80fe62eb35 100644
--- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
@@ -29,38 +29,24 @@ namespace Aidge {
 class ConvDepthWiseImpl2DForward_cpu
     : public Registrable<ConvDepthWiseImpl2DForward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const ConvDepthWise_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
+                         void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
                               const void *, const void *, void *)> {};
 class ConvDepthWiseImpl2DBackward_cpu
     : public Registrable<ConvDepthWiseImpl2DBackward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const ConvDepthWise_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
+                         void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
                               const void *, const void *, void *)> {};
 
 class ConvDepthWiseImpl2D_cpu : public OperatorImpl {
-   private:
-    const ConvDepthWise_Op<2> &mOp;
-    std::array<NbElts_t, 3> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
-   public:
-    ConvDepthWiseImpl2D_cpu(const ConvDepthWise_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {}
+public:
+    ConvDepthWiseImpl2D_cpu(const ConvDepthWise_Op<2> &op) : OperatorImpl(op) {}
 
     static std::unique_ptr<ConvDepthWiseImpl2D_cpu> create(const ConvDepthWise_Op<2> &op) {
         return std::make_unique<ConvDepthWiseImpl2D_cpu>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-    void updateConsummerProducer() override final;
-
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
index ee2d82e00376c5a2cc5a075565e35eb8885c021e..5aa29ac55740d46bba873bb9d85a04cd004cc3bd 100644
--- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
@@ -9,7 +9,7 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
 #define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
 
 #include "aidge/utils/Registrar.hpp"
@@ -27,7 +27,7 @@ namespace Aidge {
  * @tparam W Weight data type.
  * @tparam B Bias data type.
  * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
+ * @param params tuple of Attributes from the Operator
  * @param dims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param weights_ const weight Tensor.
@@ -35,9 +35,9 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
                                        const void *input_, const void *weights_, const void *biases_, void *output_) {
-    // FIXME: missing convolution parameters as arguments
+    // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
     const B *biases = static_cast<const B *>(biases_);
@@ -46,52 +46,52 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameter
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) /
+                                static_cast<float>(std::get<0>(attrs)[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) /
+                                static_cast<float>(std::get<0>(attrs)[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
     // input  (batch, ch, Xin, Yin)
     // weight (outCh, ch, kernelX, kernelY)
-    // does not take Dilation parameter into account
+    // does not take Dilation attribute into account
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) {
-            const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize;
+        for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) {
+            const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize;
             B biasVal = (biases != nullptr) ? biases[ch] : B(0);
             std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
             const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
-            const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1];
+            const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1];
             for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]);
+                const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
                 const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx);
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx);
                 for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]);
+                    const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
                     const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify);
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify);
                     const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0];
-                    const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1];
+                    const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
+                    const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
 
                     if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                        output[oIndexFull] +=  (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                        output[oIndexFull] +=  (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
                     } else {
                         for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
                             for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] *
+                                output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] *
                                                         input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
                             }
                         }
diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp
index 1f3dffe43b966bc37887f267cc56760a899476f9..3db91ab507456244676c990427287e5755ab019b 100644
--- a/include/aidge/backend/cpu/operator/ConvImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp
@@ -29,38 +29,25 @@ namespace Aidge {
 class ConvImpl2DForward_cpu
     : public Registrable<ConvImpl2DForward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
+                         void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
                               const void *, const void *, void *)> {};
 class ConvImpl2DBackward_cpu
     : public Registrable<ConvImpl2DBackward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *,
+                         void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
                               const void *, const void *, void *)> {};
 
 class ConvImpl2D_cpu : public OperatorImpl {
-   private:
-    const Conv_Op<2> &mOp;
-    std::array<NbElts_t, 3> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
    public:
-    ConvImpl2D_cpu(const Conv_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {}
+    ConvImpl2D_cpu(const Conv_Op<2>& op) : OperatorImpl(op) {}
 
     static std::unique_ptr<ConvImpl2D_cpu> create(const Conv_Op<2> &op) {
         return std::make_unique<ConvImpl2D_cpu>(op);
     }
 
    public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-    void updateConsummerProducer() override final;
-
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
index bc2f10099f42cba91be8d089b66dc176fdeb7c10..03e2c35170432181c7a9b3934d61f0bd18471876 100644
--- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
@@ -27,7 +27,7 @@ namespace Aidge {
  * @tparam W Weight data type.
  * @tparam B Bias data type.
  * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
+ * @param params tuple of Attributes from the Operator
  * @param dims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param weights_ const weight Tensor.
@@ -35,9 +35,9 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
                                        const void *input_, const void *weights_, const void *biases_, void *output_) {
-    // FIXME: missing convolution parameters as arguments
+    // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
     const B *biases = static_cast<const B *>(biases_);
@@ -45,34 +45,34 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const s
 /*
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0]));
+            static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
+                                static_cast<float>(std::get<0>(attrs)[0]));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1]));
+            static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
+                                static_cast<float>(std::get<0>(attrs)[1]));
 
     // TODO: kernel computation
     // output (Xout, Yout, outCh, batch)
     // input  (Xin, Yin, inCh, batch)
     // weight (kernelX, kernelY, inCh, outCh)
-    // does not take Dilation parameter into account
+    // does not take Dilation attribute into account
     for (std::size_t ox = 0; ox < oxSize; ++ox) {
         for (std::size_t oy = 0; oy < oySize; ++oy) {
-            const std::size_t ix = ox * std::get<0>(params)[0];
-            const std::size_t iy = oy * std::get<0>(params)[1];
+            const std::size_t ix = ox * std::get<0>(attrs)[0];
+            const std::size_t iy = oy * std::get<0>(attrs)[1];
 
-            for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
-                const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox));
+            for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
+                const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox));
                 B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
                 for (std::size_t batch = 0; batch < dims[3]; ++batch) {
                     output[oIndex + batch] = biasVal;
                 }
                 for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
-                    for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) {
-                        for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) {
+                    for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) {
+                        for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) {
                             const std::size_t wIndex =
-                                    outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx));
+                                    outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx));
                             std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
                             for (std::size_t batch = 0; batch < dims[3]; ++batch) {
                                 output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
@@ -88,53 +88,53 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const s
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
+                                static_cast<float>(std::get<0>(attrs)[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
+                                static_cast<float>(std::get<0>(attrs)[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
     // input  (batch, inCh, Xin, Yin)
     // weight (outCh, inCh, kernelX, kernelY)
-    // does not take Dilation parameter into account
+    // does not take Dilation attribute into account
     using signedsize = std::make_signed<std::size_t>::type;
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
-            const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize;
+        for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
+            const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize;
             B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
             std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
             for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
                 const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
-                const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1];
+                const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1];
                 for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                    const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]);
+                    const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
                     const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                    const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx);
+                    const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx);
                     for (std::size_t oy = 0; oy < oySize; ++oy) {
-                        const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]);
+                        const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
                         const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                        const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify);
+                        const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify);
                         const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                        const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0];
-                        const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1];
+                        const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
+                        const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
 
                         if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                            output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                            output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
                         } else {
                             for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
                                 for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                    output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] *
+                                    output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] *
                                                             input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
                                 }
                             }
diff --git a/include/aidge/backend/cpu/operator/DivImpl.hpp b/include/aidge/backend/cpu/operator/DivImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..655a9f6c8accb80fc85d8bc7bd9bf378d4f48a6b
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/DivImpl.hpp
@@ -0,0 +1,50 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_H_
+#define AIDGE_CPU_OPERATOR_DIVIMPL_H_
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Div.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include <memory>
+#include <vector>
+
+namespace Aidge {
+// class Div_Op;
+
+// compute kernel registry for forward and backward
+class DivImplForward_cpu
+    : public Registrable<DivImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> {
+};
+class DivImplBackward_cpu
+    : public Registrable<DivImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> {
+};
+
+class DivImpl_cpu : public OperatorImpl {
+public:
+    DivImpl_cpu(const Div_Op& op) : OperatorImpl(op) {}
+
+    static std::unique_ptr<DivImpl_cpu> create(const Div_Op& op) {
+        return std::make_unique<DivImpl_cpu>(op);
+    }
+
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+static Registrar<Div_Op> registrarDivImpl_cpu("cpu", Aidge::DivImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e2ead9ca8de3ed8328b659906336766fbfbb6a47
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp
@@ -0,0 +1,64 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/DivImpl.hpp"
+
+namespace Aidge {
+template <class I1, class I2, class O>
+void DivImpl_cpu_forward_kernel(std::size_t input1Length,
+                                     std::size_t input2Length,
+                                     const void* input1_,
+                                     const void* input2_,
+                                     void* output_) {
+
+    const I1* input_1 = static_cast<const I1*>(input1_);
+    const I2* input_2 = static_cast<const I2*>(input2_);
+    O* output = static_cast<O*>(output_);
+    if (input2Length == input1Length)
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            output[i] = input_1[i] / input_2[i];
+        }
+    }
+    else if (input2Length == 1)
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            output[i] = input_1[i] / input_2[0];
+        }
+    }
+    else // input_2 is 1d and of size the number of channels of input_1
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            std::size_t channelIdx = i % input2Length;
+            output[i] = input_1[i] / input_2[channelIdx];
+        }
+    }
+}
+
+namespace {
+static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::DivImpl_cpu_forward_kernel<float, float, float>);
+static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::DivImpl_cpu_forward_kernel<int, int, int>);
+static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::DivImpl_cpu_forward_kernel<double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_DIVIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp
index c69cc0b08a58877108c78d6f12c29e9089c2f665..5d79369077d06288e218b9002274e7e3d1880b59 100644
--- a/include/aidge/backend/cpu/operator/FCImpl.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl.hpp
@@ -26,34 +26,22 @@ namespace Aidge {
 // compute kernel registry for forward and backward
 class FCImplForward_cpu : public Registrable<FCImplForward_cpu,
                                                  std::tuple<DataType, DataType, DataType, DataType>,
-                                                 void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t,
+                                                 void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t,
                                                       const void *, const void *, const void *, void *)> {};
 class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
                                                   std::tuple<DataType, DataType, DataType, DataType>,
-                                                  void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t,
+                                                  void(const FC_Op::Attrs &, const DimSize_t, const DimSize_t,
                                                        const void *, const void *, const void *, void *)> {};
 
 class FCImpl_cpu : public OperatorImpl {
-   private:
-    const FC_Op &mOp;
-    std::array<NbElts_t, 3> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
+public:
+    FCImpl_cpu(const FC_Op &op) : OperatorImpl(op) {}
 
-   public:
-    FCImpl_cpu(const FC_Op &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {}
+    static std::unique_ptr<FCImpl_cpu> create(const FC_Op &op) {
+        return std::make_unique<FCImpl_cpu>(op);
+    }
 
-    static std::unique_ptr<FCImpl_cpu> create(const FC_Op &op) { return std::make_unique<FCImpl_cpu>(op); }
-
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-	void updateConsummerProducer() override final;
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
index d6acb7dfea3415a8d67384745e16ecdd8bf06324..91e2558a7ef1079cbc9fb11f78fab53ef4246149 100644
--- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
@@ -19,17 +19,17 @@
 
 namespace Aidge {
 // template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims,
+// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 4>& dims,
 //                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-//     // FIXME: missing FC parameters as arguments
+//     // FIXME: missing FC attributes as arguments
 //     const I* input = static_cast<const I*>(input_);
 //     const W* weights = static_cast<const W*>(weights_);
 //     const B* biases = static_cast<const B*>(biases_);
 //     O* output = static_cast<O*>(output_);
 
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
+//     for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
 //         std::size_t oIndex = outIdx * dims[3];
-//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
+//         const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
 //         for (std::size_t batch = 0; batch < dims[3]; ++batch) {
 //             output[oIndex + batch] = bias;
 //         }
@@ -39,10 +39,10 @@ namespace Aidge {
 //         for (std::size_t iy = 0; iy < dims[1]; ++iy) {
 //             for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
 //                 const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
-//                 for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
+//                 for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
 //                     const std::size_t oIndex = dims[3] * outCh;
-//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) +
-//                                           outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
+//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) +
+//                                           outCh;  // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
 //                     for (std::size_t batch = 0; batch < dims[3]; ++batch) {
 //                         output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
 //                     }
@@ -53,9 +53,9 @@ namespace Aidge {
 // }
 
 // template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims,
+// void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const std::array<DimSize_t, 2>& dims,
 //                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-//     // FIXME: missing FC parameters as arguments
+//     // FIXME: missing FC attributes as arguments
 //     const I* input = static_cast<const I*>(input_);
 //     const W* weights = static_cast<const W*>(weights_);
 //     const B* biases = static_cast<const B*>(biases_);
@@ -63,9 +63,9 @@ namespace Aidge {
 
 //     // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
 
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
+//     for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
 //         std::size_t oIndex = outIdx * dims[0];
-//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
+//         const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
 //         for (std::size_t batch = 0; batch < dims[0]; ++batch) {
 //             output[oIndex + batch] = bias;
 //         }
@@ -74,8 +74,8 @@ namespace Aidge {
 //     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
 //         const std::size_t oIndex = dims[1] * batch;
 //         for (std::size_t i = 0; i < dims[1]; ++i) {
-//             for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
-//                 std::size_t wIndex = i * std::get<0>(params) + outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
+//             for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
+//                 std::size_t wIndex = i * std::get<0>(attrs) + outCh;  // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
 //                 output[oIndex + outCh] += weights[wIndex] * input[i + batch];
 //             }
 //         }
@@ -83,29 +83,29 @@ namespace Aidge {
 // }
 
 template <class I, class W, class B, class O>
-void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize,
+void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-    // FIXME: missing FC parameters as arguments
+    // FIXME: missing FC attributes as arguments
     const I* input = static_cast<const I*>(input_);
     const W* weights = static_cast<const W*>(weights_);
     const B* biases = static_cast<const B*>(biases_);
     O* output = static_cast<O*>(output_);
 
-    if (std::get<1>(params)) {
-        std::fill(output, output+(batchSize*std::get<0>(params)), B(0));
+    if (std::get<1>(attrs)) {
+        std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0));
     }
     else {
         for (std::size_t batch = 0; batch < batchSize; ++batch) {
-            std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params)));
+            std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs)));
         }
     }
 
     for (std::size_t batch = 0; batch < batchSize; ++batch) {
-        for (std::size_t out = 0; out < std::get<0>(params); ++out) {
-            output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize,
+        for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
+            output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
                                                         input + (batch + 1)*oneInputSize,
                                                         weights + out*oneInputSize,
-                                                        output[out + batch*std::get<0>(params)]);
+                                                        output[out + batch*std::get<0>(attrs)]);
         }
     }
 }
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
index abe167bea16de01f861beb9701f747d39f265d9d..371e2905a81d1dc2e114f6044388d7e6686122f8 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
@@ -24,35 +24,22 @@ namespace Aidge {
 
 // compute kernel registry for forward and backward
 class LeakyReLUImplForward_cpu
-    : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Parameters&, std::size_t, const void*, void*)> {
+    : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
 };
 class LeakyReLUImplBackward_cpu
-    : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Parameters&, std::size_t, const void*, void*)> {
+    : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
 };
 
 class LeakyReLUImpl_cpu : public OperatorImpl {
-   private:
-    const LeakyReLU_Op& mOp;
-    std::array<NbElts_t, 1> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
-   public:
-    LeakyReLUImpl_cpu(const LeakyReLU_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
+public:
+    LeakyReLUImpl_cpu(const LeakyReLU_Op& op) : OperatorImpl(op) {}
 
     static std::unique_ptr<LeakyReLUImpl_cpu> create(const LeakyReLU_Op& op) {
         return std::make_unique<LeakyReLUImpl_cpu>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-    void updateConsummerProducer() override final;
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
index ff9a8ac6a8f968f244429b330401d794f16fac01..761b9579c3c3dc187e4b0fac24812fa77f916e65 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
@@ -18,14 +18,14 @@
 
 namespace Aidge {
 template <class I, class O>
-void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params,
+void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs,
                                      std::size_t inputLenght,
                                      const void* input_,
                                      void* output_) {
 
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
-    I negativeSlope = static_cast<I>(std::get<0>(params));
+    I negativeSlope = static_cast<I>(std::get<0>(attrs));
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
diff --git a/include/aidge/backend/cpu/operator/MatMulImpl.hpp b/include/aidge/backend/cpu/operator/MatMulImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..2e4b3157360065b0fa857a8bcdd85f1b7442ee63
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/MatMulImpl.hpp
@@ -0,0 +1,53 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_H_
+#define AIDGE_CPU_OPERATOR_MATMULIMPL_H_
+
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/MatMul.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+// class MatMul_Op;
+
+// compute kernel registry for forward and backward
+class MatMulImplForward_cpu
+    : public Registrable<MatMulImplForward_cpu, std::tuple<DataType, DataType, DataType>,
+                         void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t,
+                              const void *, const void *, void *)> {};
+class MatMulImplBackward_cpu
+    : public Registrable<MatMulImplBackward_cpu, std::tuple<DataType, DataType, DataType>,
+                         void(const MatMul_Op::Attrs &, const DimSize_t, const DimSize_t,
+                              const void *, const void *, void *)> {};
+
+class MatMulImpl_cpu : public OperatorImpl {
+public:
+    MatMulImpl_cpu(const MatMul_Op &op): OperatorImpl(op) {}
+
+    static std::unique_ptr<MatMulImpl_cpu> create(const MatMul_Op &op) {
+        return std::make_unique<MatMulImpl_cpu>(op);
+    }
+
+    void forward() override;
+};
+
+namespace {
+static Registrar<MatMul_Op> registrarMatMulImpl_cpu("cpu", Aidge::MatMulImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..bc52779eff274379a853ea84fb839c9486652433
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp
@@ -0,0 +1,58 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include <algorithm>
+
+#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
+
+namespace Aidge {
+
+template <class I, class W, class O>
+void MatMulImpl_cpu_forward_kernel(const MatMul_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
+                                   const void* input_, const void* weights_, void* output_) {
+    // FIXME: missing MatMul parameters as arguments
+    const I* input = static_cast<const I*>(input_);
+    const W* weights = static_cast<const W*>(weights_);
+    O* output = static_cast<O*>(output_);
+
+
+    std::fill(output, output+(batchSize*std::get<0>(attrs)), O(0));
+
+    for (std::size_t batch = 0; batch < batchSize; ++batch) {
+        for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
+            output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
+                                                        input + (batch + 1)*oneInputSize,
+                                                        weights + out*oneInputSize,
+                                                        output[out + batch*std::get<0>(attrs)]);
+        }
+    }
+}
+
+
+namespace {
+static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::MatMulImpl_cpu_forward_kernel<float, float, float>);
+static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::MatMulImpl_cpu_forward_kernel<int, int, int>);
+static Registrar<MatMulImplForward_cpu> registrarMatMulImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::MatMulImpl_cpu_forward_kernel<double, double, double>);
+}  // namespace
+
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_MATMULIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a96fcc226b927b135465ef9cf395d10f844a2646
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
@@ -0,0 +1,56 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_
+#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_
+
+#include <array>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/MaxPooling.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+// class MaxPooling_Op;
+
+// compute kernel registry for forward and backward
+class MaxPoolingImpl2DForward_cpu
+    : public Registrable<MaxPoolingImpl2DForward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+class MaxPoolingImpl2DBackward_cpu
+    : public Registrable<MaxPoolingImpl2DBackward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+
+class MaxPoolingImpl2D_cpu : public OperatorImpl {
+public:
+    MaxPoolingImpl2D_cpu(const MaxPooling_Op<2> &op) : OperatorImpl(op) {}
+
+    static std::unique_ptr<MaxPoolingImpl2D_cpu> create(const MaxPooling_Op<2> &op) {
+        return std::make_unique<MaxPoolingImpl2D_cpu>(op);
+    }
+
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+// add cpu backend to MaxPooling_Op<2> implementation registry
+static Registrar<MaxPooling_Op<2>> registrarMaxPoolingImpl2D_cpu("cpu", Aidge::MaxPoolingImpl2D_cpu::create);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..caa99e8678a72c7fd3c77fe8b7579ea739ac64c7
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp
@@ -0,0 +1,215 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/data/Data.hpp"
+#include <array>
+#include <tuple>
+#include <cmath>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D MaxPoolingolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam O Output data type.
+ * @param attrs tuple of Attributes from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class O>
+void MaxPoolingImpl2D_cpu_forward_kernel(const MaxPooling_Op<2>::Attrs &attrs,
+                                             const std::array<DimSize_t, 4> &dims,
+                                             const void *input_,
+                                             void *output_) {
+    // FIXME: missing convolution parameters as arguments
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
+
+    std::array<DimSize_t, 2> strideDims  = std::get<0>(attrs);
+    std::array<DimSize_t, 2> kernelDims  = std::get<1>(attrs);
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
+                                static_cast<float>(strideDims[0])));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
+                                static_cast<float>(strideDims[1])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, ch, Xin, Yin)
+    // weight (outCh, ch, kernelX, kernelY)
+    // does not take Dilation parameter into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+        for (std::size_t ch = 0; ch < dims[1]; ++ch) {
+            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
+            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
+            for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
+                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
+                for (std::size_t oy = 0; oy < oySize; ++oy) {
+                    const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
+                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
+                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                    const std::size_t ix = ox * strideDims[0];
+                    const std::size_t iy = oy * strideDims[1];
+
+                    I poolValue(0.0);
+                    bool valid = false;
+
+                    for (unsigned int channel = 0; channel < dims[1];
+                            ++channel){
+                        for (unsigned int sy = syMin; sy < syMax; ++sy) {
+                            for (unsigned int sx = sxMin; sx < sxMax; ++sx)
+                            {
+                                const I value = input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
+
+                                if (!valid || value > poolValue) {
+                                    poolValue = value;
+                                    valid = true;
+                                }
+                            }
+                        }
+                    }
+                    output[oIndexFull] = poolValue;
+                }
+            }
+        }
+    }
+}
+
+//N2D2 version
+/*
+template <class T>
+void N2D2::PoolCell_Frame_Kernels::forwardMax(const T* alpha,
+                                              const Tensor<T>&
+                                              inputs,
+                                              const Descriptor& desc,
+                                              const T* beta,
+                                              Tensor<T>& outputs,
+                                              Tensor<ArgMax>& argMax,
+                                              bool useArgMax,
+                                              const Tensor<bool>& maps)
+{
+    const unsigned int size = inputs.dimB() * outputs.dimZ();
+
+#if defined(_OPENMP) && _OPENMP >= 200805
+#pragma omp parallel for collapse(2) if (size > 16)
+#else
+#pragma omp parallel for if (inputs.dimB() > 4 && size > 16)
+#endif
+    for (int batchPos = 0; batchPos < (int)inputs.dimB(); ++batchPos) {
+        for (unsigned int output = 0; output < outputs.dimZ(); ++output) {
+            for (unsigned int oy = 0; oy < outputs.dimY(); ++oy) {
+                for (unsigned int ox = 0; ox < outputs.dimX(); ++ox) {
+                    const unsigned int sxMin = (unsigned int)std::max(
+                        desc.padding[0] - (int)(ox * desc.stride[0]), 0);
+                    const unsigned int syMin = (unsigned int)std::max(
+                        desc.padding[1] - (int)(oy * desc.stride[1]), 0);
+                    const unsigned int sxMax = Utils::clamp
+                        <int>(inputs.dimX() + desc.padding[0] - ox * desc.stride[0],
+                              0,
+                              desc.pool[0]);
+                    const unsigned int syMax = Utils::clamp
+                        <int>(inputs.dimY() + desc.padding[1] - oy * desc.stride[1],
+                              0,
+                              desc.pool[1]);
+
+                    const int ix = (int)(ox * desc.stride[0]) - desc.padding[0];
+                    const int iy = (int)(oy * desc.stride[1]) - desc.padding[1];
+
+                    T poolValue(0.0);
+
+                    // For each output, compute the pool value
+                    if (useArgMax) {
+                        const ArgMax inputMax
+                            = argMax(ox, oy, output, batchPos);
+
+                        if (inputMax.valid) {
+                            poolValue = inputs(inputMax.ix,
+                                               inputMax.iy,
+                                               inputMax.channel,
+                                               batchPos);
+                        }
+                    }
+                    else {
+                        unsigned int ixMax = 0;
+                        unsigned int iyMax = 0;
+                        unsigned int channelMax = 0;
+                        bool valid = false;
+
+                        for (unsigned int channel = 0; channel < inputs.dimZ();
+                             ++channel)
+                        {
+                            if (!maps.empty() && !maps(output, channel))
+                                continue;
+
+                            for (unsigned int sy = syMin; sy < syMax; ++sy) {
+                                for (unsigned int sx = sxMin; sx < sxMax; ++sx)
+                                {
+                                    const T value = inputs(ix + sx,
+                                                                 iy + sy,
+                                                                 channel,
+                                                                 batchPos);
+
+                                    if (!valid || value > poolValue) {
+                                        poolValue = value;
+                                        valid = true;
+
+                                        ixMax = ix + sx;
+                                        iyMax = iy + sy;
+                                        channelMax = channel;
+                                    }
+                                }
+                            }
+                        }
+
+                        argMax(ox, oy, output, batchPos)
+                            = ArgMax(ixMax, iyMax, channelMax, valid);
+                    }
+
+                    outputs(ox, oy, output, batchPos)
+                        = (*alpha) * poolValue
+                          + (*beta) * outputs(ox, oy, output, batchPos);
+                }
+            }
+        }
+    }
+}
+
+*/
+
+namespace {
+static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float32(
+        std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}),
+        Aidge::MaxPoolingImpl2D_cpu_forward_kernel<float, float>);
+static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32},
+        Aidge::MaxPoolingImpl2D_cpu_forward_kernel<int, int>);
+static Registrar<MaxPoolingImpl2DForward_cpu> registrarMaxPoolingImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64},
+        Aidge::MaxPoolingImpl2D_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MulImpl.hpp b/include/aidge/backend/cpu/operator/MulImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..54361e4f5f7a361032c9f4928392f18f183724ac
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/MulImpl.hpp
@@ -0,0 +1,50 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_MULIMPL_H_
+#define AIDGE_CPU_OPERATOR_MULIMPL_H_
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Mul.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include <memory>
+#include <vector>
+
+namespace Aidge {
+// class Mul_Op;
+
+// compute kernel registry for forward and backward
+class MulImplForward_cpu
+    : public Registrable<MulImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> {
+};
+class MulImplBackward_cpu
+    : public Registrable<MulImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> {
+};
+
+class MulImpl_cpu : public OperatorImpl {
+public:
+    MulImpl_cpu(const Mul_Op& op) : OperatorImpl(op) {}
+
+    static std::unique_ptr<MulImpl_cpu> create(const Mul_Op& op) {
+        return std::make_unique<MulImpl_cpu>(op);
+    }
+
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+static Registrar<Mul_Op> registrarMulImpl_cpu("cpu", Aidge::MulImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_MULIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9caef8b88af3ca779309b60eba984a72db35f84a
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp
@@ -0,0 +1,64 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_MULIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_MULIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/MulImpl.hpp"
+
+namespace Aidge {
+template <class I1, class I2, class O>
+void MulImpl_cpu_forward_kernel(std::size_t input1Length,
+                                     std::size_t input2Length,
+                                     const void* input1_,
+                                     const void* input2_,
+                                     void* output_) {
+
+    const I1* input_1 = static_cast<const I1*>(input1_);
+    const I2* input_2 = static_cast<const I2*>(input2_);
+    O* output = static_cast<O*>(output_);
+    if (input2Length == input1Length)
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            output[i] = input_1[i] * input_2[i];
+        }
+    }
+    else if (input2Length == 1)
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            output[i] = input_1[i] * input_2[0];
+        }
+    }
+    else // input_2 is 1d and of size the number of channels of input_1
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            std::size_t channelIdx = i % input2Length;
+            output[i] = input_1[i] * input_2[channelIdx];
+        }
+    }
+}
+
+namespace {
+static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::MulImpl_cpu_forward_kernel<float, float, float>);
+static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::MulImpl_cpu_forward_kernel<int, int, int>);
+static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::MulImpl_cpu_forward_kernel<double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_MULIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9d93828f5817043f4f5cb07166db213c02866ca1
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/PadImpl.hpp
@@ -0,0 +1,58 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_PADIMPL_H_
+#define AIDGE_CPU_OPERATOR_PADIMPL_H_
+
+#include <array>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Pad.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+// class Pad_Op;
+
+// compute kernel registry for forward and backward
+class PadImpl2DForward_cpu
+    : public Registrable<PadImpl2DForward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
+                              void *)> {};
+class PadImpl2DBackward_cpu
+    : public Registrable<PadImpl2DBackward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
+                              void *)> {};
+
+class PadImpl2D_cpu : public OperatorImpl {
+public:
+    PadImpl2D_cpu(const Pad_Op<2> &op) : OperatorImpl(op) {}
+
+    static std::unique_ptr<PadImpl2D_cpu> create(const Pad_Op<2> &op) {
+        return std::make_unique<PadImpl2D_cpu>(op);
+    }
+
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+// add cpu backend to Pad_Op<2> implementation registry
+static Registrar<Pad_Op<2>> registrarPadImpl2D_cpu("cpu", Aidge::PadImpl2D_cpu::create);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_PADIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..8b793257d2f3f126793316d463fe2542512da939
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
@@ -0,0 +1,110 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/PadImpl.hpp"
+#include "aidge/utils/Types.h"
+#include <cmath>
+#include <array>
+#include <algorithm>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D Padding on CPU backend.
+ * @tparam I Input data type.
+ * @tparam O Output data type.
+ * @param attrs tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class O>
+void PadImpl2D_cpu_forward_kernel(const Pad_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
+                                       const void *input_, void *output_)
+{
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
+
+    const std::size_t oySize = dims[2] + std::get<0>(attrs)[0] + std::get<0>(attrs)[1];
+    const std::size_t oxSize = dims[3] + std::get<0>(attrs)[2] + std::get<0>(attrs)[3];
+
+    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+        for (std::size_t ch = 0; ch < dims[1]; ++ch) {
+            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
+            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
+
+            for (unsigned int oy = 0; oy < oySize; ++oy) {
+                for (unsigned int ox = 0; ox < oxSize; ++ox) {
+                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+
+                    O outputValue = std::get<2>(attrs);
+
+                    if (std::get<1>(attrs) == PadBorderType::Constant) {
+                        int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]);
+                        int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]);
+
+                        if (ix >= 0  && ix < static_cast<int>(dims[3]) && iy >= 0  && iy < static_cast<int>(dims[2])) {
+                            outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
+                        }
+                    }
+                    else if (std::get<1>(attrs) == PadBorderType::Edge) {
+                        int ix = std::max(0, std::min(static_cast<int>(dims[3]) - 1, static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3])));
+                        int iy = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1])));
+
+                        outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
+                    }
+                    else if (std::get<1>(attrs) == PadBorderType::Reflect) {
+                        int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]);
+                        int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]);
+
+                        if (ix < 0)
+                            ix = 0 - ix;
+                        if (iy < 0)
+                            iy = 0 - iy;
+                        if (ix >= static_cast<int>(dims[3]))
+                            ix = static_cast<int>(dims[3]) - ix;
+                        if (iy >= static_cast<int>(dims[2]))
+                            iy = static_cast<int>(dims[2]) - iy;
+
+                        outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
+                    }
+                    else if (std::get<1>(attrs) == PadBorderType::Wrap) {
+                        int ix = (static_cast<int>(dims[3]) + static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3])) % static_cast<int>(dims[3]);
+                        int iy = (static_cast<int>(dims[2]) + static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1])) % static_cast<int>(dims[2]);
+
+                        outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
+                    }
+
+                    output[oIndexFull] = outputValue;
+                }
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32},
+        Aidge::PadImpl2D_cpu_forward_kernel<float, float>);
+static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32},
+        Aidge::PadImpl2D_cpu_forward_kernel<int, int>);
+static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64},
+        Aidge::PadImpl2D_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c33fbf0ed4adf4a0206ce8ed32ffdce2cd9ad17c
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/PowImpl.hpp
@@ -0,0 +1,50 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_POWIMPL_H_
+#define AIDGE_CPU_OPERATOR_POWIMPL_H_
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Pow.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include <memory>
+#include <vector>
+
+namespace Aidge {
+// class Pow_Op;
+
+// compute kernel registry for forward and backward
+class PowImplForward_cpu
+    : public Registrable<PowImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> {
+};
+class PowImplBackward_cpu
+    : public Registrable<PowImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> {
+};
+
+class PowImpl_cpu : public OperatorImpl {
+public:
+    PowImpl_cpu(const Pow_Op& op) : OperatorImpl(op) {}
+
+    static std::unique_ptr<PowImpl_cpu> create(const Pow_Op& op) {
+        return std::make_unique<PowImpl_cpu>(op);
+    }
+
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+static Registrar<Pow_Op> registrarPowImpl_cpu("cpu", Aidge::PowImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_POWIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c9c5db7e9aef07d24ba8f80c94b8f2494865e004
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp
@@ -0,0 +1,66 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include <cmath>
+
+#include "aidge/backend/cpu/operator/PowImpl.hpp"
+
+namespace Aidge {
+template <class I1, class I2, class O>
+void PowImpl_cpu_forward_kernel(std::size_t input1Length,
+                                     std::size_t input2Length,
+                                     const void* input1_,
+                                     const void* input2_,
+                                     void* output_) {
+
+    const I1* input_1 = static_cast<const I1*>(input1_);
+    const I2* input_2 = static_cast<const I2*>(input2_);
+    O* output = static_cast<O*>(output_);
+
+    if (input2Length == input1Length)
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            output[i] = std::pow(input_1[i], input_2[i]);
+        }
+    }
+    else if (input2Length == 1)
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            output[i] = std::pow(input_1[i], input_2[0]);
+        }
+    }
+    else // input_2 is 1d and of size the number of channels of input_1
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            std::size_t channelIdx = i % input2Length;
+            output[i] = std::pow(input_1[i], input_2[channelIdx]);
+        }
+    }
+}
+
+namespace {
+static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::PowImpl_cpu_forward_kernel<float, float, float>);
+static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::PowImpl_cpu_forward_kernel<int, int, int>);
+static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::PowImpl_cpu_forward_kernel<double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ProducerImpl.hpp b/include/aidge/backend/cpu/operator/ProducerImpl.hpp
index 032172dbf0995fc62ce631aa5eba1cabf2374ad3..19361f1903e8737562dba63b24f3410e6eba1e5b 100644
--- a/include/aidge/backend/cpu/operator/ProducerImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ProducerImpl.hpp
@@ -21,31 +21,19 @@
 
 namespace Aidge {
 class ProducerImpl_cpu : public OperatorImpl {
-   private:
-    const Producer_Op &mOp;
-
-   public:
-    ProducerImpl_cpu(const Producer_Op &op) : mOp(op) {}
+public:
+    ProducerImpl_cpu(const Producer_Op &op) : OperatorImpl(op) {}
 
     static std::unique_ptr<ProducerImpl_cpu> create(const Producer_Op &op) {
         return std::make_unique<ProducerImpl_cpu>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-    void updateConsummerProducer() override final;
-
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
-static Registrar<Producer_Op> registrarProducer1DImpl_cpu("cpu", Aidge::ProducerImpl_cpu::create);
+static Registrar<Producer_Op> registrarProducerImpl_cpu("cpu", Aidge::ProducerImpl_cpu::create);
 }  // namespace
 }  // namespace Aidge
 
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
index 537bdeeaf89b388a82e819330649c2ae3445c590..6596c1c9052ca8f919c3cb2fa7ef5a2fa1f823d4 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
@@ -31,28 +31,15 @@ class ReLUImplBackward_cpu
 };
 
 class ReLUImpl_cpu : public OperatorImpl {
-   protected:
-    const ReLU_Op& mOp;
-    std::array<NbElts_t, 1> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
-   public:
-    ReLUImpl_cpu(const ReLU_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
+public:
+    ReLUImpl_cpu(const ReLU_Op& op) : OperatorImpl(op) {}
 
     static std::unique_ptr<ReLUImpl_cpu> create(const ReLU_Op& op) {
         return std::make_unique<ReLUImpl_cpu>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-    void updateConsummerProducer() override final;
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..e336adb003769afd97770fd3dd65796b5bbf6a2d
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
@@ -0,0 +1,51 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
+#define __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Scaling.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include <memory>
+#include <vector>
+#include <array>
+
+namespace Aidge {
+// class Scaling_Op;
+
+// compute kernel registry for forward and backward
+class ScalingImplForward_cpu
+    : public Registrable<ScalingImplForward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> {
+};
+class ScalingImplBackward_cpu
+    : public Registrable<ScalingImplBackward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> {
+};
+
+class ScalingImpl_cpu : public OperatorImpl {
+public:
+    ScalingImpl_cpu(const Scaling_Op& op) : OperatorImpl(op) {}
+
+    static std::unique_ptr<ScalingImpl_cpu> create(const Scaling_Op& op) {
+        return std::make_unique<ScalingImpl_cpu>(op);
+    }
+
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+static Registrar<Scaling_Op> registrarScalingImpl_cpu("cpu", Aidge::ScalingImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..8fe13bce3a4c470d77b083603d3b889a46fda71f
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
@@ -0,0 +1,45 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_FORWARD_KERNEL_H__
+#define __AIDGE_CPU_OPERATOR_ScalingIMPL_FORWARD_KERNEL_H__
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Attrs& attrs,
+                                     std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+    const I& scalingFactor = static_cast<const I&>(std::get<0>(attrs));
+
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = input[i] * scalingFactor;
+    }
+}
+
+namespace {
+static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::ScalingImpl_cpu_forward_kernel<float, float>);
+static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::ScalingImpl_cpu_forward_kernel<int, int>);
+static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::ScalingImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_FORWARD_KERNEL_H__ */
diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
index 08567ab98e55233f1f578e82cb39ac5681f0a839..995f57f7c0168328e1982315358201c9f8940235 100644
--- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
@@ -31,28 +31,15 @@ class SoftmaxImplBackward_cpu
 };
 
 class SoftmaxImpl_cpu : public OperatorImpl {
-   private:
-    const Softmax_Op& mOp;
-    std::array<NbElts_t, 1> mNbConsumedData;
-    std::array<NbElts_t, 1> mNbProducedData;
-
-   public:
-    SoftmaxImpl_cpu(const Softmax_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {}
+public:
+    SoftmaxImpl_cpu(const Softmax_Op& op) : OperatorImpl(op) {}
 
     static std::unique_ptr<SoftmaxImpl_cpu> create(const Softmax_Op& op) {
         return std::make_unique<SoftmaxImpl_cpu>(op);
     }
 
-   public:
-    NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
-    NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
-    NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-    void updateConsummerProducer() override final;
-    void forward();
-
-    void backward();
+    void forward() override;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/SqrtImpl.hpp b/include/aidge/backend/cpu/operator/SqrtImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1880408cd52f537c6d4965438ece88151d4df6e3
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/SqrtImpl.hpp
@@ -0,0 +1,50 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_H_
+#define AIDGE_CPU_OPERATOR_SQRTIMPL_H_
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Sqrt.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include <memory>
+#include <vector>
+
+namespace Aidge {
+// class Sqrt_Op;
+
+// compute kernel registry for forward and backward
+class SqrtImplForward_cpu
+    : public Registrable<SqrtImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
+};
+class SqrtImplBackward_cpu
+    : public Registrable<SqrtImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
+};
+
+class SqrtImpl_cpu : public OperatorImpl {
+public:
+    SqrtImpl_cpu(const Sqrt_Op& op) : OperatorImpl(op) {}
+
+    static std::unique_ptr<SqrtImpl_cpu> create(const Sqrt_Op& op) {
+        return std::make_unique<SqrtImpl_cpu>(op);
+    }
+
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+static Registrar<Sqrt_Op> registrarSqrtImpl_cpu("cpu", Aidge::SqrtImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a180fc2cc206ef27b52d506a981f9f50f7bf8a3e
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp
@@ -0,0 +1,44 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include <cmath>
+
+#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void SqrtImpl_cpu_forward_kernel(std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = std::sqrt(input[i]);
+    }
+}
+
+namespace {
+static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::SqrtImpl_cpu_forward_kernel<float, float>);
+static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::SqrtImpl_cpu_forward_kernel<int, int>);
+static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::SqrtImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SubImpl.hpp b/include/aidge/backend/cpu/operator/SubImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..08ec69e509b2b6c02e30f613abd83208de254f75
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/SubImpl.hpp
@@ -0,0 +1,50 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_H_
+#define AIDGE_CPU_OPERATOR_SUBIMPL_H_
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Sub.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include <memory>
+#include <vector>
+
+namespace Aidge {
+// class Sub_Op;
+
+// compute kernel registry for forward and backward
+class SubImplForward_cpu
+    : public Registrable<SubImplForward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*,void*)> {
+};
+class SubImplBackward_cpu
+    : public Registrable<SubImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const std::size_t, const void*, const void*, void*)> {
+};
+
+class SubImpl_cpu : public OperatorImpl {
+public:
+    SubImpl_cpu(const Sub_Op& op) : OperatorImpl(op) {}
+
+    static std::unique_ptr<SubImpl_cpu> create(const Sub_Op& op) {
+        return std::make_unique<SubImpl_cpu>(op);
+    }
+
+    NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+static Registrar<Sub_Op> registrarSubImpl_cpu("cpu", Aidge::SubImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..08f2e24fa38d2739943279666187a55d7076a89b
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp
@@ -0,0 +1,65 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/SubImpl.hpp"
+
+namespace Aidge {
+template <class I1, class I2, class O>
+void SubImpl_cpu_forward_kernel(std::size_t input1Length,
+                                     std::size_t input2Length,
+                                     const void* input1_,
+                                     const void* input2_,
+                                     void* output_) {
+
+    const I1* input_1 = static_cast<const I1*>(input1_);
+    const I2* input_2 = static_cast<const I2*>(input2_);
+    O* output = static_cast<O*>(output_);
+
+    if (input2Length == input1Length)
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            output[i] = input_1[i] - input_2[i];
+        }
+    }
+    else if (input2Length == 1)
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            output[i] = input_1[i] - input_2[0];
+        }
+    }
+    else // input_2 is 1d and of size the number of channels of input_1
+    {
+        for (std::size_t i = 0; i < input1Length; ++i) {
+            std::size_t channelIdx = i % input2Length;
+            output[i] = input_1[i] - input_2[channelIdx];
+        }
+    }
+}
+
+namespace {
+static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::SubImpl_cpu_forward_kernel<float, float, float>);
+static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::SubImpl_cpu_forward_kernel<int, int, int>);
+static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::SubImpl_cpu_forward_kernel<double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ */
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..24ce15ab7ead32f98c7ac3edcd34bb2010ff4326
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+numpy
diff --git a/setup.py b/setup.py
index 0b0f66e9132d66cdb6385d7f8c6c69ae0cc5d0e3..80500f3165dd87eb7b6dd73c78b89806cc8a874a 100644
--- a/setup.py
+++ b/setup.py
@@ -62,15 +62,17 @@ class CMakeBuild(build_ext):
 
         os.chdir(str(build_temp))
 
-        # Impose to use the executable of the python 
+        # Impose to use the executable of the python
         # used to launch setup.py to setup PythonInterp
         param_py = "-DPYTHON_EXECUTABLE=" + sys.executable
-        
-        install_path = f"{build_temp}/install" if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"]
 
-        self.spawn(['cmake', str(cwd), param_py, '-DTEST=OFF', f'-DCMAKE_INSTALL_PREFIX:PATH={install_path}'])
+        compile_type = 'Debug'
+        install_path = os.path.join(sys.prefix, "lib", "libAidge")  if "AIDGE_INSTALL" not in os.environ else os.environ["AIDGE_INSTALL"]
+
+        self.spawn(['cmake', str(cwd), param_py, '-DTEST=OFF', f'-DCMAKE_INSTALL_PREFIX:PATH={install_path}', f'-DCMAKE_BUILD_TYPE={compile_type}'])
         if not self.dry_run:
-            self.spawn(['make', 'all', 'install', '-j', max_jobs])
+            self.spawn(['cmake', '--build', '.', '--config', compile_type, '-j', max_jobs])
+            self.spawn(['cmake', '--install', '.', '--config', compile_type])
         os.chdir(str(cwd))
 
         aidge_package = build_lib / (get_project_name())
@@ -81,13 +83,13 @@ class CMakeBuild(build_ext):
         # Copy all shared object files from build_temp/lib to aidge_package
         for root, _, files in os.walk(build_temp.absolute()):
             for file in files:
-                if file.endswith('.so') and (root != str(aidge_package.absolute())):
+                if (file.endswith('.so') or file.endswith('.pyd')) and (root != str(aidge_package.absolute())):
                     currentFile=os.path.join(root, file)
-                    shutil.copy(currentFile, str(aidge_package.absolute())) 
+                    shutil.copy(currentFile, str(aidge_package.absolute()))
 
         # Copy version.txt in aidge_package
         os.chdir(os.path.dirname(__file__))
-        shutil.copy("version.txt", str(aidge_package.absolute()))    
+        shutil.copy("version.txt", str(aidge_package.absolute()))
 
 
 if __name__ == '__main__':
@@ -100,13 +102,13 @@ if __name__ == '__main__':
         long_description_content_type="text/markdown",
         long_description="\n".join(DOCLINES[2:]),
         classifiers=[c for c in CLASSIFIERS.split('\n') if c],
-        platforms=["Linux"],
         packages=find_packages(where="."),
         include_package_data=True,
         ext_modules=[CMakeExtension(get_project_name())],
         cmdclass={
             'build_ext': CMakeBuild,
         },
+        install_requires=['aidge_core'],
         zip_safe=False,
 
     )
diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp
index d3da42185237a59146af17199e34a00dbebd6d96..4be0078199671bc09af73a5f9dbfcd0ff2e61bed 100644
--- a/src/operator/AddImpl.cpp
+++ b/src/operator/AddImpl.cpp
@@ -25,38 +25,12 @@
 // AddImpl_cpu<1>
 //////////////////////////////////
 
-Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
-    assert(mOp.getInput(0) && "requires valid input");
-    // Requires the whole tensors
-    return static_cast<int>(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size());
-}
-
 Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
+    // this implementation can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
-    // Requires the whole tensors, regardless of available data on inputs
-    return std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size();
-}
-
-Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const {
-    return mNbConsumedData[0];
-}
-
-Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
-    return mNbProducedData[0];
-}
-void Aidge::AddImpl_cpu<1>::updateConsummerProducer(){
-    // Update producer-consumer data
-    for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
-}
 void Aidge::AddImpl_cpu<1>::forward() {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
 
     // Find the correct kernel type
@@ -68,11 +42,6 @@ void Aidge::AddImpl_cpu<1>::forward() {
     kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
         mOp.getInput(0)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
-
-}
-
-void Aidge::AddImpl_cpu<1>::backward() {
-    printf("Not implemented yet.\n");
 }
 
 
@@ -80,67 +49,26 @@ void Aidge::AddImpl_cpu<1>::backward() {
 // AddImpl_cpu<2>
 //////////////////////////////////
 
-
-Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
-    assert(mOp.getInput(inputIdx) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
-
-    return std::accumulate(inputDims.begin(), inputDims.end(),
-                            NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
 Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
+    // this implementation of add can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
-    // Requires the whole tensors, regardless of available data on inputs
-    assert(outputIdx == 0 && "operator has only one output");
-
-    const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(outputDims.begin(), outputDims.end(),
-                        NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
-    assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
-    return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
-}
-
-Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
-    return mNbProducedData[0];
-}
-void Aidge::AddImpl_cpu<2>::updateConsummerProducer(){
-    // Update producer-consumer data
-    for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
-}
 void Aidge::AddImpl_cpu<2>::forward() {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
-    assert(mOp.mInputs[1] && "missing input #1");
+    assert(mOp.getInput(1) && "missing input #1");
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<AddImplForward_cpu<2>>::create({
         mOp.getInput(0)->dataType(),
-        mOp.mInputs[1]->dataType(),
+        mOp.getInput(1)->dataType(),
         mOp.getOutput(0)->dataType()});
 
     // Call kernel
     kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
         mOp.getInput(0)->getImpl()->rawPtr(),
-        mOp.mInputs[1]->getImpl()->rawPtr(),
+        mOp.getInput(1)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
-
-}
-
-void Aidge::AddImpl_cpu<2>::backward() {
-    printf("Not implemented yet.\n");
 }
 
 
@@ -148,69 +76,27 @@ void Aidge::AddImpl_cpu<2>::backward() {
 // AddImpl_cpu<3>
 //////////////////////////////////
 
-
-Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
-    assert(mOp.getInput(inputIdx) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
-
-    return std::accumulate(inputDims.begin(), inputDims.end(),
-                            Aidge::NbElts_t(1), std::multiplies<Aidge::NbElts_t>());
-}
-
 Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
+    // this implementation of add can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
-    // Requires the whole tensors, regardless of available data on inputs
-    assert(outputIdx == 0 && "operator has only one output");
-
-    const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(outputDims.begin(), outputDims.end(),
-                        NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
-    assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
-    return mNbConsumedData[inputIdx];
-}
-
-Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbProducedData(Aidge::IOIndex_t outputIdx) const {
-    assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size());
-    return mNbProducedData[static_cast<std::size_t>(outputIdx)];
-}
-void Aidge::AddImpl_cpu<3>::updateConsummerProducer(){
-    // Update producer-consumer data
-    for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
-}
 void Aidge::AddImpl_cpu<3>::forward() {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
-    assert(mOp.mInputs[1] && "missing input #1");
-    assert(mOp.mInputs[2] && "missing input #2");
+    assert(mOp.getInput(1) && "missing input #1");
+    assert(mOp.getInput(2) && "missing input #2");
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<AddImplForward_cpu<3>>::create({
         mOp.getInput(0)->dataType(),
-        mOp.mInputs[1]->dataType(),
-        mOp.mInputs[2]->dataType(),
+        mOp.getInput(1)->dataType(),
+        mOp.getInput(2)->dataType(),
         mOp.getOutput(0)->dataType()});
 
     // Call kernel
     kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
         mOp.getInput(0)->getImpl()->rawPtr(),
-        mOp.mInputs[1]->getImpl()->rawPtr(),
-        mOp.mInputs[2]->getImpl()->rawPtr(),
+        mOp.getInput(1)->getImpl()->rawPtr(),
+        mOp.getInput(2)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
-
-}
-
-void Aidge::AddImpl_cpu<3>::backward() {
-    printf("Not implemented yet.\n");
 }
diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp
index 6c434a5c38853a1dee66db5be95b6b1bfdde8162..ae93934c23ce9bbc97d071be2f258e04ec8ae877 100644
--- a/src/operator/AvgPoolingImpl.cpp
+++ b/src/operator/AvgPoolingImpl.cpp
@@ -20,49 +20,12 @@
 #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp"
 
-Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
-    assert(mOp.getInput(inputIdx) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
-
-    return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
 Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if
-    // there is no padding!
+    // this implementation can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
-                                                           const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const {
-    // Requires the whole tensors, regardless of available data on inputs
-    assert(outputIdx == 0 && "operator has only one output");
-    (void) outputIdx;
-
-    const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
-    assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
-    return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
-}
-
-Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const {
-    assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
-    return mNbProducedData[static_cast<std::size_t>(outputIdx)];
-}
-void Aidge::AvgPoolingImpl2D_cpu::updateConsummerProducer(){
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
-                                                                                           // amount for a forward pass
-    mNbProducedData[0] += getRequiredMemory(0, {});
-}
 void Aidge::AvgPoolingImpl2D_cpu::forward() {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
 
     // Find the correct kernel type
@@ -70,11 +33,8 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() {
             Registrar<AvgPoolingImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(),
+    kernelFunc(dynamic_cast<const AvgPooling_Op<2>&>(mOp).getStaticAttributes(),
                mOp.getInput(0)->dims<4>(),
                mOp.getInput(0)->getImpl()->rawPtr(),
                mOp.getOutput(0)->getImpl()->rawPtr());
-
 }
-
-void Aidge::AvgPoolingImpl2D_cpu::backward() { printf("Not implemented yet.\n"); }
diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp
index a0d4d032ded9ede1b2dba307aa967af330167d25..c9d52b767b03008d19209e34fa9a6f2749a63450 100644
--- a/src/operator/BatchNormImpl.cpp
+++ b/src/operator/BatchNormImpl.cpp
@@ -19,50 +19,12 @@
 #include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
 #include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp"
 
-Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
-    assert(mOp.getInput(inputIdx) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
-
-    return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
 Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if
-    // there is no padding!
+    // this implementation can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
-                                                              const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
-    // Requires the whole tensors, regardless of available data on inputs
-    assert(outputIdx == 0 && "operator has only one output");
-    (void) outputIdx;
-
-    const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
-    assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
-    return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
-}
-
-Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const {
-    assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
-    return mNbProducedData[static_cast<std::size_t>(outputIdx)];
-}
-void Aidge::BatchNormImpl2D_cpu::updateConsummerProducer(){
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
-                                                                   // amount for a forward pass
-
-    mNbProducedData[0] += getRequiredMemory(0, {});
-}
 void Aidge::BatchNormImpl2D_cpu::forward() {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
     assert(mOp.getInput(1) && "missing input #1");
     assert(mOp.getInput(2) && "missing input #2");
@@ -76,7 +38,7 @@ void Aidge::BatchNormImpl2D_cpu::forward() {
                                                           mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(),
+    kernelFunc(dynamic_cast<const BatchNorm_Op<2>&>(mOp).getStaticAttributes(),
                mOp.getInput(0)->dims<4>(),
                mOp.getInput(0)->getImpl()->rawPtr(),
                mOp.getInput(1)->getImpl()->rawPtr(),
@@ -85,8 +47,4 @@ void Aidge::BatchNormImpl2D_cpu::forward() {
                mOp.getInput(4)->getImpl()->rawPtr(),
                mOp.getOutput(0)->getImpl()->rawPtr(),
                true);
-
-
 }
-
-void Aidge::BatchNormImpl2D_cpu::backward() { printf("Not implemented yet.\n"); }
diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp
index 3e920cf68366b82bce8df29c8aea0c838e6a1364..5ac109e2f282ce55c8a274597be08561c2baf5c8 100644
--- a/src/operator/ConvDepthWiseImpl.cpp
+++ b/src/operator/ConvDepthWiseImpl.cpp
@@ -21,50 +21,12 @@
 #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp"
 
-Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
-    assert(mOp.getInput(inputIdx) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
-
-    return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
 Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if
-    // there is no padding!
+    // this implementation can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
-                                                           const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
-    // Requires the whole tensors, regardless of available data on inputs
-    assert(outputIdx == 0 && "operator has only one output");
-    (void) outputIdx;
-
-    const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
-    assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
-    return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
-}
-
-Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const {
-    assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
-    return mNbProducedData[static_cast<std::size_t>(outputIdx)];
-}
-void Aidge::ConvDepthWiseImpl2D_cpu::updateConsummerProducer(){
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
-                                                                   // amount for a forward pass
-
-    mNbProducedData[0] += getRequiredMemory(0, {});
-}
 void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
     assert(mOp.getInput(1) && "missing input #1");
     assert(mOp.getInput(2) && "missing input #2");
@@ -77,9 +39,7 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
                                                           mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
+    kernelFunc(dynamic_cast<const ConvDepthWise_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
                mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(),
                mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr());
 }
-
-void Aidge::ConvDepthWiseImpl2D_cpu::backward() { printf("Not implemented yet.\n"); }
diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp
index b4ddf80929923a9c2c5998ac8614ebb0d3afe000..347d427908502b9976c2943417775bcbf0d3b344 100644
--- a/src/operator/ConvImpl.cpp
+++ b/src/operator/ConvImpl.cpp
@@ -21,48 +21,11 @@
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp"
 
-Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
-    assert(mOp.getInput(inputIdx) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
-
-    return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
 Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if
-    // there is no padding!
+    // this implementation can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
-                                                         const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
-    // Requires the whole tensors, regardless of available data on inputs
-    assert(outputIdx == 0 && "operator has only one output");
-    (void) outputIdx;
-
-    const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const {
-    assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size());
-    return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
-}
-
-Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const {
-    assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
-    return mNbProducedData[static_cast<std::size_t>(outputIdx)];
-}
-void Aidge::ConvImpl2D_cpu::updateConsummerProducer(){
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
-                                                                   // amount for a forward pass
-
-    mNbProducedData[0] += getRequiredMemory(0, {});
-}
 void Aidge::ConvImpl2D_cpu::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -75,11 +38,7 @@ void Aidge::ConvImpl2D_cpu::forward() {
                                                           mOp.getInput(2)->dataType(), mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
+    kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
                mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(),
                mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr());
-
-
 }
-
-void Aidge::ConvImpl2D_cpu::backward() { printf("Not implemented yet.\n"); }
diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f7cbc7d20b9126ab318a6989ebf627491cb247aa
--- /dev/null
+++ b/src/operator/DivImpl.cpp
@@ -0,0 +1,51 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/operator/Div.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/DivImpl.hpp"
+#include "aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp"
+
+Aidge::NbElts_t Aidge::DivImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+
+void Aidge::DivImpl_cpu::forward() {
+    assert(mOp.getInput(0) && "missing input #0");
+    assert(mOp.getInput(1) && "missing input #1");
+
+    assert(((mOp.getInput(1)->size() == 1) || 
+            (mOp.getInput(1)->size() == mOp.getInput(0)->size()) ||
+            (mOp.getInput(1)->nbDims() == 1 && mOp.getInput(1)->size() == mOp.getInput(0)->dims()[mOp.getInput(0)->nbDims()-1])
+           ) &&
+           "input #1 must either be a tensor of size 1, the number of channels of input # or the same size of input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<DivImplForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getInput(1)->dataType(),
+        mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
+        std::static_pointer_cast<Tensor>(mOp.getInput(1))->size(),
+        mOp.getInput(0)->getImpl()->rawPtr(),
+        mOp.getInput(1)->getImpl()->rawPtr(),
+        mOp.getOutput(0)->getImpl()->rawPtr());
+}
diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp
index 086902be0ab1c2027a8c62c143bc27921e5e9e1b..77ce50281cf4db94a492fce88a6d73eabde1bae5 100644
--- a/src/operator/FCImpl.cpp
+++ b/src/operator/FCImpl.cpp
@@ -21,84 +21,23 @@
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
 #include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp"
 
-Aidge::NbElts_t Aidge::FCImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const
-{
-    assert(mOp.getInput(inputIdx) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto &inputDims
-        = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims();
-
-    return std::accumulate(
-        inputDims.begin(),
-        inputDims.end(),
-        Aidge::NbElts_t(1),
-        std::multiplies<Aidge::NbElts_t>());
-}
-
-Aidge::NbElts_t
-    Aidge::FCImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const
-{
-    // for the direct convolution algorithm, convolutions can be in-place, if
-    // there is no padding!
-    return 0;
-}
-
-Aidge::NbElts_t Aidge::FCImpl_cpu::getRequiredMemory(
-    const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const
-{
-    // Requires the whole tensors, regardless of available data on inputs
-    assert(outputIdx == 0 && "operator has only one output");
-    (void) outputIdx;
-
-    const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(
-        outputDims.begin(),
-        outputDims.end(),
-        static_cast<NbElts_t>(1),
-        std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::FCImpl_cpu::getNbConsumedData(Aidge::IOIndex_t inputIdx) const
-{
-    assert((inputIdx != gk_IODefaultIndex) && (inputIdx < mNbConsumedData.size()));
-    return mNbConsumedData[static_cast<std::size_t>(inputIdx)];
-}
-
-Aidge::NbElts_t Aidge::FCImpl_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx) const
-{
-    assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size());
-    return mNbProducedData[static_cast<std::size_t>(outputIdx)];
-}
-
-void Aidge::FCImpl_cpu::updateConsummerProducer(){
-    // Update producer-consumer data
-    for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx]
-            += getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum
-                                              // amount for a forward pass
-
-    mNbProducedData[0] += getRequiredMemory(0, {});
-}
-
 void Aidge::FCImpl_cpu::forward()
 {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
-    assert(mOp.mInputs[1] && "missing input #1");
-    assert(mOp.mInputs[2] && "missing input #2");
+    assert(mOp.getInput(1) && "missing input #1");
+    assert(mOp.getInput(2) && "missing input #2");
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<FCImplForward_cpu>::create(
         {mOp.getInput(0)->dataType(),
-         mOp.mInputs[1]->dataType(),
-         mOp.mInputs[2]->dataType(),
+         mOp.getInput(1)->dataType(),
+         mOp.getInput(2)->dataType(),
          mOp.getOutput(0)->dataType()});
 
     // Call kernel
     // if (mOp.getInput(0)->nbDims() == 4) {
     //     kernelFunc(
-    //         mOp.getParams(),
+    //         mOp.getStaticAttributes(),
     //         std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
     //         mOp.getInput(0)->getImpl()->rawPtr(),
     //         mOp.mInputs[1]->getImpl()->rawPtr(),
@@ -107,18 +46,11 @@ void Aidge::FCImpl_cpu::forward()
     // }
     // else
     kernelFunc(
-        mOp.getParams(),
+        dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
         mOp.getInput(0)->dims()[0],
         mOp.getInput(0)->sizeM1(),
         mOp.getInput(0)->getImpl()->rawPtr(),
-        mOp.mInputs[1]->getImpl()->rawPtr(),
-        mOp.mInputs[2]->getImpl()->rawPtr(),
+        mOp.getInput(1)->getImpl()->rawPtr(),
+        mOp.getInput(2)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
-
-
-}
-
-void Aidge::FCImpl_cpu::backward()
-{
-    printf("Not implemented yet.\n");
 }
diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp
index f6a44d381081c7c7f1dcbbf02d91212168cc07aa..c81acf60f0171bd819bfd760565e59d361401e29 100644
--- a/src/operator/LeakyReLUImpl.cpp
+++ b/src/operator/LeakyReLUImpl.cpp
@@ -21,42 +21,12 @@
 #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp"
 
-// FIXME: replace whole Tensor with minimum needed data quantity
-Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
-    assert(mOp.getInput(0) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto& inputDims = mOp.getInput(0)->dims();
-
-    return std::accumulate(inputDims.begin(), inputDims.end(),
-                        static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
-}
-
 Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
+    // this implementation can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
-    const auto& outputDims = mOp.getOutput(0)->dims();
-    return std::accumulate(outputDims.begin(), outputDims.end(),
-                        static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const {
-    return mNbConsumedData[0];
-}
-
-Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
-    return mNbProducedData[0];
-}
-void Aidge::LeakyReLUImpl_cpu::updateConsummerProducer(){
-    mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
-}
 void Aidge::LeakyReLUImpl_cpu::forward() {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
 
     // Find the correct kernel type
@@ -65,12 +35,8 @@ void Aidge::LeakyReLUImpl_cpu::forward() {
         mOp.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(mOp.getParams(),
+    kernelFunc(dynamic_cast<const LeakyReLU_Op&>(mOp).getStaticAttributes(),
         std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
         mOp.getInput(0)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
 }
-
-void Aidge::LeakyReLUImpl_cpu::backward() {
-    printf("Not implemented yet.\n");
-}
diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f4812629c4bcf7b699d3eca66ff4e884df0c04d6
--- /dev/null
+++ b/src/operator/MatMulImpl.cpp
@@ -0,0 +1,55 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/operator/MatMul.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
+#include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp"
+
+void Aidge::MatMulImpl_cpu::forward()
+{
+    assert(mOp.getInput(0) && "missing input #0");
+    assert(mOp.getInput(1) && "missing input #1");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<MatMulImplForward_cpu>::create(
+        {mOp.getInput(0)->dataType(),
+         mOp.getInput(1)->dataType(),
+         mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    // if (mOp.getInput(0)->nbDims() == 4) {
+    //     kernelFunc(
+    //         mOp.getStaticAttributes(),
+    //         std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
+    //         mOp.getInput(0)->getImpl()->rawPtr(),
+    //         mOp.mInputs[1]->getImpl()->rawPtr(),
+    //         mOp.mInputs[2]->getImpl()->rawPtr(),
+    //         mOp.getOutput(0)->getImpl()->rawPtr());
+    // }
+    // else
+    kernelFunc(
+        dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(),
+        mOp.getInput(0)->dims()[0],
+        mOp.getInput(0)->sizeM1(),
+        mOp.getInput(0)->getImpl()->rawPtr(),
+        mOp.getInput(1)->getImpl()->rawPtr(),
+        mOp.getOutput(0)->getImpl()->rawPtr());
+
+
+}
diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c5127c1e4577b3da44716cdc34358a8906b9cbb0
--- /dev/null
+++ b/src/operator/MaxPoolingImpl.cpp
@@ -0,0 +1,40 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <numeric>
+#include <thread>
+#include <vector>
+
+#include "aidge/utils/Types.h"
+#include "aidge/operator/MaxPooling.hpp"
+
+#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
+#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp"
+
+Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+
+void Aidge::MaxPoolingImpl2D_cpu::forward() {
+    assert(mOp.getInput(0) && "missing input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc =
+            Registrar<MaxPoolingImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    kernelFunc(dynamic_cast<const MaxPooling_Op<2>&>(mOp).getStaticAttributes(),
+               mOp.getInput(0)->dims<4>(),
+               mOp.getInput(0)->getImpl()->rawPtr(),
+               mOp.getOutput(0)->getImpl()->rawPtr());
+}
diff --git a/src/operator/MulImpl.cpp b/src/operator/MulImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b6eb245cf0b1afc8893dfbab13d3294b945b3e0e
--- /dev/null
+++ b/src/operator/MulImpl.cpp
@@ -0,0 +1,51 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/operator/Mul.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/MulImpl.hpp"
+#include "aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp"
+
+Aidge::NbElts_t Aidge::MulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+
+void Aidge::MulImpl_cpu::forward() {
+    assert(mOp.getInput(0) && "missing input #0");
+    assert(mOp.getInput(1) && "missing input #1");
+
+    assert(((mOp.getInput(1)->size() == 1) || 
+            (mOp.getInput(1)->size() == mOp.getInput(0)->size()) ||
+            (mOp.getInput(1)->nbDims() == 1 && mOp.getInput(1)->size() == mOp.getInput(0)->dims()[mOp.getInput(0)->nbDims()-1])
+           ) &&
+           "input #1 must either be a tensor of size 1, the number of channels of input # or the same size of input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<MulImplForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getInput(1)->dataType(),
+        mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
+        std::static_pointer_cast<Tensor>(mOp.getInput(1))->size(),
+        mOp.getInput(0)->getImpl()->rawPtr(),
+        mOp.getInput(1)->getImpl()->rawPtr(),
+        mOp.getOutput(0)->getImpl()->rawPtr());
+}
diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7c2af9e2161ddc4567b702690b8f268fe1af1b6c
--- /dev/null
+++ b/src/operator/PadImpl.cpp
@@ -0,0 +1,45 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/utils/Types.h"
+#include "aidge/operator/Conv.hpp"
+
+#include "aidge/backend/cpu/operator/PadImpl.hpp"
+#include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp"
+
+Aidge::NbElts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const {
+    assert(inputIdx == 0 && "operator has only one input");
+    (void) inputIdx;
+
+    // Padding cannot be in-place!
+    // We must ensure that we do not override data that has not been consummed yet.
+    const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getInput(0))->size();
+    const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size();
+    return (outputSize - inputSize);
+}
+
+void Aidge::PadImpl2D_cpu::forward() {
+    assert(mOp.getInput(0) && "missing input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc =
+            Registrar<PadImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    kernelFunc(dynamic_cast<const Pad_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
+               mOp.getInput(0)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr());
+}
diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..52a4f46956e0d0f348583a23772c519a64ca857d
--- /dev/null
+++ b/src/operator/PowImpl.cpp
@@ -0,0 +1,51 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/operator/Pow.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/PowImpl.hpp"
+#include "aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp"
+
+Aidge::NbElts_t Aidge::PowImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+
+void Aidge::PowImpl_cpu::forward() {
+    assert(mOp.getInput(0) && "missing input #0");
+    assert(mOp.getInput(1) && "missing input #1");
+
+    assert(((mOp.getInput(1)->size() == 1) || 
+            (mOp.getInput(1)->size() == mOp.getInput(0)->size()) ||
+            (mOp.getInput(1)->nbDims() == 1 && mOp.getInput(1)->size() == mOp.getInput(0)->dims()[mOp.getInput(0)->nbDims()-1])
+           ) &&
+           "input #1 must either be a tensor of size 1, the number of channels of input # or the same size of input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<PowImplForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getInput(1)->dataType(),
+        mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
+        std::static_pointer_cast<Tensor>(mOp.getInput(1))->size(),
+        mOp.getInput(0)->getImpl()->rawPtr(),
+        mOp.getInput(1)->getImpl()->rawPtr(),
+        mOp.getOutput(0)->getImpl()->rawPtr());
+}
diff --git a/src/operator/ProducerImpl.cpp b/src/operator/ProducerImpl.cpp
index 664f3745414380fbaf5654ab035ba2ab957da87b..404d95ef685fea3c5796e396a2c5e17c60ce53bc 100644
--- a/src/operator/ProducerImpl.cpp
+++ b/src/operator/ProducerImpl.cpp
@@ -19,57 +19,16 @@
 
 #include "aidge/backend/cpu/operator/ProducerImpl.hpp"
 
-
-std::size_t Aidge::ProducerImpl_cpu::getNbRequiredData(
-    Aidge::IOIndex_t /*inputIdx*/) const
-{
-    return 0;
-}
-
-
-Aidge::DimSize_t Aidge::ProducerImpl_cpu::getNbConsumedData(
-    Aidge::IOIndex_t /*inputIdx*/) const
-{
-    return 0;
-}
-
-
-std::size_t Aidge::ProducerImpl_cpu::getNbRequiredProtected(
-    Aidge::IOIndex_t /*inputIdx*/) const
-{
-    return 0;
-}
-
-
-std::size_t Aidge::ProducerImpl_cpu::getRequiredMemory(
-    const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const
+Aidge::DimSize_t Aidge::ProducerImpl_cpu::getNbProducedData(
+    Aidge::IOIndex_t outputIdx) const
 {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
     (void) outputIdx;
 
-    const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(
-        outputDims.begin(),
-        outputDims.end(),
-        NbElts_t(1),
-        std::multiplies<NbElts_t>());
-}
-
-Aidge::DimSize_t Aidge::ProducerImpl_cpu::getNbProducedData(
-    Aidge::IOIndex_t /*outputIdx*/) const
-{
-    return getRequiredMemory(0, {});
-}
-void Aidge::ProducerImpl_cpu::updateConsummerProducer(){
+    return std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size();
 }
 
 void Aidge::ProducerImpl_cpu::forward()
 {
 }
-
-
-void Aidge::ProducerImpl_cpu::backward()
-{
-    printf("Not implemented yet.\n");
-}
diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp
index cea50bc1e72cfa8e60cdd0f1839c03bcd568e052..647898d3f0495a74fe7c1dd48dba446bd92cb7b5 100644
--- a/src/operator/ReLUImpl.cpp
+++ b/src/operator/ReLUImpl.cpp
@@ -21,42 +21,12 @@
 #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp"
 
-// FIXME: replace whole Tensor with minimum needed data quantity
-Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
-    assert(mOp.getInput(0) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims();
-
-    return std::accumulate(inputDims.begin(), inputDims.end(),
-                        static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
-}
-
 Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
+    // this implementation can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::ReLUImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
-    const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(outputDims.begin(), outputDims.end(),
-                        static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const {
-    return mNbConsumedData[0];
-}
-
-Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
-    return mNbProducedData[0];
-}
-void Aidge::ReLUImpl_cpu::updateConsummerProducer(){
-    mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
-}
 void Aidge::ReLUImpl_cpu::forward() {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
 
     // Find the correct kernel type
@@ -68,9 +38,4 @@ void Aidge::ReLUImpl_cpu::forward() {
     kernelFunc(mOp.getInput(0)->size(),
         mOp.getInput(0)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
-
-}
-
-void Aidge::ReLUImpl_cpu::backward() {
-    printf("Not implemented yet.\n");
 }
diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..39c1326dd677a704795f625440e385d3f3a6465c
--- /dev/null
+++ b/src/operator/ScalingImpl.cpp
@@ -0,0 +1,41 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <numeric>    // std::accumulate
+#include <functional> // std::multiplies
+
+#include "aidge/operator/Scaling.hpp"
+
+#include "aidge/backend/cpu/operator/ScalingImpl.hpp"
+#include "aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp"
+#include "aidge/utils/Types.h"
+#include <vector>
+
+Aidge::NbElts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+
+void Aidge::ScalingImpl_cpu::forward() {
+    assert(mOp.getInput(0) && "missing input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<ScalingImplForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    kernelFunc(dynamic_cast<const Scaling_Op&>(mOp).getStaticAttributes(),
+        std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
+        mOp.getInput(0)->getImpl()->rawPtr(),
+        mOp.getOutput(0)->getImpl()->rawPtr());
+}
diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp
index 03e8f9305617f6a7ced878470e3c94ba625f5b22..45b455a3f361587848e33864872f497493315a78 100644
--- a/src/operator/SoftmaxImpl.cpp
+++ b/src/operator/SoftmaxImpl.cpp
@@ -21,45 +21,12 @@
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
 #include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp"
 
-// FIXME: replace whole Tensor with minimum needed data quantity
-Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
-    assert(mOp.getInput(0) && "requires valid input");
-
-    // Requires the whole tensors
-    const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims();
-
-    return std::accumulate(inputDims.begin(), inputDims.end(),
-                        static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
-}
-
 Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
-    // for the direct convolution algorithm, convolutions can be in-place, if there is no padding!
+    // this implementation can be in-place
     return 0;
 }
 
-Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
-    const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
-    return std::accumulate(outputDims.begin(), outputDims.end(),
-                        static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
-}
-
-Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const {
-    return mNbConsumedData[0];
-}
-
-Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
-    return mNbProducedData[0];
-}
-void Aidge::SoftmaxImpl_cpu::updateConsummerProducer(){
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
-                                                                   // amount for a forward pass
-
-    mNbProducedData[0] += getRequiredMemory(0, {});
-}
 void Aidge::SoftmaxImpl_cpu::forward() {
-    // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
     assert(mOp.getInput(0)->nbDims()>1);
 
@@ -77,13 +44,4 @@ void Aidge::SoftmaxImpl_cpu::forward() {
                featureSize,
                mOp.getInput(0)->getImpl()->rawPtr(),
                mOp.getOutput(0)->getImpl()->rawPtr());
-
-
-    mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
-}
-
-void Aidge::SoftmaxImpl_cpu::backward() {
-    printf("Not implemented yet.\n");
 }
diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..75d1d2fb20b6748c931124847198b3168d9bdba7
--- /dev/null
+++ b/src/operator/SqrtImpl.cpp
@@ -0,0 +1,41 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/operator/Sqrt.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
+#include "aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp"
+
+Aidge::NbElts_t Aidge::SqrtImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+
+void Aidge::SqrtImpl_cpu::forward() {
+    assert(mOp.getInput(0) && "missing input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<SqrtImplForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    kernelFunc(mOp.getInput(0)->size(),
+        mOp.getInput(0)->getImpl()->rawPtr(),
+        mOp.getOutput(0)->getImpl()->rawPtr());
+}
\ No newline at end of file
diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6d87821d89ff84aa1046a9ecf0fdd83dcc5dda53
--- /dev/null
+++ b/src/operator/SubImpl.cpp
@@ -0,0 +1,51 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/operator/Sub.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/SubImpl.hpp"
+#include "aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp"
+
+Aidge::NbElts_t Aidge::SubImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return 0;
+}
+
+void Aidge::SubImpl_cpu::forward() {
+    assert(mOp.getInput(0) && "missing input #0");
+    assert(mOp.getInput(1) && "missing input #1");
+
+    assert(((mOp.getInput(1)->size() == 1) || 
+            (mOp.getInput(1)->size() == mOp.getInput(0)->size()) ||
+            (mOp.getInput(1)->nbDims() == 1 && mOp.getInput(1)->size() == mOp.getInput(0)->dims()[mOp.getInput(0)->nbDims()-1])
+           ) &&
+           "input #1 must either be a tensor of size 1, the number of channels of input # or the same size of input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<SubImplForward_cpu>::create({
+        mOp.getInput(0)->dataType(),
+        mOp.getInput(1)->dataType(),
+        mOp.getOutput(0)->dataType()});
+
+    // Call kernel
+    kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
+        std::static_pointer_cast<Tensor>(mOp.getInput(1))->size(),
+        mOp.getInput(0)->getImpl()->rawPtr(),
+        mOp.getInput(1)->getImpl()->rawPtr(),
+        mOp.getOutput(0)->getImpl()->rawPtr());
+}
diff --git a/unit_tests/Test_TensorImpl.cpp b/unit_tests/data/Test_TensorImpl.cpp
similarity index 86%
rename from unit_tests/Test_TensorImpl.cpp
rename to unit_tests/data/Test_TensorImpl.cpp
index f55e25f55359cbcbcb9a53e916b345d1fb5a6b22..ceedd8c10f22c2afb0331eccafa11c748628fd7d 100644
--- a/unit_tests/Test_TensorImpl.cpp
+++ b/unit_tests/data/Test_TensorImpl.cpp
@@ -41,12 +41,12 @@ TEST_CASE("Tensor creation") {
     }
 
     SECTION("get function") {
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 0, 0})) == 1);
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 0, 1})) == 2);
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({0, 1, 1})) == 4);
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({1, 1, 0})) == 7);
-      x.get<int>(std::array<std::size_t, 3>({1, 1, 1})) = 36;
-      REQUIRE(x.get<int>(std::array<std::size_t, 3>({1, 1, 1})) == 36);
+      REQUIRE(x.get<int>({0, 0, 0}) == 1);
+      REQUIRE(x.get<int>({0, 0, 1}) == 2);
+      REQUIRE(x.get<int>({0, 1, 1}) == 4);
+      REQUIRE(x.get<int>({1, 1, 0}) == 7);
+      x.get<int>({1, 1, 1}) = 36;
+      REQUIRE(x.get<int>({1, 1, 1}) == 36);
     }
 
     SECTION("Pretty printing for debug") { REQUIRE_NOTHROW(x.print()); }
diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp
index e24d7ac6bd97586ebdeddce5ccb75807ddf530f0..18d98d169ddcb74310c5153d7c2c95103c395bb7 100644
--- a/unit_tests/operator/Test_AddImpl.cpp
+++ b/unit_tests/operator/Test_AddImpl.cpp
@@ -19,25 +19,25 @@
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Add(forward)") {
-    std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { 
-        {
-            {
-                {{20, 47},{21, 48},{22, 49}},
-                {{23, 50},{24, 51},{25, 52}},
-                {{26, 53},{27, 54},{28, 55}}
-            },
-            {
-                {{29, 56},{30, 57},{31, 58}},
-                {{32, 59},{33, 60},{34, 61}},
-                {{35, 62},{36, 63},{37, 64}}
-            },
-            {
-                {{38, 65},{39, 66},{40, 67}},
-                {{41, 68},{42, 69},{43, 70}},
-                {{44, 71},{45, 72},{46, 73}}
-            }
-        }
-    });
+    std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
+        {                                       //
+            {                                   //
+                {{20, 47},{21, 48},{22, 49}},   //
+                {{23, 50},{24, 51},{25, 52}},   //
+                {{26, 53},{27, 54},{28, 55}}    //
+            },                                  //
+            {                                   //
+                {{29, 56},{30, 57},{31, 58}},   //
+                {{32, 59},{33, 60},{34, 61}},   //
+                {{35, 62},{36, 63},{37, 64}}    //
+            },                                  //
+            {                                   //
+                {{38, 65},{39, 66},{40, 67}},   //
+                {{41, 68},{42, 69},{43, 70}},   //
+                {{44, 71},{45, 72},{46, 73}}    //
+            }                                   //
+        }                                       //
+    });                                         //
 
     SECTION("One input") {
         std::shared_ptr<Node> myAdd = Add<1>();
@@ -51,7 +51,7 @@ TEST_CASE("[cpu/operator] Add(forward)") {
     }
 
     SECTION("Two inputs") {
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { 
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
             {
                 {
                     {{40,  94},{42,  96},{44,  98}},
@@ -81,9 +81,9 @@ TEST_CASE("[cpu/operator] Add(forward)") {
 
         REQUIRE(*std::static_pointer_cast<Tensor>(myAdd->getOperator()->getOutput(0)) == *expectedOutput);
     }
-    
+
     SECTION("Three inputs") {
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { 
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
             {
                 {
                     {{ 60, 141},{ 63, 144},{ 66, 147}},
diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp
index 23ff1aaebcfb79a4d4b1abc4f1a77f1c6de63b21..891f0e94b02d07d41751728e83fa9b42e4b89be8 100644
--- a/unit_tests/operator/Test_ConvImpl.cpp
+++ b/unit_tests/operator/Test_ConvImpl.cpp
@@ -156,165 +156,8 @@ TEST_CASE("[cpu/operator] Conv(forward)") {
         // myConv->getOperator()->getOutput(0)->print();
         REQUIRE(*(myConv->getOperator()->getOutput(0)) == *myOutput);
     }
-    SECTION("test Padding") {
-        std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv", {1,1}, {1,1,1,1});
-        myConv->getOperator()->setDatatype(DataType::Int32);
-        myConv->getOperator()->setBackend("cpu");
-        std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
-            {
-                {
-                    {{  0,   1,   2},
-                    {  3,   4,   5},
-                    {  6,   7,   8}},
-                    {{  9,  10,  11},
-                    { 12,  13,  14},
-                    { 15,  16,  17}},
-                    {{ 18,  19,  20},
-                    { 21,  22,  23},
-                    { 24,  25,  26}}
-                },
-                {
-                    {{ 27,  28,  29},
-                    { 30,  31,  32},
-                    { 33,  34,  35}},
-                    {{ 36,  37,  38},
-                    { 39,  40,  41},
-                    { 42,  43,  44}},
-                    {{ 45,  46,  47},
-                    { 48,  49,  50},
-                    { 51,  52,  53}}
-                },
-                {
-                    {{ 54,  55,  56},
-                    { 57,  58,  59},
-                    { 60,  61,  62}},
-                    {{ 63,  64,  65},
-                    { 66,  67,  68},
-                    { 69,  70,  71}},
-                    {{ 72,  73,  74},
-                    { 75,  76,  77},
-                    { 78,  79,  80}}
-                },
-                {
-                    {{ 81,  82,  83},
-                    { 84,  85,  86},
-                    { 87,  88,  89}},
-                    {{ 90,  91,  92},
-                    { 93,  94,  95},
-                    { 96,  97,  98}},
-                    {{ 99, 100, 101},
-                    {102, 103, 104},
-                    {105, 106, 107}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
-        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
-            {
-                {
-                    {{  0,   1,   2,   3,   4},
-                    {  5,   6,   7,   8,   9},
-                    { 10,  11,  12,  13,  14},
-                    { 15,  16,  17,  18,  19},
-                    { 20,  21,  22,  23,  24}},
-
-                    {{ 25,  26,  27,  28,  29},
-                    { 30,  31,  32,  33,  34},
-                    { 35,  36,  37,  38,  39},
-                    { 40,  41,  42,  43,  44},
-                    { 45,  46,  47,  48,  49}},
-
-                    {{ 50,  51,  52,  53,  54},
-                    { 55,  56,  57,  58,  59},
-                    { 60,  61,  62,  63,  64},
-                    { 65,  66,  67,  68,  69},
-                    { 70,  71,  72,  73,  74}}
-                },
-                {
-                    {{ 75,  76,  77,  78,  79},
-                    { 80,  81,  82,  83,  84},
-                    { 85,  86,  87,  88,  89},
-                    { 90,  91,  92,  93,  94},
-                    { 95,  96,  97,  98,  99}},
-
-                    {{100, 101, 102, 103, 104},
-                    {105, 106, 107, 108, 109},
-                    {110, 111, 112, 113, 114},
-                    {115, 116, 117, 118, 119},
-                    {120, 121, 122, 123, 124}},
-
-                    {{125, 126, 127, 128, 129},
-                    {130, 131, 132, 133, 134},
-                    {135, 136, 137, 138, 139},
-                    {140, 141, 142, 143, 144},
-                    {145, 146, 147, 148, 149}}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> { 
-            {
-                {
-                    {{  6895,  10225,  10486,  10747,   7063},
-                     { 10303,  15226,  15577,  15928,  10429},
-                     { 11518,  16981,  17332,  17683,  11554},
-                     { 12733,  18736,  19087,  19438,  12679},
-                     {  8047,  11791,  11998,  12205,   7927}},
-
-                    {{ 15960,  24069,  24816,  25563,  17100},
-                     { 25119,  37818,  38898,  39978,  26703},
-                     { 28764,  43218,  44298,  45378,  30258},
-                     { 32409,  48618,  49698,  50778,  33813},
-                     { 21972,  32925,  33618,  34311,  22824}},
-
-                    {{ 25041,  37929,  39162,  40395,  27153},
-                     { 39951,  60426,  62235,  64044,  42993},
-                     { 46026,  69471,  71280,  73089,  48978},
-                     { 52101,  78516,  80325,  82134,  54963},
-                     { 35913,  54075,  55254,  56433,  37737}},
-
-                    {{ 34104,  51771,  53490,  55209,  37188},
-                     { 54765,  83016,  85554,  88092,  59265},
-                     { 63270,  95706,  98244, 100782,  67680},
-                     { 71775, 108396, 110934, 113472,  76095},
-                     { 49836,  75207,  76872,  78537,  52632}}
-                },
-                {
-                    {{ 20395,  29800,  30061,  30322,  19663},
-                     { 28528,  41551,  41902,  42253,  27304},
-                     { 29743,  43306,  43657,  44008,  28429},
-                     { 30958,  45061,  45412,  45763,  29554},
-                     { 18847,  27316,  27523,  27730,  17827}},
-
-                    {{ 53760,  80094,  80841,  81588,  54000},
-                     { 79794, 118818, 119898, 120978,  80028},
-                     { 83439, 124218, 125298, 126378,  83583},
-                     { 87084, 129618, 130698, 131778,  87138},
-                     { 57072,  84900,  85593,  86286,  57024}},
-
-                    {{ 87141, 130404, 131637, 132870,  88353},
-                     {131076, 196101, 197910, 199719, 132768},
-                     {137151, 205146, 206955, 208764, 138753},
-                     {143226, 214191, 216000, 217809, 144738},
-                     { 95313, 142500, 143679, 144858,  96237}},
-
-                    {{120504, 180696, 182415, 184134, 122688},
-                     {182340, 273366, 275904, 278442, 185490},
-                     {190845, 286056, 288594, 291132, 193905},
-                     {199350, 298746, 301284, 303822, 202320},
-                     {133536, 200082, 201747, 203412, 135432}}
-                }
-            }
-        });
-        myConv->getOperator()->associateInput(0,myInput);
-        myConv->getOperator()->associateInput(1,myWeights);
-        myConv->getOperator()->associateInput(2,myBias);
-        myConv->getOperator()->computeOutputDims();
-        myConv->forward();
-
-        REQUIRE(*(myConv->getOperator()->getOutput(0)) == *myOutput);
-    }
     SECTION("Point-wise") {
-        std::shared_ptr<Node> myConv = Conv(3,4,{1,1}, "myconv", {1,1}, {0,0,0,0});
+        std::shared_ptr<Node> myConv = Conv(3,4,{1,1}, "myconv", {1,1});
         myConv->getOperator()->setDatatype(DataType::Float32);
         myConv->getOperator()->setBackend("cpu");
         myConv->getOperator()->input(0) = Array4D<float,2,3,3,3> {
diff --git a/unit_tests/operator/Test_DivImpl.cpp b/unit_tests/operator/Test_DivImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c33319c88b63ee834bbcb388bbbe0775699edbd7
--- /dev/null
+++ b/unit_tests/operator/Test_DivImpl.cpp
@@ -0,0 +1,207 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Div.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+#include <memory>
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] Div(forward)") {
+    SECTION("2D Tensor by Singleton") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.07607108, 0.44075000},
+                {0.19494885, 0.20071143}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,1,1>{{0.5}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.15214217, 0.88150001},
+                {0.38989770, 0.40142286}
+            }
+        });
+
+        std::shared_ptr<Node> myDiv = Div();
+        myDiv->getOperator()->setDatatype(DataType::Float32);
+        myDiv->getOperator()->setBackend("cpu");
+        myDiv->getOperator()->associateInput(0, input_1);
+        myDiv->getOperator()->associateInput(1, input_2);
+        myDiv->getOperator()->computeOutputDims();
+        myDiv->forward();
+
+        float* resPtr = static_cast<float*>(myDiv->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 4; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("2D Tensors") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.79780143, 0.49322051},
+                {0.84239346, 0.83737719}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,2,2>{
+            {
+                {0.59088874, 0.78858775},
+                {0.42879432, 0.17615074}
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {1.35017204, 0.62544787},
+                {1.96456301, 4.75375366}
+            }
+        });
+
+        std::shared_ptr<Node> myDiv = Div();
+        myDiv->getOperator()->setDatatype(DataType::Float32);
+        myDiv->getOperator()->setBackend("cpu");
+        myDiv->getOperator()->associateInput(0, input_1);
+        myDiv->getOperator()->associateInput(1, input_2);
+        myDiv->getOperator()->computeOutputDims();
+        myDiv->forward();
+
+        float* resPtr = static_cast<float*>(myDiv->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 4; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("3D Tensor by 1D Tensor") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> {
+            {
+                {{0.24180168, 0.44319558, 0.06437260},
+                 {0.21270001, 0.34570599, 0.44151264}},
+
+                {{0.62294692, 0.98043168, 0.18628585},
+                 {0.33591706, 0.03432965, 0.32130069}}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array1D<float,3>{
+            {0.63475525, 0.58620811, 0.69340748}
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> {
+            {
+                {{0.38093686, 0.75603795, 0.09283517},
+                 {0.33508980, 0.58973253, 0.63672900}},
+
+                {{0.98139703, 1.67249763, 0.26865280},
+                 {0.52920723, 0.05856223, 0.46336490}}
+            }
+        });
+
+        std::shared_ptr<Node> myDiv = Div();
+        myDiv->getOperator()->setDatatype(DataType::Float32);
+        myDiv->getOperator()->setBackend("cpu");
+        myDiv->getOperator()->associateInput(0, input_1);
+        myDiv->getOperator()->associateInput(1, input_2);
+        myDiv->getOperator()->computeOutputDims();
+        myDiv->forward();
+
+        float* resPtr = static_cast<float*>(myDiv->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 12; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("4D Tensor") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
+            {
+                {
+                    {{0.25675946, 0.36265653, 0.22386390},
+                     {0.30483031, 0.97449398, 0.73871714},
+                     {0.36169255, 0.04510212, 0.27525920}},
+
+                    {{0.73255682, 0.03885978, 0.24181491},
+                    {0.14465559, 0.86070061, 0.88848090},
+                    {0.74408931, 0.87412918, 0.19800508}},
+
+                    {{0.43551809, 0.73437816, 0.37513995},
+                     {0.25414777, 0.06396711, 0.98708153},
+                     {0.02140611, 0.84974837, 0.62108254}}
+                },
+                {
+                    {{0.86227137, 0.69357753, 0.41814715},
+                     {0.76048166, 0.46306920, 0.05907208},
+                     {0.76625377, 0.91793799, 0.92988223}},
+
+                    {{0.34362513, 0.85009813, 0.21107805},
+                     {0.65575773, 0.38140792, 0.48540717},
+                     {0.10045588, 0.85803932, 0.23778951}},
+
+                    {{0.30316389, 0.04176688, 0.17290735},
+                     {0.07942408, 0.48647392, 0.39440966},
+                     {0.26543915, 0.92589515, 0.83948994}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,1,1>{{3.0}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
+            {
+                {
+                    {{0.08558649, 0.12088551, 0.07462130},
+                     {0.10161010, 0.32483134, 0.24623905},
+                     {0.12056419, 0.01503404, 0.09175307}},
+
+                    {{0.24418561, 0.01295326, 0.08060497},
+                     {0.04821853, 0.28690019, 0.29616031},
+                     {0.24802977, 0.29137638, 0.06600169}},
+
+                    {{0.14517270, 0.24479271, 0.12504666},
+                     {0.08471593, 0.02132237, 0.32902718},
+                     {0.00713537, 0.28324947, 0.20702751}}
+                },
+                {
+                    {{0.28742379, 0.23119251, 0.13938238},
+                     {0.25349388, 0.15435641, 0.01969069},
+                     {0.25541791, 0.30597934, 0.30996075}},
+
+                    {{0.11454171, 0.28336605, 0.07035935},
+                     {0.21858591, 0.12713598, 0.16180240},
+                     {0.03348529, 0.28601310, 0.07926317}},
+
+                    {{0.10105463, 0.01392229, 0.05763578},
+                     {0.02647469, 0.16215797, 0.13146989},
+                     {0.08847972, 0.30863172, 0.27982998}}
+                }
+            }
+        });
+
+        std::shared_ptr<Node> myDiv = Div();
+        myDiv->getOperator()->setDatatype(DataType::Float32);
+        myDiv->getOperator()->setBackend("cpu");
+        myDiv->getOperator()->associateInput(0, input_1);
+        myDiv->getOperator()->associateInput(1, input_2);
+        myDiv->getOperator()->computeOutputDims();
+        myDiv->forward();
+
+        float* resPtr = static_cast<float*>(myDiv->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 54; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+    }
+}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp
index 7096962e196c2ace4abf2b0b14aca8dfa37d3441..d5bd91ff75404a7b928c8919c64e06315b78206f 100644
--- a/unit_tests/operator/Test_LeakyReLUImpl.cpp
+++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp
@@ -153,7 +153,7 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
         REQUIRE(*myLeakyReLU->getOperator()->getOutput(0) == *expectedOutput);
     }
 
-    SECTION("Test construction parameter: negative_slop") {
+    SECTION("Test construction attribute: negative_slop") {
         std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<float,10> {
             {0.0f, 1.0f, 2.0f,-3.0f, 4.0f,-5.0f,-6.0f, 7.0f, 8.0f, 9.0f}
         });
diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0da01b3287043e07e5b967df8882960cfb814f8f
--- /dev/null
+++ b/unit_tests/operator/Test_MatMulImpl.cpp
@@ -0,0 +1,108 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <memory>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/MatMul.hpp"
+
+#include "aidge/backend/cpu/operator/MatMulImpl.hpp"
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul]") {
+    // Test MatMul forward with batch size = 2 and feature size = 75
+    std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array2D<int, 5, 75>{
+            {{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+             {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,
+              5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,
+              9,  10, 11, 12, 13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+              13, 14, 15, 1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}}});
+    std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array2D<int, 2, 5>{
+            {{23600, 23600, 23600, 23600, 23600}, {68600, 68600, 68600, 68600, 68600}}});
+
+    std::shared_ptr<Node> myMatMul = MatMul(5, "mymatmul");
+    myMatMul->getOperator()->setDatatype(DataType::Int32);
+    myMatMul->getOperator()->setBackend("cpu");
+    myMatMul->getOperator()->associateInput(1, myWeights);
+
+    SECTION("2D input") {
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array2D<int, 2, 75>{
+                {{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
+                  38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
+                  57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74},
+                 {75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                  90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                  105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+                  120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+                  135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149}}});
+        myMatMul->getOperator()->associateInput(0, myInput);
+        myMatMul->getOperator()->computeOutputDims();
+        myMatMul->forward();
+        REQUIRE(*std::static_pointer_cast<Tensor>(myMatMul->getOperator()->getOutput(0)) == *myOutput);
+    }
+    SECTION("4D input") {
+        std::shared_ptr<Tensor> myInput =
+                std::make_shared<Tensor>(Array4D<int, 2, 3, 5, 5>{{{{{0, 1, 2, 3, 4},
+                                                                     {5, 6, 7, 8, 9},
+                                                                     {10, 11, 12, 13, 14},
+                                                                     {15, 16, 17, 18, 19},
+                                                                     {20, 21, 22, 23, 24}},
+                                                                    {{25, 26, 27, 28, 29},
+                                                                     {30, 31, 32, 33, 34},
+                                                                     {35, 36, 37, 38, 39},
+                                                                     {40, 41, 42, 43, 44},
+                                                                     {45, 46, 47, 48, 49}},
+                                                                    {{50, 51, 52, 53, 54},
+                                                                     {55, 56, 57, 58, 59},
+                                                                     {60, 61, 62, 63, 64},
+                                                                     {65, 66, 67, 68, 69},
+                                                                     {70, 71, 72, 73, 74}}},
+                                                                   {{{75, 76, 77, 78, 79},
+                                                                     {80, 81, 82, 83, 84},
+                                                                     {85, 86, 87, 88, 89},
+                                                                     {90, 91, 92, 93, 94},
+                                                                     {95, 96, 97, 98, 99}},
+                                                                    {{100, 101, 102, 103, 104},
+                                                                     {105, 106, 107, 108, 109},
+                                                                     {110, 111, 112, 113, 114},
+                                                                     {115, 116, 117, 118, 119},
+                                                                     {120, 121, 122, 123, 124}},
+                                                                    {{125, 126, 127, 128, 129},
+                                                                     {130, 131, 132, 133, 134},
+                                                                     {135, 136, 137, 138, 139},
+                                                                     {140, 141, 142, 143, 144},
+                                                                     {145, 146, 147, 148, 149}}}}});
+        myMatMul->getOperator()->associateInput(0, myInput);
+        myMatMul->getOperator()->computeOutputDims();
+        myMatMul->forward();
+        REQUIRE(*std::static_pointer_cast<Tensor>(myMatMul->getOperator()->getOutput(0)) == *myOutput);
+    }
+
+    // std::cout << static_cast<Tensor>((*myMatMul->getOperator())["weight"])[0][0][0][0] << std::endl;
+}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..83fa7eaa670399c8d6c085a14db08fa35df9de8c
--- /dev/null
+++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp
@@ -0,0 +1,82 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <memory>
+#include <cstdlib>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/MaxPooling.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+using namespace Aidge;
+
+
+TEST_CASE("[cpu/operator] MaxPooling(forward)") {
+    std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<float,2,2,5,5> { //NCHW
+        {
+            {
+                {{-0.3848,  0.2166, -0.4373,  0.6142,  0.5277},
+                 {0.7995,  0.3638, -1.4589, -1.0843,  1.0918},
+            	 {0.7147,  0.0936, -1.2902,  1.2037,  0.4874},
+                 {-0.5981,  2.1184, -0.9175,  1.3859,  0.3305},
+                 {-1.7700,  0.0563, -0.3914,  0.0538, -0.3955}},
+
+                {{-3.1409, -0.4554,  0.0524,  2.2291,  0.4859},
+                 {-0.7465, -0.6567, -2.3703, -0.6386, -1.4152},
+                 { 2.2329, -0.5850,  0.0700,  1.2838, -1.7363},
+                 { 0.2139,  0.0624, -1.0689, -0.8221, -0.8038},
+                 { 0.1886, -0.7840, -0.2313,  0.2651, -1.6244}}
+            },
+            {
+                {{ 0.4371,  1.6417,  0.9129,  0.6325,  0.5438},
+                 {-2.3552, -0.8850, -0.0232, -0.5462, -1.2011},
+                 {1.7653, -1.6668, -1.0814,  0.6182,  1.2071},
+                 {0.9541, -0.5133,  0.8664, -0.8892,  1.4585},
+                 {1.0220, -0.5107,  0.1829, -0.2301, -0.4268}},
+
+                {{ 1.0429,  0.6279, -0.2875,  0.7187, -0.1500},
+                 {1.6041,  2.9635,  1.4172, -0.7517,  0.5441},
+                 {-0.2276,  0.0857,  0.6776, -0.1389, -0.0614},
+                 {-0.1547, -0.3435,  0.0650, -0.5095, -1.8073},
+                 {1.7217,  0.3999, -0.5953,  1.0604, -0.4126}}
+            }
+        }
+    });
+    SECTION("Stride") {
+        std::shared_ptr<Node> myMaxPool = MaxPooling({2,2}, "mycdw", {2,2});
+        myMaxPool->getOperator()->setDatatype(DataType::Float32);
+        myMaxPool->getOperator()->setBackend("cpu");
+
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<float,2,2,2,2> { 
+            {
+                {
+                    {{  0.7995,  0.6142},
+                     { 2.1184,  1.3859}},
+                    {{ -0.4554,  2.2291},
+                     {  2.2329,  1.2838}}
+                },
+                {
+                    {{1.6417,  0.9129},
+                     {1.7653,  0.8664}},
+                    {{2.9635,  1.4172},
+                     {0.0857,  0.6776}}
+                }
+            }
+        });
+        myMaxPool->getOperator()->associateInput(0,myInput);
+        myMaxPool->getOperator()->computeOutputDims();
+        myMaxPool->forward();
+        myMaxPool->getOperator()->getOutput(0)->print();
+        REQUIRE(*(myMaxPool->getOperator()->getOutput(0)) == *myOutput);
+    }
+}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_MulImpl.cpp b/unit_tests/operator/Test_MulImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cea62f998cfc538d1d5800639e461eb4d15cb270
--- /dev/null
+++ b/unit_tests/operator/Test_MulImpl.cpp
@@ -0,0 +1,129 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Mul.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+#include <memory>
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] Mul(forward)") {
+    SECTION("2D Tensor by Singleton") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.38977361, 0.34064174},
+                {0.00427264, 0.90872520}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,1,1>{{3.0}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {1.16932082, 1.02192521},
+                {0.01281792, 2.72617555}
+            }
+        });
+
+        std::shared_ptr<Node> myMul = Mul();
+        myMul->getOperator()->setDatatype(DataType::Float32);
+        myMul->getOperator()->setBackend("cpu");
+        myMul->getOperator()->associateInput(0, input_1);
+        myMul->getOperator()->associateInput(1, input_2);
+        myMul->getOperator()->computeOutputDims();
+        myMul->forward();
+
+        float* resPtr = static_cast<float*>(myMul->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 4; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("2D Tensors") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.38977361, 0.34064174},
+                {0.00427264, 0.90872520}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,2,2>{
+            {
+                {0.02362096, 0.24084556},
+                {0.94690859, 0.13512510}
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.00920683, 0.08204205},
+                {0.00404580, 0.12279158}
+            }
+        });
+
+        std::shared_ptr<Node> myMul = Mul();
+        myMul->getOperator()->setDatatype(DataType::Float32);
+        myMul->getOperator()->setBackend("cpu");
+        myMul->getOperator()->associateInput(0, input_1);
+        myMul->getOperator()->associateInput(1, input_2);
+        myMul->getOperator()->computeOutputDims();
+        myMul->forward();
+
+        float* resPtr = static_cast<float*>(myMul->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 4; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("3D Tensor by 1D Tensor") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> {
+            {
+                {{0.33647752, 0.89360154, 0.46586215},
+                 {0.71518236, 0.71481097, 0.97991812}},
+
+                {{0.17393428, 0.56849813, 0.18489265},
+                 {0.78397650, 0.00348300, 0.65758008}}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array1D<float,3>{
+            {0.15380561, 0.51063120, 0.93031412}
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> {
+            {
+                {{0.05175213, 0.45630082, 0.43339813},
+                 {0.10999906, 0.36500478, 0.91163164}},
+
+                {{0.02675207, 0.29029289, 0.17200825},
+                 {0.12057999, 0.00177853, 0.61175603}}
+            }
+        });
+
+        std::shared_ptr<Node> myMul = Mul();
+        myMul->getOperator()->setDatatype(DataType::Float32);
+        myMul->getOperator()->setBackend("cpu");
+        myMul->getOperator()->associateInput(0, input_1);
+        myMul->getOperator()->associateInput(1, input_2);
+        myMul->getOperator()->computeOutputDims();
+        myMul->forward();
+
+        float* resPtr = static_cast<float*>(myMul->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 12; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_PadImpl.cpp b/unit_tests/operator/Test_PadImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b603e165392f1a861dc1b40d50b70a53c9256870
--- /dev/null
+++ b/unit_tests/operator/Test_PadImpl.cpp
@@ -0,0 +1,569 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <cstdlib>
+#include <memory>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Pad.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] Pad(forward)") {
+    SECTION("Symmetric Pad") {
+        const int pv = 0; // pad value
+
+        std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv));
+        myPad->getOperator()->setDatatype(DataType::Int32);
+        myPad->getOperator()->setBackend("cpu");
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
+            {
+                {
+                    {{  0,   1,   2,   3,   4},
+                    {  5,   6,   7,   8,   9},
+                    { 10,  11,  12,  13,  14},
+                    { 15,  16,  17,  18,  19},
+                    { 20,  21,  22,  23,  24}},
+
+                    {{ 25,  26,  27,  28,  29},
+                    { 30,  31,  32,  33,  34},
+                    { 35,  36,  37,  38,  39},
+                    { 40,  41,  42,  43,  44},
+                    { 45,  46,  47,  48,  49}},
+
+                    {{ 50,  51,  52,  53,  54},
+                    { 55,  56,  57,  58,  59},
+                    { 60,  61,  62,  63,  64},
+                    { 65,  66,  67,  68,  69},
+                    { 70,  71,  72,  73,  74}}
+                },
+                {
+                    {{ 75,  76,  77,  78,  79},
+                    { 80,  81,  82,  83,  84},
+                    { 85,  86,  87,  88,  89},
+                    { 90,  91,  92,  93,  94},
+                    { 95,  96,  97,  98,  99}},
+
+                    {{100, 101, 102, 103, 104},
+                    {105, 106, 107, 108, 109},
+                    {110, 111, 112, 113, 114},
+                    {115, 116, 117, 118, 119},
+                    {120, 121, 122, 123, 124}},
+
+                    {{125, 126, 127, 128, 129},
+                    {130, 131, 132, 133, 134},
+                    {135, 136, 137, 138, 139},
+                    {140, 141, 142, 143, 144},
+                    {145, 146, 147, 148, 149}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
+            {
+                {
+                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
+                    { pv,   0,   1,   2,   3,   4,  pv},
+                    { pv,   5,   6,   7,   8,   9,  pv},
+                    { pv,  10,  11,  12,  13,  14,  pv},
+                    { pv,  15,  16,  17,  18,  19,  pv},
+                    { pv,  20,  21,  22,  23,  24,  pv},
+                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}},
+
+                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
+                    { pv,  25,  26,  27,  28,  29,  pv},
+                    { pv,  30,  31,  32,  33,  34,  pv},
+                    { pv,  35,  36,  37,  38,  39,  pv},
+                    { pv,  40,  41,  42,  43,  44,  pv},
+                    { pv,  45,  46,  47,  48,  49,  pv},
+                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}},
+
+                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
+                    { pv,  50,  51,  52,  53,  54,  pv},
+                    { pv,  55,  56,  57,  58,  59,  pv},
+                    { pv,  60,  61,  62,  63,  64,  pv},
+                    { pv,  65,  66,  67,  68,  69,  pv},
+                    { pv,  70,  71,  72,  73,  74,  pv},
+                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}}
+                },
+                {
+                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
+                    { pv,  75,  76,  77,  78,  79,  pv},
+                    { pv,  80,  81,  82,  83,  84,  pv},
+                    { pv,  85,  86,  87,  88,  89,  pv},
+                    { pv,  90,  91,  92,  93,  94,  pv},
+                    { pv,  95,  96,  97,  98,  99,  pv},
+                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}},
+
+                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
+                    {pv,  100, 101, 102, 103, 104,  pv},
+                    {pv,  105, 106, 107, 108, 109,  pv},
+                    {pv,  110, 111, 112, 113, 114,  pv},
+                    {pv,  115, 116, 117, 118, 119,  pv},
+                    {pv,  120, 121, 122, 123, 124,  pv},
+                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}},
+
+                    {{ pv,  pv,   pv,   pv,   pv,   pv,  pv},
+                    {pv,  125, 126, 127, 128, 129,  pv},
+                    {pv,  130, 131, 132, 133, 134,  pv},
+                    {pv,  135, 136, 137, 138, 139,  pv},
+                    {pv,  140, 141, 142, 143, 144,  pv},
+                    {pv,  145, 146, 147, 148, 149,  pv},
+                    { pv,  pv,   pv,   pv,   pv,   pv,  pv}}
+                }
+            }
+        });
+
+        myPad->getOperator()->associateInput(0,myInput);
+        myPad->getOperator()->computeOutputDims();
+        myPad->forward();
+        // myPad->getOperator()->getOutput(0)->print();
+        REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput);
+    }
+
+    SECTION("Asymmetric Pad") {
+        const int pv = 0; // pad value
+
+        std::shared_ptr<Node> myPad = Pad<2>({1, 0, 0, 1}, "mypad", PadBorderType::Constant, static_cast<double>(pv));
+        myPad->getOperator()->setDatatype(DataType::Int32);
+        myPad->getOperator()->setBackend("cpu");
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
+            {
+                {
+                    {{  0,   1,   2,   3,   4},
+                    {  5,   6,   7,   8,   9},
+                    { 10,  11,  12,  13,  14},
+                    { 15,  16,  17,  18,  19},
+                    { 20,  21,  22,  23,  24}},
+
+                    {{ 25,  26,  27,  28,  29},
+                    { 30,  31,  32,  33,  34},
+                    { 35,  36,  37,  38,  39},
+                    { 40,  41,  42,  43,  44},
+                    { 45,  46,  47,  48,  49}},
+
+                    {{ 50,  51,  52,  53,  54},
+                    { 55,  56,  57,  58,  59},
+                    { 60,  61,  62,  63,  64},
+                    { 65,  66,  67,  68,  69},
+                    { 70,  71,  72,  73,  74}}
+                },
+                {
+                    {{ 75,  76,  77,  78,  79},
+                    { 80,  81,  82,  83,  84},
+                    { 85,  86,  87,  88,  89},
+                    { 90,  91,  92,  93,  94},
+                    { 95,  96,  97,  98,  99}},
+
+                    {{100, 101, 102, 103, 104},
+                    {105, 106, 107, 108, 109},
+                    {110, 111, 112, 113, 114},
+                    {115, 116, 117, 118, 119},
+                    {120, 121, 122, 123, 124}},
+
+                    {{125, 126, 127, 128, 129},
+                    {130, 131, 132, 133, 134},
+                    {135, 136, 137, 138, 139},
+                    {140, 141, 142, 143, 144},
+                    {145, 146, 147, 148, 149}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,6,6> { //NCHW
+            {
+                {
+                    {{ pv,   pv,   pv,   pv,   pv,  pv},
+                    { 0,   1,   2,   3,   4,  pv},
+                    { 5,   6,   7,   8,   9,  pv},
+                    { 10,  11,  12,  13,  14,  pv},
+                    { 15,  16,  17,  18,  19,  pv},
+                    { 20,  21,  22,  23,  24,  pv}},
+
+                    {{ pv,   pv,   pv,   pv,   pv,  pv},
+                    { 25,  26,  27,  28,  29,  pv},
+                    { 30,  31,  32,  33,  34,  pv},
+                    { 35,  36,  37,  38,  39,  pv},
+                    { 40,  41,  42,  43,  44,  pv},
+                    { 45,  46,  47,  48,  49,  pv}},
+
+                    {{ pv,   pv,   pv,   pv,   pv,  pv},
+                    { 50,  51,  52,  53,  54,  pv},
+                    { 55,  56,  57,  58,  59,  pv},
+                    { 60,  61,  62,  63,  64,  pv},
+                    { 65,  66,  67,  68,  69,  pv},
+                    { 70,  71,  72,  73,  74,  pv}}
+                },
+                {
+                    {{ pv,   pv,   pv,   pv,   pv,  pv},
+                    { 75,  76,  77,  78,  79,  pv},
+                    { 80,  81,  82,  83,  84,  pv},
+                    { 85,  86,  87,  88,  89,  pv},
+                    { 90,  91,  92,  93,  94,  pv},
+                    { 95,  96,  97,  98,  99,  pv}},
+
+                    {{ pv,   pv,   pv,   pv,   pv,  pv},
+                    { 100, 101, 102, 103, 104,  pv},
+                    { 105, 106, 107, 108, 109,  pv},
+                    { 110, 111, 112, 113, 114,  pv},
+                    { 115, 116, 117, 118, 119,  pv},
+                    { 120, 121, 122, 123, 124,  pv}},
+
+                    {{ pv,   pv,   pv,   pv,   pv,  pv},
+                    { 125, 126, 127, 128, 129,  pv},
+                    { 130, 131, 132, 133, 134,  pv},
+                    { 135, 136, 137, 138, 139,  pv},
+                    { 140, 141, 142, 143, 144,  pv},
+                    { 145, 146, 147, 148, 149,  pv}}
+                }
+            }
+        });
+
+        myPad->getOperator()->associateInput(0,myInput);
+        myPad->getOperator()->computeOutputDims();
+        myPad->forward();
+        // myPad->getOperator()->getOutput(0)->print();
+        REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput);
+    }
+
+    SECTION("Pad Edge") {
+        std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Edge);
+        myPad->getOperator()->setDatatype(DataType::Int32);
+        myPad->getOperator()->setBackend("cpu");
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
+            {
+                {
+                    {{  0,   1,   2,   3,   4},
+                    {  5,   6,   7,   8,   9},
+                    { 10,  11,  12,  13,  14},
+                    { 15,  16,  17,  18,  19},
+                    { 20,  21,  22,  23,  24}},
+
+                    {{ 25,  26,  27,  28,  29},
+                    { 30,  31,  32,  33,  34},
+                    { 35,  36,  37,  38,  39},
+                    { 40,  41,  42,  43,  44},
+                    { 45,  46,  47,  48,  49}},
+
+                    {{ 50,  51,  52,  53,  54},
+                    { 55,  56,  57,  58,  59},
+                    { 60,  61,  62,  63,  64},
+                    { 65,  66,  67,  68,  69},
+                    { 70,  71,  72,  73,  74}}
+                },
+                {
+                    {{ 75,  76,  77,  78,  79},
+                    { 80,  81,  82,  83,  84},
+                    { 85,  86,  87,  88,  89},
+                    { 90,  91,  92,  93,  94},
+                    { 95,  96,  97,  98,  99}},
+
+                    {{100, 101, 102, 103, 104},
+                    {105, 106, 107, 108, 109},
+                    {110, 111, 112, 113, 114},
+                    {115, 116, 117, 118, 119},
+                    {120, 121, 122, 123, 124}},
+
+                    {{125, 126, 127, 128, 129},
+                    {130, 131, 132, 133, 134},
+                    {135, 136, 137, 138, 139},
+                    {140, 141, 142, 143, 144},
+                    {145, 146, 147, 148, 149}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
+            {
+                {
+                    {{ 0,  0,   1,   2,   3,   4,  4},
+                    { 0,   0,   1,   2,   3,   4,  4},
+                    { 5,   5,   6,   7,   8,   9,  9},
+                    { 10,  10,  11,  12,  13,  14,  14},
+                    { 15,  15,  16,  17,  18,  19,  19},
+                    { 20,  20,  21,  22,  23,  24,  24},
+                    { 20,  20,  21,  22,  23,  24,  24}},
+
+                    {{ 25,  25,  26,  27,  28,  29,  29},
+                    { 25,  25,  26,  27,  28,  29,  29},
+                    { 30,  30,  31,  32,  33,  34,  34},
+                    { 35,  35,  36,  37,  38,  39,  39},
+                    { 40,  40,  41,  42,  43,  44,  44},
+                    { 45,  45,  46,  47,  48,  49,  49},
+                    { 45,  45,  46,  47,  48,  49, 49}},
+
+                    {{ 50,  50,  51,  52,  53,  54,  54},
+                    { 50,  50,  51,  52,  53,  54,  54},
+                    { 55,  55,  56,  57,  58,  59,  59},
+                    { 60,  60,  61,  62,  63,  64,  64},
+                    { 65,  65,  66,  67,  68,  69,  69},
+                    { 70,  70,  71,  72,  73,  74,  74},
+                    { 70,  70,  71,  72,  73,  74,  74}}
+                },
+                {
+                    {{ 75,  75,  76,  77,  78,  79,  79},
+                    { 75,  75,  76,  77,  78,  79,  79},
+                    { 80,  80,  81,  82,  83,  84,  84},
+                    { 85,  85,  86,  87,  88,  89,  89},
+                    { 90,  90,  91,  92,  93,  94,  94},
+                    { 95,  95,  96,  97,  98,  99,  99},
+                    { 95,  95,  96,  97,  98,  99,  99}},
+
+                    {{100,  100, 101, 102, 103, 104,  104},
+                    {100,  100, 101, 102, 103, 104,  104},
+                    {105,  105, 106, 107, 108, 109, 109},
+                    {110,  110, 111, 112, 113, 114,  114},
+                    {115,  115, 116, 117, 118, 119,  119},
+                    {120,  120, 121, 122, 123, 124,  124},
+                    {120,  120, 121, 122, 123, 124,  124}},
+
+                    {{125,  125, 126, 127, 128, 129,  129},
+                    {125,  125, 126, 127, 128, 129,  129},
+                    {130,  130, 131, 132, 133, 134,  134},
+                    {135,  135, 136, 137, 138, 139,  139},
+                    {140,  140, 141, 142, 143, 144,  144},
+                    {145,  145, 146, 147, 148, 149,  149},
+                    {145,  145, 146, 147, 148, 149,  149}}
+                }
+            }
+        });
+
+        myPad->getOperator()->associateInput(0,myInput);
+        myPad->getOperator()->computeOutputDims();
+        myPad->forward();
+        // myPad->getOperator()->getOutput(0)->print();
+        REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput);
+    }
+
+    SECTION("Pad Reflect") {
+        std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Reflect);
+        myPad->getOperator()->setDatatype(DataType::Int32);
+        myPad->getOperator()->setBackend("cpu");
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
+            {
+                {
+                    {{  0,   1,   2,   3,   4},
+                    {  5,   6,   7,   8,   9},
+                    { 10,  11,  12,  13,  14},
+                    { 15,  16,  17,  18,  19},
+                    { 20,  21,  22,  23,  24}},
+
+                    {{ 25,  26,  27,  28,  29},
+                    { 30,  31,  32,  33,  34},
+                    { 35,  36,  37,  38,  39},
+                    { 40,  41,  42,  43,  44},
+                    { 45,  46,  47,  48,  49}},
+
+                    {{ 50,  51,  52,  53,  54},
+                    { 55,  56,  57,  58,  59},
+                    { 60,  61,  62,  63,  64},
+                    { 65,  66,  67,  68,  69},
+                    { 70,  71,  72,  73,  74}}
+                },
+                {
+                    {{ 75,  76,  77,  78,  79},
+                    { 80,  81,  82,  83,  84},
+                    { 85,  86,  87,  88,  89},
+                    { 90,  91,  92,  93,  94},
+                    { 95,  96,  97,  98,  99}},
+
+                    {{100, 101, 102, 103, 104},
+                    {105, 106, 107, 108, 109},
+                    {110, 111, 112, 113, 114},
+                    {115, 116, 117, 118, 119},
+                    {120, 121, 122, 123, 124}},
+
+                    {{125, 126, 127, 128, 129},
+                    {130, 131, 132, 133, 134},
+                    {135, 136, 137, 138, 139},
+                    {140, 141, 142, 143, 144},
+                    {145, 146, 147, 148, 149}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
+            {
+                {
+                    {
+                    { 6, 5, 6, 7, 8, 9, 5},
+                    { 1, 0, 1, 2, 3, 4, 0},
+                    { 6, 5, 6, 7, 8, 9, 5},
+                    { 11, 10, 11, 12, 13, 14, 10},
+                    { 16, 15, 16, 17, 18, 19, 15},
+                    { 21, 20, 21, 22, 23, 24, 20},
+                    { 1, 0, 1, 2, 3, 4, 0}
+                    },
+                    {
+                    { 31, 30, 31, 32, 33, 34, 30},
+                    { 26, 25, 26, 27, 28, 29, 25},
+                    { 31, 30, 31, 32, 33, 34, 30},
+                    { 36, 35, 36, 37, 38, 39, 35},
+                    { 41, 40, 41, 42, 43, 44, 40},
+                    { 46, 45, 46, 47, 48, 49, 45},
+                    { 26, 25, 26, 27, 28, 29, 25}
+                    },
+                    {
+                    { 56, 55, 56, 57, 58, 59, 55},
+                    { 51, 50, 51, 52, 53, 54, 50},
+                    { 56, 55, 56, 57, 58, 59, 55},
+                    { 61, 60, 61, 62, 63, 64, 60},
+                    { 66, 65, 66, 67, 68, 69, 65},
+                    { 71, 70, 71, 72, 73, 74, 70},
+                    { 51, 50, 51, 52, 53, 54, 50}
+                    }
+                },
+                {
+                    {
+                    { 81, 80, 81, 82, 83, 84, 80},
+                    { 76, 75, 76, 77, 78, 79, 75},
+                    { 81, 80, 81, 82, 83, 84, 80},
+                    { 86, 85, 86, 87, 88, 89, 85},
+                    { 91, 90, 91, 92, 93, 94, 90},
+                    { 96, 95, 96, 97, 98, 99, 95},
+                    { 76, 75, 76, 77, 78, 79, 75}
+                    },
+                    {
+                    { 106, 105, 106, 107, 108, 109, 105},
+                    { 101, 100, 101, 102, 103, 104, 100},
+                    { 106, 105, 106, 107, 108, 109, 105},
+                    { 111, 110, 111, 112, 113, 114, 110},
+                    { 116, 115, 116, 117, 118, 119, 115},
+                    { 121, 120, 121, 122, 123, 124, 120},
+                    { 101, 100, 101, 102, 103, 104, 100}
+                    },
+                    {
+                    { 131, 130, 131, 132, 133, 134, 130},
+                    { 126, 125, 126, 127, 128, 129, 125},
+                    { 131, 130, 131, 132, 133, 134, 130},
+                    { 136, 135, 136, 137, 138, 139, 135},
+                    { 141, 140, 141, 142, 143, 144, 140},
+                    { 146, 145, 146, 147, 148, 149, 145},
+                    { 126, 125, 126, 127, 128, 129, 125}
+                    }
+                    }
+                }
+        });
+
+        myPad->getOperator()->associateInput(0,myInput);
+        myPad->getOperator()->computeOutputDims();
+        myPad->forward();
+         myPad->getOperator()->getOutput(0)->print();
+        REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput);
+    }
+
+    SECTION("Pad Wrap") {
+        std::shared_ptr<Node> myPad = Pad<2>({1, 1, 1, 1}, "mypad", PadBorderType::Wrap);
+        myPad->getOperator()->setDatatype(DataType::Int32);
+        myPad->getOperator()->setBackend("cpu");
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
+            {
+                {
+                    {{  0,   1,   2,   3,   4},
+                    {  5,   6,   7,   8,   9},
+                    { 10,  11,  12,  13,  14},
+                    { 15,  16,  17,  18,  19},
+                    { 20,  21,  22,  23,  24}},
+
+                    {{ 25,  26,  27,  28,  29},
+                    { 30,  31,  32,  33,  34},
+                    { 35,  36,  37,  38,  39},
+                    { 40,  41,  42,  43,  44},
+                    { 45,  46,  47,  48,  49}},
+
+                    {{ 50,  51,  52,  53,  54},
+                    { 55,  56,  57,  58,  59},
+                    { 60,  61,  62,  63,  64},
+                    { 65,  66,  67,  68,  69},
+                    { 70,  71,  72,  73,  74}}
+                },
+                {
+                    {{ 75,  76,  77,  78,  79},
+                    { 80,  81,  82,  83,  84},
+                    { 85,  86,  87,  88,  89},
+                    { 90,  91,  92,  93,  94},
+                    { 95,  96,  97,  98,  99}},
+
+                    {{100, 101, 102, 103, 104},
+                    {105, 106, 107, 108, 109},
+                    {110, 111, 112, 113, 114},
+                    {115, 116, 117, 118, 119},
+                    {120, 121, 122, 123, 124}},
+
+                    {{125, 126, 127, 128, 129},
+                    {130, 131, 132, 133, 134},
+                    {135, 136, 137, 138, 139},
+                    {140, 141, 142, 143, 144},
+                    {145, 146, 147, 148, 149}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,3,7,7> { //NCHW
+            {
+                {
+                    {{ 24,  20,  21,  22,  23,  24,  20},
+                    { 4,   0,   1,   2,   3,   4,  0},
+                    { 9,   5,   6,   7,   8,   9,  5},
+                    { 14,  10,  11,  12,  13,  14,  10},
+                    { 19,  15,  16,  17,  18,  19,  15},
+                    { 24,  20,  21,  22,  23,  24,  20},
+                    { 4,   0,   1,   2,   3,   4,  0}},
+
+                    {{ 49,  45,  46,  47,  48,  49, 45},
+                    { 29,  25,  26,  27,  28,  29,  25},
+                    { 34,  30,  31,  32,  33,  34,  30},
+                    { 39,  35,  36,  37,  38,  39,  35},
+                    { 44,  40,  41,  42,  43,  44,  40},
+                    { 49,  45,  46,  47,  48,  49,  45},
+                    { 29,  25,  26,  27,  28,  29,  25}},
+
+                    {{ 74,  70,  71,  72,  73,  74,  70},
+                    { 54,  50,  51,  52,  53,  54,  50},
+                    { 59,  55,  56,  57,  58,  59,  55},
+                    { 64,  60,  61,  62,  63,  64,  60},
+                    { 69,  65,  66,  67,  68,  69,  65},
+                    { 74,  70,  71,  72,  73,  74,  70},
+                    { 54,  50,  51,  52,  53,  54,  50}}
+                },
+                {
+                    {{ 99,  95,  96,  97,  98,  99,  95},
+                    { 79,  75,  76,  77,  78,  79,  75},
+                    { 84,  80,  81,  82,  83,  84,  80},
+                    { 89,  85,  86,  87,  88,  89,  85},
+                    { 94,  90,  91,  92,  93,  94,  90},
+                    { 99,  95,  96,  97,  98,  99,  95},
+                    { 79,  75,  76,  77,  78,  79,  75}},
+
+                    {{124,  120, 121, 122, 123, 124,  120},
+                    {104,  100, 101, 102, 103, 104,  100},
+                    {109,  105, 106, 107, 108, 109, 105},
+                    {114,  110, 111, 112, 113, 114,  110},
+                    {119,  115, 116, 117, 118, 119,  115},
+                    {124,  120, 121, 122, 123, 124,  120},
+                    {104,  100, 101, 102, 103, 104,  100}},
+
+                    {{149,  145, 146, 147, 148, 149,  145},
+                    {129,  125, 126, 127, 128, 129,  125},
+                    {134,  130, 131, 132, 133, 134,  130},
+                    {139,  135, 136, 137, 138, 139,  135},
+                    {144,  140, 141, 142, 143, 144,  140},
+                    {149,  145, 146, 147, 148, 149,  145},
+                    {129,  125, 126, 127, 128, 129,  125}}
+                }
+            }
+        });
+
+        myPad->getOperator()->associateInput(0,myInput);
+        myPad->getOperator()->computeOutputDims();
+        myPad->forward();
+        // myPad->getOperator()->getOutput(0)->print();
+        REQUIRE(*(myPad->getOperator()->getOutput(0)) == *myOutput);
+    }
+}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_PaddedConv.cpp b/unit_tests/operator/Test_PaddedConv.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e41be85ab00faae1af7239c43b74a34f558a663c
--- /dev/null
+++ b/unit_tests/operator/Test_PaddedConv.cpp
@@ -0,0 +1,319 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <cstdlib>
+#include <memory>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/MetaOperator.hpp"
+#include "aidge/operator/MetaOperatorDefs.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] PaddedConv(forward)") {
+    SECTION("Classic Conv") {
+        std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv");
+        myConv->getOperator()->setDatatype(DataType::Int32);
+        myConv->getOperator()->setBackend("cpu");
+        std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
+            {
+                {
+                    {{  0,   1,   2},
+                    {  3,   4,   5},
+                    {  6,   7,   8}},
+                    {{  9,  10,  11},
+                    { 12,  13,  14},
+                    { 15,  16,  17}},
+                    {{ 18,  19,  20},
+                    { 21,  22,  23},
+                    { 24,  25,  26}}
+                },
+                {
+                    {{ 27,  28,  29},
+                    { 30,  31,  32},
+                    { 33,  34,  35}},
+                    {{ 36,  37,  38},
+                    { 39,  40,  41},
+                    { 42,  43,  44}},
+                    {{ 45,  46,  47},
+                    { 48,  49,  50},
+                    { 51,  52,  53}}
+                },
+                {
+                    {{ 54,  55,  56},
+                    { 57,  58,  59},
+                    { 60,  61,  62}},
+                    {{ 63,  64,  65},
+                    { 66,  67,  68},
+                    { 69,  70,  71}},
+                    {{ 72,  73,  74},
+                    { 75,  76,  77},
+                    { 78,  79,  80}}
+                },
+                {
+                    {{ 81,  82,  83},
+                    { 84,  85,  86},
+                    { 87,  88,  89}},
+                    {{ 90,  91,  92},
+                    { 93,  94,  95},
+                    { 96,  97,  98}},
+                    {{ 99, 100, 101},
+                    {102, 103, 104},
+                    {105, 106, 107}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
+            {
+                {
+                    {{  0,   1,   2,   3,   4},
+                    {  5,   6,   7,   8,   9},
+                    { 10,  11,  12,  13,  14},
+                    { 15,  16,  17,  18,  19},
+                    { 20,  21,  22,  23,  24}},
+
+                    {{ 25,  26,  27,  28,  29},
+                    { 30,  31,  32,  33,  34},
+                    { 35,  36,  37,  38,  39},
+                    { 40,  41,  42,  43,  44},
+                    { 45,  46,  47,  48,  49}},
+
+                    {{ 50,  51,  52,  53,  54},
+                    { 55,  56,  57,  58,  59},
+                    { 60,  61,  62,  63,  64},
+                    { 65,  66,  67,  68,  69},
+                    { 70,  71,  72,  73,  74}}
+                },
+                {
+                    {{ 75,  76,  77,  78,  79},
+                    { 80,  81,  82,  83,  84},
+                    { 85,  86,  87,  88,  89},
+                    { 90,  91,  92,  93,  94},
+                    { 95,  96,  97,  98,  99}},
+
+                    {{100, 101, 102, 103, 104},
+                    {105, 106, 107, 108, 109},
+                    {110, 111, 112, 113, 114},
+                    {115, 116, 117, 118, 119},
+                    {120, 121, 122, 123, 124}},
+
+                    {{125, 126, 127, 128, 129},
+                    {130, 131, 132, 133, 134},
+                    {135, 136, 137, 138, 139},
+                    {140, 141, 142, 143, 144},
+                    {145, 146, 147, 148, 149}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { 
+            {
+                {
+                    {{ 15226,  15577,  15928},
+                    { 16981,  17332,  17683},
+                    { 18736,  19087,  19438}},
+                    {{ 37818,  38898,  39978},
+                    { 43218,  44298,  45378},
+                    { 48618,  49698,  50778}},
+                    {{ 60426,  62235,  64044},
+                    { 69471,  71280,  73089},
+                    { 78516,  80325,  82134}},
+                    {{ 83016,  85554,  88092},
+                    { 95706,  98244, 100782},
+                    {108396, 110934, 113472}}
+                },
+                {
+                    {{ 41551,  41902,  42253},
+                    { 43306,  43657,  44008},
+                    { 45061,  45412,  45763}},
+                    {{118818, 119898, 120978},
+                    {124218, 125298, 126378},
+                    {129618, 130698, 131778}},
+                    {{196101, 197910, 199719},
+                    {205146, 206955, 208764},
+                    {214191, 216000, 217809}},
+                    {{273366, 275904, 278442},
+                    {286056, 288594, 291132},
+                    {298746, 301284, 303822}}
+                }
+            }
+        });
+
+        myConv->getOperator()->associateInput(0,myInput);
+        myConv->getOperator()->associateInput(1,myWeights);
+        myConv->getOperator()->associateInput(2,myBias);
+        myConv->getOperator()->computeOutputDims();
+        myConv->forward();
+
+        REQUIRE(*(myConv->getOperator()->getOutput(0)) == *myOutput);
+    }
+    SECTION("test Padding") {
+        std::shared_ptr<Node> myConv = PaddedConv(3,4,{3,3}, "myconv", {1,1}, {1,1,1,1});
+        myConv->getOperator()->setDatatype(DataType::Int32);
+        myConv->getOperator()->setBackend("cpu");
+        std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
+            {
+                {
+                    {{  0,   1,   2},
+                    {  3,   4,   5},
+                    {  6,   7,   8}},
+                    {{  9,  10,  11},
+                    { 12,  13,  14},
+                    { 15,  16,  17}},
+                    {{ 18,  19,  20},
+                    { 21,  22,  23},
+                    { 24,  25,  26}}
+                },
+                {
+                    {{ 27,  28,  29},
+                    { 30,  31,  32},
+                    { 33,  34,  35}},
+                    {{ 36,  37,  38},
+                    { 39,  40,  41},
+                    { 42,  43,  44}},
+                    {{ 45,  46,  47},
+                    { 48,  49,  50},
+                    { 51,  52,  53}}
+                },
+                {
+                    {{ 54,  55,  56},
+                    { 57,  58,  59},
+                    { 60,  61,  62}},
+                    {{ 63,  64,  65},
+                    { 66,  67,  68},
+                    { 69,  70,  71}},
+                    {{ 72,  73,  74},
+                    { 75,  76,  77},
+                    { 78,  79,  80}}
+                },
+                {
+                    {{ 81,  82,  83},
+                    { 84,  85,  86},
+                    { 87,  88,  89}},
+                    {{ 90,  91,  92},
+                    { 93,  94,  95},
+                    { 96,  97,  98}},
+                    {{ 99, 100, 101},
+                    {102, 103, 104},
+                    {105, 106, 107}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
+        std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
+            {
+                {
+                    {{  0,   1,   2,   3,   4},
+                    {  5,   6,   7,   8,   9},
+                    { 10,  11,  12,  13,  14},
+                    { 15,  16,  17,  18,  19},
+                    { 20,  21,  22,  23,  24}},
+
+                    {{ 25,  26,  27,  28,  29},
+                    { 30,  31,  32,  33,  34},
+                    { 35,  36,  37,  38,  39},
+                    { 40,  41,  42,  43,  44},
+                    { 45,  46,  47,  48,  49}},
+
+                    {{ 50,  51,  52,  53,  54},
+                    { 55,  56,  57,  58,  59},
+                    { 60,  61,  62,  63,  64},
+                    { 65,  66,  67,  68,  69},
+                    { 70,  71,  72,  73,  74}}
+                },
+                {
+                    {{ 75,  76,  77,  78,  79},
+                    { 80,  81,  82,  83,  84},
+                    { 85,  86,  87,  88,  89},
+                    { 90,  91,  92,  93,  94},
+                    { 95,  96,  97,  98,  99}},
+
+                    {{100, 101, 102, 103, 104},
+                    {105, 106, 107, 108, 109},
+                    {110, 111, 112, 113, 114},
+                    {115, 116, 117, 118, 119},
+                    {120, 121, 122, 123, 124}},
+
+                    {{125, 126, 127, 128, 129},
+                    {130, 131, 132, 133, 134},
+                    {135, 136, 137, 138, 139},
+                    {140, 141, 142, 143, 144},
+                    {145, 146, 147, 148, 149}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,5,5> { 
+            {
+                {
+                    {{  6895,  10225,  10486,  10747,   7063},
+                     { 10303,  15226,  15577,  15928,  10429},
+                     { 11518,  16981,  17332,  17683,  11554},
+                     { 12733,  18736,  19087,  19438,  12679},
+                     {  8047,  11791,  11998,  12205,   7927}},
+
+                    {{ 15960,  24069,  24816,  25563,  17100},
+                     { 25119,  37818,  38898,  39978,  26703},
+                     { 28764,  43218,  44298,  45378,  30258},
+                     { 32409,  48618,  49698,  50778,  33813},
+                     { 21972,  32925,  33618,  34311,  22824}},
+
+                    {{ 25041,  37929,  39162,  40395,  27153},
+                     { 39951,  60426,  62235,  64044,  42993},
+                     { 46026,  69471,  71280,  73089,  48978},
+                     { 52101,  78516,  80325,  82134,  54963},
+                     { 35913,  54075,  55254,  56433,  37737}},
+
+                    {{ 34104,  51771,  53490,  55209,  37188},
+                     { 54765,  83016,  85554,  88092,  59265},
+                     { 63270,  95706,  98244, 100782,  67680},
+                     { 71775, 108396, 110934, 113472,  76095},
+                     { 49836,  75207,  76872,  78537,  52632}}
+                },
+                {
+                    {{ 20395,  29800,  30061,  30322,  19663},
+                     { 28528,  41551,  41902,  42253,  27304},
+                     { 29743,  43306,  43657,  44008,  28429},
+                     { 30958,  45061,  45412,  45763,  29554},
+                     { 18847,  27316,  27523,  27730,  17827}},
+
+                    {{ 53760,  80094,  80841,  81588,  54000},
+                     { 79794, 118818, 119898, 120978,  80028},
+                     { 83439, 124218, 125298, 126378,  83583},
+                     { 87084, 129618, 130698, 131778,  87138},
+                     { 57072,  84900,  85593,  86286,  57024}},
+
+                    {{ 87141, 130404, 131637, 132870,  88353},
+                     {131076, 196101, 197910, 199719, 132768},
+                     {137151, 205146, 206955, 208764, 138753},
+                     {143226, 214191, 216000, 217809, 144738},
+                     { 95313, 142500, 143679, 144858,  96237}},
+
+                    {{120504, 180696, 182415, 184134, 122688},
+                     {182340, 273366, 275904, 278442, 185490},
+                     {190845, 286056, 288594, 291132, 193905},
+                     {199350, 298746, 301284, 303822, 202320},
+                     {133536, 200082, 201747, 203412, 135432}}
+                }
+            }
+        });
+
+        myConv->getOperator()->associateInput(0,myInput);
+        myConv->getOperator()->associateInput(1,myWeights);
+        myConv->getOperator()->associateInput(2,myBias);
+        myConv->getOperator()->computeOutputDims();
+        myConv->forward();
+
+        REQUIRE(*(myConv->getOperator()->getOutput(0)) == *myOutput);
+    }
+}
diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7293198f411510904ee73aced47b69dfc37374af
--- /dev/null
+++ b/unit_tests/operator/Test_PowImpl.cpp
@@ -0,0 +1,203 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Pow.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+#include <memory>
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] Pow(forward)") {
+    SECTION("2D Tensor by Singleton") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.42139274, 0.51524192},
+                {0.85247433, 0.13432795}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,1,1>{{2.0}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.17757183, 0.26547423},
+                {0.72671247, 0.01804400}
+            }
+        });
+
+        std::shared_ptr<Node> myPow = Pow();
+        myPow->getOperator()->setDatatype(DataType::Float32);
+        myPow->getOperator()->setBackend("cpu");
+        myPow->getOperator()->associateInput(0, input_1);
+        myPow->getOperator()->associateInput(1, input_2);
+        myPow->getOperator()->computeOutputDims();
+        myPow->forward();
+
+        float* resPtr = static_cast<float*>(myPow->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 4; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("3D Tensor by 1D Tensor") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> {
+            {
+                {{0.87519985, 0.10536593, 0.20268351},
+                 {0.75532353, 0.95977652, 0.03897029}},
+
+                {{0.67554104, 0.35499334, 0.27741563},
+                 {0.94270861, 0.48397779, 0.35532343}}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array1D<float,3>{
+            {0.39333701, 0.08719915, 0.16713941}
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> {
+            {
+                {{0.94891787, 0.82182676, 0.76584703},
+                 {0.89549923, 0.99642646, 0.58137459}},
+
+                {{0.85702944, 0.91364944, 0.80709606},
+                 {0.97706109, 0.93867886, 0.84118503}}
+            }
+        });
+
+        std::shared_ptr<Node> myPow = Pow();
+        myPow->getOperator()->setDatatype(DataType::Float32);
+        myPow->getOperator()->setBackend("cpu");
+        myPow->getOperator()->associateInput(0, input_1);
+        myPow->getOperator()->associateInput(1, input_2);
+        myPow->getOperator()->computeOutputDims();
+        myPow->forward();
+
+        float* resPtr = static_cast<float*>(myPow->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 12; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("2D Tensors") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.79780143, 0.49322051},
+                {0.84239346, 0.83737719}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,2,2>{
+            {
+                {0.59088874, 0.78858775},
+                {0.42879432, 0.17615074}
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.87504572, 0.57271165},
+                {0.92909741, 0.96922028}
+            }
+        });
+
+        std::shared_ptr<Node> myPow = Pow();
+        myPow->getOperator()->setDatatype(DataType::Float32);
+        myPow->getOperator()->setBackend("cpu");
+        myPow->getOperator()->associateInput(0, input_1);
+        myPow->getOperator()->associateInput(1, input_2);
+        myPow->getOperator()->computeOutputDims();
+        myPow->forward();
+
+        float* resPtr = static_cast<float*>(myPow->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 4; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("4D Tensor") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
+            {
+                {
+                    {{0.80191749, 0.45388508, 0.86550850},
+                     {0.47226250, 0.55809456, 0.59451854},
+                     {0.45497441, 0.02653158, 0.44041735}},
+                    {{0.30726379, 0.73146582, 0.46462774},
+                     {0.30268502, 0.78075552, 0.65154958},
+                     {0.91332406, 0.62448132, 0.53238851}},
+                    {{0.13917381, 0.43061519, 0.30198061},
+                     {0.12880909, 0.08995515, 0.29609048},
+                     {0.46449280, 0.47559714, 0.24193990}}
+                },
+                {
+                    {{0.87349969, 0.51625526, 0.16921073},
+                     {0.95035923, 0.10066575, 0.56729180},
+                     {0.84686232, 0.05965143, 0.03635806}},
+                    {{0.61107808, 0.59954077, 0.45627308},
+                     {0.84114522, 0.77186388, 0.37427086},
+                     {0.13415480, 0.00617349, 0.84260136}},
+                    {{0.55090177, 0.57292056, 0.29158932},
+                     {0.67131883, 0.96988875, 0.69545972},
+                     {0.80979776, 0.18238151, 0.19527155}}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,1,1>{{2.0}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
+            {
+                {
+                    {{6.43071651e-01, 2.06011668e-01, 7.49104977e-01},
+                     {2.23031864e-01, 3.11469525e-01, 3.53452295e-01},
+                     {2.07001716e-01, 7.03924568e-04, 1.93967447e-01}},
+
+                    {{9.44110379e-02, 5.35042226e-01, 2.15878934e-01},
+                     {9.16182250e-02, 6.09579206e-01, 4.24516857e-01},
+                     {8.34160864e-01, 3.89976919e-01, 2.83437520e-01}},
+
+                    {{1.93693489e-02, 1.85429439e-01, 9.11922902e-02},
+                     {1.65917836e-02, 8.09192937e-03, 8.76695737e-02},
+                     {2.15753555e-01, 2.26192638e-01, 5.85349165e-02}}
+                },
+                {
+                    {{7.63001740e-01, 2.66519487e-01, 2.86322720e-02},
+                     {9.03182685e-01, 1.01335924e-02, 3.21819991e-01},
+                     {7.17175782e-01, 3.55829368e-03, 1.32190844e-03}},
+
+                    {{3.73416424e-01, 3.59449148e-01, 2.08185121e-01},
+                     {7.07525253e-01, 5.95773816e-01, 1.40078679e-01},
+                     {1.79975089e-02, 3.81119971e-05, 7.09977031e-01}},
+
+                    {{3.03492755e-01, 3.28237981e-01, 8.50243345e-02},
+                     {4.50668961e-01, 9.40684199e-01, 4.83664215e-01},
+                     {6.55772448e-01, 3.32630165e-02, 3.81309800e-02}}
+                }
+            }
+        });
+
+        std::shared_ptr<Node> myPow = Pow();
+        myPow->getOperator()->setDatatype(DataType::Float32);
+        myPow->getOperator()->setBackend("cpu");
+        myPow->getOperator()->associateInput(0, input_1);
+        myPow->getOperator()->associateInput(1, input_2);
+        myPow->getOperator()->computeOutputDims();
+        myPow->forward();
+
+        float* resPtr = static_cast<float*>(myPow->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 54; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+    }
+}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_SqrtImpl.cpp b/unit_tests/operator/Test_SqrtImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cf17499aba50359547218adc6b3938176e729ed3
--- /dev/null
+++ b/unit_tests/operator/Test_SqrtImpl.cpp
@@ -0,0 +1,121 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Sqrt.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+#include <memory>
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] Sqrt(forward)") {
+    SECTION("2D Tensor") {
+        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {16.00000000,  0.62226844},
+                { 0.00000000,  1.84539008}
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {4.00000000, 0.78883994},
+                {0.00000000, 1.35845140}
+            }
+        });
+
+        std::shared_ptr<Node> mySqrt = Sqrt();
+        mySqrt->getOperator()->setDatatype(DataType::Float32);
+        mySqrt->getOperator()->setBackend("cpu");
+        mySqrt->getOperator()->associateInput(0,input);
+        mySqrt->getOperator()->computeOutputDims();
+        mySqrt->forward();
+
+        float* resPtr = static_cast<float*>(mySqrt->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 4; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("4D Tensor") {
+        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
+            {
+                {
+                    {{0.06218481, 0.46850157, 0.60914326},
+                     {0.57470602, 0.09943211, 0.59992820},
+                     {0.99623793, 0.54931718, 0.89343822}},
+                    {{0.75176072, 0.38237786, 0.84824580},
+                     {0.10619396, 0.11959118, 0.93499404},
+                     {0.65563291, 0.02913034, 0.17093092}},
+                    {{0.36303985, 0.92073035, 0.79146117},
+                     {0.88962847, 0.94561219, 0.92033130},
+                     {0.52903181, 0.13397896, 0.76086712}}
+                },
+                {
+                    {{0.31242222, 0.80526417, 0.48411584},
+                     {0.84375203, 0.65408552, 0.55028963},
+                     {0.77546734, 0.06203610, 0.83163154}},
+                    {{0.46342927, 0.53631741, 0.39145601},
+                     {0.14204198, 0.84214240, 0.94185621},
+                     {0.05068624, 0.99889028, 0.38464361}},
+                    {{0.37591159, 0.51769549, 0.30288595},
+                     {0.96883464, 0.35154045, 0.55648762},
+                     {0.13022375, 0.73467660, 0.02705121}}
+                }
+            }
+        });
+
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<float,2,3,3,3> {
+            {
+                {
+                    {{0.24936883, 0.6844717,  0.7804763},
+                     {0.75809366, 0.31532857, 0.7745503},
+                     {0.9981172,  0.7411593,  0.9452186}},
+                    {{0.86704135, 0.6183671,  0.9210026},
+                     {0.32587415, 0.34581956, 0.9669509},
+                     {0.80971164, 0.17067613, 0.41343793}},
+                    {{0.60252786, 0.9595469,  0.88964105},
+                     {0.9432012,  0.97242594, 0.95933896},
+                     {0.7273457,  0.36603138, 0.87227696}}
+                },
+                {
+                    {{0.55894744, 0.89736515, 0.69578433},
+                     {0.91855973, 0.8087555,  0.7418151},
+                     {0.88060623, 0.24907047, 0.91193837}},
+                    {{0.6807564,  0.73233694, 0.6256645},
+                     {0.37688458, 0.9176832,  0.9704928},
+                     {0.22513604, 0.99944496, 0.62019646}},
+                    {{0.6131163,  0.7195106,  0.5503507},
+                     {0.984294,   0.59290844, 0.745981},
+                     {0.3608653,  0.8571328,  0.16447252}}
+                }
+            }
+        });
+
+        std::shared_ptr<Node> mySqrt = Sqrt();
+        mySqrt->getOperator()->setDatatype(DataType::Float32);
+        mySqrt->getOperator()->setBackend("cpu");
+        mySqrt->getOperator()->associateInput(0,input);
+        mySqrt->getOperator()->computeOutputDims();
+        mySqrt->forward();
+
+        float* resPtr = static_cast<float*>(mySqrt->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 54; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+    }
+}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_SubImpl.cpp b/unit_tests/operator/Test_SubImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d741602cf02958a88bb41bbd2927577027acb069
--- /dev/null
+++ b/unit_tests/operator/Test_SubImpl.cpp
@@ -0,0 +1,129 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Sub.hpp"
+
+#include "aidge/backend/cpu.hpp"
+
+#include <memory>
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] Sub(forward)") {
+    SECTION("2D Tensor by Singleton") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.34234560, 0.92812711},
+                {0.73706615, 0.69953883}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,1,1>{{2.5}});
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {-2.15765429, -1.57187295},
+                {-1.76293385, -1.80046117}
+            }
+        });
+
+        std::shared_ptr<Node> mySub = Sub();
+        mySub->getOperator()->setDatatype(DataType::Float32);
+        mySub->getOperator()->setBackend("cpu");
+        mySub->getOperator()->associateInput(0, input_1);
+        mySub->getOperator()->associateInput(1, input_2);
+        mySub->getOperator()->computeOutputDims();
+        mySub->forward();
+
+        float* resPtr = static_cast<float*>(mySub->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 4; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("2D Tensors") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {0.34234560, 0.92812711},
+                {0.73706615, 0.69953883}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array2D<float,2,2>{
+            {
+                {0.61729127, 0.83004373},
+                {0.72002089, 0.52473849}
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,2> {
+            {
+                {-0.27494568,  0.09808338},
+                {0.01704526,  0.17480034}
+            }
+        });
+
+        std::shared_ptr<Node> mySub = Sub();
+        mySub->getOperator()->setDatatype(DataType::Float32);
+        mySub->getOperator()->setBackend("cpu");
+        mySub->getOperator()->associateInput(0, input_1);
+        mySub->getOperator()->associateInput(1, input_2);
+        mySub->getOperator()->computeOutputDims();
+        mySub->forward();
+
+        float* resPtr = static_cast<float*>(mySub->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 4; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+
+    SECTION("3D Tensor by 1D Tensor") {
+        std::shared_ptr<Tensor> input_1 = std::make_shared<Tensor>(Array3D<float,2,2,3> {
+            {
+                {{0.84181279, 0.20655948, 0.09750116},
+                 {0.37723488, 0.73120135, 0.04666907}},
+
+                {{0.91483921, 0.93985939, 0.58823180},
+                 {0.39963132, 0.67879969, 0.33209187}}
+            }
+        });
+        std::shared_ptr<Tensor> input_2 =  std::make_shared<Tensor>(Array1D<float,3>{
+            {0.04784805, 0.91903114, 0.38606840}
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<float,2,2,3> {
+            {
+                {{0.79396474, -0.71247166, -0.28856725},
+                 {0.32938683, -0.18782979, -0.33939934}},
+
+                {{0.86699116,  0.02082825,  0.20216340},
+                 {0.35178328, -0.24023145, -0.05397654}}
+            }
+        });
+
+        std::shared_ptr<Node> mySub = Sub();
+        mySub->getOperator()->setDatatype(DataType::Float32);
+        mySub->getOperator()->setBackend("cpu");
+        mySub->getOperator()->associateInput(0, input_1);
+        mySub->getOperator()->associateInput(1, input_2);
+        mySub->getOperator()->computeOutputDims();
+        mySub->forward();
+
+        float* resPtr = static_cast<float*>(mySub->getOperator()->getOutput(0)->getImpl()->rawPtr());
+        float* expectedPtr = static_cast<float*>(expectedOutput->getImpl()->rawPtr());
+        for (std::size_t i = 0; i< 12; ++i) {
+            REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001);
+        }
+
+    }
+}
\ No newline at end of file
diff --git a/unit_tests/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp
similarity index 100%
rename from unit_tests/Test_Scheduler.cpp
rename to unit_tests/scheduler/Test_Scheduler.cpp