diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 651acd6fe1a58edc0b6f2c446e48e4bc4e4a8750..3efb308fa0f78dce35973ccb47d1303d7c8634af 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -10,9 +10,12 @@ stages:
   - build
   # Unit test stage
   - test
+  # Code coverage
+  - coverage
 
 include:
   - local: '/.gitlab/ci/_global.gitlab-ci.yml'
   - local: '/.gitlab/ci/static_analysis.gitlab-ci.yml'
   - local: '/.gitlab/ci/build.gitlab-ci.yml'
   - local: '/.gitlab/ci/test.gitlab-ci.yml'
+  - local: '/.gitlab/ci/coverage.gitlab-ci.yml'
diff --git a/.gitlab/ci/_global.gitlab-ci.yml b/.gitlab/ci/_global.gitlab-ci.yml
index 6f34fe701df035e68ce49825fde0ff88449a9637..1615b8974db11d93cb3305ce800e46cf5377bc33 100644
--- a/.gitlab/ci/_global.gitlab-ci.yml
+++ b/.gitlab/ci/_global.gitlab-ci.yml
@@ -9,5 +9,8 @@ variables:
   GIT_SSL_NO_VERIFY: 1
   DEBIAN_FRONTEND: noninteractive
 
-
-image: n2d2-ci/ubuntu20.04/cpu:latest
\ No newline at end of file
+default:
+  image: nvidia/cuda:12.2.0-devel-ubuntu22.04
+  before_script:
+    - apt update
+    - apt install -y cmake cppcheck python-is-python3 pip git gcovr unzip curl
diff --git a/.gitlab/ci/build.gitlab-ci.yml b/.gitlab/ci/build.gitlab-ci.yml
index 620bc325977dc6f5dd2372f6f48ed8cc688b3388..68fcb6b4bc0dac08c4f0029ec1f2d3404226c1c2 100644
--- a/.gitlab/ci/build.gitlab-ci.yml
+++ b/.gitlab/ci/build.gitlab-ci.yml
@@ -1,61 +1,86 @@
 build:ubuntu_cpp:
   stage: build
+  needs: []
   tags:
     - docker
-  image: n2d2-ci/ubuntu20.04/cpu:latest
-
   script:
-    - INSTALL_PATH="$CI_PROJECT_DIR/install_cpp"
-    - mkdir -p $INSTALL_PATH
-    - mkdir -p build_cpp
+    # Download dependencies
+    # aidge_core
+    - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
+    - unzip -o build_artifacts.zip -d .
+    - rm -rf build_cpp
 
-    # Clone and compile dependencies
-    - MODULE_NAME="aidge_core"
-    - BASE_URL=`echo $CI_REPOSITORY_URL | sed "s;\/*$CI_PROJECT_PATH.*;;"`
-    - REPO_URL="$BASE_URL/aidge/$MODULE_NAME.git"
-    - git clone $REPO_URL $MODULE_NAME
-    - mkdir -p $MODULE_NAME/build
-    - cd $MODULE_NAME/build
-    - cmake -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON ..
-    - make -j4 all install
-    - cd ../..
-    
     # Build current module
+    - export CMAKE_PREFIX_PATH=../install_cpp
+    - mkdir -p build_cpp
     - cd build_cpp
-    - cmake -DCMAKE_INSTALL_PREFIX:PATH=$INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON ..
+    - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug -DWERROR=ON -DCOVERAGE=ON ..
     - make -j4 all install
 
   artifacts:
+    expire_in: 1 week
     paths:
       - build_cpp/
       - install_cpp/
 
 build:ubuntu_python:
   stage: build
+  needs: []
   tags:
     - docker
-  image: n2d2-ci/ubuntu20.04/cpu:latest
-
   script:
-    - export AIDGE_INSTALL=`pwd`/install
+    # Download dependencies
+    # aidge_core (CPP)
+    - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_cpp"'
+    - unzip -o build_artifacts.zip -d .
+    - rm -rf build_cpp
+    # aidge_core (Python)
+    - 'curl --location --output build_artifacts.zip "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:ubuntu_python"'
+    - unzip -o build_artifacts.zip -d .
 
-    # Create virtaul env
     - python3 -m pip install virtualenv
     - virtualenv venv
     - source venv/bin/activate
+    - export AIDGE_INSTALL=`pwd`/install
+    - export CMAKE_PREFIX_PATH=../install_cpp
+    - python3 -m pip install .
+  artifacts:
+    expire_in: 1 week
+    paths:
+      - venv/
 
-    # Clone dependencies
-    - MODULE_NAME="aidge_core"
-    - BASE_URL=`echo $CI_REPOSITORY_URL | sed "s;\/*$CI_PROJECT_PATH.*;;"`
-    - REPO_URL="$BASE_URL/aidge/$MODULE_NAME.git"
-    - git clone $REPO_URL $MODULE_NAME
+build:windows_cpp:
+  stage: build
+  needs: []
+  tags:
+    - windows
+
+  image: buildtools
+  before_script:
+    # Install Chocolatey
+    - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
+    # Install dependencies
+    - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
+    - choco install git -Y
+    - choco install python -Y
+    # Update PATH
+    - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+  script:
+    # Download dependencies
+    # aidge_core
+    - 'curl "https://gitlab.eclipse.org/api/v4/projects/5139/jobs/artifacts/main/download?job=build:windows_cpp" -o build_artifacts.zip'
+    - Expand-Archive -Path .\build_artifacts.zip -DestinationPath . -Force
+    - Remove-Item .\build_cpp\ -Recurse
 
-    # Pip install dependancy
-    - cd $MODULE_NAME
-    - python3 -m pip install . -v
+    - $env:CMAKE_PREFIX_PATH = '../install_cpp'
+    - mkdir -p build_cpp
+    - cd build_cpp
+    - cmake -DCMAKE_INSTALL_PREFIX:PATH=../install_cpp -DCMAKE_BUILD_TYPE=Debug ..
+    - cmake --build . -j2
+    - cmake --install . --config Debug
 
-    - cd ..
-    - python3 -m pip install . -v
   artifacts:
+    expire_in: 1 week
     paths:
-      - venv/
\ No newline at end of file
+      - build_cpp/
+      - install_cpp/
diff --git a/.gitlab/ci/coverage.gitlab-ci.yml b/.gitlab/ci/coverage.gitlab-ci.yml
new file mode 100644
index 0000000000000000000000000000000000000000..33547fc3f52771c456fba3d34a6e8d96eebafd8a
--- /dev/null
+++ b/.gitlab/ci/coverage.gitlab-ci.yml
@@ -0,0 +1,41 @@
+coverage:ubuntu_cpp:
+  stage: coverage
+  needs: ["build:ubuntu_cpp"]
+  tags:
+    - docker
+  script:
+    - cd build_cpp
+    - ctest --output-on-failure
+    # HTML report for visualization
+    - gcovr --html-details --exclude-unreachable-branches -o coverage.html --root ${CI_PROJECT_DIR} --filter '\.\./include/' --filter '\.\./src/'
+    # Coberta XML report for Gitlab integration
+    - gcovr --xml-pretty --exclude-unreachable-branches --print-summary -o coverage.xml --root ${CI_PROJECT_DIR} --filter '\.\./include/' --filter '\.\./src/'
+  coverage: /^\s*lines:\s*\d+.\d+\%/
+  artifacts:
+    name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA}
+    expire_in: 2 days
+    reports:
+      coverage_report:
+        coverage_format: cobertura
+        path: build_cpp/coverage.xml
+
+coverage:ubuntu_python:
+  stage: coverage
+  needs: ["build:ubuntu_python"]
+  tags:
+    - docker
+  script:
+    - source venv/bin/activate
+    - python3 -m pip install numpy coverage
+    - cd ${CI_PROJECT_NAME}
+    # Retrieve the installation path of the module, since it is installed with pip.
+    - export MODULE_LOCATION=`python -c "import ${CI_PROJECT_NAME} as _; print(_.__path__[0])"`
+    - python3 -m coverage run --source=$MODULE_LOCATION -m unittest discover -s unit_tests/ -v -b
+    - python3 -m coverage report
+    - python3 -m coverage xml
+  coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/'
+  artifacts:
+    reports:
+      coverage_report:
+        coverage_format: cobertura
+        path: ${CI_PROJECT_NAME}/coverage.xml
diff --git a/.gitlab/ci/static_analysis.gitlab-ci.yml b/.gitlab/ci/static_analysis.gitlab-ci.yml
index 7490b5af51ca970c1b892408ac0023d8cd945cfa..0ea9b711885442e7f260ae86e313464b592127a0 100644
--- a/.gitlab/ci/static_analysis.gitlab-ci.yml
+++ b/.gitlab/ci/static_analysis.gitlab-ci.yml
@@ -26,8 +26,8 @@ static_analysis:python:
   script:
     - pip install pylint
     - pip install pylint-gitlab
-    - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabCodeClimateReporter aidge_backend_cpu/ > codeclimate.json
-    - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabPagesHtmlReporter aidge_backend_cpu/ > pylint.html
+    - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabCodeClimateReporter ${CI_PROJECT_NAME}/ > codeclimate.json
+    - pylint --rcfile=.pylintrc --exit-zero --output-format=pylint_gitlab.GitlabPagesHtmlReporter ${CI_PROJECT_NAME}/ > pylint.html
     - mkdir -p public/python/$CI_COMMIT_REF_NAME
     - mv pylint.html public/python/$CI_COMMIT_REF_NAME/
   artifacts:
diff --git a/.gitlab/ci/test.gitlab-ci.yml b/.gitlab/ci/test.gitlab-ci.yml
index 2ad635dff79715f2e3a487b0a41d6f1b132ff641..05f567dd7430b0d3a801612ca5353a39288285d2 100644
--- a/.gitlab/ci/test.gitlab-ci.yml
+++ b/.gitlab/ci/test.gitlab-ci.yml
@@ -3,22 +3,47 @@ test:ubuntu_cpp:
   needs: ["build:ubuntu_cpp"]
   tags:
     - docker
-  image: n2d2-ci/ubuntu20.04/cpu:latest
   script:
     - cd build_cpp
-    - ctest --output-on-failure
+    - ctest --output-junit ctest-results.xml --output-on-failure
+  artifacts:
+    reports:
+      junit: build_cpp/ctest-results.xml
 
 test:ubuntu_python:
   stage: test
   needs: ["build:ubuntu_python"]
   tags:
     - docker
-  image: n2d2-ci/ubuntu20.04/cpu:latest
   script:
     - source venv/bin/activate
-    - cd aidge_backend_cpu
-    - python3 -m pip install numpy
+    - cd ${CI_PROJECT_NAME}
+    - python3 -m pip install numpy unittest-xml-reporting
     - python3 -m pip list
     # Run on discovery all tests located in core/unit_tests/python and discard the stdout 
     # only to show the errors/warnings and the results of the tests
-    - python3 -m unittest discover -s unit_tests/ -v -b 1> /dev/null
+    - python3 -m xmlrunner discover -s unit_tests/ -v -b --output-file xmlrunner-results.xml
+  artifacts:
+    reports:
+      junit: ${CI_PROJECT_NAME}/xmlrunner-results.xml
+
+test:windows_cpp:
+  stage: test
+  needs: ["build:windows_cpp"]
+  tags:
+    - windows
+  image: buildtools
+  before_script:
+    # Install Chocolatey
+    - Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
+    # Install dependencies
+    - choco install cmake.install --installargs '"ADD_CMAKE_TO_PATH=System"' -Y
+    - choco install python -Y
+    # Update PATH
+    - $env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
+  script:
+    - cd build_cpp
+    - ctest --output-junit ctest-results.xml --output-on-failure
+  artifacts:
+    reports:
+      junit: build_cpp/ctest-results.xml
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d973a04ec5136347c3ddc7fc92989e65b0f34a42..51ee1f6d5df771fcccd1b05a45861eb2f1d3bbbe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,6 +21,17 @@ include(PybindModuleCreation)
 # Define options
 option(PYBIND "python binding" ON)
 option(WERROR "Warning as error" OFF)
+option(TEST "Enable tests" ON)
+option(COVERAGE "Enable coverage" OFF)
+
+##############################################
+# Import utils CMakeLists
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
+include(PybindModuleCreation)
+
+if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
+    Include(CodeCoverage)
+endif()
 
 ##############################################
 # Find system dependencies
@@ -49,9 +60,9 @@ target_include_directories(${module_name}
 )
 
 # PYTHON BINDING
-generate_python_binding(${project} ${module_name})
-
 if (PYBIND)
+    generate_python_binding(${project} ${module_name})
+
     # Handles Python + pybind11 headers dependencies
     target_link_libraries(${module_name}
         PUBLIC 
@@ -63,20 +74,15 @@ endif()
 
 target_compile_features(${module_name} PRIVATE cxx_std_14)
 
-if(WERROR)
-    target_compile_options(${module_name} PRIVATE
+target_compile_options(${module_name} PRIVATE
     $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-    -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow -Werror>)
-    target_compile_options(${module_name} PRIVATE
+    -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow $<$<BOOL:${WERROR}>:-Werror>>)
+target_compile_options(${module_name} PRIVATE
     $<$<CXX_COMPILER_ID:MSVC>:
     /W4>)
-else()
-    target_compile_options(${module_name} PRIVATE
-        $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-        -Wall -Wextra -Wold-style-cast -Winline -pedantic -Werror=narrowing -Wshadow -Wpedantic>)
-        target_compile_options(${module_name} PRIVATE
-        $<$<CXX_COMPILER_ID:MSVC>:
-        /W4>)
+
+if(CMAKE_COMPILER_IS_GNUCXX AND COVERAGE)
+    append_coverage_compiler_flags()
 endif()
 
 ##############################################
@@ -129,6 +135,7 @@ export(EXPORT ${project}-targets
 
 ##############################################
 ## Add test
-enable_testing()
-add_subdirectory(unit_tests)
-
+if(TEST)
+    enable_testing()
+    add_subdirectory(unit_tests)
+endif()
diff --git a/README.md b/README.md
index 0a0fe37f8672fde09055d3356951579bd1b56d6c..74eb50826bf6f88a0ded363138adba04827390d0 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+![Pipeline status](https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu/badges/master/pipeline.svg?ignore_skipped=true) ![C++ coverage](https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu/badges/master/coverage.svg?job=coverage:ubuntu_cpp&key_text=C%2B%2B+coverage&key_width=90) ![Python coverage](https://gitlab.eclipse.org/eclipse/aidge/aidge_backend_cpu/badges/master/coverage.svg?job=coverage:ubuntu_python&key_text=Python+coverage&key_width=100)
+
 # Aidge CPU library
 
 You can find in this folder the library that implements the CPU operators. <br>
diff --git a/aidge_backend_cpu/unit_tests/test_scheduler.py b/aidge_backend_cpu/unit_tests/test_scheduler.py
index bc76620390b7563f0088f4c600b612bbe827b170..d8cf3e164da4bd34273905b0b0e156cf057635a5 100644
--- a/aidge_backend_cpu/unit_tests/test_scheduler.py
+++ b/aidge_backend_cpu/unit_tests/test_scheduler.py
@@ -36,7 +36,54 @@ class test_scheduler(unittest.TestCase):
         for i in range(len(expected_out)):
             self.assertEqual(expected_out[i], out_tensor[i])
 
+    def test_sequential_scheduling(self):
+        input_data =  np.array([]).astype(np.float32)
+        input_tensor = aidge_core.Tensor(input_data)
 
+        input_node = aidge_core.Producer(input_tensor, "X")
+
+        graph_view = aidge_core.sequential([
+            aidge_core.FC(50, name='0'),
+            aidge_core.FC(50, name='1'),
+            aidge_core.FC(10, name='2'),
+        ])
+        EXPECTED_SCHEDULE = ['0', '1', '2']
+
+        input_node.add_child(graph_view)
+        input_node.get_operator().set_datatype(aidge_core.DataType.Float32)
+        input_node.get_operator().set_backend("cpu")
+        graph_view.set_datatype(aidge_core.DataType.Float32)
+        graph_view.set_backend("cpu")
+
+        scheduler = aidge_core.SequentialScheduler(graph_view)
+        scheduler.generate_scheduling()
+
+        self.assertListEqual([i.name() for i in scheduler.get_static_scheduling()], EXPECTED_SCHEDULE)
+
+
+    def test_parallel_scheduling(self):
+        input_data =  np.array([]).astype(np.float32)
+        input_tensor = aidge_core.Tensor(input_data)
+
+        input_node = aidge_core.Producer(input_tensor, "X")
+        graph_view = aidge_core.sequential([
+            aidge_core.FC(50, name='0'),
+            aidge_core.parallel([aidge_core.FC(50, name='1'), aidge_core.FC(50, name='3')]),
+            aidge_core.Add(name='2'),
+        ])
+
+        EXPECTED_SCHEDULE = [['0', '1', '3', '2'],  ['0', '3', '1', '2']] # Both scheduling are valid !
+
+        input_node.add_child(graph_view)
+        input_node.get_operator().set_datatype(aidge_core.DataType.Float32)
+        input_node.get_operator().set_backend("cpu")
+        graph_view.set_datatype(aidge_core.DataType.Float32)
+        graph_view.set_backend("cpu")
+
+        scheduler = aidge_core.SequentialScheduler(graph_view)
+        scheduler.generate_scheduling()
+
+        self.assertTrue([i.name() for i in scheduler.get_static_scheduling()] in EXPECTED_SCHEDULE)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/cmake/CodeCoverage.cmake b/cmake/CodeCoverage.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..d4a039fd0e511238df1c0e0502c7588409099289
--- /dev/null
+++ b/cmake/CodeCoverage.cmake
@@ -0,0 +1,742 @@
+# Copyright (c) 2012 - 2017, Lars Bilke
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its contributors
+#    may be used to endorse or promote products derived from this software without
+#    specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# CHANGES:
+#
+# 2012-01-31, Lars Bilke
+# - Enable Code Coverage
+#
+# 2013-09-17, Joakim Söderberg
+# - Added support for Clang.
+# - Some additional usage instructions.
+#
+# 2016-02-03, Lars Bilke
+# - Refactored functions to use named parameters
+#
+# 2017-06-02, Lars Bilke
+# - Merged with modified version from github.com/ufz/ogs
+#
+# 2019-05-06, Anatolii Kurotych
+# - Remove unnecessary --coverage flag
+#
+# 2019-12-13, FeRD (Frank Dana)
+# - Deprecate COVERAGE_LCOVR_EXCLUDES and COVERAGE_GCOVR_EXCLUDES lists in favor
+#   of tool-agnostic COVERAGE_EXCLUDES variable, or EXCLUDE setup arguments.
+# - CMake 3.4+: All excludes can be specified relative to BASE_DIRECTORY
+# - All setup functions: accept BASE_DIRECTORY, EXCLUDE list
+# - Set lcov basedir with -b argument
+# - Add automatic --demangle-cpp in lcovr, if 'c++filt' is available (can be
+#   overridden with NO_DEMANGLE option in setup_target_for_coverage_lcovr().)
+# - Delete output dir, .info file on 'make clean'
+# - Remove Python detection, since version mismatches will break gcovr
+# - Minor cleanup (lowercase function names, update examples...)
+#
+# 2019-12-19, FeRD (Frank Dana)
+# - Rename Lcov outputs, make filtered file canonical, fix cleanup for targets
+#
+# 2020-01-19, Bob Apthorpe
+# - Added gfortran support
+#
+# 2020-02-17, FeRD (Frank Dana)
+# - Make all add_custom_target()s VERBATIM to auto-escape wildcard characters
+#   in EXCLUDEs, and remove manual escaping from gcovr targets
+#
+# 2021-01-19, Robin Mueller
+# - Add CODE_COVERAGE_VERBOSE option which will allow to print out commands which are run
+# - Added the option for users to set the GCOVR_ADDITIONAL_ARGS variable to supply additional
+#   flags to the gcovr command
+#
+# 2020-05-04, Mihchael Davis
+#     - Add -fprofile-abs-path to make gcno files contain absolute paths
+#     - Fix BASE_DIRECTORY not working when defined
+#     - Change BYPRODUCT from folder to index.html to stop ninja from complaining about double defines
+#
+# 2021-05-10, Martin Stump
+#     - Check if the generator is multi-config before warning about non-Debug builds
+#
+# 2022-02-22, Marko Wehle
+#     - Change gcovr output from -o <filename> for --xml <filename> and --html <filename> output respectively.
+#       This will allow for Multiple Output Formats at the same time by making use of GCOVR_ADDITIONAL_ARGS, e.g. GCOVR_ADDITIONAL_ARGS "--txt".
+#
+# 2022-09-28, Sebastian Mueller
+#     - fix append_coverage_compiler_flags_to_target to correctly add flags
+#     - replace "-fprofile-arcs -ftest-coverage" with "--coverage" (equivalent)
+#
+# USAGE:
+#
+# 1. Copy this file into your cmake modules path.
+#
+# 2. Add the following line to your CMakeLists.txt (best inside an if-condition
+#    using a CMake option() to enable it just optionally):
+#      include(CodeCoverage)
+#
+# 3. Append necessary compiler flags for all supported source files:
+#      append_coverage_compiler_flags()
+#    Or for specific target:
+#      append_coverage_compiler_flags_to_target(YOUR_TARGET_NAME)
+#
+# 3.a (OPTIONAL) Set appropriate optimization flags, e.g. -O0, -O1 or -Og
+#
+# 4. If you need to exclude additional directories from the report, specify them
+#    using full paths in the COVERAGE_EXCLUDES variable before calling
+#    setup_target_for_coverage_*().
+#    Example:
+#      set(COVERAGE_EXCLUDES
+#          '${PROJECT_SOURCE_DIR}/src/dir1/*'
+#          '/path/to/my/src/dir2/*')
+#    Or, use the EXCLUDE argument to setup_target_for_coverage_*().
+#    Example:
+#      setup_target_for_coverage_lcov(
+#          NAME coverage
+#          EXECUTABLE testrunner
+#          EXCLUDE "${PROJECT_SOURCE_DIR}/src/dir1/*" "/path/to/my/src/dir2/*")
+#
+# 4.a NOTE: With CMake 3.4+, COVERAGE_EXCLUDES or EXCLUDE can also be set
+#     relative to the BASE_DIRECTORY (default: PROJECT_SOURCE_DIR)
+#     Example:
+#       set(COVERAGE_EXCLUDES "dir1/*")
+#       setup_target_for_coverage_gcovr_html(
+#           NAME coverage
+#           EXECUTABLE testrunner
+#           BASE_DIRECTORY "${PROJECT_SOURCE_DIR}/src"
+#           EXCLUDE "dir2/*")
+#
+# 5. Use the functions described below to create a custom make target which
+#    runs your test executable and produces a code coverage report.
+#
+# 6. Build a Debug build:
+#      cmake -DCMAKE_BUILD_TYPE=Debug ..
+#      make
+#      make my_coverage_target
+#
+
+include(CMakeParseArguments)
+
+option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE)
+
+# Check prereqs
+find_program( GCOV_PATH gcov )
+find_program( LCOV_PATH  NAMES lcov lcov.bat lcov.exe lcov.perl)
+find_program( FASTCOV_PATH NAMES fastcov fastcov.py )
+find_program( GENHTML_PATH NAMES genhtml genhtml.perl genhtml.bat )
+find_program( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/scripts/test)
+find_program( CPPFILT_PATH NAMES c++filt )
+
+if(NOT GCOV_PATH)
+    message(FATAL_ERROR "gcov not found! Aborting...")
+endif() # NOT GCOV_PATH
+
+# Check supported compiler (Clang, GNU and Flang)
+get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
+foreach(LANG ${LANGUAGES})
+  if("${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang")
+    if("${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS 3)
+      message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...")
+    endif()
+  elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
+         AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
+    message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
+  endif()
+endforeach()
+
+set(COVERAGE_COMPILER_FLAGS "-g --coverage"
+    CACHE INTERNAL "")
+if(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Clang)")
+    include(CheckCXXCompilerFlag)
+    check_cxx_compiler_flag(-fprofile-abs-path HAVE_fprofile_abs_path)
+    if(HAVE_fprofile_abs_path)
+        set(COVERAGE_COMPILER_FLAGS "${COVERAGE_COMPILER_FLAGS} -fprofile-abs-path")
+    endif()
+endif()
+
+set(CMAKE_Fortran_FLAGS_COVERAGE
+    ${COVERAGE_COMPILER_FLAGS}
+    CACHE STRING "Flags used by the Fortran compiler during coverage builds."
+    FORCE )
+set(CMAKE_CXX_FLAGS_COVERAGE
+    ${COVERAGE_COMPILER_FLAGS}
+    CACHE STRING "Flags used by the C++ compiler during coverage builds."
+    FORCE )
+set(CMAKE_C_FLAGS_COVERAGE
+    ${COVERAGE_COMPILER_FLAGS}
+    CACHE STRING "Flags used by the C compiler during coverage builds."
+    FORCE )
+set(CMAKE_EXE_LINKER_FLAGS_COVERAGE
+    ""
+    CACHE STRING "Flags used for linking binaries during coverage builds."
+    FORCE )
+set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE
+    ""
+    CACHE STRING "Flags used by the shared libraries linker during coverage builds."
+    FORCE )
+mark_as_advanced(
+    CMAKE_Fortran_FLAGS_COVERAGE
+    CMAKE_CXX_FLAGS_COVERAGE
+    CMAKE_C_FLAGS_COVERAGE
+    CMAKE_EXE_LINKER_FLAGS_COVERAGE
+    CMAKE_SHARED_LINKER_FLAGS_COVERAGE )
+
+get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
+if(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG))
+    message(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading")
+endif() # NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG)
+
+if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
+    link_libraries(gcov)
+endif()
+
+# Defines a target for running and collection code coverage information
+# Builds dependencies, runs the given executable and outputs reports.
+# NOTE! The executable should always have a ZERO as exit code otherwise
+# the coverage generation will not complete.
+#
+# setup_target_for_coverage_lcov(
+#     NAME testrunner_coverage                    # New target name
+#     EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
+#     DEPENDENCIES testrunner                     # Dependencies to build first
+#     BASE_DIRECTORY "../"                        # Base directory for report
+#                                                 #  (defaults to PROJECT_SOURCE_DIR)
+#     EXCLUDE "src/dir1/*" "src/dir2/*"           # Patterns to exclude (can be relative
+#                                                 #  to BASE_DIRECTORY, with CMake 3.4+)
+#     NO_DEMANGLE                                 # Don't demangle C++ symbols
+#                                                 #  even if c++filt is found
+# )
+function(setup_target_for_coverage_lcov)
+
+    set(options NO_DEMANGLE SONARQUBE)
+    set(oneValueArgs BASE_DIRECTORY NAME)
+    set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES LCOV_ARGS GENHTML_ARGS)
+    cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if(NOT LCOV_PATH)
+        message(FATAL_ERROR "lcov not found! Aborting...")
+    endif() # NOT LCOV_PATH
+
+    if(NOT GENHTML_PATH)
+        message(FATAL_ERROR "genhtml not found! Aborting...")
+    endif() # NOT GENHTML_PATH
+
+    # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
+    if(DEFINED Coverage_BASE_DIRECTORY)
+        get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
+    else()
+        set(BASEDIR ${PROJECT_SOURCE_DIR})
+    endif()
+
+    # Collect excludes (CMake 3.4+: Also compute absolute paths)
+    set(LCOV_EXCLUDES "")
+    foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_LCOV_EXCLUDES})
+        if(CMAKE_VERSION VERSION_GREATER 3.4)
+            get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR})
+        endif()
+        list(APPEND LCOV_EXCLUDES "${EXCLUDE}")
+    endforeach()
+    list(REMOVE_DUPLICATES LCOV_EXCLUDES)
+
+    # Conditional arguments
+    if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE})
+      set(GENHTML_EXTRA_ARGS "--demangle-cpp")
+    endif()
+
+    # Setting up commands which will be run to generate coverage data.
+    # Cleanup lcov
+    set(LCOV_CLEAN_CMD
+        ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -directory .
+        -b ${BASEDIR} --zerocounters
+    )
+    # Create baseline to make sure untouched files show up in the report
+    set(LCOV_BASELINE_CMD
+        ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -c -i -d . -b
+        ${BASEDIR} -o ${Coverage_NAME}.base
+    )
+    # Run tests
+    set(LCOV_EXEC_TESTS_CMD
+        ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}
+    )
+    # Capturing lcov counters and generating report
+    set(LCOV_CAPTURE_CMD
+        ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --directory . -b
+        ${BASEDIR} --capture --output-file ${Coverage_NAME}.capture
+    )
+    # add baseline counters
+    set(LCOV_BASELINE_COUNT_CMD
+        ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -a ${Coverage_NAME}.base
+        -a ${Coverage_NAME}.capture --output-file ${Coverage_NAME}.total
+    )
+    # filter collected data to final coverage report
+    set(LCOV_FILTER_CMD
+        ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --remove
+        ${Coverage_NAME}.total ${LCOV_EXCLUDES} --output-file ${Coverage_NAME}.info
+    )
+    # Generate HTML output
+    set(LCOV_GEN_HTML_CMD
+        ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} -o
+        ${Coverage_NAME} ${Coverage_NAME}.info
+    )
+    if(${Coverage_SONARQUBE})
+        # Generate SonarQube output
+        set(GCOVR_XML_CMD
+            ${GCOVR_PATH} --sonarqube ${Coverage_NAME}_sonarqube.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS}
+            ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR}
+        )
+        set(GCOVR_XML_CMD_COMMAND
+            COMMAND ${GCOVR_XML_CMD}
+        )
+        set(GCOVR_XML_CMD_BYPRODUCTS ${Coverage_NAME}_sonarqube.xml)
+        set(GCOVR_XML_CMD_COMMENT COMMENT "SonarQube code coverage info report saved in ${Coverage_NAME}_sonarqube.xml.")
+    endif()
+
+
+    if(CODE_COVERAGE_VERBOSE)
+        message(STATUS "Executed command report")
+        message(STATUS "Command to clean up lcov: ")
+        string(REPLACE ";" " " LCOV_CLEAN_CMD_SPACED "${LCOV_CLEAN_CMD}")
+        message(STATUS "${LCOV_CLEAN_CMD_SPACED}")
+
+        message(STATUS "Command to create baseline: ")
+        string(REPLACE ";" " " LCOV_BASELINE_CMD_SPACED "${LCOV_BASELINE_CMD}")
+        message(STATUS "${LCOV_BASELINE_CMD_SPACED}")
+
+        message(STATUS "Command to run the tests: ")
+        string(REPLACE ";" " " LCOV_EXEC_TESTS_CMD_SPACED "${LCOV_EXEC_TESTS_CMD}")
+        message(STATUS "${LCOV_EXEC_TESTS_CMD_SPACED}")
+
+        message(STATUS "Command to capture counters and generate report: ")
+        string(REPLACE ";" " " LCOV_CAPTURE_CMD_SPACED "${LCOV_CAPTURE_CMD}")
+        message(STATUS "${LCOV_CAPTURE_CMD_SPACED}")
+
+        message(STATUS "Command to add baseline counters: ")
+        string(REPLACE ";" " " LCOV_BASELINE_COUNT_CMD_SPACED "${LCOV_BASELINE_COUNT_CMD}")
+        message(STATUS "${LCOV_BASELINE_COUNT_CMD_SPACED}")
+
+        message(STATUS "Command to filter collected data: ")
+        string(REPLACE ";" " " LCOV_FILTER_CMD_SPACED "${LCOV_FILTER_CMD}")
+        message(STATUS "${LCOV_FILTER_CMD_SPACED}")
+
+        message(STATUS "Command to generate lcov HTML output: ")
+        string(REPLACE ";" " " LCOV_GEN_HTML_CMD_SPACED "${LCOV_GEN_HTML_CMD}")
+        message(STATUS "${LCOV_GEN_HTML_CMD_SPACED}")
+
+        if(${Coverage_SONARQUBE})
+            message(STATUS "Command to generate SonarQube XML output: ")
+            string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}")
+            message(STATUS "${GCOVR_XML_CMD_SPACED}")
+        endif()
+    endif()
+
+    # Setup target
+    add_custom_target(${Coverage_NAME}
+        COMMAND ${LCOV_CLEAN_CMD}
+        COMMAND ${LCOV_BASELINE_CMD}
+        COMMAND ${LCOV_EXEC_TESTS_CMD}
+        COMMAND ${LCOV_CAPTURE_CMD}
+        COMMAND ${LCOV_BASELINE_COUNT_CMD}
+        COMMAND ${LCOV_FILTER_CMD}
+        COMMAND ${LCOV_GEN_HTML_CMD}
+        ${GCOVR_XML_CMD_COMMAND}
+
+        # Set output files as GENERATED (will be removed on 'make clean')
+        BYPRODUCTS
+            ${Coverage_NAME}.base
+            ${Coverage_NAME}.capture
+            ${Coverage_NAME}.total
+            ${Coverage_NAME}.info
+            ${GCOVR_XML_CMD_BYPRODUCTS}
+            ${Coverage_NAME}/index.html
+        WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+        DEPENDS ${Coverage_DEPENDENCIES}
+        VERBATIM # Protect arguments to commands
+        COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report."
+    )
+
+    # Show where to find the lcov info report
+    add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
+        COMMAND ;
+        COMMENT "Lcov code coverage info report saved in ${Coverage_NAME}.info."
+        ${GCOVR_XML_CMD_COMMENT}
+    )
+
+    # Show info where to find the report
+    add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
+        COMMAND ;
+        COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report."
+    )
+
+endfunction() # setup_target_for_coverage_lcov
+
+# Defines a target for running and collection code coverage information
+# Builds dependencies, runs the given executable and outputs reports.
+# NOTE! The executable should always have a ZERO as exit code otherwise
+# the coverage generation will not complete.
+#
+# setup_target_for_coverage_gcovr_xml(
+#     NAME ctest_coverage                    # New target name
+#     EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
+#     DEPENDENCIES executable_target         # Dependencies to build first
+#     BASE_DIRECTORY "../"                   # Base directory for report
+#                                            #  (defaults to PROJECT_SOURCE_DIR)
+#     EXCLUDE "src/dir1/*" "src/dir2/*"      # Patterns to exclude (can be relative
+#                                            #  to BASE_DIRECTORY, with CMake 3.4+)
+# )
+# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the
+# GCVOR command.
+function(setup_target_for_coverage_gcovr_xml)
+
+    set(options NONE)
+    set(oneValueArgs BASE_DIRECTORY NAME)
+    set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES)
+    cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if(NOT GCOVR_PATH)
+        message(FATAL_ERROR "gcovr not found! Aborting...")
+    endif() # NOT GCOVR_PATH
+
+    # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
+    if(DEFINED Coverage_BASE_DIRECTORY)
+        get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
+    else()
+        set(BASEDIR ${PROJECT_SOURCE_DIR})
+    endif()
+
+    # Collect excludes (CMake 3.4+: Also compute absolute paths)
+    set(GCOVR_EXCLUDES "")
+    foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES})
+        if(CMAKE_VERSION VERSION_GREATER 3.4)
+            get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR})
+        endif()
+        list(APPEND GCOVR_EXCLUDES "${EXCLUDE}")
+    endforeach()
+    list(REMOVE_DUPLICATES GCOVR_EXCLUDES)
+
+    # Combine excludes to several -e arguments
+    set(GCOVR_EXCLUDE_ARGS "")
+    foreach(EXCLUDE ${GCOVR_EXCLUDES})
+        list(APPEND GCOVR_EXCLUDE_ARGS "-e")
+        list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}")
+    endforeach()
+
+    # Set up commands which will be run to generate coverage data
+    # Run tests
+    set(GCOVR_XML_EXEC_TESTS_CMD
+        ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}
+    )
+    # Running gcovr
+    set(GCOVR_XML_CMD
+        ${GCOVR_PATH} --xml ${Coverage_NAME}.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS}
+        ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR}
+    )
+
+    if(CODE_COVERAGE_VERBOSE)
+        message(STATUS "Executed command report")
+
+        message(STATUS "Command to run tests: ")
+        string(REPLACE ";" " " GCOVR_XML_EXEC_TESTS_CMD_SPACED "${GCOVR_XML_EXEC_TESTS_CMD}")
+        message(STATUS "${GCOVR_XML_EXEC_TESTS_CMD_SPACED}")
+
+        message(STATUS "Command to generate gcovr XML coverage data: ")
+        string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}")
+        message(STATUS "${GCOVR_XML_CMD_SPACED}")
+    endif()
+
+    add_custom_target(${Coverage_NAME}
+        COMMAND ${GCOVR_XML_EXEC_TESTS_CMD}
+        COMMAND ${GCOVR_XML_CMD}
+
+        BYPRODUCTS ${Coverage_NAME}.xml
+        WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+        DEPENDS ${Coverage_DEPENDENCIES}
+        VERBATIM # Protect arguments to commands
+        COMMENT "Running gcovr to produce Cobertura code coverage report."
+    )
+
+    # Show info where to find the report
+    add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
+        COMMAND ;
+        COMMENT "Cobertura code coverage report saved in ${Coverage_NAME}.xml."
+    )
+endfunction() # setup_target_for_coverage_gcovr_xml
+
+# Defines a target for running and collection code coverage information
+# Builds dependencies, runs the given executable and outputs reports.
+# NOTE! The executable should always have a ZERO as exit code otherwise
+# the coverage generation will not complete.
+#
+# setup_target_for_coverage_gcovr_html(
+#     NAME ctest_coverage                    # New target name
+#     EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
+#     DEPENDENCIES executable_target         # Dependencies to build first
+#     BASE_DIRECTORY "../"                   # Base directory for report
+#                                            #  (defaults to PROJECT_SOURCE_DIR)
+#     EXCLUDE "src/dir1/*" "src/dir2/*"      # Patterns to exclude (can be relative
+#                                            #  to BASE_DIRECTORY, with CMake 3.4+)
+# )
+# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the
+# GCVOR command.
+function(setup_target_for_coverage_gcovr_html)
+
+    set(options NONE)
+    set(oneValueArgs BASE_DIRECTORY NAME)
+    set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES)
+    cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if(NOT GCOVR_PATH)
+        message(FATAL_ERROR "gcovr not found! Aborting...")
+    endif() # NOT GCOVR_PATH
+
+    # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
+    if(DEFINED Coverage_BASE_DIRECTORY)
+        get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
+    else()
+        set(BASEDIR ${PROJECT_SOURCE_DIR})
+    endif()
+
+    # Collect excludes (CMake 3.4+: Also compute absolute paths)
+    set(GCOVR_EXCLUDES "")
+    foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES})
+        if(CMAKE_VERSION VERSION_GREATER 3.4)
+            get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR})
+        endif()
+        list(APPEND GCOVR_EXCLUDES "${EXCLUDE}")
+    endforeach()
+    list(REMOVE_DUPLICATES GCOVR_EXCLUDES)
+
+    # Combine excludes to several -e arguments
+    set(GCOVR_EXCLUDE_ARGS "")
+    foreach(EXCLUDE ${GCOVR_EXCLUDES})
+        list(APPEND GCOVR_EXCLUDE_ARGS "-e")
+        list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}")
+    endforeach()
+
+    # Set up commands which will be run to generate coverage data
+    # Run tests
+    set(GCOVR_HTML_EXEC_TESTS_CMD
+        ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}
+    )
+    # Create folder
+    set(GCOVR_HTML_FOLDER_CMD
+        ${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/${Coverage_NAME}
+    )
+    # Running gcovr
+    set(GCOVR_HTML_CMD
+        ${GCOVR_PATH} --html ${Coverage_NAME}/index.html --html-details -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS}
+        ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR}
+    )
+
+    if(CODE_COVERAGE_VERBOSE)
+        message(STATUS "Executed command report")
+
+        message(STATUS "Command to run tests: ")
+        string(REPLACE ";" " " GCOVR_HTML_EXEC_TESTS_CMD_SPACED "${GCOVR_HTML_EXEC_TESTS_CMD}")
+        message(STATUS "${GCOVR_HTML_EXEC_TESTS_CMD_SPACED}")
+
+        message(STATUS "Command to create a folder: ")
+        string(REPLACE ";" " " GCOVR_HTML_FOLDER_CMD_SPACED "${GCOVR_HTML_FOLDER_CMD}")
+        message(STATUS "${GCOVR_HTML_FOLDER_CMD_SPACED}")
+
+        message(STATUS "Command to generate gcovr HTML coverage data: ")
+        string(REPLACE ";" " " GCOVR_HTML_CMD_SPACED "${GCOVR_HTML_CMD}")
+        message(STATUS "${GCOVR_HTML_CMD_SPACED}")
+    endif()
+
+    add_custom_target(${Coverage_NAME}
+        COMMAND ${GCOVR_HTML_EXEC_TESTS_CMD}
+        COMMAND ${GCOVR_HTML_FOLDER_CMD}
+        COMMAND ${GCOVR_HTML_CMD}
+
+        BYPRODUCTS ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html  # report directory
+        WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+        DEPENDS ${Coverage_DEPENDENCIES}
+        VERBATIM # Protect arguments to commands
+        COMMENT "Running gcovr to produce HTML code coverage report."
+    )
+
+    # Show info where to find the report
+    add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
+        COMMAND ;
+        COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report."
+    )
+
+endfunction() # setup_target_for_coverage_gcovr_html
+
+# Defines a target for running and collection code coverage information
+# Builds dependencies, runs the given executable and outputs reports.
+# NOTE! The executable should always have a ZERO as exit code otherwise
+# the coverage generation will not complete.
+#
+# setup_target_for_coverage_fastcov(
+#     NAME testrunner_coverage                    # New target name
+#     EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR
+#     DEPENDENCIES testrunner                     # Dependencies to build first
+#     BASE_DIRECTORY "../"                        # Base directory for report
+#                                                 #  (defaults to PROJECT_SOURCE_DIR)
+#     EXCLUDE "src/dir1/" "src/dir2/"             # Patterns to exclude.
+#     NO_DEMANGLE                                 # Don't demangle C++ symbols
+#                                                 #  even if c++filt is found
+#     SKIP_HTML                                   # Don't create html report
+#     POST_CMD perl -i -pe s!${PROJECT_SOURCE_DIR}/!!g ctest_coverage.json  # E.g. for stripping source dir from file paths
+# )
+function(setup_target_for_coverage_fastcov)
+
+    set(options NO_DEMANGLE SKIP_HTML)
+    set(oneValueArgs BASE_DIRECTORY NAME)
+    set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES FASTCOV_ARGS GENHTML_ARGS POST_CMD)
+    cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    if(NOT FASTCOV_PATH)
+        message(FATAL_ERROR "fastcov not found! Aborting...")
+    endif()
+
+    if(NOT Coverage_SKIP_HTML AND NOT GENHTML_PATH)
+        message(FATAL_ERROR "genhtml not found! Aborting...")
+    endif()
+
+    # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR
+    if(Coverage_BASE_DIRECTORY)
+        get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE)
+    else()
+        set(BASEDIR ${PROJECT_SOURCE_DIR})
+    endif()
+
+    # Collect excludes (Patterns, not paths, for fastcov)
+    set(FASTCOV_EXCLUDES "")
+    foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_FASTCOV_EXCLUDES})
+        list(APPEND FASTCOV_EXCLUDES "${EXCLUDE}")
+    endforeach()
+    list(REMOVE_DUPLICATES FASTCOV_EXCLUDES)
+
+    # Conditional arguments
+    if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE})
+        set(GENHTML_EXTRA_ARGS "--demangle-cpp")
+    endif()
+
+    # Set up commands which will be run to generate coverage data
+    set(FASTCOV_EXEC_TESTS_CMD ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS})
+
+    set(FASTCOV_CAPTURE_CMD ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH}
+        --search-directory ${BASEDIR}
+        --process-gcno
+        --output ${Coverage_NAME}.json
+        --exclude ${FASTCOV_EXCLUDES}
+    )
+
+    set(FASTCOV_CONVERT_CMD ${FASTCOV_PATH}
+        -C ${Coverage_NAME}.json --lcov --output ${Coverage_NAME}.info
+    )
+
+    if(Coverage_SKIP_HTML)
+        set(FASTCOV_HTML_CMD ";")
+    else()
+        set(FASTCOV_HTML_CMD ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS}
+            -o ${Coverage_NAME} ${Coverage_NAME}.info
+        )
+    endif()
+
+    set(FASTCOV_POST_CMD ";")
+    if(Coverage_POST_CMD)
+        set(FASTCOV_POST_CMD ${Coverage_POST_CMD})
+    endif()
+
+    if(CODE_COVERAGE_VERBOSE)
+        message(STATUS "Code coverage commands for target ${Coverage_NAME} (fastcov):")
+
+        message("   Running tests:")
+        string(REPLACE ";" " " FASTCOV_EXEC_TESTS_CMD_SPACED "${FASTCOV_EXEC_TESTS_CMD}")
+        message("     ${FASTCOV_EXEC_TESTS_CMD_SPACED}")
+
+        message("   Capturing fastcov counters and generating report:")
+        string(REPLACE ";" " " FASTCOV_CAPTURE_CMD_SPACED "${FASTCOV_CAPTURE_CMD}")
+        message("     ${FASTCOV_CAPTURE_CMD_SPACED}")
+
+        message("   Converting fastcov .json to lcov .info:")
+        string(REPLACE ";" " " FASTCOV_CONVERT_CMD_SPACED "${FASTCOV_CONVERT_CMD}")
+        message("     ${FASTCOV_CONVERT_CMD_SPACED}")
+
+        if(NOT Coverage_SKIP_HTML)
+            message("   Generating HTML report: ")
+            string(REPLACE ";" " " FASTCOV_HTML_CMD_SPACED "${FASTCOV_HTML_CMD}")
+            message("     ${FASTCOV_HTML_CMD_SPACED}")
+        endif()
+        if(Coverage_POST_CMD)
+            message("   Running post command: ")
+            string(REPLACE ";" " " FASTCOV_POST_CMD_SPACED "${FASTCOV_POST_CMD}")
+            message("     ${FASTCOV_POST_CMD_SPACED}")
+        endif()
+    endif()
+
+    # Setup target
+    add_custom_target(${Coverage_NAME}
+
+        # Cleanup fastcov
+        COMMAND ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH}
+            --search-directory ${BASEDIR}
+            --zerocounters
+
+        COMMAND ${FASTCOV_EXEC_TESTS_CMD}
+        COMMAND ${FASTCOV_CAPTURE_CMD}
+        COMMAND ${FASTCOV_CONVERT_CMD}
+        COMMAND ${FASTCOV_HTML_CMD}
+        COMMAND ${FASTCOV_POST_CMD}
+
+        # Set output files as GENERATED (will be removed on 'make clean')
+        BYPRODUCTS
+             ${Coverage_NAME}.info
+             ${Coverage_NAME}.json
+             ${Coverage_NAME}/index.html  # report directory
+
+        WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+        DEPENDS ${Coverage_DEPENDENCIES}
+        VERBATIM # Protect arguments to commands
+        COMMENT "Resetting code coverage counters to zero. Processing code coverage counters and generating report."
+    )
+
+    set(INFO_MSG "fastcov code coverage info report saved in ${Coverage_NAME}.info and ${Coverage_NAME}.json.")
+    if(NOT Coverage_SKIP_HTML)
+        string(APPEND INFO_MSG " Open ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html in your browser to view the coverage report.")
+    endif()
+    # Show where to find the fastcov info report
+    add_custom_command(TARGET ${Coverage_NAME} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E echo ${INFO_MSG}
+    )
+
+endfunction() # setup_target_for_coverage_fastcov
+
+function(append_coverage_compiler_flags)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE)
+    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE)
+    message(STATUS "Appending code coverage compiler flags: ${COVERAGE_COMPILER_FLAGS}")
+endfunction() # append_coverage_compiler_flags
+
+# Setup coverage for specific library
+function(append_coverage_compiler_flags_to_target name)
+    separate_arguments(_flag_list NATIVE_COMMAND "${COVERAGE_COMPILER_FLAGS}")
+    target_compile_options(${name} PRIVATE ${_flag_list})
+    if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
+        target_link_libraries(${name} PRIVATE gcov)
+    endif()
+endfunction()
diff --git a/cmake/PybindModuleCreation.cmake b/cmake/PybindModuleCreation.cmake
index 18f4abc38e2537c3f4d949f08772a57b90758cb0..87e70fc38c9e4ec4ddb44cbe5d7fb2a31c2e94d6 100644
--- a/cmake/PybindModuleCreation.cmake
+++ b/cmake/PybindModuleCreation.cmake
@@ -1,23 +1,21 @@
 function(generate_python_binding name target_to_bind) 
-    if (PYBIND)
-        add_definitions(-DPYBIND)
-        Include(FetchContent)
+    add_definitions(-DPYBIND)
+    Include(FetchContent)
 
-        FetchContent_Declare(
-        PyBind11
-        GIT_REPOSITORY https://github.com/pybind/pybind11.git
-        GIT_TAG        v2.10.4 # or a later release
-        )
+    FetchContent_Declare(
+    PyBind11
+    GIT_REPOSITORY https://github.com/pybind/pybind11.git
+    GIT_TAG        v2.10.4 # or a later release
+    )
 
-        # Use the New FindPython mode, recommanded. Requires CMake 3.15+
-        find_package(Python COMPONENTS Interpreter Development)
-        FetchContent_MakeAvailable(PyBind11)
+    # Use the New FindPython mode, recommanded. Requires CMake 3.15+
+    find_package(Python COMPONENTS Interpreter Development)
+    FetchContent_MakeAvailable(PyBind11)
 
-        message(STATUS "Creating binding for module ${name}")
-        file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp")
+    message(STATUS "Creating binding for module ${name}")
+    file(GLOB_RECURSE pybind_src_files "python_binding/*.cpp")
 
-        pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install
-        target_include_directories(${name} PUBLIC "python_binding")
-        target_link_libraries(${name} PUBLIC ${target_to_bind})        
-    endif()
+    pybind11_add_module(${name} MODULE ${pybind_src_files} "NO_EXTRAS") # NO EXTRA recquired for pip install
+    target_include_directories(${name} PUBLIC "python_binding")
+    target_link_libraries(${name} PUBLIC ${target_to_bind})
 endfunction()
diff --git a/include/aidge/aidge_backend_cpu.hpp b/include/aidge/aidge_backend_cpu.hpp
deleted file mode 100644
index ce723a528fef7a1e62851a854b06feba34525f09..0000000000000000000000000000000000000000
--- a/include/aidge/aidge_backend_cpu.hpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_IMPORTS_H__
-#define __AIDGE_CPU_IMPORTS_H__
-
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/operator/AddImpl.hpp"
-#include "aidge/operator/AvgPoolingImpl.hpp"
-#include "aidge/operator/BatchNormImpl.hpp"
-#include "aidge/operator/ConvDepthWiseImpl.hpp"
-#include "aidge/operator/ConvImpl.hpp"
-#include "aidge/operator/FCImpl.hpp"
-#include "aidge/operator/LeakyReLUImpl.hpp"
-#include "aidge/operator/ProducerImpl.hpp"
-#include "aidge/operator/ReLUImpl.hpp"
-#include "aidge/operator/SoftmaxImpl.hpp"
-
-#endif /* __AIDGE_CPU_IMPORTS_H__ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..95b2f7b8e2ff70c9b9224bea1137ad74e469ffb8
--- /dev/null
+++ b/include/aidge/backend/cpu.hpp
@@ -0,0 +1,27 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_IMPORTS_H_
+#define AIDGE_CPU_IMPORTS_H_
+
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/operator/AddImpl.hpp"
+#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
+#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/backend/cpu/operator/FCImpl.hpp"
+#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/ProducerImpl.hpp"
+#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
+
+#endif /* AIDGE_CPU_IMPORTS_H_ */
\ No newline at end of file
diff --git a/include/aidge/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp
similarity index 91%
rename from include/aidge/data/TensorImpl.hpp
rename to include/aidge/backend/cpu/data/TensorImpl.hpp
index c3cc19547f49d4200af3244c67daba33b5d6618a..014939e106e5891c86b007f4bd3905c765ec8754 100644
--- a/include/aidge/data/TensorImpl.hpp
+++ b/include/aidge/backend/cpu/data/TensorImpl.hpp
@@ -1,75 +1,74 @@
-#ifndef __AIDGE_CPU_DATA_TENSORIMPL_H__
-#define __AIDGE_CPU_DATA_TENSORIMPL_H__
-
-#include "aidge/backend/TensorImpl.hpp"
-#include "aidge/data/Tensor.hpp"
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-
-namespace Aidge {
-template <class T>
-class TensorImpl_cpu : public TensorImpl {
-   private:
-    const Tensor &mTensor;  // Impl needs to access Tensor information, but is not
-                            // supposed to change it!
-    std::vector<T> mData;
-
-   public:
-    static constexpr const char *Backend = "cpu";
-
-    TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {}
-
-    bool operator==(const TensorImpl &otherImpl) const override final {
-        std::size_t i = 0;
-        for (; i < mTensor.size() &&
-               mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i];
-             ++i) {
-        }
-        return i == mTensor.size();
-    }
-
-    static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) {
-        return std::make_unique<TensorImpl_cpu<T>>(tensor);
-    }
-
-    // native interface
-    const std::vector<T> &data() const { return mData; }
-
-    std::size_t scalarSize() const override { return sizeof(T); }
-
-    void copy(const void *src, NbElts_t length, std::size_t offset = 0) override {
-        std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length,
-                  static_cast<T *>(rawPtr())+offset);
-    }
-
-    void *rawPtr() override {
-        lazyInit(mData);
-        return mData.data();
-    };
-
-    virtual ~TensorImpl_cpu() = default;
-
-    void setRawPtr(void *ptr) override final {
-        T *newPtr = static_cast<T *>(ptr);
-        mData = std::vector<T>(newPtr, newPtr + mTensor.size());
-    };
-
-   private:
-    void lazyInit(std::vector<T> &data) {
-        assert(mTensor.dataType() == NativeType<T>::type);
-
-        if (data.size() != mTensor.size()) data.resize(mTensor.size());
-    }
-};
-
-namespace {
-static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
-        {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
-        {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
-        {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* __AIDGE_CPU_DATA_TENSORIMPL_H__ */
+#ifndef AIDGE_CPU_DATA_TENSORIMPL_H_
+#define AIDGE_CPU_DATA_TENSORIMPL_H_
+
+#include "aidge/backend/TensorImpl.hpp"
+#include "aidge/data/Tensor.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+template <class T>
+class TensorImpl_cpu : public TensorImpl {
+   private:
+    const Tensor &mTensor;  // Impl needs to access Tensor information, but is not
+                            // supposed to change it!
+    std::vector<T> mData;
+
+   public:
+    static constexpr const char *Backend = "cpu";
+
+    TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {}
+
+    bool operator==(const TensorImpl &otherImpl) const override final {
+        std::size_t i = 0;
+        for (; i < mTensor.size() &&
+               mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i];
+             ++i) {
+        }
+        return i == mTensor.size();
+    }
+
+    static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) {
+        return std::make_unique<TensorImpl_cpu<T>>(tensor);
+    }
+
+    // native interface
+    const std::vector<T> &data() const { return mData; }
+
+    std::size_t scalarSize() const override { return sizeof(T); }
+
+    void copy(const void *src, NbElts_t length, std::size_t offset = 0) override {
+        std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length,
+                  static_cast<T *>(rawPtr())+offset);
+    }
+
+    void *rawPtr() override {
+        lazyInit(mData);
+        return mData.data();
+    };
+
+    virtual ~TensorImpl_cpu() = default;
+
+    void setRawPtr(void *ptr) override final {
+        T *newPtr = static_cast<T *>(ptr);
+        mData = std::vector<T>(newPtr, newPtr + mTensor.size());
+    };
+
+   private:
+    void lazyInit(std::vector<T> &data) {
+        assert(mTensor.dataType() == NativeType<T>::type);
+
+        if (data.size() != mTensor.size()) data.resize(mTensor.size());
+    }
+};
+namespace {
+static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
+        {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
+static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
+        {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
+static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
+        {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */
diff --git a/include/aidge/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp
similarity index 88%
rename from include/aidge/operator/AddImpl.hpp
rename to include/aidge/backend/cpu/operator/AddImpl.hpp
index 8bd954c0d1dba40fe666e5aad7be47a65033e607..6e1cd03a3af81ee85f4f9e0e212af7c02089734e 100644
--- a/include/aidge/operator/AddImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AddImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef __AIDGE_CPU_OPERATOR_ADDIMPL_H__
-#define __AIDGE_CPU_OPERATOR_ADDIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_H_
+#define AIDGE_CPU_OPERATOR_ADDIMPL_H_
 
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/operator/Add.hpp"
@@ -79,9 +79,10 @@ class AddImpl_cpu : public OperatorImpl {
         return 0;
     }
 
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final {
+    NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final {
         // Requires the whole tensors, regardless of available data on inputs
         assert(outputIdx == 0 && "operator has only one output");
+        (void) outputIdx;
 
         const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
         return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
@@ -96,6 +97,7 @@ class AddImpl_cpu : public OperatorImpl {
         assert(outputIdx < mNbProducedData.size());
         return mNbProducedData[outputIdx];
     }
+    void updateConsummerProducer() override final;
 
     void forward() {
         // nothing
@@ -123,12 +125,13 @@ class AddImpl_cpu<1> : public OperatorImpl {
 
     NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
 
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx,
-                               __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/,
+                               const std::vector<DimSize_t> &/*inputsSize*/) const override final;
 
     NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final;
 
     NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final;
+    void updateConsummerProducer() override final;
 
     void forward();
 
@@ -154,12 +157,13 @@ class AddImpl_cpu<2> : public OperatorImpl {
 
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
 
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx,
-                               __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/,
+                               const std::vector<DimSize_t>& /*inputsSize*/) const override final;
 
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
 
     NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final;
+    void updateConsummerProducer() override final;
 
     void forward();
 
@@ -185,11 +189,12 @@ class AddImpl_cpu<3> : public OperatorImpl {
 
     NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final;
 
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
 
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
 
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
+    void updateConsummerProducer() override final;
 
     void forward();
 
@@ -203,4 +208,4 @@ static Registrar<Add_Op<3>> registrarAddImpl3I_cpu("cpu", Aidge::AddImpl_cpu<3>:
 }  // namespace
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_ADDIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_H_ */
diff --git a/include/aidge/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
similarity index 92%
rename from include/aidge/operator/AddImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
index f968f94b5b5f5f7708a9f753a7d0a02e6274cb98..490598599aedf24b26865ce6a1ddb3fe32044b1b 100644
--- a/include/aidge/operator/AddImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
@@ -1,87 +1,87 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H__
-#define __AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H__
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/AddImpl.hpp"
-
-namespace Aidge {
-
-template <class I1, class O>
-void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) {
-    // FIXME: missing Add parameters as arguments
-    const I1* input1 = static_cast<const I1*>(input1_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
-        output[oIndex] = input1[oIndex];
-    }
-}
-
-template <class I1, class I2, class O>
-void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
-                                      void* output_) {
-    // FIXME: missing Add parameters as arguments
-    const I1* input1 = static_cast<const I1*>(input1_);
-    const I2* input2 = static_cast<const I2*>(input2_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
-        output[oIndex] = input1[oIndex] + input2[oIndex];
-    }
-}
-
-template <class I1, class I2, class I3, class O>
-void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
-                                      const void* input3_, void* output_) {
-    // FIXME: missing Add parameters as arguments
-    const I1* input1 = static_cast<const I1*>(input1_);
-    const I2* input2 = static_cast<const I2*>(input2_);
-    const I3* input3 = static_cast<const I3*>(input3_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
-        output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex];
-    }
-}
-
-namespace {
-static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_cpu_forward_kernel<float, float>);
-static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_cpu_forward_kernel<int, int>);
-static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_cpu_forward_kernel<double, double>);
-
-static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::AddImpl2I_cpu_forward_kernel<float, float, float>);
-static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_cpu_forward_kernel<int, int, int>);
-static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_cpu_forward_kernel<double, double, double>);
-
-static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::AddImpl3I_cpu_forward_kernel<float, float, float, float>);
-static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::AddImpl3I_cpu_forward_kernel<int, int, int, int>);
-static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::AddImpl3I_cpu_forward_kernel<double, double, double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* __AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H__ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/AddImpl.hpp"
+
+namespace Aidge {
+
+template <class I1, class O>
+void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) {
+    // FIXME: missing Add parameters as arguments
+    const I1* input1 = static_cast<const I1*>(input1_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
+        output[oIndex] = input1[oIndex];
+    }
+}
+
+template <class I1, class I2, class O>
+void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
+                                      void* output_) {
+    // FIXME: missing Add parameters as arguments
+    const I1* input1 = static_cast<const I1*>(input1_);
+    const I2* input2 = static_cast<const I2*>(input2_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
+        output[oIndex] = input1[oIndex] + input2[oIndex];
+    }
+}
+
+template <class I1, class I2, class I3, class O>
+void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
+                                      const void* input3_, void* output_) {
+    // FIXME: missing Add parameters as arguments
+    const I1* input1 = static_cast<const I1*>(input1_);
+    const I2* input2 = static_cast<const I2*>(input2_);
+    const I3* input3 = static_cast<const I3*>(input3_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
+        output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex];
+    }
+}
+
+namespace {
+static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_cpu_forward_kernel<float, float>);
+static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_cpu_forward_kernel<int, int>);
+static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_cpu_forward_kernel<double, double>);
+
+static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::AddImpl2I_cpu_forward_kernel<float, float, float>);
+static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_cpu_forward_kernel<int, int, int>);
+static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_cpu_forward_kernel<double, double, double>);
+
+static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::AddImpl3I_cpu_forward_kernel<float, float, float, float>);
+static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::AddImpl3I_cpu_forward_kernel<int, int, int, int>);
+static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::AddImpl3I_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
similarity index 88%
rename from include/aidge/operator/AvgPoolingImpl.hpp
rename to include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
index 5cde8bbd7b482a70b234f988cb3f54178a2c50ee..8373cb84a550efd8741a2dbc04c1e94ad37fe611 100644
--- a/include/aidge/operator/AvgPoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H__
-#define __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_
+#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_
 
 #include <array>
 #include <memory>
@@ -51,9 +51,10 @@ class AvgPoolingImpl2D_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
+    void updateConsummerProducer() override final;
 
     void forward();
 
@@ -66,4 +67,4 @@ static Registrar<AvgPooling_Op<2>> registrarAvgPoolingImpl2D_cpu("cpu", Aidge::A
 }  // namespace
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_H_ */
diff --git a/include/aidge/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
similarity index 89%
rename from include/aidge/operator/AvgPoolingImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
index cf6cd0e6ec016239bb357510766ac199de418377..776e020f1a20056db345c8e845fd73bb31b4138b 100644
--- a/include/aidge/operator/AvgPoolingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
@@ -1,114 +1,114 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H__
-#define __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H__
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/AvgPoolingImpl.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/data/Data.hpp"
-#include <array>
-#include <tuple>
-#include <cmath>
-
-namespace Aidge {
-/**
- * @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
- * @tparam I Input data type.
- * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
- * @param dims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class O>
-void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters &params,
-                                             const std::array<DimSize_t, 4> &dims,
-                                             const void *input_,
-                                             void *output_) {
-    // FIXME: missing convolution parameters as arguments
-    const I *input = static_cast<const I *>(input_);
-    O *output = static_cast<O *>(output_);
-
-    
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
-    // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
-
-    // TODO: kernel computation
-    // output (batch, outCh, Xout, Yout)
-    // input  (batch, ch, Xin, Yin)
-    // weight (outCh, ch, kernelX, kernelY)
-    // does not take Dilation parameter into account
-    using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t ch = 0; ch < dims[1]; ++ch) {
-            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
-            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
-            for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]);
-                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (dims[2] + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx);
-                for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]);
-                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (dims[3] + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify);
-                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const std::size_t ix = ox * std::get<0>(params)[0];
-                    const std::size_t iy = oy * std::get<0>(params)[1];
-
-                    if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                        output[oIndexFull] += static_cast<O>(
-                                               input[iIndex + (ix+0)*dims[3] + (iy+0)] +
-                                               input[iIndex + (ix+0)*dims[3] + (iy+1)] +
-                                               input[iIndex + (ix+0)*dims[3] + (iy+2)] +
-                                               input[iIndex + (ix+1)*dims[3] + (iy+0)] +
-                                               input[iIndex + (ix+1)*dims[3] + (iy+1)] +
-                                               input[iIndex + (ix+1)*dims[3] + (iy+2)] +
-                                               input[iIndex + (ix+2)*dims[3] + (iy+0)] +
-                                               input[iIndex + (ix+2)*dims[3] + (iy+1)] +
-                                               input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9);
-                    } else {
-                        for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
-                            for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
-                            }
-                        }
-                        // padding not used
-                        output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin);
-                    }
-                }
-            }
-        }
-    }
-}
-
-namespace {
-static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float32(
-        std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}),
-        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>);
-static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32},
-        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<int, int>);
-static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64},
-        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* __AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H__ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/data/Data.hpp"
+#include <array>
+#include <tuple>
+#include <cmath>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam O Output data type.
+ * @param params tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class O>
+void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters &params,
+                                             const std::array<DimSize_t, 4> &dims,
+                                             const void *input_,
+                                             void *output_) {
+    // FIXME: missing convolution parameters as arguments
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
+
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) /
+                                static_cast<float>(std::get<0>(params)[0])));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) /
+                                static_cast<float>(std::get<0>(params)[1])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, ch, Xin, Yin)
+    // weight (outCh, ch, kernelX, kernelY)
+    // does not take Dilation parameter into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+        for (std::size_t ch = 0; ch < dims[1]; ++ch) {
+            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
+            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
+            for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]);
+                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx);
+                for (std::size_t oy = 0; oy < oySize; ++oy) {
+                    const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]);
+                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify);
+                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                    const std::size_t ix = ox * std::get<0>(params)[0];
+                    const std::size_t iy = oy * std::get<0>(params)[1];
+
+                    if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
+                        output[oIndexFull] += static_cast<O>(
+                                               input[iIndex + (ix+0)*dims[3] + (iy+0)] +
+                                               input[iIndex + (ix+0)*dims[3] + (iy+1)] +
+                                               input[iIndex + (ix+0)*dims[3] + (iy+2)] +
+                                               input[iIndex + (ix+1)*dims[3] + (iy+0)] +
+                                               input[iIndex + (ix+1)*dims[3] + (iy+1)] +
+                                               input[iIndex + (ix+1)*dims[3] + (iy+2)] +
+                                               input[iIndex + (ix+2)*dims[3] + (iy+0)] +
+                                               input[iIndex + (ix+2)*dims[3] + (iy+1)] +
+                                               input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9);
+                    } else {
+                        for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                            for (std::size_t sy = syMin; sy < syMax; ++sy) {
+                                output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
+                            }
+                        }
+                        // padding not used
+                        output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin);
+                    }
+                }
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float32(
+        std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}),
+        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>);
+static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32},
+        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<int, int>);
+static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64},
+        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
similarity index 90%
rename from include/aidge/operator/BatchNormImpl.hpp
rename to include/aidge/backend/cpu/operator/BatchNormImpl.hpp
index 37d644f00f4a53b0f0b5c64928ec5c77e719ceb5..d9f25b4a8e38510f82fc5afe9ed4b656197a47d5 100644
--- a/include/aidge/operator/BatchNormImpl.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H__
-#define __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_
+#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_
 
 #include <array>
 #include <memory>
@@ -66,9 +66,10 @@ class BatchNormImpl2D_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &inputsSize) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
+    void updateConsummerProducer() override final;
 
     void forward();
 
@@ -81,4 +82,4 @@ static Registrar<BatchNorm_Op<2>> registrarBatchNormImpl2D_cpu("cpu", Aidge::Bat
 }  // namespace
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_H_ */
diff --git a/include/aidge/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
similarity index 92%
rename from include/aidge/operator/BatchNormImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
index 77a8f0aa12c3b5c450dfd765626acbe7e6dfe995..eedb80bde60d65b53bac70cc33ca83eb4f0121e7 100644
--- a/include/aidge/operator/BatchNormImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
@@ -1,109 +1,109 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H__
-#define __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H__
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/BatchNormImpl.hpp"
-#include "aidge/utils/Types.h"
-#include <array>
-#include <cmath>
-#include <algorithm>
-
-namespace Aidge {
-/**
- * @brief Forward kernel for 2D BatchNormolution on CPU backend.
- * @tparam I Input data type.
- * @tparam W Weight data type.
- * @tparam B Bias data type.
- * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
- * @param dims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param scale_ const scale Tensor.
- * @param shift_ const shift Tensor.
- * @param batchMean_ const mean Tensor.
- * @param batchVar_ const variance Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class P, class O>
-void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
-    // FIXME: missing convolution parameters as arguments
-    const I *input = static_cast<const I *>(input_);
-    const P *scale = static_cast<const P *>(scale_);
-    const P *shift = static_cast<const P *>(shift_);
-    P *batchMean = static_cast<P *>(batchMean_);
-    P *batchVar = static_cast<P *>(batchVar_);
-    O *output = static_cast<O *>(output_);
-
-    const DimSize_t nbBatch = dims[0];
-    const DimSize_t nbChannels = dims[1];
-    const DimSize_t featureMapSize = dims[2]*dims[3];
-
-
-    if ((freeze == true) || (std::get<1>(params) == 0.0f)) {
-        for (std::size_t batch = 0; batch < nbBatch; ++batch) {
-            for (std::size_t ch = 0; ch < nbChannels; ++ch) {
-                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
-                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
-                const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params)));
-
-                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
-                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
-                }
-            }
-        }
-    } else {
-        const std::size_t nbDataPerChannel = nbBatch * featureMapSize;
-        for (std::size_t ch = 0; ch < nbChannels; ++ch) {
-            I sum = I(0);
-            I sumSquare = I(0);
-            for (std::size_t batch = 0; batch < nbBatch; ++batch) {
-                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
-                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
-
-                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
-                    sum += input[ioIndex + feature];
-                    sumSquare += input[ioIndex + feature] * input[ioIndex + feature];
-                }
-            }
-            const I inputMean = sum / static_cast<I>(nbDataPerChannel);            
-            const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel)  - inputMean*inputMean;
-
-            batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params);
-            batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params);
-
-            const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params)));
-            for (std::size_t batch = 0; batch < nbBatch; ++batch) {
-                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
-                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
-                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-inputMean) / var;
-                }
-            }
-        }
-    }
-}
-
-
-
-
-
-namespace {
-static Registrar<BatchNormImpl2DForward_cpu> registrarBatchNormImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* __AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H__ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
+#include "aidge/utils/Types.h"
+#include <array>
+#include <cmath>
+#include <algorithm>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D BatchNormolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param scale_ const scale Tensor.
+ * @param shift_ const shift Tensor.
+ * @param batchMean_ const mean Tensor.
+ * @param batchVar_ const variance Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class P, class O>
+void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+                                       const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
+    // FIXME: missing convolution parameters as arguments
+    const I *input = static_cast<const I *>(input_);
+    const P *scale = static_cast<const P *>(scale_);
+    const P *shift = static_cast<const P *>(shift_);
+    P *batchMean = static_cast<P *>(batchMean_);
+    P *batchVar = static_cast<P *>(batchVar_);
+    O *output = static_cast<O *>(output_);
+
+    const DimSize_t nbBatch = dims[0];
+    const DimSize_t nbChannels = dims[1];
+    const DimSize_t featureMapSize = dims[2]*dims[3];
+
+
+    if ((freeze == true) || (std::get<1>(params) == 0.0f)) {
+        for (std::size_t batch = 0; batch < nbBatch; ++batch) {
+            for (std::size_t ch = 0; ch < nbChannels; ++ch) {
+                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
+                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
+                const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params)));
+
+                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
+                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
+                }
+            }
+        }
+    } else {
+        const std::size_t nbDataPerChannel = nbBatch * featureMapSize;
+        for (std::size_t ch = 0; ch < nbChannels; ++ch) {
+            I sum = I(0);
+            I sumSquare = I(0);
+            for (std::size_t batch = 0; batch < nbBatch; ++batch) {
+                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
+                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
+
+                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
+                    sum += input[ioIndex + feature];
+                    sumSquare += input[ioIndex + feature] * input[ioIndex + feature];
+                }
+            }
+            const I inputMean = sum / static_cast<I>(nbDataPerChannel);
+            const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel)  - inputMean*inputMean;
+
+            batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params);
+            batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params);
+
+            const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params)));
+            for (std::size_t batch = 0; batch < nbBatch; ++batch) {
+                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
+                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
+                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-inputMean) / var;
+                }
+            }
+        }
+    }
+}
+
+
+
+
+
+namespace {
+static Registrar<BatchNormImpl2DForward_cpu> registrarBatchNormImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
similarity index 88%
rename from include/aidge/operator/ConvDepthWiseImpl.hpp
rename to include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
index 64f5df8c4dc6994629b10b2021d6f35d745ed7b2..0d21c676d797b2fc4e95c4aea47674c8fca5eef4 100644
--- a/include/aidge/operator/ConvDepthWiseImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef  __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H__
-#define __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_
+#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_
 
 #include <array>
 #include <memory>
@@ -53,9 +53,10 @@ class ConvDepthWiseImpl2D_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
+    void updateConsummerProducer() override final;
 
     void forward();
 
@@ -68,4 +69,4 @@ static Registrar<ConvDepthWise_Op<2>> registrarConvDepthWiseImpl2D_cpu("cpu", Ai
 }  // namespace
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_H_ */
diff --git a/include/aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
similarity index 90%
rename from include/aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
index 699a086457ee54f048182b9e318dbe1311b0c75c..ee2d82e00376c5a2cc5a075565e35eb8885c021e 100644
--- a/include/aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
@@ -1,118 +1,118 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H__
-#define  __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H__
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/ConvDepthWiseImpl.hpp"
-#include "aidge/utils/Types.h"
-#include <cmath>
-#include <array>
-#include <algorithm>
-
-namespace Aidge {
-/**
- * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
- * @tparam I Input data type.
- * @tparam W Weight data type.
- * @tparam B Bias data type.
- * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
- * @param dims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param weights_ const weight Tensor.
- * @param biases_ const Biais Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class W, class B, class O>
-void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
-    // FIXME: missing convolution parameters as arguments
-    const I *input = static_cast<const I *>(input_);
-    const W *weights = static_cast<const W *>(weights_);
-    const B *biases = static_cast<const B *>(biases_);
-    O *output = static_cast<O *>(output_);
-
-    
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
-    // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
-
-    // TODO: kernel computation
-    // output (batch, outCh, Xout, Yout)
-    // input  (batch, ch, Xin, Yin)
-    // weight (outCh, ch, kernelX, kernelY)
-    // does not take Dilation parameter into account
-    using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) {
-            const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize;
-            B biasVal = (biases != nullptr) ? biases[ch] : B(0);
-            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
-            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
-            const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1];
-            for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]);
-                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (dims[2] + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx);
-                for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]);
-                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (dims[3] + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify);
-                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0];
-                    const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1];
-
-                    if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                        output[oIndexFull] +=  (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
-                    } else {
-                        for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
-                            for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] * 
-                                                        input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-namespace {
-static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>);
-static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>);
-static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /*  __AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H__ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
+#include "aidge/utils/Types.h"
+#include <cmath>
+#include <array>
+#include <algorithm>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param weights_ const weight Tensor.
+ * @param biases_ const Biais Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class W, class B, class O>
+void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
+    // FIXME: missing convolution parameters as arguments
+    const I *input = static_cast<const I *>(input_);
+    const W *weights = static_cast<const W *>(weights_);
+    const B *biases = static_cast<const B *>(biases_);
+    O *output = static_cast<O *>(output_);
+
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) /
+                                static_cast<float>(std::get<0>(params)[0])));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) /
+                                static_cast<float>(std::get<0>(params)[1])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, ch, Xin, Yin)
+    // weight (outCh, ch, kernelX, kernelY)
+    // does not take Dilation parameter into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+        for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) {
+            const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize;
+            B biasVal = (biases != nullptr) ? biases[ch] : B(0);
+            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
+            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
+            const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1];
+            for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]);
+                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx);
+                for (std::size_t oy = 0; oy < oySize; ++oy) {
+                    const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]);
+                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify);
+                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                    const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0];
+                    const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1];
+
+                    if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
+                        output[oIndexFull] +=  (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                    } else {
+                        for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                            for (std::size_t sy = syMin; sy < syMax; ++sy) {
+                                output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] *
+                                                        input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>);
+static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>);
+static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp
similarity index 88%
rename from include/aidge/operator/ConvImpl.hpp
rename to include/aidge/backend/cpu/operator/ConvImpl.hpp
index 7bdeb0597d628c802270cd8af5a55c1362704483..1f3dffe43b966bc37887f267cc56760a899476f9 100644
--- a/include/aidge/operator/ConvImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef __AIDGE_CPU_OPERATOR_CONVIMPL_H__
-#define __AIDGE_CPU_OPERATOR_CONVIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_H_
+#define AIDGE_CPU_OPERATOR_CONVIMPL_H_
 
 #include <array>
 #include <memory>
@@ -53,9 +53,10 @@ class ConvImpl2D_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
+    void updateConsummerProducer() override final;
 
     void forward();
 
@@ -68,4 +69,4 @@ static Registrar<Conv_Op<2>> registrarConvImpl2D_cpu("cpu", Aidge::ConvImpl2D_cp
 }  // namespace
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_CONVIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */
diff --git a/include/aidge/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
similarity index 92%
rename from include/aidge/operator/ConvImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
index 8c2aedca4855c1272838604757e3b2727f11edb0..bc2f10099f42cba91be8d089b66dc176fdeb7c10 100644
--- a/include/aidge/operator/ConvImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
@@ -1,162 +1,162 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H__
-#define __AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H__
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/ConvImpl.hpp"
-#include "aidge/utils/Types.h"
-#include <cmath>
-#include <array>
-#include <algorithm>
-
-namespace Aidge {
-/**
- * @brief Forward kernel for 2D Convolution on CPU backend.
- * @tparam I Input data type.
- * @tparam W Weight data type.
- * @tparam B Bias data type.
- * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
- * @param dims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param weights_ const weight Tensor.
- * @param biases_ const Biais Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class W, class B, class O>
-void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
-    // FIXME: missing convolution parameters as arguments
-    const I *input = static_cast<const I *>(input_);
-    const W *weights = static_cast<const W *>(weights_);
-    const B *biases = static_cast<const B *>(biases_);
-    O *output = static_cast<O *>(output_);
-/*
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0]));
-    // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1]));
-
-    // TODO: kernel computation
-    // output (Xout, Yout, outCh, batch)
-    // input  (Xin, Yin, inCh, batch)
-    // weight (kernelX, kernelY, inCh, outCh)
-    // does not take Dilation parameter into account
-    for (std::size_t ox = 0; ox < oxSize; ++ox) {
-        for (std::size_t oy = 0; oy < oySize; ++oy) {
-            const std::size_t ix = ox * std::get<0>(params)[0];
-            const std::size_t iy = oy * std::get<0>(params)[1];
-
-            for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
-                const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox));
-                B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-                for (std::size_t batch = 0; batch < dims[3]; ++batch) {
-                    output[oIndex + batch] = biasVal;
-                }
-                for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
-                    for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) {
-                        for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) {
-                            const std::size_t wIndex =
-                                    outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx));
-                            std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
-                            for (std::size_t batch = 0; batch < dims[3]; ++batch) {
-                                output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-*/
-
-    
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
-    // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
-
-    // TODO: kernel computation
-    // output (batch, outCh, Xout, Yout)
-    // input  (batch, inCh, Xin, Yin)
-    // weight (outCh, inCh, kernelX, kernelY)
-    // does not take Dilation parameter into account
-    using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
-            const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize;
-            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
-            for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
-                const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
-                const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1];
-                for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                    const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]);
-                    const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                    const std::size_t sxMax = (dims[2] + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx);
-                    for (std::size_t oy = 0; oy < oySize; ++oy) {
-                        const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]);
-                        const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                        const std::size_t syMax = (dims[3] + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify);
-                        const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                        const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0];
-                        const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1];
-
-                        if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                            output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
-                        } else {
-                            for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
-                                for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                    output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] * 
-                                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-namespace {
-static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>);
-static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>);
-static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* __AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H__ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/utils/Types.h"
+#include <cmath>
+#include <array>
+#include <algorithm>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D Convolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param weights_ const weight Tensor.
+ * @param biases_ const Biais Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class W, class B, class O>
+void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
+    // FIXME: missing convolution parameters as arguments
+    const I *input = static_cast<const I *>(input_);
+    const W *weights = static_cast<const W *>(weights_);
+    const B *biases = static_cast<const B *>(biases_);
+    O *output = static_cast<O *>(output_);
+/*
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
+                                static_cast<float>(std::get<0>(params)[0]));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
+                                static_cast<float>(std::get<0>(params)[1]));
+
+    // TODO: kernel computation
+    // output (Xout, Yout, outCh, batch)
+    // input  (Xin, Yin, inCh, batch)
+    // weight (kernelX, kernelY, inCh, outCh)
+    // does not take Dilation parameter into account
+    for (std::size_t ox = 0; ox < oxSize; ++ox) {
+        for (std::size_t oy = 0; oy < oySize; ++oy) {
+            const std::size_t ix = ox * std::get<0>(params)[0];
+            const std::size_t iy = oy * std::get<0>(params)[1];
+
+            for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
+                const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox));
+                B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
+                for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+                    output[oIndex + batch] = biasVal;
+                }
+                for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
+                    for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) {
+                        for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) {
+                            const std::size_t wIndex =
+                                    outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx));
+                            std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
+                            for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+                                output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+*/
+
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
+                                static_cast<float>(std::get<0>(params)[0])));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
+                                static_cast<float>(std::get<0>(params)[1])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, inCh, Xin, Yin)
+    // weight (outCh, inCh, kernelX, kernelY)
+    // does not take Dilation parameter into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+        for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
+            const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize;
+            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
+            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
+            for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
+                const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
+                const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1];
+                for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                    const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]);
+                    const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                    const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx);
+                    for (std::size_t oy = 0; oy < oySize; ++oy) {
+                        const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]);
+                        const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
+                        const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify);
+                        const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                        const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0];
+                        const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1];
+
+                        if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
+                            output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                        } else {
+                            for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                                for (std::size_t sy = syMin; sy < syMax; ++sy) {
+                                    output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] *
+                                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>);
+static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>);
+static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp
similarity index 88%
rename from include/aidge/operator/FCImpl.hpp
rename to include/aidge/backend/cpu/operator/FCImpl.hpp
index 44f53a57f0cffe6717661c4d7f96647682b25571..c69cc0b08a58877108c78d6f12c29e9089c2f665 100644
--- a/include/aidge/operator/FCImpl.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef __AIDGE_CPU_OPERATOR_FCIMPL_H__
-#define __AIDGE_CPU_OPERATOR_FCIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_
+#define AIDGE_CPU_OPERATOR_FCIMPL_H_
 
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/operator/FC.hpp"
@@ -47,10 +47,10 @@ class FCImpl_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-
+	void updateConsummerProducer() override final;
     void forward();
 
     void backward();
@@ -61,4 +61,4 @@ static Registrar<FC_Op> registrarFCImpl_cpu("cpu", Aidge::FCImpl_cpu::create);
 }
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_FCIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_FCIMPL_H_ */
diff --git a/include/aidge/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
similarity index 94%
rename from include/aidge/operator/FCImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
index a481e2d5f80ec9c722af7f00b688003c12a4e35a..d6acb7dfea3415a8d67384745e16ecdd8bf06324 100644
--- a/include/aidge/operator/FCImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
@@ -1,128 +1,128 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H__
-#define __AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H__
-
-#include "aidge/utils/Registrar.hpp"
-#include <algorithm>
-
-#include "aidge/operator/FCImpl.hpp"
-
-namespace Aidge {
-// template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims,
-//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-//     // FIXME: missing FC parameters as arguments
-//     const I* input = static_cast<const I*>(input_);
-//     const W* weights = static_cast<const W*>(weights_);
-//     const B* biases = static_cast<const B*>(biases_);
-//     O* output = static_cast<O*>(output_);
-
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
-//         std::size_t oIndex = outIdx * dims[3];
-//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
-//         for (std::size_t batch = 0; batch < dims[3]; ++batch) {
-//             output[oIndex + batch] = bias;
-//         }
-//     }
-
-//     for (std::size_t ix = 0; ix < dims[0]; ++ix) {
-//         for (std::size_t iy = 0; iy < dims[1]; ++iy) {
-//             for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
-//                 const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
-//                 for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
-//                     const std::size_t oIndex = dims[3] * outCh;
-//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) +
-//                                           outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
-//                     for (std::size_t batch = 0; batch < dims[3]; ++batch) {
-//                         output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
-//                     }
-//                 }
-//             }
-//         }
-//     }
-// }
-
-// template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims,
-//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-//     // FIXME: missing FC parameters as arguments
-//     const I* input = static_cast<const I*>(input_);
-//     const W* weights = static_cast<const W*>(weights_);
-//     const B* biases = static_cast<const B*>(biases_);
-//     O* output = static_cast<O*>(output_);
-
-//     // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
-
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
-//         std::size_t oIndex = outIdx * dims[0];
-//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
-//         for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-//             output[oIndex + batch] = bias;
-//         }
-//     }
-
-//     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-//         const std::size_t oIndex = dims[1] * batch;
-//         for (std::size_t i = 0; i < dims[1]; ++i) {
-//             for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
-//                 std::size_t wIndex = i * std::get<0>(params) + outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
-//                 output[oIndex + outCh] += weights[wIndex] * input[i + batch];
-//             }
-//         }
-//     }
-// }
-
-template <class I, class W, class B, class O>
-void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize,
-                                   const void* input_, const void* weights_, const void* biases_, void* output_) {
-    // FIXME: missing FC parameters as arguments
-    const I* input = static_cast<const I*>(input_);
-    const W* weights = static_cast<const W*>(weights_);
-    const B* biases = static_cast<const B*>(biases_);
-    O* output = static_cast<O*>(output_);
-
-    if (std::get<1>(params)) {
-        std::fill(output, output+(batchSize*std::get<0>(params)), B(0));
-    }
-    else {
-        for (std::size_t batch = 0; batch < batchSize; ++batch) {
-            std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params)));
-        }
-    }
-
-    for (std::size_t batch = 0; batch < batchSize; ++batch) {
-        for (std::size_t out = 0; out < std::get<0>(params); ++out) {
-            output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize,
-                                                        input + (batch + 1)*oneInputSize,
-                                                        weights + out*oneInputSize,
-                                                        output[out + batch*std::get<0>(params)]);
-        }
-    }
-}
-
-
-namespace {
-static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>);
-static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>);
-static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>);
-}  // namespace
-
-}  // namespace Aidge
-
-#endif /* __AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H__ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include <algorithm>
+
+#include "aidge/backend/cpu/operator/FCImpl.hpp"
+
+namespace Aidge {
+// template <class I, class W, class B, class O>
+// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims,
+//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
+//     // FIXME: missing FC parameters as arguments
+//     const I* input = static_cast<const I*>(input_);
+//     const W* weights = static_cast<const W*>(weights_);
+//     const B* biases = static_cast<const B*>(biases_);
+//     O* output = static_cast<O*>(output_);
+
+//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
+//         std::size_t oIndex = outIdx * dims[3];
+//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
+//         for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+//             output[oIndex + batch] = bias;
+//         }
+//     }
+
+//     for (std::size_t ix = 0; ix < dims[0]; ++ix) {
+//         for (std::size_t iy = 0; iy < dims[1]; ++iy) {
+//             for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
+//                 const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
+//                 for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
+//                     const std::size_t oIndex = dims[3] * outCh;
+//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) +
+//                                           outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
+//                     for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+//                         output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
+//                     }
+//                 }
+//             }
+//         }
+//     }
+// }
+
+// template <class I, class W, class B, class O>
+// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims,
+//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
+//     // FIXME: missing FC parameters as arguments
+//     const I* input = static_cast<const I*>(input_);
+//     const W* weights = static_cast<const W*>(weights_);
+//     const B* biases = static_cast<const B*>(biases_);
+//     O* output = static_cast<O*>(output_);
+
+//     // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
+
+//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
+//         std::size_t oIndex = outIdx * dims[0];
+//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
+//         for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+//             output[oIndex + batch] = bias;
+//         }
+//     }
+
+//     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+//         const std::size_t oIndex = dims[1] * batch;
+//         for (std::size_t i = 0; i < dims[1]; ++i) {
+//             for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
+//                 std::size_t wIndex = i * std::get<0>(params) + outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
+//                 output[oIndex + outCh] += weights[wIndex] * input[i + batch];
+//             }
+//         }
+//     }
+// }
+
+template <class I, class W, class B, class O>
+void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize,
+                                   const void* input_, const void* weights_, const void* biases_, void* output_) {
+    // FIXME: missing FC parameters as arguments
+    const I* input = static_cast<const I*>(input_);
+    const W* weights = static_cast<const W*>(weights_);
+    const B* biases = static_cast<const B*>(biases_);
+    O* output = static_cast<O*>(output_);
+
+    if (std::get<1>(params)) {
+        std::fill(output, output+(batchSize*std::get<0>(params)), B(0));
+    }
+    else {
+        for (std::size_t batch = 0; batch < batchSize; ++batch) {
+            std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params)));
+        }
+    }
+
+    for (std::size_t batch = 0; batch < batchSize; ++batch) {
+        for (std::size_t out = 0; out < std::get<0>(params); ++out) {
+            output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize,
+                                                        input + (batch + 1)*oneInputSize,
+                                                        weights + out*oneInputSize,
+                                                        output[out + batch*std::get<0>(params)]);
+        }
+    }
+}
+
+
+namespace {
+static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>);
+static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>);
+static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
similarity index 86%
rename from include/aidge/operator/LeakyReLUImpl.hpp
rename to include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
index dd5bc4d9452374049ab3753a0331befa9b76d2e7..abe167bea16de01f861beb9701f747d39f265d9d 100644
--- a/include/aidge/operator/LeakyReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H__
-#define __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_
+#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_
 
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/operator/LeakyReLU.hpp"
@@ -46,10 +46,10 @@ class LeakyReLUImpl_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-
+    void updateConsummerProducer() override final;
     void forward();
 
     void backward();
@@ -60,4 +60,4 @@ static Registrar<LeakyReLU_Op> registrarLeakyReLUImpl_cpu("cpu", Aidge::LeakyReL
 }
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */
diff --git a/include/aidge/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
similarity index 85%
rename from include/aidge/operator/LeakyReLUImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
index e41a8f20ebd3c405f7adbc9ed4ded3080c9688ce..ff9a8ac6a8f968f244429b330401d794f16fac01 100644
--- a/include/aidge/operator/LeakyReLUImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
@@ -1,45 +1,45 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H__
-#define __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H__
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/LeakyReLUImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params,
-                                     std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-    I negativeSlope = static_cast<I>(std::get<0>(params));
-
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
-    }
-}
-
-namespace {
-static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>);
-static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<int, int>);
-static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* __AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H__ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params,
+                                     std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+    I negativeSlope = static_cast<I>(std::get<0>(params));
+
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
+    }
+}
+
+namespace {
+static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>);
+static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<int, int>);
+static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/ProducerImpl.hpp b/include/aidge/backend/cpu/operator/ProducerImpl.hpp
similarity index 82%
rename from include/aidge/operator/ProducerImpl.hpp
rename to include/aidge/backend/cpu/operator/ProducerImpl.hpp
index d1376df3572c986f3c2369c72141680ab6291b0b..032172dbf0995fc62ce631aa5eba1cabf2374ad3 100644
--- a/include/aidge/operator/ProducerImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ProducerImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef __AIDGE_CPU_OPERATOR_PRODUCERIMPL_H__
-#define __AIDGE_CPU_OPERATOR_PRODUCERIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_
+#define AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_
 
 #include <memory>
 
@@ -34,9 +34,10 @@ class ProducerImpl_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t> &/*inputsSize*/) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
+    void updateConsummerProducer() override final;
 
     void forward();
 
@@ -48,4 +49,4 @@ static Registrar<Producer_Op> registrarProducer1DImpl_cpu("cpu", Aidge::Producer
 }  // namespace
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_PRODUCERIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_PRODUCERIMPL_H_ */
diff --git a/include/aidge/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
similarity index 86%
rename from include/aidge/operator/ReLUImpl.hpp
rename to include/aidge/backend/cpu/operator/ReLUImpl.hpp
index 905a76917a25f7db0e65748d28c67ef06f353170..537bdeeaf89b388a82e819330649c2ae3445c590 100644
--- a/include/aidge/operator/ReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef __AIDGE_CPU_OPERATOR_RELUIMPL_H__
-#define __AIDGE_CPU_OPERATOR_RELUIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_H_
+#define AIDGE_CPU_OPERATOR_RELUIMPL_H_
 
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/operator/ReLU.hpp"
@@ -46,10 +46,10 @@ class ReLUImpl_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-
+    void updateConsummerProducer() override final;
     void forward();
 
     void backward();
@@ -60,4 +60,4 @@ static Registrar<ReLU_Op> registrarReLUImpl_cpu("cpu", Aidge::ReLUImpl_cpu::crea
 }
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_RELUIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_H_ */
diff --git a/include/aidge/operator/ReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
similarity index 84%
rename from include/aidge/operator/ReLUImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
index 640455a43791c72fcb4832987e1a035239f746af..955099a6fe76352e6ea692b99a2a2d1561a30a6d 100644
--- a/include/aidge/operator/ReLUImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
@@ -1,43 +1,43 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H__
-#define __AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H__
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/ReLUImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = input[i] > 0 ? input[i] : 0;
-    }
-}
-
-namespace {
-static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_cpu_forward_kernel<float, float>);
-static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_cpu_forward_kernel<int, int>);
-static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* __AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H__ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = input[i] > 0 ? input[i] : 0;
+    }
+}
+
+namespace {
+static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_cpu_forward_kernel<float, float>);
+static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_cpu_forward_kernel<int, int>);
+static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
similarity index 86%
rename from include/aidge/operator/SoftmaxImpl.hpp
rename to include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
index c4d718bbadf09bfefbd4509ad0b99ffc144b4e61..08567ab98e55233f1f578e82cb39ac5681f0a839 100644
--- a/include/aidge/operator/SoftmaxImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H__
-#define __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H__
+#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_
+#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_
 
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/operator/Softmax.hpp"
@@ -46,10 +46,10 @@ class SoftmaxImpl_cpu : public OperatorImpl {
    public:
     NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    NbElts_t getRequiredMemory(__attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t>& inputsSize) const override final;
+    NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final;
     NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final;
     NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final;
-
+    void updateConsummerProducer() override final;
     void forward();
 
     void backward();
@@ -60,4 +60,4 @@ static Registrar<Softmax_Op> registrarSoftmaxImpl_cpu("cpu", Aidge::SoftmaxImpl_
 }
 }  // namespace Aidge
 
-#endif /* __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H__ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ */
diff --git a/include/aidge/operator/SoftmaxImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp
similarity index 88%
rename from include/aidge/operator/SoftmaxImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp
index d1634e28a9b57cf2f2d486237947779b41e121bd..297a3a321667dfc8c5a2bb0e3fc3bebce8825950 100644
--- a/include/aidge/operator/SoftmaxImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp
@@ -1,64 +1,64 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H__
-#define __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H__
-
-#include "aidge/utils/Registrar.hpp"
-#include <cstddef>
-#include <cmath>
-#include "aidge/data/Data.hpp"
-#include "aidge/utils/Types.h"
-
-#include "aidge/operator/SoftmaxImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void SoftmaxImpl_cpu_forward_kernel(const DimSize_t batchSize,
-                                        const DimSize_t channelSize,
-                                        const DimSize_t featureSize,
-                                        const void* input_,
-                                        void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t batch = 0; batch < batchSize; ++batch) {
-        for (std::size_t feature = 0; feature < featureSize; ++feature) {
-            std::size_t ioIndex = batch*channelSize*featureSize + feature;
-
-            I sum(0.0);
-            for (std::size_t ch = 0; ch < channelSize; ++ch) {
-                output[ioIndex] = std::exp(input[ioIndex]);
-                sum += output[ioIndex];
-                ioIndex+=featureSize;
-            }
-
-            ioIndex = batch*channelSize*featureSize + feature;
-            for (std::size_t ch = 0; ch < channelSize; ++ch) {
-                output[ioIndex] /= sum;
-                ioIndex += featureSize;
-            }
-        }
-    }
-}
-
-namespace {
-static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>);
-static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>);
-static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* __AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H__ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include <cstddef>
+#include <cmath>
+#include "aidge/data/Data.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void SoftmaxImpl_cpu_forward_kernel(const DimSize_t batchSize,
+                                        const DimSize_t channelSize,
+                                        const DimSize_t featureSize,
+                                        const void* input_,
+                                        void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t batch = 0; batch < batchSize; ++batch) {
+        for (std::size_t feature = 0; feature < featureSize; ++feature) {
+            std::size_t ioIndex = batch*channelSize*featureSize + feature;
+
+            I sum(0.0);
+            for (std::size_t ch = 0; ch < channelSize; ++ch) {
+                output[ioIndex] = std::exp(input[ioIndex]);
+                sum += output[ioIndex];
+                ioIndex+=featureSize;
+            }
+
+            ioIndex = batch*channelSize*featureSize + feature;
+            for (std::size_t ch = 0; ch < channelSize; ++ch) {
+                output[ioIndex] /= sum;
+                ioIndex += featureSize;
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>);
+static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>);
+static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ */
diff --git a/python_binding/pybind_cpu.cpp b/python_binding/pybind_cpu.cpp
index afe125154979849d61038d918a669679ac2a4b91..4a325bf51716ee6a920b3fcbde394b3e5b7c1d0f 100644
--- a/python_binding/pybind_cpu.cpp
+++ b/python_binding/pybind_cpu.cpp
@@ -1,6 +1,6 @@
 #include <pybind11/pybind11.h>
 // Need to call this header to register every impl
-#include "aidge/aidge_backend_cpu.hpp"
+#include "aidge/backend/cpu.hpp"
 
 namespace py = pybind11;
 
diff --git a/setup.ps1 b/setup.ps1
new file mode 100644
index 0000000000000000000000000000000000000000..748739834ff10802085d68c3360a87978cf7e8a7
--- /dev/null
+++ b/setup.ps1
@@ -0,0 +1,38 @@
+# Helper setup tool to automatically build aidge_backend_cpu on Windows.
+
+# Requirements
+################################################################################
+# aidge_core must be installed first in $env:AIDGE_INSTALL_PATH.
+
+# Enable or disable automatic installation of requirements
+# Run .\setup.ps1 -install_reqs:$false to disable it
+param ([bool]$install_reqs=$true)
+
+if (-not $env:AIDGE_INSTALL_PATH)
+{
+    Write-Error -Message "AIDGE_INSTALL_PATH environment variable must be set to aidge_core install path." -ErrorAction Stop
+}
+
+# 1. Setup environment
+################################################################################
+if ($install_reqs)
+{
+    # No additional dependencies
+}
+
+# 2. Compile & install aidge_core
+################################################################################
+$env:CMAKE_PREFIX_PATH=$env:AIDGE_INSTALL_PATH
+mkdir -Force build_cpp
+mkdir -Force $env:AIDGE_INSTALL_PATH
+Set-Location build_cpp
+cmake -DCMAKE_INSTALL_PREFIX:PATH=$env:AIDGE_INSTALL_PATH -DCMAKE_BUILD_TYPE=Debug ..
+if(!$?) { $lastError = $LASTEXITCODE; Set-Location $PSScriptRoot; Exit $lastError }
+cmake --build . -j2
+if(!$?) { $lastError = $LASTEXITCODE; Set-Location $PSScriptRoot; Exit $lastError }
+cmake --install . --config Debug
+if(!$?) { $lastError = $LASTEXITCODE; Set-Location $PSScriptRoot; Exit $lastError }
+# Optional: run the unit tests
+ctest --output-on-failure
+if(!$?) { $lastError = $LASTEXITCODE; Set-Location $PSScriptRoot; Exit $lastError }
+Set-Location $PSScriptRoot
diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp
index f4e08ba540b814a81be9cbea74ebc7644f6f843a..d3da42185237a59146af17199e34a00dbebd6d96 100644
--- a/src/operator/AddImpl.cpp
+++ b/src/operator/AddImpl.cpp
@@ -16,11 +16,11 @@
 #include <vector>
 
 #include "aidge/operator/Conv.hpp"
-
-#include "aidge/operator/AddImpl.hpp"
-#include "aidge/operator/AddImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
 
+#include "aidge/backend/cpu/operator/AddImpl.hpp"
+#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp"
+
 //////////////////////////////////
 // AddImpl_cpu<1>
 //////////////////////////////////
@@ -48,7 +48,13 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbConsumedData(Aidge::IOIndex_t /*inpu
 Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
     return mNbProducedData[0];
 }
+void Aidge::AddImpl_cpu<1>::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
 
+    mNbProducedData[0]+= getRequiredMemory(0, {});
+}
 void Aidge::AddImpl_cpu<1>::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -63,11 +69,6 @@ void Aidge::AddImpl_cpu<1>::forward() {
         mOp.getInput(0)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
 
-    // Update producer-consumer data
-    for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
 }
 
 void Aidge::AddImpl_cpu<1>::backward() {
@@ -95,7 +96,7 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOInd
     return 0;
 }
 
-Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t>& inputsSize) const {
+Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
 
@@ -112,7 +113,13 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbConsumedData(Aidge::IOIndex_t inputI
 Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
     return mNbProducedData[0];
 }
+void Aidge::AddImpl_cpu<2>::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
 
+    mNbProducedData[0]+= getRequiredMemory(0, {});
+}
 void Aidge::AddImpl_cpu<2>::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -130,11 +137,6 @@ void Aidge::AddImpl_cpu<2>::forward() {
         mOp.mInputs[1]->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
 
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx]+= getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
 }
 
 void Aidge::AddImpl_cpu<2>::backward() {
@@ -180,7 +182,13 @@ Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbProducedData(Aidge::IOIndex_t output
     assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size());
     return mNbProducedData[static_cast<std::size_t>(outputIdx)];
 }
+void Aidge::AddImpl_cpu<3>::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
 
+    mNbProducedData[0]+= getRequiredMemory(0, {});
+}
 void Aidge::AddImpl_cpu<3>::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -201,13 +209,8 @@ void Aidge::AddImpl_cpu<3>::forward() {
         mOp.mInputs[2]->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
 
-    // Update producer-consumer data
-    for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
 }
 
 void Aidge::AddImpl_cpu<3>::backward() {
     printf("Not implemented yet.\n");
-}
\ No newline at end of file
+}
diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp
index 2e1e901d35f2ac8620f1c4be53413ce58e9260f9..6c434a5c38853a1dee66db5be95b6b1bfdde8162 100644
--- a/src/operator/AvgPoolingImpl.cpp
+++ b/src/operator/AvgPoolingImpl.cpp
@@ -9,16 +9,16 @@
  *
  ********************************************************************************/
 
-#include "aidge/operator/AvgPoolingImpl.hpp"
-
 #include <cassert>
 #include <numeric>
 #include <thread>
 #include <vector>
 
-#include "aidge/operator/AvgPoolingImpl_forward_kernels.hpp"
-#include "aidge/operator/AvgPooling.hpp"
 #include "aidge/utils/Types.h"
+#include "aidge/operator/AvgPooling.hpp"
+
+#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
+#include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp"
 
 Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
     assert(mOp.getInput(inputIdx) && "requires valid input");
@@ -39,6 +39,7 @@ Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getRequiredMemory(const Aidge::IOIn
                                                            const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
+    (void) outputIdx;
 
     const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
@@ -53,7 +54,13 @@ Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t
     assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
     return mNbProducedData[static_cast<std::size_t>(outputIdx)];
 }
-
+void Aidge::AvgPoolingImpl2D_cpu::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
+                                                                                           // amount for a forward pass
+    mNbProducedData[0] += getRequiredMemory(0, {});
+}
 void Aidge::AvgPoolingImpl2D_cpu::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -68,13 +75,6 @@ void Aidge::AvgPoolingImpl2D_cpu::forward() {
                mOp.getInput(0)->getImpl()->rawPtr(),
                mOp.getOutput(0)->getImpl()->rawPtr());
 
-
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
-                                                                   // amount for a forward pass
-
-    mNbProducedData[0] += getRequiredMemory(0, {});
 }
 
 void Aidge::AvgPoolingImpl2D_cpu::backward() { printf("Not implemented yet.\n"); }
diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp
index 5bb7d0a9d36e3f7918ce1a5aa6ae4e9dbb96e9a1..a0d4d032ded9ede1b2dba307aa967af330167d25 100644
--- a/src/operator/BatchNormImpl.cpp
+++ b/src/operator/BatchNormImpl.cpp
@@ -9,15 +9,15 @@
  *
  ********************************************************************************/
 
-#include "aidge/operator/BatchNormImpl.hpp"
-
 #include <cassert>
 #include <numeric> // std::accumulate
 #include <vector>
 
-#include "aidge/operator/BatchNormImpl_forward_kernels.hpp"
-#include "aidge/operator/BatchNorm.hpp"
 #include "aidge/utils/Types.h"
+#include "aidge/operator/BatchNorm.hpp"
+
+#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
+#include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp"
 
 Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
     assert(mOp.getInput(inputIdx) && "requires valid input");
@@ -34,10 +34,11 @@ Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*i
     return 0;
 }
 
-Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx,
-                                                              __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const {
+Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
+                                                              const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
+    (void) outputIdx;
 
     const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
@@ -52,7 +53,14 @@ Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t o
     assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
     return mNbProducedData[static_cast<std::size_t>(outputIdx)];
 }
+void Aidge::BatchNormImpl2D_cpu::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
+                                                                   // amount for a forward pass
 
+    mNbProducedData[0] += getRequiredMemory(0, {});
+}
 void Aidge::BatchNormImpl2D_cpu::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -78,12 +86,7 @@ void Aidge::BatchNormImpl2D_cpu::forward() {
                mOp.getOutput(0)->getImpl()->rawPtr(),
                true);
 
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
-                                                                   // amount for a forward pass
 
-    mNbProducedData[0] += getRequiredMemory(0, {});
 }
 
 void Aidge::BatchNormImpl2D_cpu::backward() { printf("Not implemented yet.\n"); }
diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp
index 178d602ffa73c00efef596dc2d31f51619b6600d..3e920cf68366b82bce8df29c8aea0c838e6a1364 100644
--- a/src/operator/ConvDepthWiseImpl.cpp
+++ b/src/operator/ConvDepthWiseImpl.cpp
@@ -9,17 +9,17 @@
  *
  ********************************************************************************/
 
-#include "aidge/operator/ConvDepthWiseImpl.hpp"
-
 #include <cassert>
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp"
-#include "aidge/operator/ConvDepthWise.hpp"
 #include "aidge/utils/Types.h"
+#include "aidge/operator/ConvDepthWise.hpp"
+
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp"
 
 Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
     assert(mOp.getInput(inputIdx) && "requires valid input");
@@ -36,10 +36,11 @@ Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t
     return 0;
 }
 
-Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx,
-                                                           __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const {
+Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
+                                                           const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
+    (void) outputIdx;
 
     const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
@@ -54,7 +55,14 @@ Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbProducedData(Aidge::IOIndex
     assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
     return mNbProducedData[static_cast<std::size_t>(outputIdx)];
 }
+void Aidge::ConvDepthWiseImpl2D_cpu::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
+                                                                   // amount for a forward pass
 
+    mNbProducedData[0] += getRequiredMemory(0, {});
+}
 void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -72,14 +80,6 @@ void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
     kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims<4>(),
                mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(),
                mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr());
-
-
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
-                                                                   // amount for a forward pass
-
-    mNbProducedData[0] += getRequiredMemory(0, {});
 }
 
 void Aidge::ConvDepthWiseImpl2D_cpu::backward() { printf("Not implemented yet.\n"); }
diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp
index 58c83da14aeca4ae9104dea525a4ad236243775f..b4ddf80929923a9c2c5998ac8614ebb0d3afe000 100644
--- a/src/operator/ConvImpl.cpp
+++ b/src/operator/ConvImpl.cpp
@@ -9,17 +9,17 @@
  *
  ********************************************************************************/
 
-#include "aidge/operator/ConvImpl.hpp"
-
 #include <cassert>
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/operator/ConvImpl_forward_kernels.hpp"
-#include "aidge/operator/Conv.hpp"
 #include "aidge/utils/Types.h"
+#include "aidge/operator/Conv.hpp"
+
+#include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp"
 
 Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
     assert(mOp.getInput(inputIdx) && "requires valid input");
@@ -36,10 +36,11 @@ Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputI
     return 0;
 }
 
-Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx,
-                                                         __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const {
+Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
+                                                         const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
+    (void) outputIdx;
 
     const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>());
@@ -54,7 +55,14 @@ Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbProducedData(Aidge::IOIndex_t output
     assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size()));
     return mNbProducedData[static_cast<std::size_t>(outputIdx)];
 }
+void Aidge::ConvImpl2D_cpu::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
+                                                                   // amount for a forward pass
 
+    mNbProducedData[0] += getRequiredMemory(0, {});
+}
 void Aidge::ConvImpl2D_cpu::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -71,15 +79,7 @@ void Aidge::ConvImpl2D_cpu::forward() {
                mOp.getInput(0)->getImpl()->rawPtr(), mOp.getInput(1)->getImpl()->rawPtr(),
                mOp.getInput(2)->getImpl()->rawPtr(), mOp.getOutput(0)->getImpl()->rawPtr());
 
-    // FIXME: Dummy wait for some earlier scheduler tests
-    std::this_thread::sleep_for(std::chrono::milliseconds(mOp.get<ConvParam::OutChannels>()));
-
-    // Update producer-consumer data
-    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
-                                                                   // amount for a forward pass
 
-    mNbProducedData[0] += getRequiredMemory(0, {});
 }
 
 void Aidge::ConvImpl2D_cpu::backward() { printf("Not implemented yet.\n"); }
diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp
index aa47296931302cff379f8e296a5ab527b0f2477b..086902be0ab1c2027a8c62c143bc27921e5e9e1b 100644
--- a/src/operator/FCImpl.cpp
+++ b/src/operator/FCImpl.cpp
@@ -16,10 +16,11 @@
 #include <vector>
 
 #include "aidge/operator/FC.hpp"
-#include "aidge/operator/FCImpl.hpp"
-#include "aidge/operator/FCImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
 
+#include "aidge/backend/cpu/operator/FCImpl.hpp"
+#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp"
+
 Aidge::NbElts_t Aidge::FCImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const
 {
     assert(mOp.getInput(inputIdx) && "requires valid input");
@@ -44,10 +45,11 @@ Aidge::NbElts_t
 }
 
 Aidge::NbElts_t Aidge::FCImpl_cpu::getRequiredMemory(
-    __attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const
+    const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const
 {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
+    (void) outputIdx;
 
     const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(
@@ -69,6 +71,16 @@ Aidge::NbElts_t Aidge::FCImpl_cpu::getNbProducedData(Aidge::IOIndex_t outputIdx)
     return mNbProducedData[static_cast<std::size_t>(outputIdx)];
 }
 
+void Aidge::FCImpl_cpu::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx]
+            += getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum
+                                              // amount for a forward pass
+
+    mNbProducedData[0] += getRequiredMemory(0, {});
+}
+
 void Aidge::FCImpl_cpu::forward()
 {
     // FIXME: uncomment the following code once memory handling will work
@@ -93,7 +105,7 @@ void Aidge::FCImpl_cpu::forward()
     //         mOp.mInputs[2]->getImpl()->rawPtr(),
     //         mOp.getOutput(0)->getImpl()->rawPtr());
     // }
-    // else 
+    // else
     kernelFunc(
         mOp.getParams(),
         mOp.getInput(0)->dims()[0],
@@ -102,19 +114,8 @@ void Aidge::FCImpl_cpu::forward()
         mOp.mInputs[1]->getImpl()->rawPtr(),
         mOp.mInputs[2]->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
-    
-    
-
-    // FIXME: Dummy wait for some earlier scheduler tests
-    std::this_thread::sleep_for(std::chrono::milliseconds(mOp.get<FCParam::OutChannels>()));
 
-    // Update producer-consumer data
-    for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx)
-        mNbConsumedData[inputIdx]
-            += getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum
-                                              // amount for a forward pass
 
-    mNbProducedData[0] += getRequiredMemory(0, {});
 }
 
 void Aidge::FCImpl_cpu::backward()
diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp
index 8c88e1f7a507cc426416d53dc895dce077ead415..f6a44d381081c7c7f1dcbbf02d91212168cc07aa 100644
--- a/src/operator/LeakyReLUImpl.cpp
+++ b/src/operator/LeakyReLUImpl.cpp
@@ -13,14 +13,13 @@
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
+#include <vector>
 
 #include "aidge/operator/LeakyReLU.hpp"
-
-#include "aidge/operator/LeakyReLUImpl.hpp"
-#include "aidge/operator/LeakyReLUImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
-#include <numeric>
-#include <vector>
+
+#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp"
 
 // FIXME: replace whole Tensor with minimum needed data quantity
 Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
@@ -38,7 +37,7 @@ Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IO
     return 0;
 }
 
-Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const {
+Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
     const auto& outputDims = mOp.getOutput(0)->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(),
                         static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
@@ -51,7 +50,11 @@ Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*i
 Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
     return mNbProducedData[0];
 }
+void Aidge::LeakyReLUImpl_cpu::updateConsummerProducer(){
+    mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass
 
+    mNbProducedData[0]+= getRequiredMemory(0, {});
+}
 void Aidge::LeakyReLUImpl_cpu::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -66,11 +69,6 @@ void Aidge::LeakyReLUImpl_cpu::forward() {
         std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(),
         mOp.getInput(0)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
-
-
-    mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
 }
 
 void Aidge::LeakyReLUImpl_cpu::backward() {
diff --git a/src/operator/ProducerImpl.cpp b/src/operator/ProducerImpl.cpp
index 69e4ba281cf8ddf7fb273da6508a42f03b5074c8..664f3745414380fbaf5654ab035ba2ab957da87b 100644
--- a/src/operator/ProducerImpl.cpp
+++ b/src/operator/ProducerImpl.cpp
@@ -17,7 +17,7 @@
 #include "aidge/operator/Producer.hpp"
 #include "aidge/utils/Types.h"
 
-#include "aidge/operator/ProducerImpl.hpp"
+#include "aidge/backend/cpu/operator/ProducerImpl.hpp"
 
 
 std::size_t Aidge::ProducerImpl_cpu::getNbRequiredData(
@@ -42,10 +42,11 @@ std::size_t Aidge::ProducerImpl_cpu::getNbRequiredProtected(
 
 
 std::size_t Aidge::ProducerImpl_cpu::getRequiredMemory(
-    __attribute__((unused)) const IOIndex_t outputIdx, __attribute__((unused)) const std::vector<DimSize_t> &inputsSize) const
+    const IOIndex_t outputIdx, const std::vector<DimSize_t> &/*inputsSize*/) const
 {
     // Requires the whole tensors, regardless of available data on inputs
     assert(outputIdx == 0 && "operator has only one output");
+    (void) outputIdx;
 
     const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(
@@ -60,7 +61,8 @@ Aidge::DimSize_t Aidge::ProducerImpl_cpu::getNbProducedData(
 {
     return getRequiredMemory(0, {});
 }
-
+void Aidge::ProducerImpl_cpu::updateConsummerProducer(){
+}
 
 void Aidge::ProducerImpl_cpu::forward()
 {
@@ -70,4 +72,4 @@ void Aidge::ProducerImpl_cpu::forward()
 void Aidge::ProducerImpl_cpu::backward()
 {
     printf("Not implemented yet.\n");
-}
\ No newline at end of file
+}
diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp
index 9cbf0efe07bc15791e3bfcc28d86e1463d908def..cea50bc1e72cfa8e60cdd0f1839c03bcd568e052 100644
--- a/src/operator/ReLUImpl.cpp
+++ b/src/operator/ReLUImpl.cpp
@@ -13,14 +13,13 @@
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
+#include <vector>
 
 #include "aidge/operator/ReLU.hpp"
-
-#include "aidge/operator/ReLUImpl.hpp"
-#include "aidge/operator/ReLUImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
-#include <numeric>
-#include <vector>
+
+#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp"
 
 // FIXME: replace whole Tensor with minimum needed data quantity
 Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
@@ -38,7 +37,7 @@ Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex
     return 0;
 }
 
-Aidge::NbElts_t Aidge::ReLUImpl_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const {
+Aidge::NbElts_t Aidge::ReLUImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
     const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(),
                         static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
@@ -51,7 +50,11 @@ Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inputI
 Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
     return mNbProducedData[0];
 }
+void Aidge::ReLUImpl_cpu::updateConsummerProducer(){
+    mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass
 
+    mNbProducedData[0]+= getRequiredMemory(0, {});
+}
 void Aidge::ReLUImpl_cpu::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
@@ -66,10 +69,6 @@ void Aidge::ReLUImpl_cpu::forward() {
         mOp.getInput(0)->getImpl()->rawPtr(),
         mOp.getOutput(0)->getImpl()->rawPtr());
 
-
-    mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass
-
-    mNbProducedData[0]+= getRequiredMemory(0, {});
 }
 
 void Aidge::ReLUImpl_cpu::backward() {
diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp
index b0f978c9e928209d843aec6795f77e92ff57b263..03e8f9305617f6a7ced878470e3c94ba625f5b22 100644
--- a/src/operator/SoftmaxImpl.cpp
+++ b/src/operator/SoftmaxImpl.cpp
@@ -13,14 +13,13 @@
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
+#include <vector>
 
 #include "aidge/operator/Softmax.hpp"
-
-#include "aidge/operator/SoftmaxImpl.hpp"
-#include "aidge/operator/SoftmaxImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
-#include <numeric>
-#include <vector>
+
+#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
+#include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp"
 
 // FIXME: replace whole Tensor with minimum needed data quantity
 Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
@@ -38,7 +37,7 @@ Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOIn
     return 0;
 }
 
-Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getRequiredMemory(__attribute__((unused)) const Aidge::IOIndex_t outputIdx, __attribute__((unused)) const std::vector<Aidge::DimSize_t> &inputsSize) const {
+Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
     const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims();
     return std::accumulate(outputDims.begin(), outputDims.end(),
                         static_cast<NbElts_t>(1), std::multiplies<NbElts_t>());
@@ -51,7 +50,14 @@ Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbConsumedData(Aidge::IOIndex_t /*inp
 Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const {
     return mNbProducedData[0];
 }
+void Aidge::SoftmaxImpl_cpu::updateConsummerProducer(){
+    // Update producer-consumer data
+    for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx)
+        mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx));  // each input is consumed by the minimum
+                                                                   // amount for a forward pass
 
+    mNbProducedData[0] += getRequiredMemory(0, {});
+}
 void Aidge::SoftmaxImpl_cpu::forward() {
     // FIXME: uncomment the following code once memory handling will work
     assert(mOp.getInput(0) && "missing input #0");
diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt
index 62f99c1c368736fac64e85818034f3db8ce88a2c..671cdd5ac1262ab61b35a70a234236aff4a3cc15 100644
--- a/unit_tests/CMakeLists.txt
+++ b/unit_tests/CMakeLists.txt
@@ -1,4 +1,3 @@
-
 Include(FetchContent)
 
 FetchContent_Declare(
@@ -10,7 +9,7 @@ FetchContent_Declare(
 FetchContent_MakeAvailable(Catch2)
 
 file(GLOB_RECURSE src_files "*.cpp")
-message(STATUS "TEST FILES : ${src_files}")
+
 add_executable(tests${module_name} ${src_files})
 
 target_link_libraries(tests${module_name} PUBLIC ${module_name})
diff --git a/unit_tests/Test_Scheduler.cpp b/unit_tests/Test_Scheduler.cpp
index 055f4efef8985bc5c0def2f5d397e3e5f3ce96d8..78ab8d5b149e8f702558658fef0442f225de3813 100644
--- a/unit_tests/Test_Scheduler.cpp
+++ b/unit_tests/Test_Scheduler.cpp
@@ -18,7 +18,8 @@
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/OpArgs.hpp"
 #include "aidge/scheduler/Scheduler.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
+
+#include "aidge/backend/cpu.hpp"
 
 using namespace Aidge;
 
diff --git a/unit_tests/Test_TensorImpl.cpp b/unit_tests/Test_TensorImpl.cpp
index d5aa94d0ea2053e8e977de86cab4d3e87337a8fb..f55e25f55359cbcbcb9a53e916b345d1fb5a6b22 100644
--- a/unit_tests/Test_TensorImpl.cpp
+++ b/unit_tests/Test_TensorImpl.cpp
@@ -14,7 +14,7 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
 
 using namespace Aidge;
 
@@ -57,4 +57,30 @@ TEST_CASE("Tensor creation") {
     }
   }
 }
+TEST_CASE("Tensor fill") {
+  SECTION("Instantiate batches independantly") {
+    // initialization with 0s
+    std::shared_ptr<Tensor> concatenatedTensor= std::make_shared<Tensor>(Array2D<int, 3, 5>{});
+    //concatenatedTensor->print();
+
+    std::shared_ptr<Tensor> myTensor1 = std::make_shared<Tensor>(Array1D<int, 5>{{1,2,3,4,5}});
+    std::shared_ptr<Tensor> myTensor2 = std::make_shared<Tensor>(Array1D<int, 5>{{6,7,8,9,10}});
+    std::shared_ptr<Tensor> myTensor3 = std::make_shared<Tensor>(Array1D<int, 5>{{11,12,13,14,15}});
+
+    // use copy function from implementation
+    concatenatedTensor->getImpl()->copy(myTensor1->getImpl()->rawPtr(), 5, 0);
+    concatenatedTensor->getImpl()->copy(myTensor2->getImpl()->rawPtr(), 5, 5);
+    concatenatedTensor->getImpl()->copy(myTensor3->getImpl()->rawPtr(), 5, 10);
+    // concatenatedTensor->print();
+
+    std::shared_ptr<Tensor> expectedTensor= std::make_shared<Tensor>(Array2D<int, 3, 5>{
+      {{1,2,3,4,5},
+      {6,7,8,9,10},
+      {11,12,13,14,15}}
+    });
+    // expectedTensor->print();
+    
+    REQUIRE(*concatenatedTensor == *expectedTensor);
+  }
+}
 
diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp
index 3443ac6ba936106eebc9a1a26299d31fc457f32f..e24d7ac6bd97586ebdeddce5ccb75807ddf530f0 100644
--- a/unit_tests/operator/Test_AddImpl.cpp
+++ b/unit_tests/operator/Test_AddImpl.cpp
@@ -12,10 +12,10 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/Add.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Add(forward)") {
diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp
index 178f1ba2f5e54d3df6dba2ec4d58f7bce718e7d1..10d4c09b32528e2cdcdbf2c56204e6911fca0187 100644
--- a/unit_tests/operator/Test_AvgPoolingImpl.cpp
+++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp
@@ -14,10 +14,10 @@
 #include <cstdlib>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/AvgPooling.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] AvgPooling(forward)") {
diff --git a/unit_tests/operator/Test_BatchNormImpl.cpp b/unit_tests/operator/Test_BatchNormImpl.cpp
index 9436ceb3dd49b1984cf2ba67cd91b08143d59fc8..e6107a028e0c3d62f69821ff2650b45f34da103f 100644
--- a/unit_tests/operator/Test_BatchNormImpl.cpp
+++ b/unit_tests/operator/Test_BatchNormImpl.cpp
@@ -13,10 +13,10 @@
 #include <memory>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/BatchNorm.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] BatchNorm(forward)") {
diff --git a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
index 48a6cc88103d88cd53c16ef0bb81cdd32f2f8e73..0d0ed4b928d64cafc96907fedf3ee0d642a255d0 100644
--- a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
+++ b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
@@ -13,10 +13,10 @@
 #include <memory>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/ConvDepthWise.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] ConvDepthWise(forward)") {
diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp
index 2c314af411e8578d83d319507ac179db46c19e79..23ff1aaebcfb79a4d4b1abc4f1a77f1c6de63b21 100644
--- a/unit_tests/operator/Test_ConvImpl.cpp
+++ b/unit_tests/operator/Test_ConvImpl.cpp
@@ -14,10 +14,10 @@
 #include <memory>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/Conv.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Conv(forward)") {
diff --git a/unit_tests/operator/Test_FCImpl.cpp b/unit_tests/operator/Test_FCImpl.cpp
index be672eccfb175b6996180148299ca34a28ec7db1..e3494e20205f1a295eb537100b59fb7bbc26116a 100644
--- a/unit_tests/operator/Test_FCImpl.cpp
+++ b/unit_tests/operator/Test_FCImpl.cpp
@@ -12,11 +12,11 @@
 #include <catch2/catch_test_macros.hpp>
 #include <memory>
 
-#include "aidge/aidge_backend_cpu.hpp"
-#include "aidge/data/TensorImpl.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/FC.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/oeprator] FC(forward)") {
diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp
index b6686d8987ad26d13f1bab1b9e12be7f060d610d..7096962e196c2ace4abf2b0b14aca8dfa37d3441 100644
--- a/unit_tests/operator/Test_LeakyReLUImpl.cpp
+++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp
@@ -12,10 +12,10 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/LeakyReLU.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
diff --git a/unit_tests/operator/Test_ReLUImpl.cpp b/unit_tests/operator/Test_ReLUImpl.cpp
index 8d3a2b91980a58eba1d6d48bc259413e98da649c..9752a4914b5cb3cd06f2654cf64e0c193c5dd65b 100644
--- a/unit_tests/operator/Test_ReLUImpl.cpp
+++ b/unit_tests/operator/Test_ReLUImpl.cpp
@@ -12,10 +12,10 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/ReLU.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 #include <memory>
 
 
diff --git a/unit_tests/operator/Test_SoftmaxImpl.cpp b/unit_tests/operator/Test_SoftmaxImpl.cpp
index 104062124fbab91f6519e37b80d7bfe0b4ddd281..bad34102b589e3d73956f43593456d885373b3de 100644
--- a/unit_tests/operator/Test_SoftmaxImpl.cpp
+++ b/unit_tests/operator/Test_SoftmaxImpl.cpp
@@ -12,10 +12,10 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/Softmax.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 #include <memory>
 
 using namespace Aidge;