diff --git a/CHANGELOG b/CHANGELOG
index d6c26bd6de9121689a86043838c711f6d3b04cad..9a76d7b11556b434cf9749d625cedea85dc6c5ac 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,16 @@
+# Version 0.2.2 (May 14, 2024)
+
+* Remove implmentation for Operators soly handling memory and format
+ - Concat
+ - Gather
+ - Memorize
+ - Pop
+ - Reshape
+ - Slice
+ - Transpose
+* Fix ReLU backward kernel
+* Add `showCpuVersion()` function to show which compiler was used
+
 # Version 0.2.1 (April 11, 2024)
 
 Fix: explicit linkage with fmt
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5937d6e5ec88011c9456fa684a9af154e16a01cd..a9603c550f89f106fcc9da818a7bd67492ec863f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,12 +1,24 @@
 cmake_minimum_required(VERSION 3.15)
 
 
-file(READ "${CMAKE_SOURCE_DIR}/version.txt" version)
-file(READ "${CMAKE_SOURCE_DIR}/project_name.txt" project)
+file(STRINGS "${CMAKE_SOURCE_DIR}/version.txt" version)
+add_definitions(-DPROJECT_VERSION="${version}")
+file(STRINGS "${CMAKE_SOURCE_DIR}/project_name.txt" project)
 
 message(STATUS "Project name: ${project}")
 message(STATUS "Project version: ${version}")
 
+execute_process(
+    COMMAND git rev-parse --short HEAD
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    OUTPUT_VARIABLE GIT_COMMIT_HASH
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+message(STATUS "Latest git commit: ${GIT_COMMIT_HASH}")
+
+# Define a preprocessor macro with the Git commit version
+add_definitions(-DGIT_COMMIT_HASH="${GIT_COMMIT_HASH}")
+
 # Note : project name is {project} and python module name is also {project}
 set(module_name _${project}) # target name
 
diff --git a/aidge_backend_cpu/unit_tests/test_recipes.py b/aidge_backend_cpu/unit_tests/test_recipes.py
index 5586ab246e61d04b5754421b90ef3cd30629c1c3..12d8774369af5a46cfbd30d44fc90f4f97ca9821 100644
--- a/aidge_backend_cpu/unit_tests/test_recipes.py
+++ b/aidge_backend_cpu/unit_tests/test_recipes.py
@@ -40,7 +40,7 @@ class test_recipes(unittest.TestCase):
         graph_view.set_backend("cpu")
 
         np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32)
-        np_bias = np.arange(1).reshape([1, 1]).astype(np.float32)
+        np_bias = np.arange(1).reshape([1]).astype(np.float32)
 
         np_scale = np.array([0.05]).astype(np.float32)
         np_shift = np.array([0.05]).astype(np.float32)
diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp
index 6b8b7b9208abd95f312ee53e5909f7de2b163624..a1417de1517a8212b4b4308e5128a5ee3fce1e39 100644
--- a/include/aidge/backend/cpu.hpp
+++ b/include/aidge/backend/cpu.hpp
@@ -16,32 +16,25 @@
 #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
-#include "aidge/backend/cpu/operator/ConcatImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
 #include "aidge/backend/cpu/operator/DivImpl.hpp"
 #include "aidge/backend/cpu/operator/ErfImpl.hpp"
 #include "aidge/backend/cpu/operator/FCImpl.hpp"
-#include "aidge/backend/cpu/operator/GatherImpl.hpp"
 #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
 #include "aidge/backend/cpu/operator/MatMulImpl.hpp"
-#include "aidge/backend/cpu/operator/MemorizeImpl.hpp"
 #include "aidge/backend/cpu/operator/MulImpl.hpp"
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
-#include "aidge/backend/cpu/operator/PopImpl.hpp"
 #include "aidge/backend/cpu/operator/PowImpl.hpp"
 #include "aidge/backend/cpu/operator/ReduceMeanImpl.hpp"
 #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
-#include "aidge/backend/cpu/operator/ReshapeImpl.hpp"
 #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
 #include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
-#include "aidge/backend/cpu/operator/SliceImpl.hpp"
 #include "aidge/backend/cpu/operator/SqrtImpl.hpp"
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
 #include "aidge/backend/cpu/operator/SubImpl.hpp"
 #include "aidge/backend/cpu/operator/TanhImpl.hpp"
-#include "aidge/backend/cpu/operator/TransposeImpl.hpp"
 
 #include "aidge/backend/cpu/data/TensorImpl.hpp"
 
diff --git a/include/aidge/backend/cpu/operator/ConcatImpl.hpp b/include/aidge/backend/cpu/operator/ConcatImpl.hpp
deleted file mode 100644
index a997ffa9860f87fe0d9bc4e64239a656053416a6..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/ConcatImpl.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_CONCATIMPL_H_
-#define AIDGE_CPU_OPERATOR_CONCATIMPL_H_
-
-#include "aidge/backend/OperatorImpl.hpp"
-#include "aidge/operator/Concat.hpp"
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include <memory>
-#include <vector>
-
-namespace Aidge {
-// class Concat_Op<2>;
-
-// compute kernel registry for forward and backward
-class ConcatImplForward_cpu
-    : public Registrable<ConcatImplForward_cpu, std::tuple<DataType, DataType>, void(const Concat_Op::Attrs&,
-                                                                                     const std::vector<DimSize_t>,
-                                                                                     const std::vector<DimSize_t>&,
-                                                                                     const std::vector<const void*>,
-                                                                                     void*)> {};
-
-class ConcatImplBackward_cpu
-    : public Registrable<ConcatImplBackward_cpu, std::tuple<DataType, DataType>, void(const Concat_Op::Attrs&,
-                                                                                     const std::vector<DimSize_t>,
-                                                                                     const std::vector<DimSize_t>&,
-                                                                                     const std::vector<const void*>,
-                                                                                     void*)> {};
-
-
-class ConcatImpl_cpu : public OperatorImpl {
-public:
-    ConcatImpl_cpu(const Concat_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<ConcatImpl_cpu> create(const Concat_Op& op) {
-        return std::make_unique<ConcatImpl_cpu>(op);
-    }
-
-public:
-    void forward() override;
-
-    void backward() override;
-};
-
-namespace {
-static Registrar<Concat_Op> registrarConcatImpl_cpu("cpu", Aidge::ConcatImpl_cpu::create);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_CONCATIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp
deleted file mode 100644
index ed849b0e1cdb5089275784dea418c832a38dfe66..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_CONCATIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_CONCATIMPL_FORWARD_KERNEL_H_
-
-#include <algorithm>
-#include <numeric>
-#include <cstddef>
-#include <vector>
-
-#include "aidge/backend/cpu/operator/ConcatImpl.hpp"
-#include "aidge/data/Data.hpp"
-#include "aidge/operator/Concat.hpp"
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-
-namespace Aidge {
-
-template <class I, class O>
-void ConcatImpl_cpu_forward_kernel(const Concat_Op::Attrs& attrs,
-                                   const std::vector<DimSize_t>& dimsFirstInput,
-                                   const std::vector<DimSize_t>& concatAxisValues,
-                                   const std::vector<const void*>& inputs_,
-                                   void* output_)
-{
-    // FIXME: missing Concat attributes as arguments
-    std::vector<const I*> inputs;
-    for (const auto& input_ : inputs_) {
-        inputs.push_back(static_cast<const I*>(input_));
-    }
-    O* output = static_cast<O*>(output_);
-
-    DimSize_t outputAxisValue = std::accumulate(concatAxisValues.begin(), concatAxisValues.end(), 0);
-
-    DimSize_t prodDimLower = 1;
-    for (DimIdx_t i = 0; i < std::get<0>(attrs); ++i) {
-        prodDimLower *= dimsFirstInput[i];
-    }
-    DimSize_t prodDimHigher = 1;
-    for (DimIdx_t i = std::get<0>(attrs) + 1; static_cast<std::size_t>(i) < dimsFirstInput.size();
-         ++i) {
-        prodDimHigher *= dimsFirstInput[i];
-    }
-
-    std::size_t oIndexStart = 0;
-    std::size_t oIndex = 0;
-    for (std::size_t inputId = 0; inputId < inputs.size(); ++inputId) {
-        oIndex = oIndexStart;
-        const DimSize_t iOffset = prodDimHigher*concatAxisValues[inputId];
-        for (std::size_t iIndex = 0; iIndex < prodDimLower; ++iIndex) {
-            std::copy(inputs[inputId] + iIndex*iOffset, inputs[inputId] + (iIndex+1)*iOffset, output + oIndex);
-            oIndex += prodDimHigher*outputAxisValue;
-        }
-        oIndexStart += concatAxisValues[inputId]*prodDimHigher;
-    }
-}
-
-namespace {
-static Registrar<ConcatImplForward_cpu> registrarConcatImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ConcatImpl_cpu_forward_kernel<float, float>);
-static Registrar<ConcatImplForward_cpu> registrarConcatImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ConcatImpl_cpu_forward_kernel<int, int>);
-static Registrar<ConcatImplForward_cpu> registrarConcatImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64},
-        Aidge::ConcatImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_CONCATIMPL_CPU_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/GatherImpl.hpp b/include/aidge/backend/cpu/operator/GatherImpl.hpp
deleted file mode 100644
index 2164f6c4f26dca64c672f62bc8fdc0895c642ae4..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/GatherImpl.hpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_GATHERIMPL_H_
-#define AIDGE_CPU_OPERATOR_GATHERIMPL_H_
-
-#include "aidge/backend/OperatorImpl.hpp"
-#include "aidge/operator/Gather.hpp"
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-#include <memory>
-#include <vector>
-
-namespace Aidge {
-// class Gather_Op;
-
-// compute kernel registry for forward and backward
-class GatherImplForward_cpu
-    : public Registrable<GatherImplForward_cpu, std::tuple<DataType, DataType>, void(const typename Gather_Op::Attrs&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class GatherImplBackward_cpu
-    : public Registrable<GatherImplBackward_cpu, std::tuple<DataType, DataType>, void(const typename Gather_Op::Attrs&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-
-class GatherImpl_cpu : public OperatorImpl {
-public:
-    GatherImpl_cpu(const Gather_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<GatherImpl_cpu> create(const Gather_Op& op) {
-        return std::make_unique<GatherImpl_cpu>(op);
-    }
-
-    void forward() override;
-};
-
-namespace {
-static Registrar<Gather_Op> registrarGatherImpl_cpu("cpu", Aidge::GatherImpl_cpu::create);
-}
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_GATHERIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/GatherImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/GatherImpl_forward_kernels.hpp
deleted file mode 100644
index 0d312e3c143720c7d920128c8d484d4c68439a24..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/GatherImpl_forward_kernels.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-#include <cstddef>
-#include <cmath>
-#include "aidge/data/Data.hpp"
-#include "aidge/utils/Types.h"
-
-#include "aidge/backend/cpu/operator/GatherImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void GatherImpl_cpu_forward_kernel(const typename Gather_Op::Attrs& attrs, const std::vector<DimSize_t>& inputDims, const void* input_, void* output_)
-{
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-    const std::size_t axisIdx = std::get<2>(attrs)>=0 ?
-                                std::get<2>(attrs) :
-                                static_cast<std::size_t>(std::get<2>(attrs)) + inputDims.size();
-
-    std::size_t postAxisElems = 1;
-    for (std::size_t i = axisIdx + 1; i < inputDims.size(); ++i) {
-        postAxisElems *= inputDims[i];
-    }
-    std::size_t preAxisElems = 1;
-    for (std::size_t i = 0; i < axisIdx; ++i) {
-        preAxisElems *= inputDims[i];
-    }
-
-    const std::vector<std::int64_t> indices = std::get<0>(attrs);
-    for (std::size_t i=0; i<preAxisElems; ++i)
-    {
-        for(std::size_t j=0; j<indices.size(); ++j)
-        {
-            const std::size_t idx = indices[j] >= 0 ? indices[j] : static_cast<std::size_t>(indices[j]) + inputDims[axisIdx];
-            const I* startPtr = std::next(input, i * postAxisElems * inputDims[axisIdx] + idx * postAxisElems);
-            std::copy_n(startPtr, postAxisElems, output);
-            output += postAxisElems;
-        }
-    }
-}
-
-namespace {
-static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::GatherImpl_cpu_forward_kernel<float, float>);
-static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::GatherImpl_cpu_forward_kernel<int, int>);
-static Registrar<GatherImplForward_cpu> registrarGatherImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::GatherImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_GATHERIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MemorizeImpl.hpp b/include/aidge/backend/cpu/operator/MemorizeImpl.hpp
deleted file mode 100644
index 5ea0c9d4f3802490e5b41b5ea1c8454c87c65b28..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/MemorizeImpl.hpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_MEMORIZEIMPL_H_
-#define AIDGE_CPU_OPERATOR_MEMORIZEIMPL_H_
-
-#include "aidge/backend/OperatorImpl.hpp"
-#include "aidge/operator/Memorize.hpp"
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include <memory>
-#include <vector>
-
-namespace Aidge {
-class MemorizeImpl_cpu : public OperatorImpl {
-public:
-    MemorizeImpl_cpu(const Memorize_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<MemorizeImpl_cpu> create(const Memorize_Op& op) {
-        return std::make_unique<MemorizeImpl_cpu>(op);
-    }
-
-    Elts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
-    Elts_t getRequiredMemory(const Aidge::IOIndex_t outputIdx,
-                               const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const override final;
-    void updateConsummerProducer() override final;
-    void forward() override;
-};
-
-namespace {
-static Registrar<Memorize_Op> registrarMemorizeImpl_cpu("cpu", Aidge::MemorizeImpl_cpu::create);
-}
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_MEMORIZEIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PopImpl.hpp b/include/aidge/backend/cpu/operator/PopImpl.hpp
deleted file mode 100644
index 19d5903973da378ce003daf4de9e1ae54d7b1b0e..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/PopImpl.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_POPIMPL_H_
-#define AIDGE_CPU_OPERATOR_POPIMPL_H_
-
-#include "aidge/backend/OperatorImpl.hpp"
-#include "aidge/operator/Pop.hpp"
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include <memory>
-#include <vector>
-
-namespace Aidge {
-// class Pop_Op;
-
-// compute kernel registry for forward and backward
-class PopImplForward_cpu
-    : public Registrable<PopImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
-};
-class PopImplBackward_cpu
-    : public Registrable<PopImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
-};
-
-class PopImpl_cpu : public OperatorImpl {
-public:
-    PopImpl_cpu(const Pop_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<PopImpl_cpu> create(const Pop_Op& op) {
-        return std::make_unique<PopImpl_cpu>(op);
-    }
-
-    Elts_t getNbRequiredData(const IOIndex_t inputIdx) const override final;
-    void forward() override;
-};
-
-namespace {
-static Registrar<Pop_Op> registrarPopImpl_cpu("cpu", Aidge::PopImpl_cpu::create);
-}
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_POPIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
index cef82482813757312c638aebac9f2afd738493db..e2ebf44616db876b462157db650ff48362dd7bac 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
@@ -30,7 +30,7 @@ class ReLUImplForward_cpu
     : public Registrable<ReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
 };
 class ReLUImplBackward_cpu
-    : public Registrable<ReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
+    : public Registrable<ReLUImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> {
 };
 
 class ReLUImpl_cpu : public OperatorImpl {
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp
index b68ea076cb94eb9550b4a7af89ef58162ee15aea..43a9714ad2d32228fac9bf9c526191f0cec5bfa0 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp
@@ -14,31 +14,32 @@
 
 #include <cstddef>  // std::size_t
 
-#include "aidge/utils/Registrar.hpp"
-
 #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
 
 namespace Aidge {
-template <class I, class O>
+template <class O, class GI, class GO>
 void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
+                                  const void* output_, const void* grad_output_,
+                                  void* grad_input_) {
+    const O* output = static_cast<const O*>(output_);
+    const GO* grad_output = static_cast<const GO*>(grad_output_);
+    GI* grad_input = static_cast<GI*>(grad_input_);
     for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = (input[i] > I(0)) ? static_cast<O>(input[i]) : O(0);
+        grad_input[i] = (output[i] > GO(0)) ? GI(grad_output[i]) : GI(0);
     }
 }
 
 namespace {
 static Registrar<ReLUImplBackward_cpu> registrarReLUImplBackward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_cpu_backward_kernel<float, float>);
+    {DataType::Float32, DataType::Float32, DataType::Float32},
+    Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>);
 static Registrar<ReLUImplBackward_cpu> registrarReLUImplBackward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_cpu_backward_kernel<int, int>);
+    {DataType::Int32, DataType::Int32, DataType::Int32},
+    Aidge::ReLUImpl_cpu_backward_kernel<int, int, int>);
 static Registrar<ReLUImplBackward_cpu> registrarReLUImplBackward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_cpu_backward_kernel<double, double>);
+    {DataType::Float64, DataType::Float64, DataType::Float64},
+    Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>);
 }  // namespace
 }  // namespace Aidge
 
diff --git a/include/aidge/backend/cpu/operator/ReshapeImpl.hpp b/include/aidge/backend/cpu/operator/ReshapeImpl.hpp
deleted file mode 100644
index 1dc5fa2a09533494568ffea78153887d01368a7d..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/ReshapeImpl.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_
-#define AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_
-
-#include "aidge/backend/OperatorImpl.hpp"
-#include "aidge/operator/Reshape.hpp"
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-#include <memory>
-#include <vector>
-
-namespace Aidge {
-// class Reshape_Op;
-
-// compute kernel registry for forward and backward
-class ReshapeImplForward_cpu
-    : public Registrable<ReshapeImplForward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const void*, void*)> {
-};
-class ReshapeImplBackward_cpu
-    : public Registrable<ReshapeImplBackward_cpu, std::tuple<DataType, DataType>, void(std::size_t, const void*, void*)> {
-};
-
-class ReshapeImpl_cpu : public OperatorImpl {
-public:
-    ReshapeImpl_cpu(const Reshape_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<ReshapeImpl_cpu> create(const Reshape_Op& op) {
-        return std::make_unique<ReshapeImpl_cpu>(op);
-    }
-
-    Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    void forward() override;
-};
-
-namespace {
-static Registrar<Reshape_Op> registrarReshapeImpl_cpu("cpu", Aidge::ReshapeImpl_cpu::create);
-}
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_RESHAPEIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReshapeImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReshapeImpl_forward_kernels.hpp
deleted file mode 100644
index cefdab57ee41ffab0b98a87698d95f5d89a0206d..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/ReshapeImpl_forward_kernels.hpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_RESHAPEIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_RESHAPEIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-#include <cmath>
-
-#include "aidge/backend/cpu/operator/ReshapeImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void ReshapeImpl_cpu_forward_kernel(std::size_t inputLength,
-                                    const void* input_,
-                                    void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-    std::copy_n(input, inputLength, output);
-}
-
-namespace {
-static Registrar<ReshapeImplForward_cpu> registrarReshapeImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32},
-        Aidge::ReshapeImpl_cpu_forward_kernel<float, float>);
-static Registrar<ReshapeImplForward_cpu> registrarReshapeImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32},
-        Aidge::ReshapeImpl_cpu_forward_kernel<int, int>);
-static Registrar<ReshapeImplForward_cpu> registrarReshapeImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64},
-        Aidge::ReshapeImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_RESHAPEIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp
deleted file mode 100644
index 1583435c12a243ef5861299434a7fc1409307538..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/SliceImpl.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_H_
-#define AIDGE_CPU_OPERATOR_SLICEIMPL_H_
-
-#include <memory>
-#include <vector>
-
-#include "aidge/backend/OperatorImpl.hpp"
-#include "aidge/operator/Slice.hpp"
-
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-
-namespace Aidge {
-// class Slice_Op;
-
-// compute kernel registry for forward and backward
-class SliceImplForward_cpu
-    : public Registrable<SliceImplForward_cpu, std::tuple<DataType>,
-                         void(const typename Slice_Op::Attrs&,
-                              const std::vector<std::size_t>,
-                              const void*,
-                              void*)> {};
-class SliceImplBackward_cpu
-    : public Registrable<SliceImplBackward_cpu, std::tuple<DataType>,
-                         void(const typename Slice_Op::Attrs&,
-                              const std::vector<std::size_t>,
-                              const void*,
-                              void*)> {};
-
-class SliceImpl_cpu : public OperatorImpl {
-public:
-    SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) {
-        return std::make_unique<SliceImpl_cpu>(op);
-    }
-
-    void forward() override;
-    void backward() override;
-};
-
-namespace {
-static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create);
-}
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
deleted file mode 100644
index d92e9008aff2a4e3c9e392fcc51871001020ce5a..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_
-
-#include <cstddef>
-#include <vector>
-
-#include "aidge/backend/cpu/operator/SliceImpl.hpp"
-#include "aidge/data/Data.hpp"
-#include "aidge/operator/Slice.hpp"
-#include "aidge/utils/Registrar.hpp"
-
-namespace Aidge {
-template <class I>
-void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs,
-                                  const std::vector<std::size_t> inputDims,
-                                  const void* input_,
-                                  void* output_) {
-    std::vector<std::size_t> slicedDims = inputDims;
-
-    std::size_t beginning = 0;
-    DimSize_t nbAxes = std::get<2>(attrs).size();
-    for (std::size_t i = 0; i < nbAxes; ++i) {
-        // For each slice operation get the params and cast them to size_t
-        const std::int64_t axis_ = std::get<2>(attrs)[i];
-        const std::int64_t start_ = std::get<0>(attrs)[i];
-        const std::int64_t end_ = std::get<1>(attrs)[i];
-        const std::size_t axis = axis_ >= 0 ? axis_ : static_cast<std::size_t>(axis_) + inputDims.size();
-        const std::size_t start = start_ >= 0 ? start_ : start_ + inputDims[axis];
-        const std::size_t end = end_ >= 0 ? end_ : end_ + inputDims[axis];
-        std::size_t stride = 1;
-        for (std::size_t j = inputDims.size() - 1; j > axis; --j) stride *= inputDims[j];
-        beginning += start * stride;
-        const std::size_t sliceLength = end - start + 1;
-        slicedDims[axis] = sliceLength;
-    }
-
-    const I* input = static_cast<const I*>(input_) + beginning;
-    I* output = static_cast<I*>(output_);
-    const std::size_t nbDims = slicedDims.size();
-
-    // for inputDims = {4,5,5,3} & slicedDims = {3,2,2,1}, substractDims = {1,5,5,3}
-    std::vector<std::size_t> substractedDims = std::vector<std::size_t>(nbDims);
-    for (std::size_t i = 0; i < nbDims; ++i) {
-        substractedDims[i] = inputDims[i] - slicedDims[i];
-    }
-
-    // for slicedDims = {3,2,2,1}, prodSlicedDims = {12,4,2,1}
-    std::vector<std::size_t> prodSlicedDims = std::vector<std::size_t>(nbDims);
-    std::vector<std::size_t> prodInputDims = std::vector<std::size_t>(nbDims + 1);
-    prodSlicedDims[nbDims - 1] = slicedDims[nbDims - 1];
-    prodInputDims[nbDims - 1] = inputDims[nbDims - 1];
-    prodInputDims[nbDims] = 1;
-    for (std::size_t i = 2; i <= nbDims; ++i) {
-        prodSlicedDims[nbDims - i] = prodSlicedDims[nbDims - i + 1] * slicedDims[nbDims - i];
-        prodInputDims[nbDims - i] = prodInputDims[nbDims - i + 1] * inputDims[nbDims - i];
-    }
-
-    std::size_t j = 0;
-    std::size_t i = 0;
-    for (; j < prodSlicedDims[0];) {
-        output[j] = input[i++];
-        ++j;
-        for (std::size_t idx = nbDims - 1; idx > 0; --idx) {
-            i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx] * prodInputDims[idx + 1] : 0;
-        }
-    }
-}
-
-namespace {
-
-static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32(
-        {DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float>);
-static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32(
-        {DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int>);
-static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64(
-        {DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/TransposeImpl.hpp b/include/aidge/backend/cpu/operator/TransposeImpl.hpp
deleted file mode 100644
index 8bdcc612ea434e266a97724d45aaeefc8e033bf0..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/TransposeImpl.hpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_TransposeIMPL_H_
-#define AIDGE_CPU_OPERATOR_TransposeIMPL_H_
-
-#include "aidge/backend/OperatorImpl.hpp"
-#include "aidge/operator/Transpose.hpp"
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-#include <memory>
-#include <vector>
-
-namespace Aidge {
-// class Transpose_Op;
-
-// compute kernel registry for forward and backward
-class TransposeImpl2DForward_cpu
-    : public Registrable<TransposeImpl2DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<2>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class TransposeImpl3DForward_cpu
-    : public Registrable<TransposeImpl3DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<3>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class TransposeImpl4DForward_cpu
-    : public Registrable<TransposeImpl4DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<4>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class TransposeImpl5DForward_cpu
-    : public Registrable<TransposeImpl5DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<5>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class TransposeImpl6DForward_cpu
-    : public Registrable<TransposeImpl6DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<6>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class TransposeImpl2DBackward_cpu
-    : public Registrable<TransposeImpl2DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<2>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class TransposeImpl3DBackward_cpu
-    : public Registrable<TransposeImpl3DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<3>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class TransposeImpl4DBackward_cpu
-    : public Registrable<TransposeImpl4DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<4>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class TransposeImpl5DBackward_cpu
-    : public Registrable<TransposeImpl5DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<5>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-class TransposeImpl6DBackward_cpu
-    : public Registrable<TransposeImpl6DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<6>::Attrs& attrs, const std::vector<DimSize_t>&, const std::vector<DimSize_t>&, const void*, void*)> {
-};
-
-
-class TransposeImpl2D_cpu : public OperatorImpl {
-public:
-    TransposeImpl2D_cpu(const Transpose_Op<2>& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<TransposeImpl2D_cpu> create(const Transpose_Op<2>& op) {
-        return std::make_unique<TransposeImpl2D_cpu>(op);
-    }
-
-    void forward() override;
-};
-class TransposeImpl3D_cpu : public OperatorImpl {
-public:
-    TransposeImpl3D_cpu(const Transpose_Op<3>& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<TransposeImpl3D_cpu> create(const Transpose_Op<3>& op) {
-        return std::make_unique<TransposeImpl3D_cpu>(op);
-    }
-
-    void forward() override;
-};
-class TransposeImpl4D_cpu : public OperatorImpl {
-public:
-    TransposeImpl4D_cpu(const Transpose_Op<4>& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<TransposeImpl4D_cpu> create(const Transpose_Op<4>& op) {
-        return std::make_unique<TransposeImpl4D_cpu>(op);
-    }
-
-    void forward() override;
-};
-class TransposeImpl5D_cpu : public OperatorImpl {
-public:
-    TransposeImpl5D_cpu(const Transpose_Op<5>& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<TransposeImpl5D_cpu> create(const Transpose_Op<5>& op) {
-        return std::make_unique<TransposeImpl5D_cpu>(op);
-    }
-
-    void forward() override;
-};
-class TransposeImpl6D_cpu : public OperatorImpl {
-public:
-    TransposeImpl6D_cpu(const Transpose_Op<6>& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<TransposeImpl6D_cpu> create(const Transpose_Op<6>& op) {
-        return std::make_unique<TransposeImpl6D_cpu>(op);
-    }
-
-    void forward() override;
-};
-
-namespace {
-static Registrar<Transpose_Op<2>> registrarTransposeImpl2D_cpu("cpu", Aidge::TransposeImpl2D_cpu::create);
-static Registrar<Transpose_Op<3>> registrarTransposeImpl3D_cpu("cpu", Aidge::TransposeImpl3D_cpu::create);
-static Registrar<Transpose_Op<4>> registrarTransposeImpl4D_cpu("cpu", Aidge::TransposeImpl4D_cpu::create);
-static Registrar<Transpose_Op<5>> registrarTransposeImpl5D_cpu("cpu", Aidge::TransposeImpl5D_cpu::create);
-static Registrar<Transpose_Op<6>> registrarTransposeImpl6D_cpu("cpu", Aidge::TransposeImpl6D_cpu::create);
-}
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_TransposeIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp
deleted file mode 100644
index 9fd5e5b58ed8e850c0a902e2de93b65cc75d274a..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_TRANSPOSEIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_TRANSPOSEIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-#include <cstddef>
-#include <cmath>
-#include "aidge/data/Data.hpp"
-#include "aidge/utils/Types.h"
-
-#include "aidge/backend/cpu/operator/TransposeImpl.hpp"
-
-namespace Aidge {
-template <class I, class O, DimSize_t DIM>
-void TransposeImpl_cpu_forward_kernel( const typename Transpose_Op<DIM>::Attrs& attrs, const std::vector<DimSize_t>& inputDims, const std::vector<DimSize_t>& outputDims, const void* input_, void* output_)
-{
-    O* output = static_cast<O*>(output_);
-    const I* input = static_cast<const I*>(input_);
-    
-    // Compute total number of elements in the input array
-    size_t totalElements = 1;
-    for (size_t dimSize : inputDims) {
-        totalElements *= dimSize;
-    }
-
-	std::vector<std::size_t> outStrides(DIM, 1);
-	for (size_t i = 0; i < DIM; ++i) {
-			for (size_t j = i+1; j < DIM; ++j)
-			{
-					outStrides[i] *= outputDims[j];
-			}
-	}
-
-    std::vector<size_t> indices(outputDims.size(), 0);
-    for (size_t i = 0; i < totalElements; ++i) {
-        size_t idx = 0;
-        // Permute indices based on OutputDimsOrder attr
-        std::vector<size_t> permutedIndices(DIM);
-        for (size_t j = 0; j < DIM; ++j) {
-            permutedIndices[j] = indices[std::get<0>(attrs)[j]];
-        }
-
-        for (int j = DIM -1; j >=0; --j) {
-            idx += permutedIndices[j] * outStrides[j];
-        }
-        // Copy the value in output
-        output[idx] = input[i];
-
-        // Update indices for the next iteration
-        for (int j = DIM - 1; j >= 0; --j) {
-            if (indices[j] < inputDims[j] - 1) {
-                indices[j]++;
-                break;
-            } else {
-                indices[j] = 0;
-            }
-        }
-    }
-
-}
-namespace {
-// DIM = 2
-static Registrar<TransposeImpl2DForward_cpu> registrarTransposeImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 2>);
-static Registrar<TransposeImpl2DForward_cpu> registrarTransposeImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 2>);
-static Registrar<TransposeImpl2DForward_cpu> registrarTransposeImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 2>);
-// DIM = 3
-static Registrar<TransposeImpl3DForward_cpu> registrarTransposeImpl3DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 3>);
-static Registrar<TransposeImpl3DForward_cpu> registrarTransposeImpl3DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 3>);
-static Registrar<TransposeImpl3DForward_cpu> registrarTransposeImpl3DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 3>);
-// DIM = 4
-static Registrar<TransposeImpl4DForward_cpu> registrarTransposeImpl4DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 4>);
-static Registrar<TransposeImpl4DForward_cpu> registrarTransposeImpl4DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 4>);
-static Registrar<TransposeImpl4DForward_cpu> registrarTransposeImpl4DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 4>);
-// DIM = 5
-static Registrar<TransposeImpl5DForward_cpu> registrarTransposeImpl5DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 5>);
-static Registrar<TransposeImpl5DForward_cpu> registrarTransposeImpl5DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 5>);
-static Registrar<TransposeImpl5DForward_cpu> registrarTransposeImpl5DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 5>);
-// DIM = 6
-static Registrar<TransposeImpl6DForward_cpu> registrarTransposeImpl6DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 6>);
-static Registrar<TransposeImpl6DForward_cpu> registrarTransposeImpl6DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 6>);
-static Registrar<TransposeImpl6DForward_cpu> registrarTransposeImpl6DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 6>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_TRANSPOSEIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/utils/sys_info/CpuVersionInfo.hpp b/include/aidge/utils/sys_info/CpuVersionInfo.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..887ce839e079349d9d64505f7184831ffc4cf1c2
--- /dev/null
+++ b/include/aidge/utils/sys_info/CpuVersionInfo.hpp
@@ -0,0 +1,35 @@
+#ifndef AIDGE_UTILS_SYS_INFO_CPU_VERSION_INFO_H
+#define AIDGE_UTILS_SYS_INFO_CPU_VERSION_INFO_H
+
+#include "aidge/utils/Log.hpp"
+
+namespace Aidge {
+
+#ifndef PROJECT_VERSION // Normally defined in CMakeLists.txt
+#define PROJECT_VERSION "Unknown version"
+#endif
+#ifndef GIT_COMMIT_HASH
+#define GIT_COMMIT_HASH ""
+#endif
+void showCpuVersion() {
+    Log::info("Aidge backend CPU: {} ({}), {} {}", PROJECT_VERSION, GIT_COMMIT_HASH, __DATE__, __TIME__);
+        // Compiler version
+    #if defined(__clang__)
+    /* Clang/LLVM. ---------------------------------------------- */
+        Log::info("Clang/LLVM compiler version: {}.{}.{}\n", __clang_major__ , __clang_minor__, __clang_patchlevel__);
+    #elif defined(__ICC) || defined(__INTEL_COMPILER)
+    /* Intel ICC/ICPC. ------------------------------------------ */
+        Log::info("Intel ICC/ICPC compiler version: {}\n", __INTEL_COMPILER);
+    #elif defined(__GNUC__) || defined(__GNUG__)
+    /* GNU GCC/G++. --------------------------------------------- */
+        Log::info("GNU GCC/G++ compiler version: {}.{}.{}", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
+    #elif defined(_MSC_VER)
+    /* Microsoft Visual Studio. --------------------------------- */
+        Log::info("Microsoft Visual Studio compiler version: {}\n", _MSC_VER);
+    #else
+        Log::info("Unknown compiler\n");
+    #endif
+
+}
+}  // namespace Aidge
+#endif  // AIDGE_UTILS_SYS_INFO_CPU_VERSION_INFO_H
diff --git a/python_binding/pybind_cpu.cpp b/python_binding/pybind_cpu.cpp
index 4a325bf51716ee6a920b3fcbde394b3e5b7c1d0f..d5022e1d469ae4171e796baed6c1aa061dd95765 100644
--- a/python_binding/pybind_cpu.cpp
+++ b/python_binding/pybind_cpu.cpp
@@ -6,10 +6,13 @@ namespace py = pybind11;
 
 namespace Aidge {
 
-void init_Aidge(py::module& /*m*/){
+void init_cpu_sys_info(py::module& m);
 
+void init_Aidge(py::module& m){
+    init_cpu_sys_info(m);
 }
 
+
 PYBIND11_MODULE(aidge_backend_cpu, m) {
     init_Aidge(m);
 }
diff --git a/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp b/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..573bee3659c65f90935e03c06eff5a2998bb9f5b
--- /dev/null
+++ b/python_binding/utils/sys_info/pybind_CpuVersionInfo.cpp
@@ -0,0 +1,9 @@
+#include <pybind11/pybind11.h>
+#include "aidge/utils/sys_info/CpuVersionInfo.hpp"
+
+namespace py = pybind11;
+namespace Aidge {
+void init_cpu_sys_info(py::module& m){
+    m.def("show_cpu_version", &showCpuVersion);
+}
+}
diff --git a/src/operator/ConcatImpl.cpp b/src/operator/ConcatImpl.cpp
deleted file mode 100644
index 605f4a19ff3856924593b0e6d7815d5de1579c01..0000000000000000000000000000000000000000
--- a/src/operator/ConcatImpl.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <cassert>
-#include <numeric> // std::accumulate
-#include <vector>
-
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include "aidge/data/Data.hpp"
-#include "aidge/data/Tensor.hpp"
-
-#include "aidge/backend/cpu/operator/ConcatImpl.hpp"
-#include "aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp"
-
-void  Aidge::ConcatImpl_cpu::forward() {
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input in Concat operator");
-    DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType();
-    for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) {
-        assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i)) && "missing input in Concat operator");
-        assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput);
-    }
-
-    auto kernelFunc = Registrar<ConcatImplForward_cpu>::create({
-        datatypeFirstInput,
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
-
-    std::vector<const void*> opInputs;
-    std::vector<DimSize_t> opInputAxis;
-    for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) {
-        opInputs.push_back(getCPUPtr(mOp.getRawInput(i)));
-        opInputAxis.push_back(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dims()[dynamic_cast<const Concat_Op&>(mOp).template getAttr<DimSize_t>("Axis")]);
-    }
-
-    kernelFunc(dynamic_cast<const Concat_Op&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               opInputAxis,
-               opInputs,
-               getCPUPtr(mOp.getRawOutput(0)));
-}
-
-void  Aidge::ConcatImpl_cpu::backward() { fmt::print("Not implemented yet.\n"); }
\ No newline at end of file
diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp
index eecff38afd4d4487d51a070d6c0f4c2507a2b478..d9edf3a9959c1c80dbe85c93f7a1499260452c4c 100644
--- a/src/operator/FCImpl.cpp
+++ b/src/operator/FCImpl.cpp
@@ -72,7 +72,10 @@ void Aidge::FCImpl_cpu::backward()
 {
     const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
     const auto& fc_grad = op_.getOutput(0)->grad();
-    assert(fc_grad && "missing ouput #0 gradient");
+    AIDGE_ASSERT(fc_grad, "missing ouput #0 gradient");
+    AIDGE_ASSERT(op_.getInput(0)->grad(), "missing input #0 gradient");
+    AIDGE_ASSERT(op_.getInput(1)->grad(), "missing input #1 gradient");
+    AIDGE_ASSERT(op_.getInput(2)->grad(), "missing input #2 gradient");
 
     // Find the correct kernel type
     const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = {
diff --git a/src/operator/GatherImpl.cpp b/src/operator/GatherImpl.cpp
deleted file mode 100644
index 5384f64536955b7cb2ed85af81e52697e9b84a2a..0000000000000000000000000000000000000000
--- a/src/operator/GatherImpl.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include "aidge/backend/cpu/operator/GatherImpl.hpp"
-
-#include <memory>
-#include <vector>
-
-#include "aidge/backend/cpu/operator/GatherImpl_forward_kernels.hpp"
-#include "aidge/data/Data.hpp"
-#include "aidge/data/Tensor.hpp"
-#include "aidge/operator/Gather.hpp"
-#include "aidge/utils/Types.h"
-
-void Aidge::GatherImpl_cpu::forward() {
-    const Gather_Op& op = static_cast<const Gather_Op&>(mOp);
-
-    auto kernelFunc = Registrar<GatherImplForward_cpu>::create({
-                            op.getInput(0)->dataType(),
-                            op.getOutput(0)->dataType()
-                        });
-
-    // Call kernel
-    kernelFunc(dynamic_cast<const Gather_Op&>(mOp).getStaticAttributes(),
-            op.getInput(0)->dims(),
-            op.getInput(0)->getImpl()->rawPtr(),
-            op.getOutput(0)->getImpl()->rawPtr()
-        );
-}
diff --git a/src/operator/MemorizeImpl.cpp b/src/operator/MemorizeImpl.cpp
deleted file mode 100644
index 8a23bd35585c03c91567c0da5b0727fe1323b754..0000000000000000000000000000000000000000
--- a/src/operator/MemorizeImpl.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <cassert>
-#include <chrono>  // std::chrono::milliseconds
-#include <numeric> // std::accumulate
-#include <thread>  // std::this_thread::sleep_for
-#include <vector>
-
-#include "aidge/operator/Memorize.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-
-#include "aidge/backend/cpu/operator/MemorizeImpl.hpp"
-
-Aidge::Elts_t Aidge::MemorizeImpl_cpu::getNbRequiredData(
-    Aidge::IOIndex_t inputIdx) const
-{
-    const Memorize_Op& op = dynamic_cast<const Memorize_Op&>(mOp);
-    const unsigned int scheduleStep = op.template getAttr<MemorizeAttr::ScheduleStep>();
-
-    if (scheduleStep == 0 && inputIdx == 0) {
-        // No data input is required for the initial step.
-        // Initialization data is required however.
-        return Elts_t::NoneElts();
-    }
-    else if (scheduleStep > 0 && inputIdx == 1) {
-        // No initialization data is required after the initial step.
-        return Elts_t::NoneElts();
-    }
-    else {
-        return OperatorImpl::getNbRequiredData(inputIdx);
-    }
-}
-
-Aidge::Elts_t Aidge::MemorizeImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
-                                                         const std::vector<Aidge::DimSize_t> &/*inputsSize*/) const {
-    assert(mOp.getRawOutput(outputIdx) && "requires valid output");
-
-    const Memorize_Op& op = dynamic_cast<const Memorize_Op&>(mOp);
-    const unsigned int scheduleStep = op.template getAttr<MemorizeAttr::ScheduleStep>();
-    const unsigned int endStep = op.template getAttr<MemorizeAttr::EndStep>();
-
-    if (endStep > 0 && outputIdx == 1 && scheduleStep >= endStep) {
-        return Elts_t::NoneElts();
-    }
-    else {
-        return Elts_t::DataElts(std::static_pointer_cast<Tensor>(mOp.getRawOutput(outputIdx))->size());
-    }
-}
-
-void Aidge::MemorizeImpl_cpu::updateConsummerProducer() {
-    OperatorImpl::updateConsummerProducer();
-
-    const Memorize_Op& op = dynamic_cast<const Memorize_Op&>(mOp);
-    const unsigned int scheduleStep = op.template getAttr<MemorizeAttr::ScheduleStep>();
-    const unsigned int endStep = op.template getAttr<MemorizeAttr::EndStep>();
-    AIDGE_ASSERT(endStep == 0 || scheduleStep <= endStep, "cannot update consumer producer anymore, number of cycles exceeded");
-}
-
-void Aidge::MemorizeImpl_cpu::forward() {
-    const Memorize_Op& op = dynamic_cast<const Memorize_Op&>(mOp);
-    const unsigned int forwardStep = op.template getAttr<MemorizeAttr::ForwardStep>();
-    const unsigned int endStep = op.template getAttr<MemorizeAttr::EndStep>();
-    AIDGE_ASSERT(endStep == 0 || forwardStep <= endStep, "cannot forward anymore, number of cycles exceeded");
-
-    if (forwardStep == 0) {
-        op.getOutput(0)->getImpl()->copy(op.getInput(1)->getImpl()->rawPtr(), op.getInput(1)->size());
-    }
-    else {
-        op.getOutput(0)->getImpl()->copy(op.getInput(0)->getImpl()->rawPtr(), op.getInput(0)->size());
-    }
-}
diff --git a/src/operator/PopImpl.cpp b/src/operator/PopImpl.cpp
deleted file mode 100644
index 02bbddbaed6d9d89e729d6c778a1765fcbab4b4f..0000000000000000000000000000000000000000
--- a/src/operator/PopImpl.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <cassert>
-#include <chrono>  // std::chrono::milliseconds
-#include <numeric> // std::accumulate
-#include <thread>  // std::this_thread::sleep_for
-#include <vector>
-
-#include "aidge/operator/Pop.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-
-#include "aidge/backend/cpu/operator/PopImpl.hpp"
-
-Aidge::Elts_t Aidge::PopImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
-    assert(mOp.getRawInput(inputIdx) && "requires valid input");
-
-    return Elts_t::DataElts(std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->size()
-        / std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims()[0]);
-}
-
-void Aidge::PopImpl_cpu::forward() {
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
-
-    const Pop_Op& op = dynamic_cast<const Pop_Op&>(mOp);
-    const unsigned int forwardStep = op.template getAttr<PopAttr::ForwardStep>();
-
-    *std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))
-        = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->extract({forwardStep});
-}
diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp
index 4bba09b6fbeea1552bf5b7cc7e491291345fca45..06859f09db169946175a93140e04f2e2a99e3362 100644
--- a/src/operator/ReLUImpl.cpp
+++ b/src/operator/ReLUImpl.cpp
@@ -45,16 +45,18 @@ void Aidge::ReLUImpl_cpu::forward() {
 void Aidge::ReLUImpl_cpu::backward() {
     // reversing in and out Tensors
         const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp);
-    std::shared_ptr<Tensor> in0  = op_.getOutput(0)->grad();
-    std::shared_ptr<Tensor> out0 = op_.getInput(0)->grad();
+    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
+    std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
     AIDGE_ASSERT(out0, "current {} operator output#0 has not gradient Tensor.", op_.type());
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<ReLUImplBackward_cpu>::create({
-        in0->dataType(),
-        out0->dataType()
+        out0->dataType(),
+        gra_out0->dataType(),
+        gra_int0->dataType()
     });
 
     // Call kernel
-    kernelFunc(in0->size(), getCPUPtr(in0), getCPUPtr(out0));
+    kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
 }
diff --git a/src/operator/ReshapeImpl.cpp b/src/operator/ReshapeImpl.cpp
deleted file mode 100644
index 69c1c3135ce9f32d536bfd2c41b90eb55f7d8986..0000000000000000000000000000000000000000
--- a/src/operator/ReshapeImpl.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include "aidge/backend/cpu/operator/ReshapeImpl.hpp"
-
-#include "aidge/backend/cpu/operator/ReshapeImpl_forward_kernels.hpp"
-#include "aidge/data/Tensor.hpp"
-#include "aidge/operator/Reshape.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/utils/ErrorHandling.hpp"
-
-Aidge::Elts_t Aidge::ReshapeImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
-    // this implementation can be in-place
-    return Elts_t::DataElts(0);
-}
-
-void Aidge::ReshapeImpl_cpu::forward() {
-    const Reshape_Op& op_ = static_cast<const Reshape_Op&>(mOp);
-    AIDGE_ASSERT(op_.getInput(0)->size() == op_.getOutput(0)->size(),
-                    "input must have the same overall size as shape");
-
-    // Find the correct kernel type
-    auto kernelFunc = Registrar<ReshapeImplForward_cpu>::create({
-        op_.getInput(0)->dataType(),
-        op_.getOutput(0)->dataType()});
-
-    // Call kernel
-    kernelFunc(op_.getInput(0)->size(),
-               op_.getInput(0)->getImpl()->rawPtr(),
-               op_.getOutput(0)->getImpl()->rawPtr());
-}
diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp
deleted file mode 100644
index 47b13c4694cea22421811c889b5627e9f1362ac0..0000000000000000000000000000000000000000
--- a/src/operator/SliceImpl.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <cassert>
-#include <numeric>    // std::accumulate
-#include <functional> // std::multiplies
-
-#include "aidge/operator/Slice.hpp"
-
-#include "aidge/backend/cpu/operator/SliceImpl.hpp"
-#include "aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp"
-#include "aidge/utils/Types.h"
-#include <vector>
-#include <cassert>
-#include <tuple>
-
-void Aidge::SliceImpl_cpu::forward() {
-    // FIXME: uncomment the following code once memory handling will work
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
-
-    // Find the correct kernel type
-    auto kernelFunc = Registrar<SliceImplForward_cpu>::create(
-            {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()});
-
-    // Call kernel
-    kernelFunc(dynamic_cast<const Slice_Op&>(mOp).getStaticAttributes(),
-            std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-            std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-            std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()
-            );
-
-    // each input is consumed by the minimum amount for a forward pass
-    mNbConsumedData[0] += getNbRequiredData(0);
-
-    mNbProducedData[0] += getRequiredMemory(0, {});
-}
-
-void Aidge::SliceImpl_cpu::backward() { fmt::print("Not implemented yet.\n"); }
diff --git a/src/operator/TransposeImpl.cpp b/src/operator/TransposeImpl.cpp
deleted file mode 100644
index 710e67b4f5aaa5261a111a8e131a0dd740694a4b..0000000000000000000000000000000000000000
--- a/src/operator/TransposeImpl.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <cassert>
-#include <chrono>  // std::chrono::milliseconds
-#include <numeric> // std::accumulate
-#include <thread>  // std::this_thread::sleep_for
-#include <vector>
-
-#include "aidge/utils/Types.h"
-#include "aidge/operator/Transpose.hpp"
-
-#include "aidge/backend/cpu/operator/TransposeImpl.hpp"
-#include "aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp"
-
-void Aidge::TransposeImpl2D_cpu::forward() {
-    // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<TransposeImpl2DForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
-
-    // auto attr = dynamic_cast<const Transpose_Op<2>&>(mOp).getStaticAttributes();
-    // std::vector<DimIdx_t> outDimsOrder;
-    // outDimsOrder.reserve(std::get<0>(attr).size()); // Reserve space for the new vector
-
-    // std::transform(std::get<0>(attr).begin(), std::get<0>(attr).end(), std::back_inserter(outDimsOrder), 
-    //                [](int intValue) { return static_cast<DimIdx_t>(intValue); });
-
-    // Call kernel
-    kernelFunc(dynamic_cast<const Transpose_Op<2>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
-}
-
-void Aidge::TransposeImpl3D_cpu::forward() {
-    // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<TransposeImpl3DForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
-
-    // Call kernel
-    kernelFunc(dynamic_cast<const Transpose_Op<3>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
-}
-
-void Aidge::TransposeImpl4D_cpu::forward() {
-    // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<TransposeImpl4DForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
-
-    // Call kernel
-    kernelFunc(dynamic_cast<const Transpose_Op<4>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
-}
-void Aidge::TransposeImpl5D_cpu::forward() {
-    // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<TransposeImpl5DForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
-
-    // Call kernel
-    kernelFunc(dynamic_cast<const Transpose_Op<5>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
-}
-void Aidge::TransposeImpl6D_cpu::forward() {
-    // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<TransposeImpl6DForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
-
-    // Call kernel
-    kernelFunc(dynamic_cast<const Transpose_Op<6>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
-}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp
index e2e7051afda5e7f72c3142987587179bc759f1e8..95a0e96fe6cf8c19beeef2bdbae3c07873996dcf 100644
--- a/unit_tests/operator/Test_AddImpl.cpp
+++ b/unit_tests/operator/Test_AddImpl.cpp
@@ -45,7 +45,6 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
         op->associateInput(0, input1);
         op->setBackend("cpu");
         op->setDataType(DataType::Int32);
-        op->computeOutputDims();
         myAdd->forward();
 
         REQUIRE(*(op->getOutput(0)) == *input1);
@@ -78,7 +77,6 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
         op->associateInput(1, input1);
         op->setBackend("cpu");
         op->setDataType(DataType::Int32);
-        op->computeOutputDims();
         myAdd->forward();
 
         REQUIRE(*(op->getOutput(0)) == *expectedOutput);
@@ -112,7 +110,6 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
         op->associateInput(2, input1);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myAdd->forward();
 
         REQUIRE(*op->getOutput(0) == *expectedOutput);
@@ -170,7 +167,6 @@ TEST_CASE("[cpu/operator] Add(forward)", "[Add][CPU]") {
         op->associateInput(2, input_2);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myAdd->forward();
         op->getOutput(0)->print();
         expectedOutput->print();
diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp
index c4abf0201771c3f39a429e0f935b8216a04514e1..aaa2757830c245275d02792a7a5a2eb1db32d7b8 100644
--- a/unit_tests/operator/Test_AvgPoolingImpl.cpp
+++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp
@@ -74,7 +74,6 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") {
         op->associateInput(0,myInput);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myAvgPool->forward();
         op->getOutput(0)->print();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
@@ -99,7 +98,6 @@ TEST_CASE("[cpu/operator] AvgPooling(forward)", "[AvgPooling][CPU]") {
         op->associateInput(0,myInput2);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myAvgPool->forward();
         op->getOutput(0)->print();
         float* outPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
diff --git a/unit_tests/operator/Test_BatchNormImpl.cpp b/unit_tests/operator/Test_BatchNormImpl.cpp
index 8c8c1dff3d74c2fce97abd8c3d88bf9840706ee4..1b42c90dd09d63cd319f19bd29751da816db06c0 100644
--- a/unit_tests/operator/Test_BatchNormImpl.cpp
+++ b/unit_tests/operator/Test_BatchNormImpl.cpp
@@ -86,7 +86,6 @@ TEST_CASE("[cpu/operator] BatchNorm(forward)", "[BatchNorm][CPU]") {
     op->associateInput(4,myVar);
     op->setDataType(DataType::Float32);
     op->setBackend("cpu");
-    op->computeOutputDims();
     myBatchNorm->forward();
 
     float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
diff --git a/unit_tests/operator/Test_ConcatImpl.cpp b/unit_tests/operator/Test_ConcatImpl.cpp
deleted file mode 100644
index 7f616fcb30cd51efb790fe725d423600901f2976..0000000000000000000000000000000000000000
--- a/unit_tests/operator/Test_ConcatImpl.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <catch2/catch_test_macros.hpp>
-
-#include "aidge/data/Tensor.hpp"
-#include "aidge/operator/Add.hpp"
-
-#include "aidge/backend/cpu.hpp"
-
-using namespace Aidge;
-
-TEST_CASE("[cpu/operator] Concat(forward)", "[Concat][CPU]") {
-    SECTION("Concat 1D inputs") {
-        std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array1D<int,2>{{ 2, 3 }});
-        std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array1D<int,3>{{ 4, 5, 6 }});
-        std::shared_ptr<Tensor> input3 = std::make_shared<Tensor>(Array1D<int,4>{{ 7, 8, 9, 10 }});
-        std::shared_ptr<Tensor> input4 = std::make_shared<Tensor>(Array1D<int,5>{{ 11, 12, 13, 14, 15 }});
-        std::shared_ptr<Tensor> input5 = std::make_shared<Tensor>(Array1D<int,6>{{ 16, 17, 18, 19, 20, 21 }});
-
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,20>{
-            { 2, 3, 4, 5, 6, 7, 8, 9, 10,11,12,13,14,15,16,17,18,19,20,21 }});
-
-        auto myConcat = Concat(5, 0);
-        myConcat->getOperator()->associateInput(0, input1);
-        myConcat->getOperator()->associateInput(1, input2);
-        myConcat->getOperator()->associateInput(2, input3);
-        myConcat->getOperator()->associateInput(3, input4);
-        myConcat->getOperator()->associateInput(4, input5);
-        myConcat->getOperator()->setBackend("cpu");
-        myConcat->getOperator()->setDataType(DataType::Int32);
-        std::static_pointer_cast<OperatorTensor>(myConcat->getOperator())->computeOutputDims();
-        myConcat->forward();
-
-        std::static_pointer_cast<Tensor>(myConcat->getOperator()->getRawOutput(0))->print();
-
-        REQUIRE(*std::static_pointer_cast<OperatorTensor>(myConcat->getOperator())->getOutput(0) == *expectedOutput);
-    }
-    SECTION("Concat 4D inputs on 1st axis") {
-        std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
-            {                                       //
-                {                                   //
-                    {{20, 47},{21, 48},{22, 49}},   //
-                    {{23, 50},{24, 51},{25, 52}},   //
-                    {{26, 53},{27, 54},{28, 55}}    //
-                },                                  //
-            }                                       //
-        });                                         //
-        std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,2,3,3,2> {
-            {
-                {                                   //
-                    {{29, 56},{30, 57},{31, 58}},   //
-                    {{32, 59},{33, 60},{34, 61}},   //
-                    {{35, 62},{36, 63},{37, 64}}    //
-                },                                  //
-                {                                   //
-                    {{38, 65},{39, 66},{40, 67}},   //
-                    {{41, 68},{42, 69},{43, 70}},   //
-                    {{44, 71},{45, 72},{46, 73}}    //
-                }                                   //
-            }                                       //
-        });                                         //
-
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,3,3,3,2> {
-            {                                       //
-                {                                   //
-                    {{20, 47},{21, 48},{22, 49}},   //
-                    {{23, 50},{24, 51},{25, 52}},   //
-                    {{26, 53},{27, 54},{28, 55}}    //
-                },                                  //
-                {                                   //
-                    {{29, 56},{30, 57},{31, 58}},   //
-                    {{32, 59},{33, 60},{34, 61}},   //
-                    {{35, 62},{36, 63},{37, 64}}    //
-                },                                  //
-                {                                   //
-                    {{38, 65},{39, 66},{40, 67}},   //
-                    {{41, 68},{42, 69},{43, 70}},   //
-                    {{44, 71},{45, 72},{46, 73}}    //
-                }                                   //
-            }                                       //
-        });                                         //
-
-        auto myConcat = Concat(2, 0);
-        myConcat->getOperator()->associateInput(0, input1);
-        myConcat->getOperator()->associateInput(1, input2);
-        myConcat->getOperator()->setBackend("cpu");
-        myConcat->getOperator()->setDataType(DataType::Int32);
-        std::static_pointer_cast<OperatorTensor>(myConcat->getOperator())->computeOutputDims();
-        myConcat->forward();
-
-        std::static_pointer_cast<OperatorTensor>(myConcat->getOperator())->getOutput(0)->print();
-
-        REQUIRE(*std::static_pointer_cast<OperatorTensor>(myConcat->getOperator())->getOutput(0) == *expectedOutput);
-    }
-
-    SECTION("Concat 4D inputs on 3rd axis") {
-        std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,1,3,3,2> {
-            {                                       //
-                {                                   //
-                    {{20, 47},{21, 48},{22, 49}},   //
-                    {{23, 50},{24, 51},{25, 52}},   //
-                    {{26, 53},{27, 54},{28, 55}}    //
-                },                                  //
-            }                                       //
-        });                                         //
-        std::shared_ptr<Tensor> input2 = std::make_shared<Tensor>(Array4D<int,1,3,6,2> {
-            {
-                {                                   //
-                    {{29, 56},{30, 57},{31, 58},{38, 65},{39, 66},{40, 67}},   //
-                    {{32, 59},{33, 60},{34, 61},{41, 68},{42, 69},{43, 70}},   //
-                    {{35, 62},{36, 63},{37, 64},{44, 71},{45, 72},{46, 73}}    //
-                },
-            }
-        });
-
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,3,9,2> {
-            {                                                                                             //
-                {                                                                                         //
-                    {{20, 47},{21, 48},{22, 49},{29, 56},{30, 57},{31, 58},{38, 65},{39, 66},{40, 67}},   //
-                    {{23, 50},{24, 51},{25, 52},{32, 59},{33, 60},{34, 61},{41, 68},{42, 69},{43, 70}},   //
-                    {{26, 53},{27, 54},{28, 55},{35, 62},{36, 63},{37, 64},{44, 71},{45, 72},{46, 73}}    //
-                },                                                                                        //
-            }                                                                                             //
-        });                                                                                               //
-
-        auto myConcat = Concat(2, 2);
-        myConcat->getOperator()->associateInput(0, input1);
-        myConcat->getOperator()->associateInput(1, input2);
-        myConcat->getOperator()->setBackend("cpu");
-        myConcat->getOperator()->setDataType(DataType::Int32);
-        std::static_pointer_cast<OperatorTensor>(myConcat->getOperator())->computeOutputDims();
-        myConcat->forward();
-
-        std::static_pointer_cast<Tensor>(myConcat->getOperator()->getRawOutput(0))->print();
-
-        REQUIRE(*std::static_pointer_cast<OperatorTensor>(myConcat->getOperator())->getOutput(0) == *expectedOutput);
-    }
-}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
index 112703b64162004ab708f143d6e12b0c8bb9c6b6..e4e46de91bfbc38f41520f1edfc7e99d197e5c83 100644
--- a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
+++ b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
@@ -146,7 +146,6 @@ TEST_CASE("[cpu/operator] ConvDepthWise(forward)", "[ConvDepthWise][CPU]") {
     op -> associateInput(2, myBias);
     op->setDataType(DataType::Int32);
     op->setBackend("cpu");
-    op -> computeOutputDims();
     myCDW -> forward();
     op -> getOutput(0) -> print();
     REQUIRE(*(op -> getOutput(0)) == *myOutput);
diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp
index 0f46e8f6405366a32f45ce61d61fc94afabdd4a8..b52085139294021de2fe9d72e173ad74db028ea3 100644
--- a/unit_tests/operator/Test_ConvImpl.cpp
+++ b/unit_tests/operator/Test_ConvImpl.cpp
@@ -152,7 +152,6 @@ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") {
         op->associateInput(2,myBias);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myConv->forward();
         // op->getOutput(0)->print();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
@@ -244,7 +243,6 @@ TEST_CASE("[cpu/operator] Conv(forward)", "[Conv][CPU]") {
         };
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myConv->forward();
 
         float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
diff --git a/unit_tests/operator/Test_DivImpl.cpp b/unit_tests/operator/Test_DivImpl.cpp
index a0ed261fe9622f36a9bb2e46c4796ae7f6f8f5e6..5d7dfdf12032d4c444e38cda6d2a4298fc552b14 100644
--- a/unit_tests/operator/Test_DivImpl.cpp
+++ b/unit_tests/operator/Test_DivImpl.cpp
@@ -103,7 +103,7 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 Tres->resize(dims);
                 Tres -> getImpl() -> setRawPtr(result, nb_elements);
 
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myDiv->forward();
                 end = std::chrono::system_clock::now();
@@ -196,7 +196,7 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
 
                 // compute result
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myDiv->forward();
                 end = std::chrono::system_clock::now();
@@ -291,7 +291,7 @@ TEST_CASE("[cpu/operator] Div", "[Div][CPU]") {
                 Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
 
                 // compute result
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myDiv->forward();
                 end = std::chrono::system_clock::now();
diff --git a/unit_tests/operator/Test_ErfImpl.cpp b/unit_tests/operator/Test_ErfImpl.cpp
index db2ae0437742d1cd1b298d62f5bdd7241b755ec4..2826b5b57d431cf8296a9869f88f7d642c59c963 100644
--- a/unit_tests/operator/Test_ErfImpl.cpp
+++ b/unit_tests/operator/Test_ErfImpl.cpp
@@ -37,7 +37,6 @@ TEST_CASE("[cpu/operator] Erf(forward)") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myErf->forward();
 
         float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
@@ -78,7 +77,6 @@ TEST_CASE("[cpu/operator] Erf(forward)") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myErf->forward();
 
         float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
diff --git a/unit_tests/operator/Test_FCImpl.cpp b/unit_tests/operator/Test_FCImpl.cpp
index 4309ce1a54f14b1da0c8b173cb46992109ee034b..b2566f26d984fb1d89052745ec35870c6b935d48 100644
--- a/unit_tests/operator/Test_FCImpl.cpp
+++ b/unit_tests/operator/Test_FCImpl.cpp
@@ -64,7 +64,6 @@ TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") {
         op->associateInput(0, myInput);
         op -> setDataType(DataType::Int32);
         op -> setBackend("cpu");
-        op->computeOutputDims();
         myFC->forward();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
     }
@@ -103,7 +102,6 @@ TEST_CASE("[cpu/oeprator] FC(forward)", "[FC][CPU]") {
         op->associateInput(0, myInput);
         op -> setDataType(DataType::Int32);
         op -> setBackend("cpu");
-        op->computeOutputDims();
         myFC->forward();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
     }
diff --git a/unit_tests/operator/Test_GatherImpl.cpp b/unit_tests/operator/Test_GatherImpl.cpp
deleted file mode 100644
index a8345917ab0a141065e86638c09b2689902679ec..0000000000000000000000000000000000000000
--- a/unit_tests/operator/Test_GatherImpl.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <catch2/catch_test_macros.hpp>
-
-#include "aidge/data/Tensor.hpp"
-#include "aidge/operator/Gather.hpp"
-
-#include "aidge/backend/cpu.hpp"
-
-#include <memory>
-
-
-using namespace Aidge;
-
-TEST_CASE("[cpu/operator] Gather(forward)") {
-    SECTION("2D Tensor axis 0") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<int,3,3> {
-            {
-                {1, 2, 3},
-                {4, 5, 6},
-                {7, 8, 9}
-            }
-        });
-        std::shared_ptr<Tensor> indexes = std::make_shared<Tensor>(Array2D<int,1,2> {
-            {
-                {1, 2}
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,1,2,3> {
-            {
-                {
-                    {4, 5, 6},
-                    {7, 8, 9}
-                }
-            }
-        });
-
-        std::shared_ptr<Node> myGather = Gather({1, 2}, {1, 2}, 0);
-        auto op = std::static_pointer_cast<OperatorTensor>(myGather -> getOperator());
-        op->associateInput(0,input);
-        // op->associateInput(1,indexes);
-        op->setDataType(DataType::Int32);
-        op->setBackend("cpu");
-        op->computeOutputDims();
-        myGather->forward();
-        op->getOutput(0)->print();
-        expectedOutput->print();
-
-        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
-
-    }
-    SECTION("2D Tensor axis 1") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<int,3,3> {
-            {
-                {1, 2, 3},
-                {4, 5, 6},
-                {7, 8, 9}
-            }
-        });
-        std::shared_ptr<Tensor> indexes = std::make_shared<Tensor>(Array2D<int,1,2> {
-            {
-                {0, 2}
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,3,1,2> {
-            {
-                {
-                    {1, 3}
-                },
-                {
-                    {4, 6}
-                },
-                {
-                    {7, 9}
-                }
-            }
-        });
-
-        std::shared_ptr<Node> myGather = Gather({0, 2}, {1, 2}, 1);
-        auto op = std::static_pointer_cast<OperatorTensor>(myGather -> getOperator());
-        op->associateInput(0,input);
-        // op->associateInput(1,indexes);
-        op->setDataType(DataType::Int32);
-        op->setBackend("cpu");
-        op->computeOutputDims();
-        myGather->forward();
-
-        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
-
-    }
-}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
index c1db6c5eebcef13df970ec7e9fc415b5cba187a2..43903100a163b4499ed96c44d77ad119534d2eaa 100644
--- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
+++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
@@ -154,7 +154,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
         Tres->resize(dims_out);
         Tres->getImpl()->setRawPtr(result, out_nb_elems);
 
-        op->computeOutputDims();
+        op->forwardDims();
         start = std::chrono::system_clock::now();
         REQUIRE_NOTHROW(globAvgPool->forward());
         end = std::chrono::system_clock::now();
@@ -225,7 +225,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
           Tres->resize(dims_out);
           Tres->getImpl()->setRawPtr(result, out_nb_elems);
 
-          op->computeOutputDims();
+          op->forwardDims();
           start = std::chrono::system_clock::now();
           REQUIRE_NOTHROW(globAvgPool->forward());
           end = std::chrono::system_clock::now();
@@ -350,7 +350,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
           // results
           Tres->resize(out_dims);
           Tres->getImpl()->setRawPtr(result, out_nb_elems);
-          op->computeOutputDims();
+          op->forwardDims();
           start = std::chrono::system_clock::now();
           REQUIRE_NOTHROW(globAvgPool->forward());
           end = std::chrono::system_clock::now();
@@ -537,7 +537,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
           // results
           Tres->resize(out_dims);
           Tres->getImpl()->setRawPtr(result, out_nb_elems);
-          op->computeOutputDims();
+          op->forwardDims();
           start = std::chrono::system_clock::now();
           REQUIRE_NOTHROW(globAvgPool->forward());
           end = std::chrono::system_clock::now();
diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp
index cad2a6f97a31e4e2200a8c8ceb1d9dde7b118362..85dd9f99ee425216f8495e7813b35ce69be9c806 100644
--- a/unit_tests/operator/Test_LeakyReLUImpl.cpp
+++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp
@@ -32,7 +32,6 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myLeakyReLU->forward();
         REQUIRE(*(op->getOutput(0)) == *expectedOutput);
     }
@@ -56,7 +55,6 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myLeakyReLU->forward();
         REQUIRE(*(op->getOutput(0)) == *expectedOutput);
     }
@@ -92,7 +90,6 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myLeakyReLU->forward();
         REQUIRE(*(op->getOutput(0)) == *expectedOutput);
     }
@@ -152,7 +149,6 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myLeakyReLU->forward();
         REQUIRE(*(op->getOutput(0)) == *expectedOutput);
     }
@@ -170,7 +166,6 @@ TEST_CASE("[cpu/operator] LeakyReLU(forward)", "[LeakyReLU][CPU]") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myLeakyReLU->forward();
         REQUIRE(*(op->getOutput(0)) == *expectedOutput);
     }
diff --git a/unit_tests/operator/Test_MatMulImpl.cpp b/unit_tests/operator/Test_MatMulImpl.cpp
index 168418372d94a7de2aee7ed2e6a41d90c68531af..8a1e589fa0e9a57d712c77a12501d35f5f995bcc 100644
--- a/unit_tests/operator/Test_MatMulImpl.cpp
+++ b/unit_tests/operator/Test_MatMulImpl.cpp
@@ -94,7 +94,7 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             op->associateInput(1, T2);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
-            op->computeOutputDims();
+            op->forwardDims();
             start = std::chrono::system_clock::now();
             myMatMul->forward();
             end = std::chrono::system_clock::now();
@@ -158,7 +158,7 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             op->associateInput(1, T2);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
-            op->computeOutputDims();
+            op->forwardDims();
             start = std::chrono::system_clock::now();
             myMatMul->forward();
             end = std::chrono::system_clock::now();
@@ -225,7 +225,7 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
             op->associateInput(1, T2);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
-            op->computeOutputDims();
+            op->forwardDims();
             start = std::chrono::system_clock::now();
             myMatMul->forward();
             end = std::chrono::system_clock::now();
@@ -258,7 +258,7 @@ TEST_CASE("[cpu/operator] MatMul(forward)", "[MatMul][CPU]") {
 
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
+        op->forwardDims();
         myMatMul->forward();
 
     }
diff --git a/unit_tests/operator/Test_MaxPoolingImpl.cpp b/unit_tests/operator/Test_MaxPoolingImpl.cpp
index 9f528f2d044cf43133f3729a7f0e4f1bd95b8889..af04ede4e33c32ce785804e2484b6ba9ac5edc36 100644
--- a/unit_tests/operator/Test_MaxPoolingImpl.cpp
+++ b/unit_tests/operator/Test_MaxPoolingImpl.cpp
@@ -75,7 +75,6 @@ TEST_CASE("[cpu/operator] MaxPooling(forward)", "[MaxPooling][CPU]") {
         myMaxPool->getOperator()->associateInput(0,myInput);
         myMaxPool->getOperator()->setDataType(DataType::Float32);
         myMaxPool->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
         myMaxPool->forward();
         op->getOutput(0)->print();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp
index 63a11d19a025b5560075c4b85123d645522da09e..aa9a3909619aac2bcd2718ab7aaa0f8f6699ed34 100644
--- a/unit_tests/operator/Test_MetaOperator.cpp
+++ b/unit_tests/operator/Test_MetaOperator.cpp
@@ -175,10 +175,8 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
 
     padOp->setDataType(DataType::Float64);
     padOp->setBackend("cpu");
-    padOp->computeOutputDims();
     convOp->setDataType(DataType::Float64);
     convOp->setBackend("cpu");
-    convOp->computeOutputDims();
 
     myPad->forward();
     myConv->forward();
@@ -240,7 +238,7 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         g->save("lstm_outside_dims", true, true);
 
         microGraph->save("lstm_dims", true, true);
-        REQUIRE(op->outputDimsForwarded());
+        REQUIRE(op->dimsForwarded());
 
         auto microGraphScheduler = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraphScheduler();
         microGraphScheduler->saveSchedulingDiagram("lstm_scheduling");
diff --git a/unit_tests/operator/Test_MulImpl.cpp b/unit_tests/operator/Test_MulImpl.cpp
index 5b5a05764ecb0298a08c3e9ceece448d46e63044..9d592d31e1999f63fb0ebe3f5ad9d19e85c8645c 100644
--- a/unit_tests/operator/Test_MulImpl.cpp
+++ b/unit_tests/operator/Test_MulImpl.cpp
@@ -103,7 +103,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 Tres->resize(dims);
                 Tres -> getImpl() -> setRawPtr(result, nb_elements);
 
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myMul->forward();
                 end = std::chrono::system_clock::now();
@@ -196,7 +196,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
 
                 // compute result
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myMul->forward();
                 end = std::chrono::system_clock::now();
@@ -291,7 +291,7 @@ TEST_CASE("[cpu/operator] Mul", "[Mul][CPU]") {
                 Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
 
                 // compute result
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myMul->forward();
                 end = std::chrono::system_clock::now();
diff --git a/unit_tests/operator/Test_PadImpl.cpp b/unit_tests/operator/Test_PadImpl.cpp
index edcdaa9623e4a788f515ee99491accffcef576af..cdd3a5f979085f3782776ce69ddd92c0d53150c4 100644
--- a/unit_tests/operator/Test_PadImpl.cpp
+++ b/unit_tests/operator/Test_PadImpl.cpp
@@ -126,7 +126,6 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
         myPad->getOperator()->associateInput(0,myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
         myPad->forward();
         // myPad->getOperator()->getOutput(0)->print();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
@@ -231,7 +230,6 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
         myPad->getOperator()->associateInput(0,myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
         myPad->forward();
         // myPad->getOperator()->getOutput(0)->print();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
@@ -340,7 +338,6 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
         myPad->getOperator()->associateInput(0,myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
         myPad->forward();
         // myPad->getOperator()->getOutput(0)->print();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
@@ -457,7 +454,6 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
         myPad->getOperator()->associateInput(0,myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
         myPad->forward();
         op->getOutput(0)->print();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
@@ -566,7 +562,6 @@ TEST_CASE("[cpu/operator] Pad(forward)", "[Pad][CPU]") {
         myPad->getOperator()->associateInput(0,myInput);
         myPad->getOperator()->setDataType(DataType::Int32);
         myPad->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
         myPad->forward();
         // myPad->getOperator()->getOutput(0)->print();
         REQUIRE(*(op->getOutput(0)) == *myOutput);
diff --git a/unit_tests/operator/Test_PowImpl.cpp b/unit_tests/operator/Test_PowImpl.cpp
index 01f9760275923b2249e5b6098b83b4ae27d5fb30..3b85defb37ff76439b658faa84c3c7457a152d2f 100644
--- a/unit_tests/operator/Test_PowImpl.cpp
+++ b/unit_tests/operator/Test_PowImpl.cpp
@@ -104,7 +104,7 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 Tres->resize(dims);
                 Tres -> getImpl() -> setRawPtr(result, nb_elements);
 
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myPow->forward();
                 end = std::chrono::system_clock::now();
@@ -197,7 +197,7 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
 
                 // compute result
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myPow->forward();
                 end = std::chrono::system_clock::now();
@@ -292,7 +292,7 @@ TEST_CASE("[cpu/operator] Pow", "[Pow][CPU]") {
                 Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
 
                 // compute result
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 myPow->forward();
                 end = std::chrono::system_clock::now();
diff --git a/unit_tests/operator/Test_ReLUImpl.cpp b/unit_tests/operator/Test_ReLUImpl.cpp
index c4166ac4dba75d6719fc2f38f980065126948e1f..106d29ecfbf8ba785b4f9e5dba75daa272a86b26 100644
--- a/unit_tests/operator/Test_ReLUImpl.cpp
+++ b/unit_tests/operator/Test_ReLUImpl.cpp
@@ -35,7 +35,6 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myReLU->forward();
         REQUIRE(*(op->getOutput(0)) == *expectedOutput);
     }
@@ -59,7 +58,6 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myReLU->forward();
         REQUIRE(*op->getOutput(0) == *expectedOutput);
     }
@@ -95,7 +93,6 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myReLU->forward();
         REQUIRE(*(op->getOutput(0)) == *expectedOutput);
     }
@@ -155,7 +152,6 @@ TEST_CASE("[cpu/operator] ReLU(forward)", "[ReLU][CPU]") {
         op->associateInput(0,input0);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myReLU->forward();
         REQUIRE(*op->getOutput(0) == *expectedOutput);
     }
diff --git a/unit_tests/operator/Test_ReduceMeanImpl.cpp b/unit_tests/operator/Test_ReduceMeanImpl.cpp
index d9bf68b78d1ece371cbfb5cda3c502f82eaf97de..0269622740b5a0282a093d509d4b565f7acc3e76 100644
--- a/unit_tests/operator/Test_ReduceMeanImpl.cpp
+++ b/unit_tests/operator/Test_ReduceMeanImpl.cpp
@@ -54,7 +54,6 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
             op->associateInput(0,myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
-            op->computeOutputDims();
             myReduceMean->forward();
             op->getOutput(0)->print();
 
@@ -94,7 +93,6 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
             op->associateInput(0,myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
-            op->computeOutputDims();
             myReduceMean->forward();
             myOutput.print();
             op->getOutput(0)->print();
@@ -131,7 +129,6 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
         op->associateInput(0,myInput);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         myReduceMean->forward();
         op->getOutput(0)->print();
 
@@ -165,7 +162,6 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
             op->associateInput(0,myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
-            op->computeOutputDims();
             myReduceMean->forward();
             op->getOutput(0)->print();
 
@@ -188,7 +184,6 @@ TEST_CASE("[cpu/operator] ReduceMean(forward)", "[ReduceMean][CPU]") {
             op->associateInput(0,myInput);
             op->setDataType(DataType::Float32);
             op->setBackend("cpu");
-            op->computeOutputDims();
             myReduceMean->forward();
             op->getOutput(0)->print();
             // approxEq<float>(*(op->getOutput(0)), *myOutput);
diff --git a/unit_tests/operator/Test_ReshapeImpl.cpp b/unit_tests/operator/Test_ReshapeImpl.cpp
deleted file mode 100644
index 1fee1f4cd132acf9ee39a86759f2e628317fce19..0000000000000000000000000000000000000000
--- a/unit_tests/operator/Test_ReshapeImpl.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <catch2/catch_test_macros.hpp>
-
-#include "aidge/data/Tensor.hpp"
-#include "aidge/operator/Reshape.hpp"
-
-#include "aidge/backend/cpu.hpp"
-
-#include <memory>
-
-using namespace Aidge;
-
-TEST_CASE("[cpu/operator] Reshape(forward)") {
-    SECTION("1D Tensor") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array1D<float,6> {
-            {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,2,3> {
-            {
-                {1.0, 2.0, 3.0},
-                {4.0, 5.0, 6.0}
-            }
-        });
-
-        std::shared_ptr<Node> myReshape = Reshape({2, 3});
-        auto op = std::static_pointer_cast<OperatorTensor>(myReshape -> getOperator());
-        op->associateInput(0, input);
-        op->setDataType(DataType::Float32);
-        op->setBackend("cpu");
-        op->computeOutputDims();
-        myReshape->forward();
-
-        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
-    }
-    SECTION("2D Tensor") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array2D<float,2,3> {
-            {
-                {1.0, 2.0, 3.0},
-                {4.0, 5.0, 6.0}
-            }
-
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<float,3,2> {
-            {
-                {1.0, 2.0},
-                {3.0, 4.0},
-                {5.0, 6.0}
-            }
-        });
-
-        std::shared_ptr<Node> myReshape = Reshape({3, 2});
-        auto op = std::static_pointer_cast<OperatorTensor>(myReshape -> getOperator());
-        op->associateInput(0, input);
-        op->setDataType(DataType::Float32);
-        op->setBackend("cpu");
-        op->computeOutputDims();
-        myReshape->forward();
-
-        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
-    }
-}
\ No newline at end of file
diff --git a/unit_tests/operator/Test_SliceImpl.cpp b/unit_tests/operator/Test_SliceImpl.cpp
deleted file mode 100644
index 0b5ae682c659bf5a0f8d50448733b9ec18a4c36e..0000000000000000000000000000000000000000
--- a/unit_tests/operator/Test_SliceImpl.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <catch2/catch_test_macros.hpp>
-
-#include "aidge/data/Tensor.hpp"
-#include "aidge/operator/Slice.hpp"
-
-#include "aidge/backend/cpu.hpp"
-
-using namespace Aidge;
-
-TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
-    SECTION("1D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> {
-            {0, 1, 2,-3, 4,-5,-6, 7, 8, 9}
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,4> {
-            {0, 1, 2,-3}
-        });
-
-        std::shared_ptr<Node> mySlice = Slice({0}, {3}, {0});
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
-        mySlice->getOperator()->setDataType(DataType::Int32);
-        mySlice->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
-        mySlice->forward();
-
-        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
-        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
-        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
-    }
-
-    SECTION("2D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> {
-            {
-                { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,3> {
-            {
-                {-5,-6, 7},
-                {-5,-6, 7}
-            }
-        });
-
-        std::shared_ptr<Node> mySlice = Slice({0,5}, {1,7}, {0,1});
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
-        mySlice->getOperator()->setDataType(DataType::Int32);
-        mySlice->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
-        mySlice->forward();
-        // mySlice->getOperator()->output(0).print();
-        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
-        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
-        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
-    }
-
-    SECTION("3D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> {
-            {
-                {
-                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                },
-                {
-                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,1,1,3> {
-            {
-                {
-                    { 4,-5,-6}
-                }
-            }
-        });
-
-        std::shared_ptr<Node> mySlice = Slice({0,1,4}, {0,1,6}, {0,1,2});
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
-        mySlice->getOperator()->setDataType(DataType::Int32);
-        mySlice->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
-        mySlice->forward();
-        // mySlice->getOperator()->output(0).print();
-        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
-        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
-        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
-    }
-
-    SECTION("4D Tensor") {
-        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
-            {
-                {
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    }
-                },
-                {
-                    {
-                        { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3,11,-5,-6, 7,-1,10}
-                    }
-                }
-            }
-        });
-        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
-            {
-                {
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    }
-                },
-                {
-                    {
-                        { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
-                    },
-                    {
-                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
-                        {-5, 4, 2,-3,11,-5,-6, 7,-1,10}
-                    }
-                }
-            }
-        });
-
-        std::shared_ptr<Node> mySlice = Slice({0,0,0,0}, {1,1,1,9}, {0,1,2,3});
-        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
-        mySlice->getOperator()->associateInput(0,input0);
-        mySlice->getOperator()->setDataType(DataType::Int32);
-        mySlice->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
-        mySlice->forward();
-        // mySlice->getOperator()->output(0).print();
-        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
-        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
-        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
-    }
-}
diff --git a/unit_tests/operator/Test_SoftmaxImpl.cpp b/unit_tests/operator/Test_SoftmaxImpl.cpp
index 7459a45e48cad74e722dc881e4653d34b7f549d0..da6c6f0d35a1db9ad9099a40b7e83459e14a20f5 100644
--- a/unit_tests/operator/Test_SoftmaxImpl.cpp
+++ b/unit_tests/operator/Test_SoftmaxImpl.cpp
@@ -44,7 +44,6 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") {
         op->associateInput(0,input);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         mySoftmax->forward();
 
         float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
@@ -113,7 +112,6 @@ TEST_CASE("[cpu/operator] Softmax(forward)", "[Softmax][CPU]") {
         op->associateInput(0,input);
         op->setDataType(DataType::Float32);
         op->setBackend("cpu");
-        op->computeOutputDims();
         mySoftmax->forward();
 
         float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
diff --git a/unit_tests/operator/Test_SqrtImpl.cpp b/unit_tests/operator/Test_SqrtImpl.cpp
index 653ecf0d04907ad8f7887e79cf149d79b37a9bbc..d630c66c8b8085e6d382841da6b7cac2c88b1dd0 100644
--- a/unit_tests/operator/Test_SqrtImpl.cpp
+++ b/unit_tests/operator/Test_SqrtImpl.cpp
@@ -40,7 +40,6 @@ TEST_CASE("[cpu/operator] Sqrt(forward)", "[Sqrt][CPU]") {
         mySqrt->getOperator()->associateInput(0,input);
         mySqrt->getOperator()->setDataType(DataType::Float32);
         mySqrt->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
         mySqrt->forward();
 
         float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
@@ -111,7 +110,6 @@ TEST_CASE("[cpu/operator] Sqrt(forward)", "[Sqrt][CPU]") {
         mySqrt->getOperator()->associateInput(0,input);
         mySqrt->getOperator()->setDataType(DataType::Float32);
         mySqrt->getOperator()->setBackend("cpu");
-        op->computeOutputDims();
         mySqrt->forward();
 
         float* resPtr = static_cast<float*>(op->getOutput(0)->getImpl()->rawPtr());
diff --git a/unit_tests/operator/Test_SubImpl.cpp b/unit_tests/operator/Test_SubImpl.cpp
index f9ba894f081b76b3abd0f0909636a38eaee3601a..44666ae631152c8898e24f7003b0c2ede8c67b84 100644
--- a/unit_tests/operator/Test_SubImpl.cpp
+++ b/unit_tests/operator/Test_SubImpl.cpp
@@ -103,7 +103,7 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 Tres->resize(dims);
                 Tres -> getImpl() -> setRawPtr(result, nb_elements);
 
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 mySub->forward();
                 end = std::chrono::system_clock::now();
@@ -196,7 +196,7 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
 
                 // compute result
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 mySub->forward();
                 end = std::chrono::system_clock::now();
@@ -291,7 +291,7 @@ TEST_CASE("[cpu/operator] Sub", "[Sub][CPU]") {
                 Tres -> getImpl() -> setRawPtr(result, dimsOut[0]*dimsOut[1]*dimsOut[2]*dimsOut[3]);
 
                 // compute result
-                op->computeOutputDims();
+                op->forwardDims();
                 start = std::chrono::system_clock::now();
                 mySub->forward();
                 end = std::chrono::system_clock::now();
diff --git a/unit_tests/operator/Test_TransposeImpl.cpp b/unit_tests/operator/Test_TransposeImpl.cpp
deleted file mode 100644
index d381faadd7750f6a9a48fe9371f98e813b94a310..0000000000000000000000000000000000000000
--- a/unit_tests/operator/Test_TransposeImpl.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#include <catch2/catch_test_macros.hpp>
-#include <memory>
-
-#include "aidge/data/Tensor.hpp"
-#include "aidge/operator/Transpose.hpp"
-
-#include "aidge/backend/cpu.hpp"
-
-using namespace Aidge;
-
-TEST_CASE("[cpu/operator] Transpose(forward)") {
-    SECTION("3D Tensor") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array3D<float,2,3,4> {
-            {
-                {{0.42507452, 0.11244237, 0.43243718, 0.62354952},
-                {0.90250170, 0.48719984, 0.45781207, 0.92536664},
-                {0.06348717, 0.91678733, 0.64452291, 0.00484818}},
-
-                {{0.66873497, 0.99508536, 0.55714869, 0.84887981},
-                {0.41666120, 0.92365038, 0.80034822, 0.38721532},
-                {0.52037925, 0.53937608, 0.66380072, 0.36330253}}
-            }
-        });
-        std::shared_ptr<Tensor> output = std::make_shared<Tensor>(Array3D<float,2,4,3> { 
-            {
-                {{0.42507452, 0.90250170, 0.06348717},
-                {0.11244237, 0.48719984, 0.91678733},
-                {0.43243718, 0.45781207, 0.64452291},
-                {0.62354952, 0.92536664, 0.00484818}},
-
-                {{0.66873497, 0.41666120, 0.52037925},
-                {0.99508536, 0.92365038, 0.53937608},
-                {0.55714869, 0.80034822, 0.66380072},
-                {0.84887981, 0.38721532, 0.36330253}}
-            }
-        });
-        std::shared_ptr<Node> myTranspose = Transpose<3>(std::array<DimSize_t,3>{{0,2,1}});
-        auto op = std::static_pointer_cast<OperatorTensor>(myTranspose -> getOperator());
-        op->associateInput(0,input);
-        op->setDataType(DataType::Float32);
-        op->setBackend("cpu");
-        op->computeOutputDims();
-        myTranspose->forward();
-
-        REQUIRE(*(op->getOutput(0)) == *output);
-    }
-    SECTION("4D Tensor") {
-        std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array4D<int,2,3,1,4> {
-            {
-                {
-                    {
-                        {1, 2, 3, 4}
-                    },
-                    {
-                        {5, 6, 7, 8}
-                    },
-                    {
-                        {9, 10, 11, 12}
-                    }
-                },
-                {
-                    {
-                        {13, 14, 15, 16}
-                    },
-                    {
-                        {17, 18, 19, 20}
-                    },
-                    {
-                        {21, 22, 23, 24}
-                    }
-                }
-            }
-        });
-        std::shared_ptr<Tensor> output = std::make_shared<Tensor>(Array4D<int,2,4,1,3> { 
-            {
-                {
-                    {
-                        {1, 5, 9}
-                    },
-                    {
-                        {2, 6, 10}
-                    },
-                    {
-                        {3, 7, 11}
-                    },
-                    {
-                        {4, 8, 12}
-                    }
-                },
-                {
-                    {
-                        {13, 17, 21}
-                    },
-                    {
-                        {14, 18, 22}
-                    },
-                    {
-                        {15, 19, 23}
-                    },
-                    {
-                        {16, 20, 24}
-                    }
-                }
-            }
-        });
-        std::shared_ptr<Node> myTranspose = Transpose<4>(std::array<DimSize_t,4>{{0,3,2,1}});
-        auto op = std::static_pointer_cast<OperatorTensor>(myTranspose -> getOperator());
-        op->associateInput(0,input);
-        op->setDataType(DataType::Int32);
-        op->setBackend("cpu");
-        op->computeOutputDims();
-        myTranspose->forward();
-
-        REQUIRE(*(op->getOutput(0)) == *output);
-    }
-}
\ No newline at end of file
diff --git a/unit_tests/recipies/Test_HorizontalTiling.cpp b/unit_tests/recipies/Test_HorizontalTiling.cpp
index a8a384f611a8cf99a0aa94c58e9bcd5955f698c4..2c10cdf369d7d37ea67b70b9dfe3e76018da2a32 100644
--- a/unit_tests/recipies/Test_HorizontalTiling.cpp
+++ b/unit_tests/recipies/Test_HorizontalTiling.cpp
@@ -163,7 +163,6 @@ TEST_CASE("[core/recipes] Tiling(transformation)", "[Tiling][Recipes]") {
             myReLU->addChild(myConv, 0, 0);
             myConv->getOperator()->setInput(1, myWeights);
             myConv->getOperator()->setInput(2, myBias);
-            std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->computeOutputDims();
 
             std::shared_ptr<GraphView> g = std::make_shared<GraphView>();
             g->add({myReLU, myConv});
diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp
index 953f291d107e8ea99c25b9aa1f06def6b3e381b2..01ccd37c319ee64deb15240b30cc369b37c9e47d 100644
--- a/unit_tests/scheduler/Test_Scheduler.cpp
+++ b/unit_tests/scheduler/Test_Scheduler.cpp
@@ -17,6 +17,7 @@
 #include "aidge/graph/Node.hpp"
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/OpArgs.hpp"
+#include "aidge/operator/Memorize.hpp"
 #include "aidge/scheduler/SequentialScheduler.hpp"
 #include "aidge/scheduler/ParallelScheduler.hpp"
 
@@ -418,8 +419,8 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward
     compile_gradient(gv);
     SequentialScheduler scheduler(gv);
     scheduler.forward();
-    auto predictedOutput = gv->getOrderedOutputs()[0].first;
-
+    auto outNode = gv->getOrderedOutputs()[0].first;
+    std::shared_ptr<Tensor> predictedOutput = std::dynamic_pointer_cast<OperatorTensor>(outNode->getOperator())->getOutput(0);
     std::shared_ptr<Tensor> targetOutput =
           std::make_shared<Tensor>(Array4D<float, 2, 1, 5, 5>{{{{{0.0f, 1.0f, 1.0f, 2.0f, 2.0f},
                                                                  {2.0f, 2.0f, 3.0f, 3.0f, 3.0f},
@@ -431,7 +432,8 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward
                                                                  {6.0f, 6.0f, 6.0f, 6.0f, 6.0f},
                                                                  {6.0f, 6.0f, 6.0f, 7.0f, 7.0f},
                                                                  {7.0f, 7.0f, 7.0f, 7.0f, 7.0f}}}}});
-
-    REQUIRE_NOTHROW(scheduler.backward({targetOutput}));
+    predictedOutput->initGrad();
+    predictedOutput->setGrad(targetOutput);
+    REQUIRE_NOTHROW(scheduler.backward());
 }
 } // namespace Aidge
diff --git a/version.txt b/version.txt
index 0c62199f16ac1e2d7f7ae75b420c1231325dff4e..ee1372d33a29e27945406f0527f8af8e6ee119c9 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-0.2.1
+0.2.2