From f19edbda4bf688073e671e4f3f011d6eb85d5243 Mon Sep 17 00:00:00 2001
From: Olivier BICHLER <olivier.bichler@cea.fr>
Date: Thu, 14 Sep 2023 11:02:18 +0200
Subject: [PATCH] Changed header files structure to work with multiple backend
 modules

---
 include/aidge/aidge_backend_cpu.hpp           |  27 --
 include/aidge/backend/cpu.hpp                 |  27 ++
 .../{ => backend/cpu}/data/TensorImpl.hpp     | 150 ++++----
 .../{ => backend/cpu}/operator/AddImpl.hpp    |   0
 .../cpu}/operator/AddImpl_forward_kernels.hpp | 174 +++++-----
 .../cpu}/operator/AvgPoolingImpl.hpp          |   0
 .../AvgPoolingImpl_forward_kernels.hpp        | 228 ++++++------
 .../cpu}/operator/BatchNormImpl.hpp           |   0
 .../BatchNormImpl_forward_kernels.hpp         | 218 ++++++------
 .../cpu}/operator/ConvDepthWiseImpl.hpp       |   0
 .../ConvDepthWiseImpl_forward_kernels.hpp     | 236 ++++++-------
 .../{ => backend/cpu}/operator/ConvImpl.hpp   |   0
 .../operator/ConvImpl_forward_kernels.hpp     | 324 +++++++++---------
 .../{ => backend/cpu}/operator/FCImpl.hpp     |   0
 .../cpu}/operator/FCImpl_forward_kernels.hpp  | 256 +++++++-------
 .../cpu}/operator/LeakyReLUImpl.hpp           |   0
 .../LeakyReLUImpl_forward_kernels.hpp         |  90 ++---
 .../cpu}/operator/ProducerImpl.hpp            |   0
 .../{ => backend/cpu}/operator/ReLUImpl.hpp   |   0
 .../operator/ReLUImpl_forward_kernels.hpp     |  86 ++---
 .../cpu}/operator/SoftmaxImpl.hpp             |   0
 .../operator/SoftmaxImpl_forward_kernels.hpp  | 128 +++----
 python_binding/pybind_cpu.cpp                 |   2 +-
 src/operator/AddImpl.cpp                      |   6 +-
 src/operator/AvgPoolingImpl.cpp               |   8 +-
 src/operator/BatchNormImpl.cpp                |   8 +-
 src/operator/ConvDepthWiseImpl.cpp            |   8 +-
 src/operator/ConvImpl.cpp                     |   8 +-
 src/operator/FCImpl.cpp                       |   5 +-
 src/operator/LeakyReLUImpl.cpp                |   9 +-
 src/operator/ProducerImpl.cpp                 |   2 +-
 src/operator/ReLUImpl.cpp                     |   9 +-
 src/operator/SoftmaxImpl.cpp                  |   9 +-
 unit_tests/Test_Scheduler.cpp                 |   3 +-
 unit_tests/Test_TensorImpl.cpp                |   2 +-
 unit_tests/operator/Test_AddImpl.cpp          |   4 +-
 unit_tests/operator/Test_AvgPoolingImpl.cpp   |   4 +-
 unit_tests/operator/Test_BatchNormImpl.cpp    |   4 +-
 .../operator/Test_ConvDepthWiseImpl.cpp       |   4 +-
 unit_tests/operator/Test_ConvImpl.cpp         |   4 +-
 unit_tests/operator/Test_FCImpl.cpp           |   4 +-
 unit_tests/operator/Test_LeakyReLUImpl.cpp    |   4 +-
 unit_tests/operator/Test_ReLUImpl.cpp         |   4 +-
 unit_tests/operator/Test_SoftmaxImpl.cpp      |   4 +-
 44 files changed, 1029 insertions(+), 1030 deletions(-)
 delete mode 100644 include/aidge/aidge_backend_cpu.hpp
 create mode 100644 include/aidge/backend/cpu.hpp
 rename include/aidge/{ => backend/cpu}/data/TensorImpl.hpp (96%)
 rename include/aidge/{ => backend/cpu}/operator/AddImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/AddImpl_forward_kernels.hpp (97%)
 rename include/aidge/{ => backend/cpu}/operator/AvgPoolingImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/AvgPoolingImpl_forward_kernels.hpp (97%)
 rename include/aidge/{ => backend/cpu}/operator/BatchNormImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/BatchNormImpl_forward_kernels.hpp (96%)
 rename include/aidge/{ => backend/cpu}/operator/ConvDepthWiseImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/ConvDepthWiseImpl_forward_kernels.hpp (97%)
 rename include/aidge/{ => backend/cpu}/operator/ConvImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/ConvImpl_forward_kernels.hpp (98%)
 rename include/aidge/{ => backend/cpu}/operator/FCImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/FCImpl_forward_kernels.hpp (97%)
 rename include/aidge/{ => backend/cpu}/operator/LeakyReLUImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/LeakyReLUImpl_forward_kernels.hpp (95%)
 rename include/aidge/{ => backend/cpu}/operator/ProducerImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/ReLUImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/ReLUImpl_forward_kernels.hpp (95%)
 rename include/aidge/{ => backend/cpu}/operator/SoftmaxImpl.hpp (100%)
 rename include/aidge/{ => backend/cpu}/operator/SoftmaxImpl_forward_kernels.hpp (95%)

diff --git a/include/aidge/aidge_backend_cpu.hpp b/include/aidge/aidge_backend_cpu.hpp
deleted file mode 100644
index e887a229..00000000
--- a/include/aidge/aidge_backend_cpu.hpp
+++ /dev/null
@@ -1,27 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_IMPORTS_H_
-#define AIDGE_CPU_IMPORTS_H_
-
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/operator/AddImpl.hpp"
-#include "aidge/operator/AvgPoolingImpl.hpp"
-#include "aidge/operator/BatchNormImpl.hpp"
-#include "aidge/operator/ConvDepthWiseImpl.hpp"
-#include "aidge/operator/ConvImpl.hpp"
-#include "aidge/operator/FCImpl.hpp"
-#include "aidge/operator/LeakyReLUImpl.hpp"
-#include "aidge/operator/ProducerImpl.hpp"
-#include "aidge/operator/ReLUImpl.hpp"
-#include "aidge/operator/SoftmaxImpl.hpp"
-
-#endif /* AIDGE_CPU_IMPORTS_H_ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp
new file mode 100644
index 00000000..95b2f7b8
--- /dev/null
+++ b/include/aidge/backend/cpu.hpp
@@ -0,0 +1,27 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_IMPORTS_H_
+#define AIDGE_CPU_IMPORTS_H_
+
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/operator/AddImpl.hpp"
+#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
+#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/backend/cpu/operator/FCImpl.hpp"
+#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/ProducerImpl.hpp"
+#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
+
+#endif /* AIDGE_CPU_IMPORTS_H_ */
\ No newline at end of file
diff --git a/include/aidge/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp
similarity index 96%
rename from include/aidge/data/TensorImpl.hpp
rename to include/aidge/backend/cpu/data/TensorImpl.hpp
index e2c9f828..dfcb8afa 100644
--- a/include/aidge/data/TensorImpl.hpp
+++ b/include/aidge/backend/cpu/data/TensorImpl.hpp
@@ -1,75 +1,75 @@
-#ifndef AIDGE_CPU_DATA_TENSORIMPL_H_
-#define AIDGE_CPU_DATA_TENSORIMPL_H_
-
-#include "aidge/backend/TensorImpl.hpp"
-#include "aidge/data/Tensor.hpp"
-#include "aidge/utils/Registrar.hpp"
-#include "aidge/utils/Types.h"
-
-namespace Aidge {
-template <class T>
-class TensorImpl_cpu : public TensorImpl {
-   private:
-    const Tensor &mTensor;  // Impl needs to access Tensor information, but is not
-                            // supposed to change it!
-    std::vector<T> mData;
-
-   public:
-    static constexpr const char *Backend = "cpu";
-
-    TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {}
-
-    bool operator==(const TensorImpl &otherImpl) const override final {
-        std::size_t i = 0;
-        for (; i < mTensor.size() &&
-               mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i];
-             ++i) {
-        }
-        return i == mTensor.size();
-    }
-
-    static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) {
-        return std::make_unique<TensorImpl_cpu<T>>(tensor);
-    }
-
-    // native interface
-    const std::vector<T> &data() const { return mData; }
-
-    std::size_t scalarSize() const override { return sizeof(T); }
-
-    void copy(const void *src, NbElts_t length) override {
-        std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length,
-                  static_cast<T *>(rawPtr()));
-    }
-
-    void *rawPtr() override {
-        lazyInit(mData);
-        return mData.data();
-    };
-
-    virtual ~TensorImpl_cpu() = default;
-
-    void setRawPtr(void *ptr) override final {
-        T *newPtr = static_cast<T *>(ptr);
-        mData = std::vector<T>(newPtr, newPtr + mTensor.size());
-    };
-
-   private:
-    void lazyInit(std::vector<T> &data) {
-        assert(mTensor.dataType() == NativeType<T>::type);
-
-        if (data.size() != mTensor.size()) data.resize(mTensor.size());
-    }
-};
-
-namespace {
-static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
-        {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
-        {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
-static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
-        {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */
+#ifndef AIDGE_CPU_DATA_TENSORIMPL_H_
+#define AIDGE_CPU_DATA_TENSORIMPL_H_
+
+#include "aidge/backend/TensorImpl.hpp"
+#include "aidge/data/Tensor.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+template <class T>
+class TensorImpl_cpu : public TensorImpl {
+   private:
+    const Tensor &mTensor;  // Impl needs to access Tensor information, but is not
+                            // supposed to change it!
+    std::vector<T> mData;
+
+   public:
+    static constexpr const char *Backend = "cpu";
+
+    TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {}
+
+    bool operator==(const TensorImpl &otherImpl) const override final {
+        std::size_t i = 0;
+        for (; i < mTensor.size() &&
+               mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i];
+             ++i) {
+        }
+        return i == mTensor.size();
+    }
+
+    static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) {
+        return std::make_unique<TensorImpl_cpu<T>>(tensor);
+    }
+
+    // native interface
+    const std::vector<T> &data() const { return mData; }
+
+    std::size_t scalarSize() const override { return sizeof(T); }
+
+    void copy(const void *src, NbElts_t length) override {
+        std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length,
+                  static_cast<T *>(rawPtr()));
+    }
+
+    void *rawPtr() override {
+        lazyInit(mData);
+        return mData.data();
+    };
+
+    virtual ~TensorImpl_cpu() = default;
+
+    void setRawPtr(void *ptr) override final {
+        T *newPtr = static_cast<T *>(ptr);
+        mData = std::vector<T>(newPtr, newPtr + mTensor.size());
+    };
+
+   private:
+    void lazyInit(std::vector<T> &data) {
+        assert(mTensor.dataType() == NativeType<T>::type);
+
+        if (data.size() != mTensor.size()) data.resize(mTensor.size());
+    }
+};
+
+namespace {
+static Registrar<Tensor> registrarTensorImpl_cpu_Float64(
+        {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create);
+static Registrar<Tensor> registrarTensorImpl_cpu_Float32(
+        {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create);
+static Registrar<Tensor> registrarTensorImpl_cpu_Int32(
+        {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */
diff --git a/include/aidge/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp
similarity index 100%
rename from include/aidge/operator/AddImpl.hpp
rename to include/aidge/backend/cpu/operator/AddImpl.hpp
diff --git a/include/aidge/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
similarity index 97%
rename from include/aidge/operator/AddImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
index 73ec0ddd..49059859 100644
--- a/include/aidge/operator/AddImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
@@ -1,87 +1,87 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/AddImpl.hpp"
-
-namespace Aidge {
-
-template <class I1, class O>
-void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) {
-    // FIXME: missing Add parameters as arguments
-    const I1* input1 = static_cast<const I1*>(input1_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
-        output[oIndex] = input1[oIndex];
-    }
-}
-
-template <class I1, class I2, class O>
-void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
-                                      void* output_) {
-    // FIXME: missing Add parameters as arguments
-    const I1* input1 = static_cast<const I1*>(input1_);
-    const I2* input2 = static_cast<const I2*>(input2_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
-        output[oIndex] = input1[oIndex] + input2[oIndex];
-    }
-}
-
-template <class I1, class I2, class I3, class O>
-void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
-                                      const void* input3_, void* output_) {
-    // FIXME: missing Add parameters as arguments
-    const I1* input1 = static_cast<const I1*>(input1_);
-    const I2* input2 = static_cast<const I2*>(input2_);
-    const I3* input3 = static_cast<const I3*>(input3_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
-        output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex];
-    }
-}
-
-namespace {
-static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_cpu_forward_kernel<float, float>);
-static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_cpu_forward_kernel<int, int>);
-static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_cpu_forward_kernel<double, double>);
-
-static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::AddImpl2I_cpu_forward_kernel<float, float, float>);
-static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_cpu_forward_kernel<int, int, int>);
-static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_cpu_forward_kernel<double, double, double>);
-
-static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::AddImpl3I_cpu_forward_kernel<float, float, float, float>);
-static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::AddImpl3I_cpu_forward_kernel<int, int, int, int>);
-static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::AddImpl3I_cpu_forward_kernel<double, double, double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_ADDIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/AddImpl.hpp"
+
+namespace Aidge {
+
+template <class I1, class O>
+void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) {
+    // FIXME: missing Add parameters as arguments
+    const I1* input1 = static_cast<const I1*>(input1_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
+        output[oIndex] = input1[oIndex];
+    }
+}
+
+template <class I1, class I2, class O>
+void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
+                                      void* output_) {
+    // FIXME: missing Add parameters as arguments
+    const I1* input1 = static_cast<const I1*>(input1_);
+    const I2* input2 = static_cast<const I2*>(input2_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
+        output[oIndex] = input1[oIndex] + input2[oIndex];
+    }
+}
+
+template <class I1, class I2, class I3, class O>
+void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_,
+                                      const void* input3_, void* output_) {
+    // FIXME: missing Add parameters as arguments
+    const I1* input1 = static_cast<const I1*>(input1_);
+    const I2* input2 = static_cast<const I2*>(input2_);
+    const I3* input3 = static_cast<const I3*>(input3_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) {
+        output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex];
+    }
+}
+
+namespace {
+static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_cpu_forward_kernel<float, float>);
+static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_cpu_forward_kernel<int, int>);
+static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_cpu_forward_kernel<double, double>);
+
+static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::AddImpl2I_cpu_forward_kernel<float, float, float>);
+static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_cpu_forward_kernel<int, int, int>);
+static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_cpu_forward_kernel<double, double, double>);
+
+static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::AddImpl3I_cpu_forward_kernel<float, float, float, float>);
+static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::AddImpl3I_cpu_forward_kernel<int, int, int, int>);
+static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::AddImpl3I_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
similarity index 100%
rename from include/aidge/operator/AvgPoolingImpl.hpp
rename to include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
diff --git a/include/aidge/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
similarity index 97%
rename from include/aidge/operator/AvgPoolingImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
index 7ead482c..776e020f 100644
--- a/include/aidge/operator/AvgPoolingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
@@ -1,114 +1,114 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/AvgPoolingImpl.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/data/Data.hpp"
-#include <array>
-#include <tuple>
-#include <cmath>
-
-namespace Aidge {
-/**
- * @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
- * @tparam I Input data type.
- * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
- * @param dims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class O>
-void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters &params,
-                                             const std::array<DimSize_t, 4> &dims,
-                                             const void *input_,
-                                             void *output_) {
-    // FIXME: missing convolution parameters as arguments
-    const I *input = static_cast<const I *>(input_);
-    O *output = static_cast<O *>(output_);
-
-
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
-    // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
-
-    // TODO: kernel computation
-    // output (batch, outCh, Xout, Yout)
-    // input  (batch, ch, Xin, Yin)
-    // weight (outCh, ch, kernelX, kernelY)
-    // does not take Dilation parameter into account
-    using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t ch = 0; ch < dims[1]; ++ch) {
-            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
-            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
-            for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]);
-                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx);
-                for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]);
-                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify);
-                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const std::size_t ix = ox * std::get<0>(params)[0];
-                    const std::size_t iy = oy * std::get<0>(params)[1];
-
-                    if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                        output[oIndexFull] += static_cast<O>(
-                                               input[iIndex + (ix+0)*dims[3] + (iy+0)] +
-                                               input[iIndex + (ix+0)*dims[3] + (iy+1)] +
-                                               input[iIndex + (ix+0)*dims[3] + (iy+2)] +
-                                               input[iIndex + (ix+1)*dims[3] + (iy+0)] +
-                                               input[iIndex + (ix+1)*dims[3] + (iy+1)] +
-                                               input[iIndex + (ix+1)*dims[3] + (iy+2)] +
-                                               input[iIndex + (ix+2)*dims[3] + (iy+0)] +
-                                               input[iIndex + (ix+2)*dims[3] + (iy+1)] +
-                                               input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9);
-                    } else {
-                        for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
-                            for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
-                            }
-                        }
-                        // padding not used
-                        output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin);
-                    }
-                }
-            }
-        }
-    }
-}
-
-namespace {
-static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float32(
-        std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}),
-        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>);
-static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32},
-        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<int, int>);
-static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64},
-        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/data/Data.hpp"
+#include <array>
+#include <tuple>
+#include <cmath>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam O Output data type.
+ * @param params tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class O>
+void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Parameters &params,
+                                             const std::array<DimSize_t, 4> &dims,
+                                             const void *input_,
+                                             void *output_) {
+    // FIXME: missing convolution parameters as arguments
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
+
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<2>(params)[0] + std::get<2>(params)[2] - std::get<1>(params)[0] + std::get<0>(params)[0]) /
+                                static_cast<float>(std::get<0>(params)[0])));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<2>(params)[1] + std::get<2>(params)[3] - std::get<1>(params)[1] + std::get<0>(params)[1]) /
+                                static_cast<float>(std::get<0>(params)[1])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, ch, Xin, Yin)
+    // weight (outCh, ch, kernelX, kernelY)
+    // does not take Dilation parameter into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+        for (std::size_t ch = 0; ch < dims[1]; ++ch) {
+            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
+            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
+            for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                const signedsize difx = static_cast<signedsize>(std::get<2>(params)[0] - ox * std::get<0>(params)[0]);
+                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(params)[0] ? std::get<1>(params)[0] : dims[2] + difx);
+                for (std::size_t oy = 0; oy < oySize; ++oy) {
+                    const signedsize dify = static_cast<signedsize>(std::get<2>(params)[1] - oy * std::get<0>(params)[1]);
+                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(params)[1] ? std::get<1>(params)[1] : dims[3] + dify);
+                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                    const std::size_t ix = ox * std::get<0>(params)[0];
+                    const std::size_t iy = oy * std::get<0>(params)[1];
+
+                    if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
+                        output[oIndexFull] += static_cast<O>(
+                                               input[iIndex + (ix+0)*dims[3] + (iy+0)] +
+                                               input[iIndex + (ix+0)*dims[3] + (iy+1)] +
+                                               input[iIndex + (ix+0)*dims[3] + (iy+2)] +
+                                               input[iIndex + (ix+1)*dims[3] + (iy+0)] +
+                                               input[iIndex + (ix+1)*dims[3] + (iy+1)] +
+                                               input[iIndex + (ix+1)*dims[3] + (iy+2)] +
+                                               input[iIndex + (ix+2)*dims[3] + (iy+0)] +
+                                               input[iIndex + (ix+2)*dims[3] + (iy+1)] +
+                                               input[iIndex + (ix+2)*dims[3] + (iy+2)]) / O(9);
+                    } else {
+                        for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                            for (std::size_t sy = syMin; sy < syMax; ++sy) {
+                                output[oIndexFull] += input[iIndex + (ix+sx)*dims[3] + (iy+sy)];
+                            }
+                        }
+                        // padding not used
+                        output[oIndexFull] /= (sxMax - sxMin) * (syMax - syMin);
+                    }
+                }
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float32(
+        std::tuple<DataType, DataType>({DataType::Float32, DataType::Float32}),
+        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<float, float>);
+static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32},
+        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<int, int>);
+static Registrar<AvgPoolingImpl2DForward_cpu> registrarAvgPoolingImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64},
+        Aidge::AvgPoolingImpl2D_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
similarity index 100%
rename from include/aidge/operator/BatchNormImpl.hpp
rename to include/aidge/backend/cpu/operator/BatchNormImpl.hpp
diff --git a/include/aidge/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
similarity index 96%
rename from include/aidge/operator/BatchNormImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
index 6d05c40c..eedb80bd 100644
--- a/include/aidge/operator/BatchNormImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
@@ -1,109 +1,109 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/BatchNormImpl.hpp"
-#include "aidge/utils/Types.h"
-#include <array>
-#include <cmath>
-#include <algorithm>
-
-namespace Aidge {
-/**
- * @brief Forward kernel for 2D BatchNormolution on CPU backend.
- * @tparam I Input data type.
- * @tparam W Weight data type.
- * @tparam B Bias data type.
- * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
- * @param dims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param scale_ const scale Tensor.
- * @param shift_ const shift Tensor.
- * @param batchMean_ const mean Tensor.
- * @param batchVar_ const variance Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class P, class O>
-void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
-    // FIXME: missing convolution parameters as arguments
-    const I *input = static_cast<const I *>(input_);
-    const P *scale = static_cast<const P *>(scale_);
-    const P *shift = static_cast<const P *>(shift_);
-    P *batchMean = static_cast<P *>(batchMean_);
-    P *batchVar = static_cast<P *>(batchVar_);
-    O *output = static_cast<O *>(output_);
-
-    const DimSize_t nbBatch = dims[0];
-    const DimSize_t nbChannels = dims[1];
-    const DimSize_t featureMapSize = dims[2]*dims[3];
-
-
-    if ((freeze == true) || (std::get<1>(params) == 0.0f)) {
-        for (std::size_t batch = 0; batch < nbBatch; ++batch) {
-            for (std::size_t ch = 0; ch < nbChannels; ++ch) {
-                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
-                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
-                const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params)));
-
-                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
-                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
-                }
-            }
-        }
-    } else {
-        const std::size_t nbDataPerChannel = nbBatch * featureMapSize;
-        for (std::size_t ch = 0; ch < nbChannels; ++ch) {
-            I sum = I(0);
-            I sumSquare = I(0);
-            for (std::size_t batch = 0; batch < nbBatch; ++batch) {
-                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
-                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
-
-                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
-                    sum += input[ioIndex + feature];
-                    sumSquare += input[ioIndex + feature] * input[ioIndex + feature];
-                }
-            }
-            const I inputMean = sum / static_cast<I>(nbDataPerChannel);
-            const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel)  - inputMean*inputMean;
-
-            batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params);
-            batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params);
-
-            const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params)));
-            for (std::size_t batch = 0; batch < nbBatch; ++batch) {
-                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
-                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
-                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-inputMean) / var;
-                }
-            }
-        }
-    }
-}
-
-
-
-
-
-namespace {
-static Registrar<BatchNormImpl2DForward_cpu> registrarBatchNormImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
+#include "aidge/utils/Types.h"
+#include <array>
+#include <cmath>
+#include <algorithm>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D BatchNormolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param scale_ const scale Tensor.
+ * @param shift_ const shift Tensor.
+ * @param batchMean_ const mean Tensor.
+ * @param batchVar_ const variance Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class P, class O>
+void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+                                       const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
+    // FIXME: missing convolution parameters as arguments
+    const I *input = static_cast<const I *>(input_);
+    const P *scale = static_cast<const P *>(scale_);
+    const P *shift = static_cast<const P *>(shift_);
+    P *batchMean = static_cast<P *>(batchMean_);
+    P *batchVar = static_cast<P *>(batchVar_);
+    O *output = static_cast<O *>(output_);
+
+    const DimSize_t nbBatch = dims[0];
+    const DimSize_t nbChannels = dims[1];
+    const DimSize_t featureMapSize = dims[2]*dims[3];
+
+
+    if ((freeze == true) || (std::get<1>(params) == 0.0f)) {
+        for (std::size_t batch = 0; batch < nbBatch; ++batch) {
+            for (std::size_t ch = 0; ch < nbChannels; ++ch) {
+                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
+                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
+                const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(params)));
+
+                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
+                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
+                }
+            }
+        }
+    } else {
+        const std::size_t nbDataPerChannel = nbBatch * featureMapSize;
+        for (std::size_t ch = 0; ch < nbChannels; ++ch) {
+            I sum = I(0);
+            I sumSquare = I(0);
+            for (std::size_t batch = 0; batch < nbBatch; ++batch) {
+                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
+                std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
+
+                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
+                    sum += input[ioIndex + feature];
+                    sumSquare += input[ioIndex + feature] * input[ioIndex + feature];
+                }
+            }
+            const I inputMean = sum / static_cast<I>(nbDataPerChannel);
+            const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel)  - inputMean*inputMean;
+
+            batchMean[ch] = batchMean[ch]*(1-std::get<1>(params)) + inputMean*std::get<1>(params);
+            batchVar[ch] = batchVar[ch]*(1-std::get<1>(params)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(params);
+
+            const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(params)));
+            for (std::size_t batch = 0; batch < nbBatch; ++batch) {
+                const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
+                for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
+                    output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-inputMean) / var;
+                }
+            }
+        }
+    }
+}
+
+
+
+
+
+namespace {
+static Registrar<BatchNormImpl2DForward_cpu> registrarBatchNormImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::BatchNormImpl2D_cpu_forward_kernel<float, float, float>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_BATCHNORMIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
similarity index 100%
rename from include/aidge/operator/ConvDepthWiseImpl.hpp
rename to include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
diff --git a/include/aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
similarity index 97%
rename from include/aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
index da9c8daf..ee2d82e0 100644
--- a/include/aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
@@ -1,118 +1,118 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/ConvDepthWiseImpl.hpp"
-#include "aidge/utils/Types.h"
-#include <cmath>
-#include <array>
-#include <algorithm>
-
-namespace Aidge {
-/**
- * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
- * @tparam I Input data type.
- * @tparam W Weight data type.
- * @tparam B Bias data type.
- * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
- * @param dims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param weights_ const weight Tensor.
- * @param biases_ const Biais Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class W, class B, class O>
-void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
-    // FIXME: missing convolution parameters as arguments
-    const I *input = static_cast<const I *>(input_);
-    const W *weights = static_cast<const W *>(weights_);
-    const B *biases = static_cast<const B *>(biases_);
-    O *output = static_cast<O *>(output_);
-
-
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
-    // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
-
-    // TODO: kernel computation
-    // output (batch, outCh, Xout, Yout)
-    // input  (batch, ch, Xin, Yin)
-    // weight (outCh, ch, kernelX, kernelY)
-    // does not take Dilation parameter into account
-    using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) {
-            const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize;
-            B biasVal = (biases != nullptr) ? biases[ch] : B(0);
-            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
-            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
-            const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1];
-            for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]);
-                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx);
-                for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]);
-                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify);
-                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0];
-                    const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1];
-
-                    if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                        output[oIndexFull] +=  (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
-                    } else {
-                        for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
-                            for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] *
-                                                        input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-namespace {
-static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>);
-static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>);
-static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMP_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
+#include "aidge/utils/Types.h"
+#include <cmath>
+#include <array>
+#include <algorithm>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param weights_ const weight Tensor.
+ * @param biases_ const Biais Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class W, class B, class O>
+void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
+    // FIXME: missing convolution parameters as arguments
+    const I *input = static_cast<const I *>(input_);
+    const W *weights = static_cast<const W *>(weights_);
+    const B *biases = static_cast<const B *>(biases_);
+    O *output = static_cast<O *>(output_);
+
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<4>(params)[0] + std::get<4>(params)[2] - std::get<3>(params)[0] + std::get<0>(params)[0]) /
+                                static_cast<float>(std::get<0>(params)[0])));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<4>(params)[1] + std::get<4>(params)[3] - std::get<3>(params)[1] + std::get<0>(params)[1]) /
+                                static_cast<float>(std::get<0>(params)[1])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, ch, Xin, Yin)
+    // weight (outCh, ch, kernelX, kernelY)
+    // does not take Dilation parameter into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+        for (std::size_t ch = 0; ch < std::get<2>(params); ++ch) {
+            const std::size_t oIndex = (ch + batch*std::get<2>(params)) * oxSize * oySize;
+            B biasVal = (biases != nullptr) ? biases[ch] : B(0);
+            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
+            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
+            const std::size_t wIndex = ch * std::get<3>(params)[0] * std::get<3>(params)[1];
+            for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                const signedsize difx = static_cast<signedsize>(std::get<4>(params)[0] - ox * std::get<0>(params)[0]);
+                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(params)[0] ? std::get<3>(params)[0] : dims[2] + difx);
+                for (std::size_t oy = 0; oy < oySize; ++oy) {
+                    const signedsize dify = static_cast<signedsize>(std::get<4>(params)[1] - oy * std::get<0>(params)[1]);
+                    const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(params)[1] ? std::get<3>(params)[1] : dims[3] + dify);
+                    const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                    const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<4>(params)[0];
+                    const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<4>(params)[1];
+
+                    if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
+                        output[oIndexFull] +=  (weights[wIndex + 0*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 0*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 0*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 1*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 1*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 1*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 2*std::get<3>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 2*std::get<3>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 2*std::get<3>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                    } else {
+                        for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                            for (std::size_t sy = syMin; sy < syMax; ++sy) {
+                                output[oIndexFull] += weights[wIndex + sx*std::get<3>(params)[1] + sy] *
+                                                        input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>);
+static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>);
+static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp
similarity index 100%
rename from include/aidge/operator/ConvImpl.hpp
rename to include/aidge/backend/cpu/operator/ConvImpl.hpp
diff --git a/include/aidge/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
similarity index 98%
rename from include/aidge/operator/ConvImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
index 93cf523b..bc2f1009 100644
--- a/include/aidge/operator/ConvImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
@@ -1,162 +1,162 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/ConvImpl.hpp"
-#include "aidge/utils/Types.h"
-#include <cmath>
-#include <array>
-#include <algorithm>
-
-namespace Aidge {
-/**
- * @brief Forward kernel for 2D Convolution on CPU backend.
- * @tparam I Input data type.
- * @tparam W Weight data type.
- * @tparam B Bias data type.
- * @tparam O Output data type.
- * @param params tuple of Parameters from the Operator
- * @param dims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param weights_ const weight Tensor.
- * @param biases_ const Biais Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class W, class B, class O>
-void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
-    // FIXME: missing convolution parameters as arguments
-    const I *input = static_cast<const I *>(input_);
-    const W *weights = static_cast<const W *>(weights_);
-    const B *biases = static_cast<const B *>(biases_);
-    O *output = static_cast<O *>(output_);
-/*
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0]));
-    // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1]));
-
-    // TODO: kernel computation
-    // output (Xout, Yout, outCh, batch)
-    // input  (Xin, Yin, inCh, batch)
-    // weight (kernelX, kernelY, inCh, outCh)
-    // does not take Dilation parameter into account
-    for (std::size_t ox = 0; ox < oxSize; ++ox) {
-        for (std::size_t oy = 0; oy < oySize; ++oy) {
-            const std::size_t ix = ox * std::get<0>(params)[0];
-            const std::size_t iy = oy * std::get<0>(params)[1];
-
-            for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
-                const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox));
-                B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-                for (std::size_t batch = 0; batch < dims[3]; ++batch) {
-                    output[oIndex + batch] = biasVal;
-                }
-                for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
-                    for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) {
-                        for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) {
-                            const std::size_t wIndex =
-                                    outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx));
-                            std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
-                            for (std::size_t batch = 0; batch < dims[3]; ++batch) {
-                                output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-*/
-
-
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
-                                static_cast<float>(std::get<0>(params)[0])));
-    // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
-                                static_cast<float>(std::get<0>(params)[1])));
-
-    // TODO: kernel computation
-    // output (batch, outCh, Xout, Yout)
-    // input  (batch, inCh, Xin, Yin)
-    // weight (outCh, inCh, kernelX, kernelY)
-    // does not take Dilation parameter into account
-    using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
-            const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize;
-            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
-            for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
-                const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
-                const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1];
-                for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                    const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]);
-                    const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                    const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx);
-                    for (std::size_t oy = 0; oy < oySize; ++oy) {
-                        const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]);
-                        const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                        const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify);
-                        const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                        const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0];
-                        const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1];
-
-                        if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                            output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
-                        } else {
-                            for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
-                                for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                    output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] *
-                                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-namespace {
-static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>);
-static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>);
-static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/utils/Types.h"
+#include <cmath>
+#include <array>
+#include <algorithm>
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 2D Convolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param weights_ const weight Tensor.
+ * @param biases_ const Biais Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class W, class B, class O>
+void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Parameters &params, const std::array<DimSize_t, 4> &dims,
+                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
+    // FIXME: missing convolution parameters as arguments
+    const I *input = static_cast<const I *>(input_);
+    const W *weights = static_cast<const W *>(weights_);
+    const B *biases = static_cast<const B *>(biases_);
+    O *output = static_cast<O *>(output_);
+/*
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
+                                static_cast<float>(std::get<0>(params)[0]));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
+                                static_cast<float>(std::get<0>(params)[1]));
+
+    // TODO: kernel computation
+    // output (Xout, Yout, outCh, batch)
+    // input  (Xin, Yin, inCh, batch)
+    // weight (kernelX, kernelY, inCh, outCh)
+    // does not take Dilation parameter into account
+    for (std::size_t ox = 0; ox < oxSize; ++ox) {
+        for (std::size_t oy = 0; oy < oySize; ++oy) {
+            const std::size_t ix = ox * std::get<0>(params)[0];
+            const std::size_t iy = oy * std::get<0>(params)[1];
+
+            for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
+                const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox));
+                B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
+                for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+                    output[oIndex + batch] = biasVal;
+                }
+                for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
+                    for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) {
+                        for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) {
+                            const std::size_t wIndex =
+                                    outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx));
+                            std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
+                            for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+                                output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+*/
+
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] + std::get<5>(params)[0] + std::get<5>(params)[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) /
+                                static_cast<float>(std::get<0>(params)[0])));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] + std::get<5>(params)[1] + std::get<5>(params)[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) /
+                                static_cast<float>(std::get<0>(params)[1])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, inCh, Xin, Yin)
+    // weight (outCh, inCh, kernelX, kernelY)
+    // does not take Dilation parameter into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+        for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) {
+            const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize;
+            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
+            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
+            for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
+                const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
+                const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1];
+                for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                    const signedsize difx = static_cast<signedsize>(std::get<5>(params)[0] - ox * std::get<0>(params)[0]);
+                    const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                    const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(params)[0] ? std::get<4>(params)[0] : dims[2] + difx);
+                    for (std::size_t oy = 0; oy < oySize; ++oy) {
+                        const signedsize dify = static_cast<signedsize>(std::get<5>(params)[1] - oy * std::get<0>(params)[1]);
+                        const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
+                        const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(params)[1] ? std::get<4>(params)[1] : dims[3] + dify);
+                        const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                        const signedsize ix = static_cast<signedsize>(ox * std::get<0>(params)[0]) - std::get<5>(params)[0];
+                        const signedsize iy = static_cast<signedsize>(oy * std::get<0>(params)[1]) - std::get<5>(params)[1];
+
+                        if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
+                            output[oIndexFull] += (weights[wIndex + 0*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 0*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 0*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 1*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 1*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 1*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 2*std::get<4>(params)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 2*std::get<4>(params)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 2*std::get<4>(params)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                        } else {
+                            for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                                for (std::size_t sy = syMin; sy < syMax; ++sy) {
+                                    output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] *
+                                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>);
+static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>);
+static Registrar<ConvImpl2DForward_cpu> registrarConvImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp
similarity index 100%
rename from include/aidge/operator/FCImpl.hpp
rename to include/aidge/backend/cpu/operator/FCImpl.hpp
diff --git a/include/aidge/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
similarity index 97%
rename from include/aidge/operator/FCImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
index 7bd1f366..d6acb7df 100644
--- a/include/aidge/operator/FCImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
@@ -1,128 +1,128 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-#include <algorithm>
-
-#include "aidge/operator/FCImpl.hpp"
-
-namespace Aidge {
-// template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims,
-//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-//     // FIXME: missing FC parameters as arguments
-//     const I* input = static_cast<const I*>(input_);
-//     const W* weights = static_cast<const W*>(weights_);
-//     const B* biases = static_cast<const B*>(biases_);
-//     O* output = static_cast<O*>(output_);
-
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
-//         std::size_t oIndex = outIdx * dims[3];
-//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
-//         for (std::size_t batch = 0; batch < dims[3]; ++batch) {
-//             output[oIndex + batch] = bias;
-//         }
-//     }
-
-//     for (std::size_t ix = 0; ix < dims[0]; ++ix) {
-//         for (std::size_t iy = 0; iy < dims[1]; ++iy) {
-//             for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
-//                 const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
-//                 for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
-//                     const std::size_t oIndex = dims[3] * outCh;
-//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) +
-//                                           outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
-//                     for (std::size_t batch = 0; batch < dims[3]; ++batch) {
-//                         output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
-//                     }
-//                 }
-//             }
-//         }
-//     }
-// }
-
-// template <class I, class W, class B, class O>
-// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims,
-//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
-//     // FIXME: missing FC parameters as arguments
-//     const I* input = static_cast<const I*>(input_);
-//     const W* weights = static_cast<const W*>(weights_);
-//     const B* biases = static_cast<const B*>(biases_);
-//     O* output = static_cast<O*>(output_);
-
-//     // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
-
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
-//         std::size_t oIndex = outIdx * dims[0];
-//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
-//         for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-//             output[oIndex + batch] = bias;
-//         }
-//     }
-
-//     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-//         const std::size_t oIndex = dims[1] * batch;
-//         for (std::size_t i = 0; i < dims[1]; ++i) {
-//             for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
-//                 std::size_t wIndex = i * std::get<0>(params) + outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
-//                 output[oIndex + outCh] += weights[wIndex] * input[i + batch];
-//             }
-//         }
-//     }
-// }
-
-template <class I, class W, class B, class O>
-void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize,
-                                   const void* input_, const void* weights_, const void* biases_, void* output_) {
-    // FIXME: missing FC parameters as arguments
-    const I* input = static_cast<const I*>(input_);
-    const W* weights = static_cast<const W*>(weights_);
-    const B* biases = static_cast<const B*>(biases_);
-    O* output = static_cast<O*>(output_);
-
-    if (std::get<1>(params)) {
-        std::fill(output, output+(batchSize*std::get<0>(params)), B(0));
-    }
-    else {
-        for (std::size_t batch = 0; batch < batchSize; ++batch) {
-            std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params)));
-        }
-    }
-
-    for (std::size_t batch = 0; batch < batchSize; ++batch) {
-        for (std::size_t out = 0; out < std::get<0>(params); ++out) {
-            output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize,
-                                                        input + (batch + 1)*oneInputSize,
-                                                        weights + out*oneInputSize,
-                                                        output[out + batch*std::get<0>(params)]);
-        }
-    }
-}
-
-
-namespace {
-static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>);
-static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>);
-static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>);
-}  // namespace
-
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include <algorithm>
+
+#include "aidge/backend/cpu/operator/FCImpl.hpp"
+
+namespace Aidge {
+// template <class I, class W, class B, class O>
+// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims,
+//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
+//     // FIXME: missing FC parameters as arguments
+//     const I* input = static_cast<const I*>(input_);
+//     const W* weights = static_cast<const W*>(weights_);
+//     const B* biases = static_cast<const B*>(biases_);
+//     O* output = static_cast<O*>(output_);
+
+//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
+//         std::size_t oIndex = outIdx * dims[3];
+//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
+//         for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+//             output[oIndex + batch] = bias;
+//         }
+//     }
+
+//     for (std::size_t ix = 0; ix < dims[0]; ++ix) {
+//         for (std::size_t iy = 0; iy < dims[1]; ++iy) {
+//             for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
+//                 const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
+//                 for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
+//                     const std::size_t oIndex = dims[3] * outCh;
+//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) +
+//                                           outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
+//                     for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+//                         output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
+//                     }
+//                 }
+//             }
+//         }
+//     }
+// }
+
+// template <class I, class W, class B, class O>
+// void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims,
+//                                    const void* input_, const void* weights_, const void* biases_, void* output_) {
+//     // FIXME: missing FC parameters as arguments
+//     const I* input = static_cast<const I*>(input_);
+//     const W* weights = static_cast<const W*>(weights_);
+//     const B* biases = static_cast<const B*>(biases_);
+//     O* output = static_cast<O*>(output_);
+
+//     // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
+
+//     for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) {
+//         std::size_t oIndex = outIdx * dims[0];
+//         const B bias = std::get<1>(params) ? B(0) : biases[outIdx];
+//         for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+//             output[oIndex + batch] = bias;
+//         }
+//     }
+
+//     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+//         const std::size_t oIndex = dims[1] * batch;
+//         for (std::size_t i = 0; i < dims[1]; ++i) {
+//             for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) {
+//                 std::size_t wIndex = i * std::get<0>(params) + outCh;  // (iIndex*std::get<0>(params) + oIndex)/dims[3];
+//                 output[oIndex + outCh] += weights[wIndex] * input[i + batch];
+//             }
+//         }
+//     }
+// }
+
+template <class I, class W, class B, class O>
+void FCImpl_cpu_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize,
+                                   const void* input_, const void* weights_, const void* biases_, void* output_) {
+    // FIXME: missing FC parameters as arguments
+    const I* input = static_cast<const I*>(input_);
+    const W* weights = static_cast<const W*>(weights_);
+    const B* biases = static_cast<const B*>(biases_);
+    O* output = static_cast<O*>(output_);
+
+    if (std::get<1>(params)) {
+        std::fill(output, output+(batchSize*std::get<0>(params)), B(0));
+    }
+    else {
+        for (std::size_t batch = 0; batch < batchSize; ++batch) {
+            std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params)));
+        }
+    }
+
+    for (std::size_t batch = 0; batch < batchSize; ++batch) {
+        for (std::size_t out = 0; out < std::get<0>(params); ++out) {
+            output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize,
+                                                        input + (batch + 1)*oneInputSize,
+                                                        weights + out*oneInputSize,
+                                                        output[out + batch*std::get<0>(params)]);
+        }
+    }
+}
+
+
+namespace {
+static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::FCImpl_cpu_forward_kernel<float, float, float, float>);
+static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::FCImpl_cpu_forward_kernel<int, int, int, int>);
+static Registrar<FCImplForward_cpu> registrarFCImpl2DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::FCImpl_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_FCIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
similarity index 100%
rename from include/aidge/operator/LeakyReLUImpl.hpp
rename to include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
diff --git a/include/aidge/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
similarity index 95%
rename from include/aidge/operator/LeakyReLUImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
index 43e43a06..ff9a8ac6 100644
--- a/include/aidge/operator/LeakyReLUImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
@@ -1,45 +1,45 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/LeakyReLUImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params,
-                                     std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-    I negativeSlope = static_cast<I>(std::get<0>(params));
-
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
-    }
-}
-
-namespace {
-static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>);
-static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<int, int>);
-static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Parameters& params,
+                                     std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+    I negativeSlope = static_cast<I>(std::get<0>(params));
+
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
+    }
+}
+
+namespace {
+static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<float, float>);
+static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::LeakyReLUImpl_cpu_forward_kernel<int, int>);
+static Registrar<LeakyReLUImplForward_cpu> registrarLeakyReLUImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::LeakyReLUImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/ProducerImpl.hpp b/include/aidge/backend/cpu/operator/ProducerImpl.hpp
similarity index 100%
rename from include/aidge/operator/ProducerImpl.hpp
rename to include/aidge/backend/cpu/operator/ProducerImpl.hpp
diff --git a/include/aidge/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
similarity index 100%
rename from include/aidge/operator/ReLUImpl.hpp
rename to include/aidge/backend/cpu/operator/ReLUImpl.hpp
diff --git a/include/aidge/operator/ReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
similarity index 95%
rename from include/aidge/operator/ReLUImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
index 47550cf3..955099a6 100644
--- a/include/aidge/operator/ReLUImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
@@ -1,43 +1,43 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/operator/ReLUImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = input[i] > 0 ? input[i] : 0;
-    }
-}
-
-namespace {
-static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_cpu_forward_kernel<float, float>);
-static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_cpu_forward_kernel<int, int>);
-static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = input[i] > 0 ? input[i] : 0;
+    }
+}
+
+namespace {
+static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_cpu_forward_kernel<float, float>);
+static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_cpu_forward_kernel<int, int>);
+static Registrar<ReLUImplForward_cpu> registrarReLUImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
similarity index 100%
rename from include/aidge/operator/SoftmaxImpl.hpp
rename to include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
diff --git a/include/aidge/operator/SoftmaxImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp
similarity index 95%
rename from include/aidge/operator/SoftmaxImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp
index c10c1e08..297a3a32 100644
--- a/include/aidge/operator/SoftmaxImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp
@@ -1,64 +1,64 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-#include <cstddef>
-#include <cmath>
-#include "aidge/data/Data.hpp"
-#include "aidge/utils/Types.h"
-
-#include "aidge/operator/SoftmaxImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void SoftmaxImpl_cpu_forward_kernel(const DimSize_t batchSize,
-                                        const DimSize_t channelSize,
-                                        const DimSize_t featureSize,
-                                        const void* input_,
-                                        void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t batch = 0; batch < batchSize; ++batch) {
-        for (std::size_t feature = 0; feature < featureSize; ++feature) {
-            std::size_t ioIndex = batch*channelSize*featureSize + feature;
-
-            I sum(0.0);
-            for (std::size_t ch = 0; ch < channelSize; ++ch) {
-                output[ioIndex] = std::exp(input[ioIndex]);
-                sum += output[ioIndex];
-                ioIndex+=featureSize;
-            }
-
-            ioIndex = batch*channelSize*featureSize + feature;
-            for (std::size_t ch = 0; ch < channelSize; ++ch) {
-                output[ioIndex] /= sum;
-                ioIndex += featureSize;
-            }
-        }
-    }
-}
-
-namespace {
-static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>);
-static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>);
-static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ */
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+#include <cstddef>
+#include <cmath>
+#include "aidge/data/Data.hpp"
+#include "aidge/utils/Types.h"
+
+#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void SoftmaxImpl_cpu_forward_kernel(const DimSize_t batchSize,
+                                        const DimSize_t channelSize,
+                                        const DimSize_t featureSize,
+                                        const void* input_,
+                                        void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t batch = 0; batch < batchSize; ++batch) {
+        for (std::size_t feature = 0; feature < featureSize; ++feature) {
+            std::size_t ioIndex = batch*channelSize*featureSize + feature;
+
+            I sum(0.0);
+            for (std::size_t ch = 0; ch < channelSize; ++ch) {
+                output[ioIndex] = std::exp(input[ioIndex]);
+                sum += output[ioIndex];
+                ioIndex+=featureSize;
+            }
+
+            ioIndex = batch*channelSize*featureSize + feature;
+            for (std::size_t ch = 0; ch < channelSize; ++ch) {
+                output[ioIndex] /= sum;
+                ioIndex += featureSize;
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>);
+static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>);
+static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ */
diff --git a/python_binding/pybind_cpu.cpp b/python_binding/pybind_cpu.cpp
index afe12515..4a325bf5 100644
--- a/python_binding/pybind_cpu.cpp
+++ b/python_binding/pybind_cpu.cpp
@@ -1,6 +1,6 @@
 #include <pybind11/pybind11.h>
 // Need to call this header to register every impl
-#include "aidge/aidge_backend_cpu.hpp"
+#include "aidge/backend/cpu.hpp"
 
 namespace py = pybind11;
 
diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp
index ece1967a..63e2bb47 100644
--- a/src/operator/AddImpl.cpp
+++ b/src/operator/AddImpl.cpp
@@ -16,11 +16,11 @@
 #include <vector>
 
 #include "aidge/operator/Conv.hpp"
-
-#include "aidge/operator/AddImpl.hpp"
-#include "aidge/operator/AddImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
 
+#include "aidge/backend/cpu/operator/AddImpl.hpp"
+#include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp"
+
 //////////////////////////////////
 // AddImpl_cpu<1>
 //////////////////////////////////
diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp
index eebaa5dd..d8e7e591 100644
--- a/src/operator/AvgPoolingImpl.cpp
+++ b/src/operator/AvgPoolingImpl.cpp
@@ -9,16 +9,16 @@
  *
  ********************************************************************************/
 
-#include "aidge/operator/AvgPoolingImpl.hpp"
-
 #include <cassert>
 #include <numeric>
 #include <thread>
 #include <vector>
 
-#include "aidge/operator/AvgPoolingImpl_forward_kernels.hpp"
-#include "aidge/operator/AvgPooling.hpp"
 #include "aidge/utils/Types.h"
+#include "aidge/operator/AvgPooling.hpp"
+
+#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
+#include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp"
 
 Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
     assert(mOp.getInput(inputIdx) && "requires valid input");
diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp
index c8783b36..dc63396e 100644
--- a/src/operator/BatchNormImpl.cpp
+++ b/src/operator/BatchNormImpl.cpp
@@ -9,15 +9,15 @@
  *
  ********************************************************************************/
 
-#include "aidge/operator/BatchNormImpl.hpp"
-
 #include <cassert>
 #include <numeric> // std::accumulate
 #include <vector>
 
-#include "aidge/operator/BatchNormImpl_forward_kernels.hpp"
-#include "aidge/operator/BatchNorm.hpp"
 #include "aidge/utils/Types.h"
+#include "aidge/operator/BatchNorm.hpp"
+
+#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
+#include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp"
 
 Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
     assert(mOp.getInput(inputIdx) && "requires valid input");
diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp
index 75a35cff..9d73662d 100644
--- a/src/operator/ConvDepthWiseImpl.cpp
+++ b/src/operator/ConvDepthWiseImpl.cpp
@@ -9,17 +9,17 @@
  *
  ********************************************************************************/
 
-#include "aidge/operator/ConvDepthWiseImpl.hpp"
-
 #include <cassert>
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/operator/ConvDepthWiseImpl_forward_kernels.hpp"
-#include "aidge/operator/ConvDepthWise.hpp"
 #include "aidge/utils/Types.h"
+#include "aidge/operator/ConvDepthWise.hpp"
+
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp"
 
 Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
     assert(mOp.getInput(inputIdx) && "requires valid input");
diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp
index e75fab10..b5724790 100644
--- a/src/operator/ConvImpl.cpp
+++ b/src/operator/ConvImpl.cpp
@@ -9,17 +9,17 @@
  *
  ********************************************************************************/
 
-#include "aidge/operator/ConvImpl.hpp"
-
 #include <cassert>
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/operator/ConvImpl_forward_kernels.hpp"
-#include "aidge/operator/Conv.hpp"
 #include "aidge/utils/Types.h"
+#include "aidge/operator/Conv.hpp"
+
+#include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp"
 
 Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const {
     assert(mOp.getInput(inputIdx) && "requires valid input");
diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp
index 970ce690..25c4955a 100644
--- a/src/operator/FCImpl.cpp
+++ b/src/operator/FCImpl.cpp
@@ -16,10 +16,11 @@
 #include <vector>
 
 #include "aidge/operator/FC.hpp"
-#include "aidge/operator/FCImpl.hpp"
-#include "aidge/operator/FCImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
 
+#include "aidge/backend/cpu/operator/FCImpl.hpp"
+#include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp"
+
 Aidge::NbElts_t Aidge::FCImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const
 {
     assert(mOp.getInput(inputIdx) && "requires valid input");
diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp
index 1e86de4a..85234e5d 100644
--- a/src/operator/LeakyReLUImpl.cpp
+++ b/src/operator/LeakyReLUImpl.cpp
@@ -13,14 +13,13 @@
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
+#include <vector>
 
 #include "aidge/operator/LeakyReLU.hpp"
-
-#include "aidge/operator/LeakyReLUImpl.hpp"
-#include "aidge/operator/LeakyReLUImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
-#include <numeric>
-#include <vector>
+
+#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp"
 
 // FIXME: replace whole Tensor with minimum needed data quantity
 Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
diff --git a/src/operator/ProducerImpl.cpp b/src/operator/ProducerImpl.cpp
index 6c1de523..0135b7a0 100644
--- a/src/operator/ProducerImpl.cpp
+++ b/src/operator/ProducerImpl.cpp
@@ -17,7 +17,7 @@
 #include "aidge/operator/Producer.hpp"
 #include "aidge/utils/Types.h"
 
-#include "aidge/operator/ProducerImpl.hpp"
+#include "aidge/backend/cpu/operator/ProducerImpl.hpp"
 
 
 std::size_t Aidge::ProducerImpl_cpu::getNbRequiredData(
diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp
index 61c11937..1fe231d4 100644
--- a/src/operator/ReLUImpl.cpp
+++ b/src/operator/ReLUImpl.cpp
@@ -13,14 +13,13 @@
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
+#include <vector>
 
 #include "aidge/operator/ReLU.hpp"
-
-#include "aidge/operator/ReLUImpl.hpp"
-#include "aidge/operator/ReLUImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
-#include <numeric>
-#include <vector>
+
+#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp"
 
 // FIXME: replace whole Tensor with minimum needed data quantity
 Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp
index 50673042..f564534e 100644
--- a/src/operator/SoftmaxImpl.cpp
+++ b/src/operator/SoftmaxImpl.cpp
@@ -13,14 +13,13 @@
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
+#include <vector>
 
 #include "aidge/operator/Softmax.hpp"
-
-#include "aidge/operator/SoftmaxImpl.hpp"
-#include "aidge/operator/SoftmaxImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
-#include <numeric>
-#include <vector>
+
+#include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
+#include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp"
 
 // FIXME: replace whole Tensor with minimum needed data quantity
 Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const {
diff --git a/unit_tests/Test_Scheduler.cpp b/unit_tests/Test_Scheduler.cpp
index 055f4efe..78ab8d5b 100644
--- a/unit_tests/Test_Scheduler.cpp
+++ b/unit_tests/Test_Scheduler.cpp
@@ -18,7 +18,8 @@
 #include "aidge/graph/GraphView.hpp"
 #include "aidge/graph/OpArgs.hpp"
 #include "aidge/scheduler/Scheduler.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
+
+#include "aidge/backend/cpu.hpp"
 
 using namespace Aidge;
 
diff --git a/unit_tests/Test_TensorImpl.cpp b/unit_tests/Test_TensorImpl.cpp
index ca9e7df5..d28505f7 100644
--- a/unit_tests/Test_TensorImpl.cpp
+++ b/unit_tests/Test_TensorImpl.cpp
@@ -14,7 +14,7 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
+#include "aidge/backend/cpu/data/TensorImpl.hpp"
 
 using namespace Aidge;
 
diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp
index 3443ac6b..e24d7ac6 100644
--- a/unit_tests/operator/Test_AddImpl.cpp
+++ b/unit_tests/operator/Test_AddImpl.cpp
@@ -12,10 +12,10 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/Add.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Add(forward)") {
diff --git a/unit_tests/operator/Test_AvgPoolingImpl.cpp b/unit_tests/operator/Test_AvgPoolingImpl.cpp
index 178f1ba2..10d4c09b 100644
--- a/unit_tests/operator/Test_AvgPoolingImpl.cpp
+++ b/unit_tests/operator/Test_AvgPoolingImpl.cpp
@@ -14,10 +14,10 @@
 #include <cstdlib>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/AvgPooling.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] AvgPooling(forward)") {
diff --git a/unit_tests/operator/Test_BatchNormImpl.cpp b/unit_tests/operator/Test_BatchNormImpl.cpp
index 9436ceb3..e6107a02 100644
--- a/unit_tests/operator/Test_BatchNormImpl.cpp
+++ b/unit_tests/operator/Test_BatchNormImpl.cpp
@@ -13,10 +13,10 @@
 #include <memory>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/BatchNorm.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] BatchNorm(forward)") {
diff --git a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
index 48a6cc88..0d0ed4b9 100644
--- a/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
+++ b/unit_tests/operator/Test_ConvDepthWiseImpl.cpp
@@ -13,10 +13,10 @@
 #include <memory>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/ConvDepthWise.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] ConvDepthWise(forward)") {
diff --git a/unit_tests/operator/Test_ConvImpl.cpp b/unit_tests/operator/Test_ConvImpl.cpp
index 2c314af4..23ff1aae 100644
--- a/unit_tests/operator/Test_ConvImpl.cpp
+++ b/unit_tests/operator/Test_ConvImpl.cpp
@@ -14,10 +14,10 @@
 #include <memory>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/Conv.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] Conv(forward)") {
diff --git a/unit_tests/operator/Test_FCImpl.cpp b/unit_tests/operator/Test_FCImpl.cpp
index be672ecc..e3494e20 100644
--- a/unit_tests/operator/Test_FCImpl.cpp
+++ b/unit_tests/operator/Test_FCImpl.cpp
@@ -12,11 +12,11 @@
 #include <catch2/catch_test_macros.hpp>
 #include <memory>
 
-#include "aidge/aidge_backend_cpu.hpp"
-#include "aidge/data/TensorImpl.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/FC.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/oeprator] FC(forward)") {
diff --git a/unit_tests/operator/Test_LeakyReLUImpl.cpp b/unit_tests/operator/Test_LeakyReLUImpl.cpp
index b6686d89..7096962e 100644
--- a/unit_tests/operator/Test_LeakyReLUImpl.cpp
+++ b/unit_tests/operator/Test_LeakyReLUImpl.cpp
@@ -12,10 +12,10 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/LeakyReLU.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 using namespace Aidge;
 
 TEST_CASE("[cpu/operator] LeakyReLU(forward)") {
diff --git a/unit_tests/operator/Test_ReLUImpl.cpp b/unit_tests/operator/Test_ReLUImpl.cpp
index 8d3a2b91..9752a491 100644
--- a/unit_tests/operator/Test_ReLUImpl.cpp
+++ b/unit_tests/operator/Test_ReLUImpl.cpp
@@ -12,10 +12,10 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/ReLU.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 #include <memory>
 
 
diff --git a/unit_tests/operator/Test_SoftmaxImpl.cpp b/unit_tests/operator/Test_SoftmaxImpl.cpp
index 10406212..bad34102 100644
--- a/unit_tests/operator/Test_SoftmaxImpl.cpp
+++ b/unit_tests/operator/Test_SoftmaxImpl.cpp
@@ -12,10 +12,10 @@
 #include <catch2/catch_test_macros.hpp>
 
 #include "aidge/data/Tensor.hpp"
-#include "aidge/data/TensorImpl.hpp"
-#include "aidge/aidge_backend_cpu.hpp"
 #include "aidge/operator/Softmax.hpp"
 
+#include "aidge/backend/cpu.hpp"
+
 #include <memory>
 
 using namespace Aidge;
-- 
GitLab