diff --git a/aidge_backend_cpu/unit_tests/test_recipes.py b/aidge_backend_cpu/unit_tests/test_recipes.py
index 12d8774369af5a46cfbd30d44fc90f4f97ca9821..7c11b92b93eaf04eb83518992c46bf4dec40dfca 100644
--- a/aidge_backend_cpu/unit_tests/test_recipes.py
+++ b/aidge_backend_cpu/unit_tests/test_recipes.py
@@ -36,7 +36,7 @@ class test_recipes(unittest.TestCase):
         graph_view = aidge_core.sequential([input_node, conv, bn])
 
         # Add random values to conv and BatchNorm parameters
-        graph_view.set_datatype(aidge_core.DataType.Float32)
+        graph_view.set_datatype(aidge_core.dtype.float32)
         graph_view.set_backend("cpu")
 
         np_weights = np.arange(9).reshape([1, 1, 3, 3]).astype(np.float32)
diff --git a/aidge_backend_cpu/unit_tests/test_scheduler.py b/aidge_backend_cpu/unit_tests/test_scheduler.py
index 0c41d59963c7633151745f2efe1f1fac3ee07815..0aeeb04b74a078f77c57500b959d6ef9fa9af4d0 100644
--- a/aidge_backend_cpu/unit_tests/test_scheduler.py
+++ b/aidge_backend_cpu/unit_tests/test_scheduler.py
@@ -24,7 +24,7 @@ class test_scheduler(unittest.TestCase):
 
         input_node.add_child(relu)
 
-        gv.set_datatype(aidge_core.DataType.Int32)
+        gv.set_datatype(aidge_core.dtype.int32)
         gv.set_backend("cpu")
 
         scheduler = aidge_core.SequentialScheduler(gv)
@@ -48,7 +48,7 @@ class test_scheduler(unittest.TestCase):
         ])
         EXPECTED_SCHEDULE = ['0', '1', '2']
 
-        graph_view.set_datatype(aidge_core.DataType.Float32)
+        graph_view.set_datatype(aidge_core.dtype.float32)
         graph_view.set_backend("cpu")
 
         graph_view.forward_dims()
@@ -74,7 +74,7 @@ class test_scheduler(unittest.TestCase):
 
         EXPECTED_SCHEDULE = [['0', '1', '3', '2'],  ['0', '3', '1', '2']] # Both scheduling are valid !
 
-        graph_view.set_datatype(aidge_core.DataType.Float32)
+        graph_view.set_datatype(aidge_core.dtype.float32)
         graph_view.set_backend("cpu")
 
         graph_view.forward_dims()
diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp
index a1417de1517a8212b4b4308e5128a5ee3fce1e39..11f9c264098d5a238d0d1f8e6bc4fac0cc099549 100644
--- a/include/aidge/backend/cpu.hpp
+++ b/include/aidge/backend/cpu.hpp
@@ -32,6 +32,7 @@
 #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
 #include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
 #include "aidge/backend/cpu/operator/SqrtImpl.hpp"
+#include "aidge/backend/cpu/operator/SliceImpl.hpp"
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
 #include "aidge/backend/cpu/operator/SubImpl.hpp"
 #include "aidge/backend/cpu/operator/TanhImpl.hpp"
diff --git a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
index 478a0226f43ccbc64d567a56ab89a558179438c5..94b22dcc7fc8251f8ca907ab0b060b0275309c9d 100644
--- a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp
@@ -14,6 +14,8 @@
 
 #include "aidge/utils/Registrar.hpp"
 
+#include <cstdint>     // std::int32_t, std::int64_t
+
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/operator/AddImpl.hpp"
 
@@ -42,10 +44,12 @@ void AddImpl_cpu_forward_kernel(const std::vector<const void*> inputs_, const st
 namespace {
 static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float32(
         {DataType::Float32, DataType::Float32}, Aidge::AddImpl_cpu_forward_kernel<float, float>);
-static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::AddImpl_cpu_forward_kernel<int, int>);
 static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float64(
         {DataType::Float64, DataType::Float64}, Aidge::AddImpl_cpu_forward_kernel<double, double>);
+static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::AddImpl_cpu_forward_kernel<std::int32_t, std::int32_t>);
+static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int64(
+        {DataType::Int64, DataType::Int64}, Aidge::AddImpl_cpu_forward_kernel<std::int64_t, std::int64_t>);
 }  // namespace
 }  // namespace Aidge
 
diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
index ce126dc2b870d6ac767c15bc6fbca2deb07e8772..12a5dc334619c16e6ad3a77f0cd76f4db7a87b77 100644
--- a/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl.hpp
@@ -29,12 +29,20 @@ namespace Aidge {
 // compute kernel registry for forward and backward
 class AvgPoolingImpl2DForward_cpu
     : public Registrable<AvgPoolingImpl2DForward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+                        std::tuple<DataType, DataType>,
+                        void(const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 4>&,
+                            const void *,
+                            void *)> {};
 class AvgPoolingImpl2DBackward_cpu
     : public Registrable<AvgPoolingImpl2DBackward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const AvgPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+                        std::tuple<DataType, DataType>,
+                        void(const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 4>&,
+                            const void *,
+                            void *)> {};
 
 class AvgPoolingImpl2D_cpu : public OperatorImpl {
 public:
diff --git a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
index d6950e11e935a3f6d5548148d1c393a5340af224..c7d9f86235c3bf1d7d01cf429cab29d156592fb5 100644
--- a/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp
@@ -12,16 +12,16 @@
 #ifndef AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
 #define AIDGE_CPU_OPERATOR_AVGPOOLINGIMPL_FORWARD_KERNEL_H_
 
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
-#include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include "aidge/data/Data.hpp"
 #include <array>
 #include <tuple>
 #include <cmath>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
+#include "aidge/data/Data.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
 namespace Aidge {
 /**
  * @brief Forward kernel for 2D AvgPoolingolution on CPU backend.
@@ -33,10 +33,11 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
-                                             const std::array<DimSize_t, 4> &dims,
-                                             const void *input_,
-                                             void *output_) {
+void AvgPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
+                                        const std::array<DimSize_t, 2>& kernelDims,
+                                        const std::array<DimSize_t, 4> &dims,
+                                        const void *input_,
+                                        void *output_) {
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
@@ -44,12 +45,12 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<1>(attrs)[0] + std::get<0>(attrs)[0]) /
-                                static_cast<float>(std::get<0>(attrs)[0])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
+                                static_cast<float>(strideDims[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<1>(attrs)[1] + std::get<0>(attrs)[1]) /
-                                static_cast<float>(std::get<0>(attrs)[1])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - kernelDims[1] + strideDims[1]) /
+                                static_cast<float>(strideDims[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
@@ -63,16 +64,16 @@ void AvgPoolingImpl2D_cpu_forward_kernel(const AvgPooling_Op<2>::Attrs &attrs,
             const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
             std::fill(output + oIndex, output+(oIndex+oxSize*oySize), 0);
             for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
+                const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
                 const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<1>(attrs)[0] ? std::get<1>(attrs)[0] : dims[2] + difx);
+                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > kernelDims[0] ? kernelDims[0] : dims[2] + difx);
                 for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
+                    const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
                     const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<1>(attrs)[1] ? std::get<1>(attrs)[1] : dims[3] + dify);
+                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > kernelDims[1] ? kernelDims[1] : dims[3] + dify);
                     const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const std::size_t ix = ox * std::get<0>(attrs)[0];
-                    const std::size_t iy = oy * std::get<0>(attrs)[1];
+                    const std::size_t ix = ox * strideDims[0];
+                    const std::size_t iy = oy * strideDims[1];
 
                     if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
                         output[oIndexFull] += static_cast<O>(
diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
index 8bd567dab3d564ccdeffdc581585e404fc4697a4..93bdab2d3f37e3bd8dc1e68ab68a05de8c8015ed 100644
--- a/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl.hpp
@@ -30,26 +30,28 @@ namespace Aidge {
 class BatchNormImpl2DForward_cpu
     : public Registrable<BatchNormImpl2DForward_cpu,
                          std::tuple<DataType, DataType, DataType>,
-                         void(const BatchNorm_Op<2>::Attrs &,
-                              const std::array<DimSize_t, 4> &,
-                              const void *,
-                              const void *,
-                              const void *,
-                              void *,
-                              void *,
-                              void *,
-                              const bool)> {};
+                         void(float,
+                            float,
+                            const std::array<DimSize_t, 4> &,
+                            const void *,
+                            const void *,
+                            const void *,
+                            void *,
+                            void *,
+                            void *,
+                            const bool)> {};
 class BatchNormImpl2DBackward_cpu
     : public Registrable<BatchNormImpl2DBackward_cpu,
                          std::tuple<DataType, DataType, DataType>,
-                         void(const BatchNorm_Op<2>::Attrs &,
-                              const std::array<DimSize_t, 4> &,
-                              const void *,
-                              const void *,
-                              const void *,
-                              void *,
-                              void *,
-                              void *)> {};
+                         void(float,
+                            float,
+                            const std::array<DimSize_t, 4> &,
+                            const void *,
+                            const void *,
+                            const void *,
+                            void *,
+                            void *,
+                            void *)> {};
 
 class BatchNormImpl2D_cpu : public OperatorImpl {
 public:
diff --git a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
index cfde6ebe7cab8cfe2f793723983c8552bd9747b8..19f232a783bccf0a800d41f2bc566ccf6e04f05e 100644
--- a/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp
@@ -38,7 +38,7 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class P, class O>
-void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
+void BatchNormImpl2D_cpu_forward_kernel(float epsilon, float momentum, const std::array<DimSize_t, 4> &dims,
                                        const void *input_, const void *scale_, const void *shift_, void *batchMean_, void *batchVar_, void *output_, const bool freeze) {
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
@@ -53,12 +53,12 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con
     const DimSize_t featureMapSize = dims[2]*dims[3];
 
 
-    if ((freeze == true) || (std::get<1>(attrs) == 0.0f)) {
+    if ((freeze == true) || (momentum == 0.0f)) {
         for (std::size_t batch = 0; batch < nbBatch; ++batch) {
             for (std::size_t ch = 0; ch < nbChannels; ++ch) {
                 const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
                 std::fill(output + ioIndex, output + ioIndex + featureMapSize, shift[ch]);
-                const P var = std::sqrt(batchVar[ch] + static_cast<P>(std::get<0>(attrs)));
+                const P var = std::sqrt(batchVar[ch] + static_cast<P>(epsilon));
 
                 for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
                     output[ioIndex + feature] += scale[ch] * (input[ioIndex + feature]-batchMean[ch]) / var;
@@ -82,10 +82,10 @@ void BatchNormImpl2D_cpu_forward_kernel(const BatchNorm_Op<2>::Attrs &attrs, con
             const I inputMean = sum / static_cast<I>(nbDataPerChannel);
             const I inputVar = sumSquare / static_cast<I>(nbDataPerChannel)  - inputMean*inputMean;
 
-            batchMean[ch] = batchMean[ch]*(1-std::get<1>(attrs)) + inputMean*std::get<1>(attrs);
-            batchVar[ch] = batchVar[ch]*(1-std::get<1>(attrs)) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*std::get<1>(attrs);
+            batchMean[ch] = batchMean[ch]*(1-momentum) + inputMean*momentum;
+            batchVar[ch] = batchVar[ch]*(1-momentum) + inputVar*(static_cast<I>(nbDataPerChannel)/static_cast<I>(nbDataPerChannel-1))*momentum;
 
-            const P var = std::sqrt(inputVar + static_cast<P>(std::get<0>(attrs)));
+            const P var = std::sqrt(inputVar + static_cast<P>(epsilon));
             for (std::size_t batch = 0; batch < nbBatch; ++batch) {
                 const std::size_t ioIndex = (ch + batch*nbChannels) * featureMapSize;
                 for (std::size_t feature = 0; feature<featureMapSize; ++feature) {
diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
index a61a7299ed6bd5c5a3e41c09e9d5b5f1f7ae3326..ec886a310dd2edc616ced6ee447665eab3ce301a 100644
--- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp
@@ -25,18 +25,60 @@
 
 namespace Aidge {
 // class ConvDepthWise_Op;
+// compute kernel registry for forward and backward
+class ConvDepthWiseImpl1DForward_cpu
+    : public Registrable<ConvDepthWiseImpl1DForward_cpu,
+                         std::tuple<DataType, DataType, DataType, DataType>,
+                         void(const std::array<DimSize_t, 1>&,
+                            const std::array<DimSize_t, 1>&,
+                            const std::array<DimSize_t, 1>&,
+                            const std::array<DimSize_t, 3>&,
+                            const void *,
+                            const void *,
+                            const void *,
+                            void *)> {};
+
+class ConvDepthWiseImpl1D_cpu : public OperatorImpl {
+public:
+    ConvDepthWiseImpl1D_cpu(const ConvDepthWise_Op<1> &op) : OperatorImpl(op, "cpu") {}
+
+    static std::unique_ptr<ConvDepthWiseImpl1D_cpu> create(const ConvDepthWise_Op<1> &op) {
+        return std::make_unique<ConvDepthWiseImpl1D_cpu>(op);
+    }
+
+    Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+// add cpu backend to ConvDepthWise_Op<1> implementation registry
+static Registrar<ConvDepthWise_Op<1>> registrarConvDepthWiseImpl1D_cpu("cpu", Aidge::ConvDepthWiseImpl1D_cpu::create);
+}  // namespace
 
 // compute kernel registry for forward and backward
 class ConvDepthWiseImpl2DForward_cpu
     : public Registrable<ConvDepthWiseImpl2DForward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
-                              const void *, const void *, void *)> {};
+                         void(const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 4> &,
+                            const void *,
+                            const void *,
+                            const void *,
+                            void *)> {};
 class ConvDepthWiseImpl2DBackward_cpu
     : public Registrable<ConvDepthWiseImpl2DBackward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const ConvDepthWise_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
-                              const void *, const void *, void *)> {};
+                         void(const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            bool,
+                            const std::array<DimSize_t, 4> &,
+                            const void *,
+                            const void *,
+                            const void *,
+                            void *)> {};
 
 class ConvDepthWiseImpl2D_cpu : public OperatorImpl {
 public:
diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
index 801bd315f9e5058ffade574fc92179b1e3c513e4..a02aa672b92f089790ef1903af8b804f816f3baa 100644
--- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp
@@ -12,17 +12,93 @@
 #ifndef AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
 #define AIDGE_CPU_OPERATOR_CONVDEPTHWISEIMPL_FORWARD_KERNEL_H_
 
-#include "aidge/utils/Registrar.hpp"
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <cstddef>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include <cmath>
-#include <cstddef>
-#include <array>
-#include <algorithm>
 
 namespace Aidge {
+/**
+ * @brief Forward kernel for 1D ConvDepthWiseolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Attributes from the Operator
+ * @param inputDims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param weights_ const weight Tensor.
+ * @param biases_ const Biais Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class W, class B, class O>
+void ConvDepthWiseImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
+                            const std::array<DimSize_t, 1>& /*dilationDims*/,
+                            const std::array<DimSize_t, 1>& kernelDims,
+                            const std::array<DimSize_t, 3>& inputDims,
+                            const void *input_,
+                            const void *weights_,
+                            const void *biases_,
+                            void *output_) {
+    // FIXME: missing convolution attributes as arguments
+    const I *input = static_cast<const I *>(input_);
+    const W *weights = static_cast<const W *>(weights_);
+    const B *biases = static_cast<const B *>(biases_);
+    O *output = static_cast<O *>(output_);
+
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
+                                static_cast<float>(strideDims[0])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, ch, Xin, Yin)
+    // weight (outCh, ch, kernelX, kernelY)
+    // does not take Dilation attribute into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
+        for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
+            const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize;
+            B biasVal = (biases != nullptr) ? biases[ch] : B(0);
+            std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
+            const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2];
+            const std::size_t wIndex = ch * kernelDims[0];
+            for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
+                const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
+                const std::size_t oIndexFull = oIndex + ox;
+                const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
+
+                for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                    output[oIndexFull] += weights[wIndex + sx] *
+                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
+                }
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>);
+static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>);
+static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+
+
 /**
  * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend.
  * @tparam I Input data type.
@@ -30,15 +106,22 @@ namespace Aidge {
  * @tparam B Bias data type.
  * @tparam O Output data type.
  * @param params tuple of Attributes from the Operator
- * @param dims Array of input dimensions.
+ * @param inputDims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param weights_ const weight Tensor.
  * @param biases_ const Biais Tensor.
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
+void ConvDepthWiseImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
+                            const std::array<DimSize_t, 2>& /*dilationDims*/,
+                            const std::array<DimSize_t, 2>& kernelDims,
+                            const std::array<DimSize_t, 4>& inputDims,
+                            const void *input_,
+                            const void *weights_,
+                            const void *biases_,
+                            void *output_)
+{
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
@@ -48,12 +131,12 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<3>(attrs)[0] + std::get<0>(attrs)[0]) /
-                                static_cast<float>(std::get<0>(attrs)[0])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
+                                static_cast<float>(strideDims[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<3>(attrs)[1] + std::get<0>(attrs)[1]) /
-                                static_cast<float>(std::get<0>(attrs)[1])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
+                                static_cast<float>(strideDims[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
@@ -61,40 +144,40 @@ void ConvDepthWiseImpl2D_cpu_forward_kernel(const ConvDepthWise_Op<2>::Attrs &at
     // weight (outCh, ch, kernelX, kernelY)
     // does not take Dilation attribute into account
     using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t ch = 0; ch < std::get<2>(attrs); ++ch) {
-            const std::size_t oIndex = (ch + batch*std::get<2>(attrs)) * oxSize * oySize;
-            B biasVal = ((!std::get<4>(attrs)) && biases != nullptr) ? biases[ch] : B(0);
+    for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
+        for (std::size_t ch = 0; ch < inputDims[1]; ++ch) {
+            const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize * oySize;
+            B biasVal = (biases != nullptr) ? biases[ch] : B(0);
             std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
-            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
-            const std::size_t wIndex = ch * std::get<3>(attrs)[0] * std::get<3>(attrs)[1];
+            const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2] * inputDims[3];
+            const std::size_t wIndex = ch * kernelDims[0] * kernelDims[1];
             for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
+                const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
                 const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<3>(attrs)[0] ? std::get<3>(attrs)[0] : dims[2] + difx);
+                const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
                 for (std::size_t oy = 0; oy < oySize; ++oy) {
-                    const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
+                    const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
                     const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                    const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<3>(attrs)[1] ? std::get<3>(attrs)[1] : dims[3] + dify);
+                    const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
                     const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                    const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
-                    const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
+                    const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
+                    const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
 
                     if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                        output[oIndexFull] +=  (weights[wIndex + 0*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 0*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 0*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 1*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 1*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 1*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                weights[wIndex + 2*std::get<3>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                weights[wIndex + 2*std::get<3>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                weights[wIndex + 2*std::get<3>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                        output[oIndexFull] +=  (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
+                                                weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
+                                                weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
+                                                weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
                     } else {
                         for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
                             for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                output[oIndexFull] += weights[wIndex + sx*std::get<3>(attrs)[1] + sy] *
-                                                        input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
+                                output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
+                                                        input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
                             }
                         }
                     }
@@ -110,7 +193,7 @@ static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DFor
         Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<float, float, float, float>);
 static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Int32(
         {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<int, int, int, int>);
+        Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t, std::int32_t>);
 static Registrar<ConvDepthWiseImpl2DForward_cpu> registrarConvDepthWiseImpl2DForward_cpu_Float64(
         {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
         Aidge::ConvDepthWiseImpl2D_cpu_forward_kernel<double, double, double, double>);
diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp
index e7ce0892a6241009a8e80821e341b3209a19faa4..d7be46c251a82d1b631f4ad50e7175fa2f896d03 100644
--- a/include/aidge/backend/cpu/operator/ConvImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp
@@ -27,16 +27,63 @@ namespace Aidge {
 // class Conv_Op;
 
 // compute kernel registry for forward and backward
+// Conv 1D
+class ConvImpl1DForward_cpu
+    : public Registrable<ConvImpl1DForward_cpu,
+                         std::tuple<DataType, DataType, DataType, DataType>,
+                         void(const std::array<DimSize_t, 1>&,
+                            const std::array<DimSize_t, 1>&,
+                            const std::array<DimSize_t, 1>&,
+                            const std::array<DimSize_t, 3> &,
+                            DimSize_t,
+                            const void *,
+                            const void *,
+                            const void *,
+                            void *)> {};
+
+class ConvImpl1D_cpu : public OperatorImpl {
+   public:
+    ConvImpl1D_cpu(const Conv_Op<1>& op) : OperatorImpl(op, "cpu") {}
+
+    static std::unique_ptr<ConvImpl1D_cpu> create(const Conv_Op<1> &op) {
+        return std::make_unique<ConvImpl1D_cpu>(op);
+    }
+
+   public:
+    Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+// add cpu backend to Conv_Op<1> implementation registry
+static Registrar<Conv_Op<1>> registrarConvImpl1D_cpu("cpu", Aidge::ConvImpl1D_cpu::create);
+}  // namespace
+
+// Conv 2D
 class ConvImpl2DForward_cpu
     : public Registrable<ConvImpl2DForward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
-                              const void *, const void *, void *)> {};
+                         void(const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 4> &,
+                            DimSize_t,
+                            const void *,
+                            const void *,
+                            const void *,
+                            void *)> {};
 class ConvImpl2DBackward_cpu
     : public Registrable<ConvImpl2DBackward_cpu,
                          std::tuple<DataType, DataType, DataType, DataType>,
-                         void(const Conv_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
-                              const void *, const void *, void *)> {};
+                         void(const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            bool,
+                            const std::array<DimSize_t, 4> &,
+                            const void *,
+                            const void *,
+                            const void *,
+                            void *)> {};
 
 class ConvImpl2D_cpu : public OperatorImpl {
    public:
diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
index 00d34f6596780f42aa5864058ea543f046f8edb1..88a71c47244788f2da5e576c8ad5170a92561909 100644
--- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp
@@ -12,17 +12,100 @@
 #ifndef AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
 #define AIDGE_CPU_OPERATOR_CONVIMPL_FORWARD_KERNEL_H_
 
-#include "aidge/utils/Registrar.hpp"
+#include <algorithm>
+#include <array>
+#include <cmath>
 
-#include "aidge/data/half.hpp"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/data/half.hpp"
+#include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include <cmath>
-#include <array>
-#include <algorithm>
 
 namespace Aidge {
+/**
+ * @brief Forward kernel for 1D Convolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Attributes from the Operator
+ * @param inputDims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param weights_ const weight Tensor.
+ * @param biases_ const Biais Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class W, class B, class O>
+void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
+                            const std::array<DimSize_t, 1>& /*dilationDims*/,
+                            const std::array<DimSize_t, 1>& kernelDims,
+                            const std::array<DimSize_t, 3>& inputDims,
+                            DimSize_t outChannels,
+                            const void *input_,
+                            const void *weights_,
+                            const void *biases_,
+                            void *output_)
+{
+    // FIXME: missing convolution attributes as arguments
+    const I *input = static_cast<const I *>(input_);
+    const W *weights = static_cast<const W *>(weights_);
+    const B *biases = static_cast<const B *>(biases_);
+    O *output = static_cast<O *>(output_);
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
+                                static_cast<float>(strideDims[0])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, inCh, Xin, Yin)
+    // weight (outCh, inCh, kernelX, kernelY)
+    // does not take Dilation attribute into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
+        for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
+            const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
+            // If bias = nullptr, set B(0)
+            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
+            std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
+            for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
+                const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
+                const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
+                for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                    const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
+                    const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                    const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
+                    const std::size_t oIndexFull = oIndex + ox;
+                    const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
+
+                    for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                        output[oIndexFull] += weights[wIndex + sx] *
+                                                input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
+                    }
+                }
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32},
+        Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>);
+static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float16(
+        {DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16},
+        Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>);
+static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>);
+static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64},
+        Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>);
+}  // namespace
+
+
 /**
  * @brief Forward kernel for 2D Convolution on CPU backend.
  * @tparam I Input data type.
@@ -30,15 +113,23 @@ namespace Aidge {
  * @tparam B Bias data type.
  * @tparam O Output data type.
  * @param params tuple of Attributes from the Operator
- * @param dims Array of input dimensions.
+ * @param inputDims Array of input dimensions.
  * @param input_ const input Tensor.
  * @param weights_ const weight Tensor.
  * @param biases_ const Biais Tensor.
  * @param output_ Output Tensor.
  */
 template <class I, class W, class B, class O>
-void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, const void *weights_, const void *biases_, void *output_) {
+void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
+                            const std::array<DimSize_t, 2>& /*dilationDims*/,
+                            const std::array<DimSize_t, 2>& kernelDims,
+                            const std::array<DimSize_t, 4> &inputDims,
+                            DimSize_t outChannels,
+                            const void *input_,
+                            const void *weights_,
+                            const void *biases_,
+                            void *output_)
+{
     // FIXME: missing convolution attributes as arguments
     const I *input = static_cast<const I *>(input_);
     const W *weights = static_cast<const W *>(weights_);
@@ -47,12 +138,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
 /*
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
-                                static_cast<float>(std::get<0>(attrs)[0]));
+            static_cast<std::size_t>(static_cast<float>(inputDims[0] - kernelDims[0] + strideDims[0]) /
+                                static_cast<float>(strideDims[0]));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
-                                static_cast<float>(std::get<0>(attrs)[1]));
+            static_cast<std::size_t>(static_cast<float>(inputDims[1] - kernelDims[1] + strideDims[1]) /
+                                static_cast<float>(strideDims[1]));
 
     // TODO: kernel computation
     // output (Xout, Yout, outCh, batch)
@@ -61,22 +152,22 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
     // does not take Dilation attribute into account
     for (std::size_t ox = 0; ox < oxSize; ++ox) {
         for (std::size_t oy = 0; oy < oySize; ++oy) {
-            const std::size_t ix = ox * std::get<0>(attrs)[0];
-            const std::size_t iy = oy * std::get<0>(attrs)[1];
+            const std::size_t ix = ox * strideDims[0];
+            const std::size_t iy = oy * strideDims[1];
 
-            for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
-                const std::size_t oIndex = dims[3] * (outCh + std::get<3>(attrs) * (oy + oySize * ox));
+            for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
+                const std::size_t oIndex = inputDims[3] * (outCh + outChannels * (oy + oySize * ox));
                 B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-                for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+                for (std::size_t batch = 0; batch < inputDims[3]; ++batch) {
                     output[oIndex + batch] = biasVal;
                 }
-                for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
-                    for (std::size_t sx = 0; sx < std::get<4>(attrs)[0]; ++sx) {
-                        for (std::size_t sy = 0; sy < std::get<4>(attrs)[1]; ++sy) {
+                for (std::size_t inCh = 0; inCh < inputDims[2]; ++inCh) {
+                    for (std::size_t sx = 0; sx < kernelDims[0]; ++sx) {
+                        for (std::size_t sy = 0; sy < kernelDims[1]; ++sy) {
                             const std::size_t wIndex =
-                                    outCh + std::get<3>(attrs) * (inCh + dims[2] * (sy + std::get<4>(attrs)[1] * sx));
-                            std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx)));
-                            for (std::size_t batch = 0; batch < dims[3]; ++batch) {
+                                    outCh + outChannels * (inCh + inputDims[2] * (sy + kernelDims[1] * sx));
+                            std::size_t iIndex = inputDims[3] * (inCh + inputDims[2] * ((iy + sy) + inputDims[1] * (ix + sx)));
+                            for (std::size_t batch = 0; batch < inputDims[3]; ++batch) {
                                 output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
                             }
                         }
@@ -90,12 +181,12 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
 
     // output H size
     const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - std::get<4>(attrs)[0] + std::get<0>(attrs)[0]) /
-                                static_cast<float>(std::get<0>(attrs)[0])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
+                                static_cast<float>(strideDims[0])));
     // output W size
     const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(dims[3] - std::get<4>(attrs)[1] + std::get<0>(attrs)[1]) /
-                                static_cast<float>(std::get<0>(attrs)[1])));
+            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
+                                static_cast<float>(strideDims[1])));
 
     // TODO: kernel computation
     // output (batch, outCh, Xout, Yout)
@@ -103,42 +194,42 @@ void ConvImpl2D_cpu_forward_kernel(const Conv_Op<2>::Attrs &attrs, const std::ar
     // weight (outCh, inCh, kernelX, kernelY)
     // does not take Dilation attribute into account
     using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
-        for (std::size_t outCh = 0; outCh < std::get<3>(attrs); ++outCh) {
-            const std::size_t oIndex = (outCh + batch*std::get<3>(attrs)) * oxSize * oySize;
-            // If  NoBias or bias = nullptr, set B(0)
-            B biasVal = ((!std::get<5>(attrs)) && biases != nullptr) ? biases[outCh] : B(0);
+    for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
+        for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
+            const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
+            // If bias = nullptr, set B(0)
+            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
             std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
-            for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) {
-                const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3];
-                const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(attrs)[0] * std::get<4>(attrs)[1];
+            for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
+                const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
+                const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
                 for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                    const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]);
+                    const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
                     const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                    const std::size_t sxMax = (static_cast<signedsize>(dims[2]) + difx) < 0 ? 0 : ((dims[2] + difx) > std::get<4>(attrs)[0] ? std::get<4>(attrs)[0] : dims[2] + difx);
+                    const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
                     for (std::size_t oy = 0; oy < oySize; ++oy) {
-                        const signedsize dify = static_cast<signedsize>(- oy * std::get<0>(attrs)[1]);
+                        const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
                         const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                        const std::size_t syMax = (static_cast<signedsize>(dims[3]) + dify) < 0 ? 0 : ((dims[3] + dify) > std::get<4>(attrs)[1] ? std::get<4>(attrs)[1] : dims[3] + dify);
+                        const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
                         const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                        const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]);
-                        const signedsize iy = static_cast<signedsize>(oy * std::get<0>(attrs)[1]);
+                        const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
+                        const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
 
                         if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                            output[oIndexFull] += (weights[wIndex + 0*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 0*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 0*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 1*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 1*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 1*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*dims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 2*std::get<4>(attrs)[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 2*std::get<4>(attrs)[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 2*std::get<4>(attrs)[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*dims[3] + static_cast<std::size_t>(iy+2)]);
+                            output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
                         } else {
                             for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
                                 for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                    output[oIndexFull] += weights[wIndex + sx*std::get<4>(attrs)[1] + sy] *
-                                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*dims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
+                                    output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
+                                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
                                 }
                             }
                         }
diff --git a/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp
index 3cdcefa9e1c865f66b64ed527605d46af31be8af..74db1128c111ae62bedb6fa61682abca62429cdb 100644
--- a/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp
@@ -14,6 +14,7 @@
 
 #include <numeric>     // std::accumulate
 #include <cstddef>     // std::size_t
+#include <cstdint>     // std::int32_t, std::int64_t
 #include <functional>  // std::multiplies
 
 #include "aidge/utils/Registrar.hpp"
@@ -76,7 +77,7 @@ static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float32(
         Aidge::DivImpl_cpu_forward_kernel<float, float, float>);
 static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Int32(
         {DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::DivImpl_cpu_forward_kernel<int, int, int>);
+        Aidge::DivImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>);
 static Registrar<DivImplForward_cpu> registrarDivImplForward_cpu_Float64(
         {DataType::Float64, DataType::Float64, DataType::Float64},
         Aidge::DivImpl_cpu_forward_kernel<double, double, double>);
diff --git a/include/aidge/backend/cpu/operator/FCImpl.hpp b/include/aidge/backend/cpu/operator/FCImpl.hpp
index fedd8b38b2dbee9e5fd288a07d5cd722470723e5..f21cd0ff330f61b942eb55f036c7b23458a5959a 100644
--- a/include/aidge/backend/cpu/operator/FCImpl.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl.hpp
@@ -12,14 +12,14 @@
 #ifndef AIDGE_CPU_OPERATOR_FCIMPL_H_
 #define AIDGE_CPU_OPERATOR_FCIMPL_H_
 
+#include <array>
+#include <memory>
+#include <vector>
+
 #include "aidge/backend/OperatorImpl.hpp"
 #include "aidge/operator/FC.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include <memory>
-#include <vector>
-#include <array>
 
 namespace Aidge {
 // class FC_Op;
@@ -30,27 +30,27 @@ class FCImplForward_cpu : public Registrable<FCImplForward_cpu,
                                                         DataType,
                                                         DataType,
                                                         DataType>,
-                                             void(const FC_Op::Attrs&,
-                                                  const DimSize_t,
-                                                  const DimSize_t,
-                                                  const void *,
-                                                  const void *,
-                                                  const void *,
-                                                  void *)> {};
+                                             void(const DimSize_t,
+                                                const DimSize_t,
+                                                const DimSize_t,
+                                                const void *,
+                                                const void *,
+                                                const void *,
+                                                void *)> {};
 class FCImplBackward_cpu : public Registrable<FCImplBackward_cpu,
                                               std::tuple<DataType,
                                                          DataType,
                                                          DataType,
                                                          DataType>,
-                                              void(const FC_Op::Attrs&,
-                                              const DimSize_t,
-                                              const DimSize_t,
-                                              const void *,
-                                              const void *,
-                                              const void *,
-                                              void *,
-                                              void *,
-                                              void *)> {};
+                                              void(const DimSize_t,
+                                                const DimSize_t,
+                                                const DimSize_t,
+                                                const void *,
+                                                const void *,
+                                                const void *,
+                                                void *,
+                                                void *,
+                                                void *)> {};
 
 class FCImpl_cpu : public OperatorImpl {
 public:
diff --git a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp
index 50fb5f49033cccd3c554d692bc336c7d5d677384..c93a44d922dce2dc18df94bf903134ddadf5256f 100644
--- a/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl_backward_kernels.hpp
@@ -19,8 +19,16 @@
 
 namespace Aidge {
 template <class I, class O, class W, class B>
-void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
-                                   const void* input_, const void* originalInput_, const void* weight_, void* output_, void* weightGrad_, void* biasesGrad_) {
+void FCImpl_cpu_backward_kernel(const DimSize_t batchSize,
+                                const DimSize_t inputFeatureSize,
+                                const DimSize_t outputFeatureSize,
+                                const void* input_,
+                                const void* originalInput_,
+                                const void* weight_,
+                                void* output_,
+                                void* weightGrad_,
+                                void* biasesGrad_)
+{
     // FIXME: missing FC attributes as arguments
     const I* input  = static_cast<const I*>(input_);
     const I* originalInput  = static_cast<const I*>(originalInput_);
@@ -31,37 +39,37 @@ void FCImpl_cpu_backward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batch
 
 
     // bias grad
-    if (std::get<1>(attrs)) { // no bias
-        std::fill(biasesGrad, biasesGrad + std::get<0>(attrs), B(0));
+    if (biasesGrad == nullptr) { // no bias
+        std::fill(biasesGrad, biasesGrad + outputFeatureSize, B(0));
     } else {
-        for (std::size_t o = 0; o < std::get<0>(attrs); ++o) { // nb outputs
+        for (std::size_t o = 0; o < outputFeatureSize; ++o) { // nb outputs
             B sum{0};
             for (std::size_t b = 0; b < batchSize; ++b) {
-                sum += input[b*std::get<0>(attrs) + o];
+                sum += input[b*outputFeatureSize + o];
             }
             biasesGrad[o] = sum;
         }
     }
 
     // weight grad
-    for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
-        for (std::size_t c = 0; c < oneInputSize; ++c) {
+    for (std::size_t o = 0; o < outputFeatureSize; ++o) {
+        for (std::size_t c = 0; c < inputFeatureSize; ++c) {
             W sum{0};
             for (std::size_t b = 0; b < batchSize; ++b) {
-                sum += originalInput[b*oneInputSize + c]*input[b*std::get<0>(attrs) + o];
+                sum += originalInput[b*inputFeatureSize + c]*input[b*outputFeatureSize + o];
             }
-            weightGrad[o*oneInputSize + c] = sum;
+            weightGrad[o*inputFeatureSize + c] = sum;
         }
     }
 
     // input grad
     for (std::size_t b = 0; b < batchSize; ++b) {
-        for (std::size_t c = 0; c < oneInputSize; ++c) {
+        for (std::size_t c = 0; c < inputFeatureSize; ++c) {
             O sum{0};
-            for (std::size_t o = 0; o < std::get<0>(attrs); ++o) {
-                sum += weight[o*oneInputSize + c] * input[b*std::get<0>(attrs) + o];
+            for (std::size_t o = 0; o < outputFeatureSize; ++o) {
+                sum += weight[o*inputFeatureSize + c] * input[b*outputFeatureSize + o];
             }
-            output[b*oneInputSize + c] = sum;
+            output[b*inputFeatureSize + c] = sum;
         }
     }
 }
diff --git a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
index 64f3b3e18f7255b74decad5137cbb5ccd6966123..caeacd1bda2fde086fd649c50a733e790fc2c000 100644
--- a/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp
@@ -27,9 +27,9 @@ namespace Aidge {
 //     const B* biases = static_cast<const B*>(biases_);
 //     O* output = static_cast<O*>(output_);
 
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
+//     for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
 //         std::size_t oIndex = outIdx * dims[3];
-//         const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
+//         const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
 //         for (std::size_t batch = 0; batch < dims[3]; ++batch) {
 //             output[oIndex + batch] = bias;
 //         }
@@ -39,10 +39,10 @@ namespace Aidge {
 //         for (std::size_t iy = 0; iy < dims[1]; ++iy) {
 //             for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) {
 //                 const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix));
-//                 for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
+//                 for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
 //                     const std::size_t oIndex = dims[3] * outCh;
-//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(attrs) +
-//                                           outCh;  // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
+//                     const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * outputFeatureSize +
+//                                           outCh;  // (iIndex*outputFeatureSize + oIndex)/dims[3];
 //                     for (std::size_t batch = 0; batch < dims[3]; ++batch) {
 //                         output[oIndex + batch] += weights[wIndex] * input[iIndex + batch];
 //                     }
@@ -63,9 +63,9 @@ namespace Aidge {
 
 //     // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N]
 
-//     for (std::size_t outIdx = 0; outIdx < std::get<0>(attrs); ++outIdx) {
+//     for (std::size_t outIdx = 0; outIdx < outputFeatureSize; ++outIdx) {
 //         std::size_t oIndex = outIdx * dims[0];
-//         const B bias = std::get<1>(attrs) ? B(0) : biases[outIdx];
+//         const B bias = std::get<0>(attrs) ? B(0) : biases[outIdx];
 //         for (std::size_t batch = 0; batch < dims[0]; ++batch) {
 //             output[oIndex + batch] = bias;
 //         }
@@ -74,8 +74,8 @@ namespace Aidge {
 //     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
 //         const std::size_t oIndex = dims[1] * batch;
 //         for (std::size_t i = 0; i < dims[1]; ++i) {
-//             for (std::size_t outCh = 0; outCh < std::get<0>(attrs); ++outCh) {
-//                 std::size_t wIndex = i * std::get<0>(attrs) + outCh;  // (iIndex*std::get<0>(attrs) + oIndex)/dims[3];
+//             for (std::size_t outCh = 0; outCh < outputFeatureSize; ++outCh) {
+//                 std::size_t wIndex = i * outputFeatureSize + outCh;  // (iIndex*outputFeatureSize + oIndex)/dims[3];
 //                 output[oIndex + outCh] += weights[wIndex] * input[i + batch];
 //             }
 //         }
@@ -83,29 +83,34 @@ namespace Aidge {
 // }
 
 template <class I, class W, class B, class O>
-void FCImpl_cpu_forward_kernel(const FC_Op::Attrs& attrs, const DimSize_t batchSize, const DimSize_t oneInputSize,
-                                   const void* input_, const void* weights_, const void* biases_, void* output_) {
+void FCImpl_cpu_forward_kernel(const DimSize_t batchSize,
+                            const DimSize_t inputFeatureSize,
+                            const DimSize_t outputFeatureSize,
+                            const void* input_,
+                            const void* weights_,
+                            const void* biases_,
+                            void* output_) {
     // FIXME: missing FC attributes as arguments
     const I* input = static_cast<const I*>(input_);
     const W* weights = static_cast<const W*>(weights_);
     const B* biases = static_cast<const B*>(biases_);
     O* output = static_cast<O*>(output_);
 
-    if (std::get<1>(attrs)) {
-        std::fill(output, output+(batchSize*std::get<0>(attrs)), B(0));
+    if (biases == nullptr) {
+        std::fill(output, output+(batchSize*outputFeatureSize), B(0));
     }
     else {
         for (std::size_t batch = 0; batch < batchSize; ++batch) {
-            std::copy(biases, biases+std::get<0>(attrs), output+(batch*std::get<0>(attrs)));
+            std::copy(biases, biases+outputFeatureSize, output+(batch*outputFeatureSize));
         }
     }
 
     for (std::size_t batch = 0; batch < batchSize; ++batch) {
-        for (std::size_t out = 0; out < std::get<0>(attrs); ++out) {
-            output[out + batch*std::get<0>(attrs)] = std::inner_product(input + batch*oneInputSize,
-                                                        input + (batch + 1)*oneInputSize,
-                                                        weights + out*oneInputSize,
-                                                        output[out + batch*std::get<0>(attrs)]);
+        for (std::size_t out = 0; out < outputFeatureSize; ++out) {
+            output[out + batch*outputFeatureSize] = std::inner_product(input + batch*inputFeatureSize,
+                                                        input + (batch + 1)*inputFeatureSize,
+                                                        weights + out*inputFeatureSize,
+                                                        output[out + batch*outputFeatureSize]);
         }
     }
 }
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
index 880a59b3aeae2598f6b1ed5e287af18fd7bcfd6f..c9ad909eee631189a81067eda076c0b8cbb13377 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl.hpp
@@ -25,11 +25,19 @@
 namespace Aidge {
 // compute kernel registry for forward and backward
 class LeakyReLUImplForward_cpu
-    : public Registrable<LeakyReLUImplForward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
-};
+    : public Registrable<LeakyReLUImplForward_cpu,
+                        std::tuple<DataType, DataType>,
+                        void(const float,
+                            std::size_t,
+                            const void*,
+                            void*)> {};
 class LeakyReLUImplBackward_cpu
-    : public Registrable<LeakyReLUImplBackward_cpu, std::tuple<DataType, DataType>, void(const LeakyReLU_Op::Attrs&, std::size_t, const void*, void*)> {
-};
+    : public Registrable<LeakyReLUImplBackward_cpu,
+                        std::tuple<DataType, DataType>,
+                        void(const float,
+                            std::size_t,
+                            const void*,
+                            void*)> {};
 
 class LeakyReLUImpl_cpu : public OperatorImpl {
 public:
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp
index 949e6af66a476693b347f38a45edea10e21bc933..e308d940890101ad396c7ed20541bbc4f8b035cf 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp
@@ -18,17 +18,17 @@
 
 namespace Aidge {
 template <class I, class O>
-void LeakyReLUImpl_cpu_backward_kernel(const LeakyReLU_Op::Attrs& attrs,
+void LeakyReLUImpl_cpu_backward_kernel(const float negativeSlope_,
                                      std::size_t inputLenght,
                                      const void* input_,
                                      void* output_) {
 
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
-    I negativeSlope = static_cast<I>(std::get<0>(attrs));
+    const I negativeSlope = static_cast<const I>(negativeSlope_);
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = input[i] > 0 ? input[i] : negativeSlope*input[i];
+        output[i] = (input[i] > 0) ? input[i] : negativeSlope*input[i];
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
index d10b32e18ee983fc1270bc4a7cce35e18f601071..450d0bf4ace4879f90e0104e14b5bf61366e96c2 100644
--- a/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp
@@ -18,17 +18,17 @@
 
 namespace Aidge {
 template <class I, class O>
-void LeakyReLUImpl_cpu_forward_kernel(const LeakyReLU_Op::Attrs& attrs,
+void LeakyReLUImpl_cpu_forward_kernel(const float negativeSlope_,
                                      std::size_t inputLenght,
                                      const void* input_,
                                      void* output_) {
 
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
-    const I negativeSlope = static_cast<const I>(std::get<0>(attrs));
+    const I negativeSlope = static_cast<const I>(negativeSlope_);
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = input[i] >= 0 ? input[i] : input[i] * negativeSlope;
+        output[i] = (input[i] >= 0) ? input[i] : input[i] * negativeSlope;
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/LnImpl.hpp b/include/aidge/backend/cpu/operator/LnImpl.hpp
new file mode 100755
index 0000000000000000000000000000000000000000..faa03855a4f881f2a644ebc4023871b7acd6275c
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/LnImpl.hpp
@@ -0,0 +1,54 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_LNIMPL_H_
+#define AIDGE_CPU_OPERATOR_LNIMPL_H_
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Ln.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include <memory>
+#include <vector>
+
+namespace Aidge {
+// class Ln_Op;
+
+// compute kernel registry for forward and backward
+class LnImplForward_cpu
+    : public Registrable<LnImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
+};
+class LnImplBackward_cpu
+    : public Registrable<LnImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> {
+};
+
+class LnImpl_cpu : public OperatorImpl {
+public:
+    LnImpl_cpu(const Ln_Op& op) : OperatorImpl(op, "cpu") {}
+
+    static std::unique_ptr<LnImpl_cpu> create(const Ln_Op& op) {
+        return std::make_unique<LnImpl_cpu>(op);
+    }
+
+    Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+	
+    void forward() override final;
+
+    void backward() override final;
+};
+
+namespace {
+static Registrar<Ln_Op> registrarLnImpl_cpu("cpu", Aidge::LnImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_LNIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp
new file mode 100755
index 0000000000000000000000000000000000000000..5fb82e35f8855d9d6e2eb85e9ab380c9f1fc9b90
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp
@@ -0,0 +1,50 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_LNIMPL_BACKWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_LNIMPL_BACKWARD_KERNEL_H_
+
+#include <cstddef>  // std::size_t
+
+#include "aidge/backend/cpu/operator/LnImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
+
+namespace Aidge {
+template <class I, class GI, class GO>
+void LnImpl_cpu_backward_kernel(const std::size_t inputLenght,
+                                const void* input_, const void* grad_output_,
+	                            void* grad_input_) {
+						 
+    const I* input = static_cast<const I*>(input_);
+    const GO* grad_output = static_cast<const GO*>(grad_output_);
+    GI* grad_input = static_cast<GI*>(grad_input_);
+	const float eps = 1.0e-20f;
+	
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+		if (input[i] > I(eps)) {
+			grad_input[i] = grad_output[i] / input[i];
+		} else {
+			grad_input[i] = GI(0);
+		}
+    }
+}
+
+namespace {
+static Registrar<LnImplBackward_cpu> registrarLnImplBackward_cpu_Float32(
+    {DataType::Float32, DataType::Float32, DataType::Float32},
+    Aidge::LnImpl_cpu_backward_kernel<float, float, float>);	
+static Registrar<LnImplBackward_cpu> registrarLnImplBackward_cpu_Float64(
+    {DataType::Float64, DataType::Float64, DataType::Float64},
+    Aidge::LnImpl_cpu_backward_kernel<double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_LNIMPL_BACKWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp
new file mode 100755
index 0000000000000000000000000000000000000000..ebb975512a6e7c0f7225c305372f0ec6e7060786
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp
@@ -0,0 +1,47 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_LNIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_LNIMPL_FORWARD_KERNEL_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/LnImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void LnImpl_cpu_forward_kernel(std::size_t inputLenght,
+                               const void* input_,
+                               void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+	const float eps = 1.0e-20f;
+
+//#pragma omp parallel for if (inputLenght > 1024)
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+		if (input[i] > I(eps)) {
+			output[i] = std::log(input[i]);
+		} else {
+			output[i] = std::log(I(eps));
+		}
+    }
+}
+
+namespace {
+static Registrar<LnImplForward_cpu> registrarLnImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::LnImpl_cpu_forward_kernel<float, float>);
+static Registrar<LnImplForward_cpu> registrarLnImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::LnImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_LNIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
index d2d30aa7db5b1522712faa846ef33e1b21772d5e..4dd30e1fb939837f6861313eda04d7d05f3c8110 100644
--- a/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl.hpp
@@ -29,12 +29,22 @@ namespace Aidge {
 // compute kernel registry for forward and backward
 class MaxPoolingImpl2DForward_cpu
     : public Registrable<MaxPoolingImpl2DForward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+                        std::tuple<DataType, DataType>,
+                        void(const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const bool,
+                            const std::array<DimSize_t, 4> &,
+                            const void *,
+                            void *)> {};
 class MaxPoolingImpl2DBackward_cpu
     : public Registrable<MaxPoolingImpl2DBackward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const MaxPooling_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *, void *)> {};
+                        std::tuple<DataType, DataType>,
+                        void(const std::array<DimSize_t, 2>&,
+                            const std::array<DimSize_t, 2>&,
+                            const bool,
+                            const std::array<DimSize_t, 4> &,
+                            const void *,
+                            void *)> {};
 
 class MaxPoolingImpl2D_cpu : public OperatorImpl {
 public:
diff --git a/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp
index c4baccdee5def0be93be42b5657d77d21240328c..79a7bd154f4d4e19a71d719597992466c37c6a9f 100644
--- a/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp
@@ -12,15 +12,15 @@
 #ifndef AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_
 #define AIDGE_CPU_OPERATOR_MaxPOOLINGIMPL_FORWARD_KERNEL_H_
 
-#include "aidge/utils/Registrar.hpp"
+#include <array>
+#include <cmath>
+#include <tuple>
 
 #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/data/Data.hpp"
-#include <array>
-#include <tuple>
-#include <cmath>
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
 
 namespace Aidge {
 /**
@@ -33,17 +33,16 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void MaxPoolingImpl2D_cpu_forward_kernel(const MaxPooling_Op<2>::Attrs &attrs,
-                                             const std::array<DimSize_t, 4> &dims,
-                                             const void *input_,
-                                             void *output_) {
+void MaxPoolingImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
+                                        const std::array<DimSize_t, 2>& kernelDims,
+                                        const bool /*ceilMode*/,
+                                        const std::array<DimSize_t, 4> &dims,
+                                        const void *input_,
+                                        void *output_) {
     // FIXME: missing convolution parameters as arguments
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
-    std::array<DimSize_t, 2> strideDims  = std::get<0>(attrs);
-    std::array<DimSize_t, 2> kernelDims  = std::get<1>(attrs);
-
     // output H size
     const std::size_t oxSize =
             static_cast<std::size_t>(std::floor(static_cast<float>(dims[2] - kernelDims[0] + strideDims[0]) /
diff --git a/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp
index e1387768ea02e2a9f35790c64c7674c321a1faa7..c44199ba4797682362f4a7cb223435d6d1585443 100644
--- a/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp
@@ -14,6 +14,8 @@
 
 #include "aidge/utils/Registrar.hpp"
 
+#include <cstdint>     // std::int32_t, std::int64_t
+
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/operator/MulImpl.hpp"
 
@@ -35,13 +37,13 @@ void MulImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
         totalElements *= dimSize;
     }
 
-	for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) 
+	for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
 	{
 		std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
 
 		std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
 		std::size_t idx2 = getFlattenedIndex(input2Dims, indexes);
-		
+
         output[oIndex] = input_1[idx1] * input_2[idx2];
     }
 }
@@ -50,12 +52,15 @@ namespace {
 static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float32(
         {DataType::Float32, DataType::Float32, DataType::Float32},
         Aidge::MulImpl_cpu_forward_kernel<float, float, float>);
-static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::MulImpl_cpu_forward_kernel<int, int, int>);
 static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Float64(
         {DataType::Float64, DataType::Float64, DataType::Float64},
         Aidge::MulImpl_cpu_forward_kernel<double, double, double>);
+static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::MulImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>);
+static Registrar<MulImplForward_cpu> registrarMulImplForward_cpu_Int64(
+        {DataType::Int64, DataType::Int64, DataType::Int64},
+        Aidge::MulImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>);
 }  // namespace
 }  // namespace Aidge
 
diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp
index b3c91a43419e9a5e9e1299f4a2118a51b6b64fc7..c6e41c29fd203fdd80b2acb9ad0dfcac91a0f66c 100644
--- a/include/aidge/backend/cpu/operator/PadImpl.hpp
+++ b/include/aidge/backend/cpu/operator/PadImpl.hpp
@@ -25,18 +25,54 @@
 
 namespace Aidge {
 // class Pad_Op;
+// compute kernel registry for forward and backward
+class PadImpl1DForward_cpu
+    : public Registrable<PadImpl1DForward_cpu,
+                         std::tuple<DataType, DataType>,
+                         void(const std::array<DimSize_t, 2>&,
+                            const PadBorderType,
+                            const double,
+                            const std::array<DimSize_t, 3> &,
+                            const void *,
+                            void *)> {};
+
+class PadImpl1D_cpu : public OperatorImpl {
+public:
+    PadImpl1D_cpu(const Pad_Op<1> &op) : OperatorImpl(op, "cpu") {}
+
+    static std::unique_ptr<PadImpl1D_cpu> create(const Pad_Op<1> &op) {
+        return std::make_unique<PadImpl1D_cpu>(op);
+    }
+
+    Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+// add cpu backend to Pad_Op<1> implementation registry
+static Registrar<Pad_Op<1>> registrarPadImpl1D_cpu("cpu", Aidge::PadImpl1D_cpu::create);
+}  // namespace
+
 
 // compute kernel registry for forward and backward
 class PadImpl2DForward_cpu
     : public Registrable<PadImpl2DForward_cpu,
                          std::tuple<DataType, DataType>,
-                         void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
-                              void *)> {};
+                         void(const std::array<DimSize_t, 4>&,
+                            const PadBorderType,
+                            const double,
+                            const std::array<DimSize_t, 4> &,
+                            const void *,
+                            void *)> {};
 class PadImpl2DBackward_cpu
     : public Registrable<PadImpl2DBackward_cpu,
                          std::tuple<DataType, DataType>,
-                         void(const Pad_Op<2>::Attrs &, const std::array<DimSize_t, 4> &, const void *,
-                              void *)> {};
+                         void(const std::array<DimSize_t, 4>&,
+                            const PadBorderType,
+                            const double,
+                            const std::array<DimSize_t, 4> &,
+                            const void *,
+                            void *)> {};
 
 class PadImpl2D_cpu : public OperatorImpl {
 public:
diff --git a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
index f6f00bc4df661921708e605f44056a77bb8125f4..26c873c8fe7f140b09b31d0f1a9d4125acbcf50f 100644
--- a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
@@ -12,16 +12,95 @@
 #ifndef AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
 #define AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
 
-#include "aidge/utils/Registrar.hpp"
+#include <algorithm>  // std::max, std::min
+#include <array>
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::int32_t
 
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include <cmath>
-#include <array>
-#include <algorithm>
 
 namespace Aidge {
+/**
+ * @brief Forward kernel for 1D Padding on CPU backend.
+ * @tparam I Input data type.
+ * @tparam O Output data type.
+ * @param attrs tuple of Parameters from the Operator
+ * @param dims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class O>
+void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorders,
+                                const PadBorderType borderType,
+                                const double borderValue,
+                                const std::array<DimSize_t, 3>& dims,
+                                const void *input_,
+                                void *output_)
+{
+    const I *input = static_cast<const I *>(input_);
+    O *output = static_cast<O *>(output_);
+
+    const std::size_t oxSize = dims[2] + beginEndBorders[0] + beginEndBorders[1];
+
+    for (std::size_t batch = 0; batch < dims[0]; ++batch) {
+        for (std::size_t ch = 0; ch < dims[1]; ++ch) {
+            const std::size_t iIndex = (ch + batch*dims[1]) * dims[2];
+            const std::size_t oIndex = (ch + batch*dims[1]) * oxSize;
+
+            for (unsigned int ox = 0; ox < oxSize; ++ox) {
+                const std::size_t oIndexFull = oIndex + ox;
+
+                O outputValue = static_cast<O>(borderValue);
+
+                if (borderType == PadBorderType::Constant) {
+                    int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]);
+
+                    if (ix >= 0  && ix < static_cast<int>(dims[2])) {
+                        outputValue = input[iIndex + static_cast<std::size_t>(ix)];
+                    }
+                }
+                else if (borderType == PadBorderType::Edge) {
+                    int ix = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(ox) - static_cast<int>(beginEndBorders[1])));
+
+                    outputValue = input[iIndex + static_cast<std::size_t>(ix)];
+                }
+                else if (borderType == PadBorderType::Reflect) {
+                    int ix = static_cast<int>(ox) - static_cast<int>(beginEndBorders[1]);
+
+                    if (ix < 0)
+                        ix = 0 - ix;
+                    if (ix >= static_cast<int>(dims[2]))
+                        ix = static_cast<int>(dims[2]) - ix;
+
+                    outputValue = input[iIndex + static_cast<std::size_t>(ix)];
+                }
+                else if (borderType == PadBorderType::Wrap) {
+                    int ix = (static_cast<int>(dims[2]) + static_cast<int>(ox) - static_cast<int>(beginEndBorders[1])) % static_cast<int>(dims[2]);
+
+                    outputValue = input[iIndex + static_cast<std::size_t>(ix)];
+                }
+
+                output[oIndexFull] = outputValue;
+            }
+        }
+    }
+}
+
+namespace {
+static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32},
+        PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>);
+static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32},
+        PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>);
+static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64},
+        PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>);
+}  // namespace
+
+
 /**
  * @brief Forward kernel for 2D Padding on CPU backend.
  * @tparam I Input data type.
@@ -32,58 +111,62 @@ namespace Aidge {
  * @param output_ Output Tensor.
  */
 template <class I, class O>
-void PadImpl2D_cpu_forward_kernel(const Pad_Op<2>::Attrs &attrs, const std::array<DimSize_t, 4> &dims,
-                                       const void *input_, void *output_)
+void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorders,
+                                const PadBorderType borderType,
+                                const double borderValue,
+                                const std::array<DimSize_t, 4> &dims,
+                                const void *input_,
+                                void *output_)
 {
     const I *input = static_cast<const I *>(input_);
     O *output = static_cast<O *>(output_);
 
-    const std::size_t oySize = dims[2] + std::get<0>(attrs)[0] + std::get<0>(attrs)[1];
-    const std::size_t oxSize = dims[3] + std::get<0>(attrs)[2] + std::get<0>(attrs)[3];
+    const std::size_t oySize = dims[2] + beginEndBorders[0] + beginEndBorders[1];
+    const std::size_t oxSize = dims[3] + beginEndBorders[2] + beginEndBorders[3];
 
     for (std::size_t batch = 0; batch < dims[0]; ++batch) {
         for (std::size_t ch = 0; ch < dims[1]; ++ch) {
             const std::size_t iIndex = (ch + batch*dims[1]) * dims[2] * dims[3];
             const std::size_t oIndex = (ch + batch*dims[1]) * oxSize * oySize;
 
-            for (unsigned int oy = 0; oy < oySize; ++oy) {
-                for (unsigned int ox = 0; ox < oxSize; ++ox) {
+            for (std::uint32_t oy = 0; oy < oySize; ++oy) {
+                for (std::uint32_t ox = 0; ox < oxSize; ++ox) {
                     const std::size_t oIndexFull = oIndex + ox*oySize + oy;
 
-                    O outputValue = std::get<2>(attrs);
+                    O outputValue = static_cast<O>(borderValue);
 
-                    if (std::get<1>(attrs) == PadBorderType::Constant) {
-                        int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]);
-                        int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]);
+                    if (borderType == PadBorderType::Constant) {
+                        std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]);
+                        std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]);
 
-                        if (ix >= 0  && ix < static_cast<int>(dims[3]) && iy >= 0  && iy < static_cast<int>(dims[2])) {
+                        if (ix >= 0  && ix < static_cast<std::int32_t>(dims[3]) && iy >= 0  && iy < static_cast<std::int32_t>(dims[2])) {
                             outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
                         }
                     }
-                    else if (std::get<1>(attrs) == PadBorderType::Edge) {
-                        int ix = std::max(0, std::min(static_cast<int>(dims[3]) - 1, static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3])));
-                        int iy = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1])));
+                    else if (borderType == PadBorderType::Edge) {
+                        std::int32_t ix = std::max(0, std::min(static_cast<std::int32_t>(dims[3]) - 1, static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])));
+                        std::int32_t iy = std::max(0, std::min(static_cast<std::int32_t>(dims[2]) - 1, static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])));
 
                         outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
                     }
-                    else if (std::get<1>(attrs) == PadBorderType::Reflect) {
-                        int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3]);
-                        int iy = static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1]);
+                    else if (borderType == PadBorderType::Reflect) {
+                        std::int32_t ix = static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3]);
+                        std::int32_t iy = static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1]);
 
                         if (ix < 0)
                             ix = 0 - ix;
                         if (iy < 0)
                             iy = 0 - iy;
-                        if (ix >= static_cast<int>(dims[3]))
-                            ix = static_cast<int>(dims[3]) - ix;
-                        if (iy >= static_cast<int>(dims[2]))
-                            iy = static_cast<int>(dims[2]) - iy;
+                        if (ix >= static_cast<std::int32_t>(dims[3]))
+                            ix = static_cast<std::int32_t>(dims[3]) - ix;
+                        if (iy >= static_cast<std::int32_t>(dims[2]))
+                            iy = static_cast<std::int32_t>(dims[2]) - iy;
 
                         outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
                     }
-                    else if (std::get<1>(attrs) == PadBorderType::Wrap) {
-                        int ix = (static_cast<int>(dims[3]) + static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[3])) % static_cast<int>(dims[3]);
-                        int iy = (static_cast<int>(dims[2]) + static_cast<int>(oy) - static_cast<int>(std::get<0>(attrs)[1])) % static_cast<int>(dims[2]);
+                    else if (borderType == PadBorderType::Wrap) {
+                        std::int32_t ix = (static_cast<std::int32_t>(dims[3]) + static_cast<std::int32_t>(ox) - static_cast<std::int32_t>(beginEndBorders[3])) % static_cast<std::int32_t>(dims[3]);
+                        std::int32_t iy = (static_cast<std::int32_t>(dims[2]) + static_cast<std::int32_t>(oy) - static_cast<std::int32_t>(beginEndBorders[1])) % static_cast<std::int32_t>(dims[2]);
 
                         outputValue = input[iIndex + static_cast<std::size_t>(ix)*dims[2] + static_cast<std::size_t>(iy)];
                     }
@@ -101,7 +184,7 @@ static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float32(
         Aidge::PadImpl2D_cpu_forward_kernel<float, float>);
 static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Int32(
         {DataType::Int32, DataType::Int32},
-        Aidge::PadImpl2D_cpu_forward_kernel<int, int>);
+        Aidge::PadImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>);
 static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float64(
         {DataType::Float64, DataType::Float64},
         Aidge::PadImpl2D_cpu_forward_kernel<double, double>);
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp
index 43a9714ad2d32228fac9bf9c526191f0cec5bfa0..1bd932e43608d98f737cc9046aed74b2fec6abc6 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl_backward_kernels.hpp
@@ -18,15 +18,15 @@
 #include "aidge/utils/Registrar.hpp"
 
 namespace Aidge {
-template <class O, class GI, class GO>
+template <class I, class GI, class GO>
 void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                  const void* output_, const void* grad_output_,
-                                  void* grad_input_) {
-    const O* output = static_cast<const O*>(output_);
+                                  const void* input_, const void* grad_output_,
+				  void* grad_input_) {
+    const I* input = static_cast<const I*>(input_);
     const GO* grad_output = static_cast<const GO*>(grad_output_);
     GI* grad_input = static_cast<GI*>(grad_input_);
     for (std::size_t i = 0; i < inputLenght; ++i) {
-        grad_input[i] = (output[i] > GO(0)) ? GI(grad_output[i]) : GI(0);
+        grad_input[i] = (input[i] > 0) ? grad_output[i] : 0;
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
index aa533786d3ce5b6f5cd501b6ba74b1be2823d407..af9c65590c7182185c9d79669dde49e592cbeb5d 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp
@@ -27,7 +27,7 @@ void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
 
 //#pragma omp parallel for if (inputLenght > 1024)
     for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = input[i] > 0 ? input[i] : 0;
+        output[i] = (input[i] > 0) ? input[i] : 0;
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
index 7355a2bd46f45ab5019a31832001ae3335c1d8e8..8d784c38dc006ea82f040dfe83b4bef05908dd68 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
@@ -28,12 +28,20 @@ namespace Aidge {
 // Every DIM
 class ReduceMeanImplForward_cpu
     : public Registrable<ReduceMeanImplForward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const ReduceMean_Op::Attrs &, const std::vector<DimSize_t>&, const void *, void *)> {};
+                        std::tuple<DataType, DataType>,
+                        void(const std::vector<std::int32_t>&,
+                            DimSize_t,
+                            const std::vector<DimSize_t>&,
+                            const void *,
+                            void *)> {};
 class ReduceMeanImpl1DBackward_cpu
     : public Registrable<ReduceMeanImpl1DBackward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const ReduceMean_Op::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)> {};
+                        std::tuple<DataType, DataType>,
+                        void(const std::vector<std::int32_t>&,
+                            DimSize_t,
+                            const std::vector<DimSize_t>&,
+                            const void *,
+                            void *)> {};
 
 class ReduceMeanImpl_cpu : public OperatorImpl {
    public:
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
index 6533f7b19eac07d429cd8c5ed05ea082457b9e7b..bba355e16958bb1a22bde1d24304d992a658ade8 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
@@ -26,15 +26,15 @@
 
 namespace Aidge {
 template <class I, class O>
-void ReduceMeanImpl_cpu_forward_kernel(const typename ReduceMean_Op::Attrs& attrs,
-                                     const std::vector<DimSize_t>& inputDims,
-                                     const void* input_,
-                                     void* output_) {
+void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
+                                    DimSize_t /*keepDims*/,
+                                    const std::vector<DimSize_t>& inputDims,
+                                    const void* input_,
+                                    void* output_) {
 
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
 
-    const std::vector<std::int32_t>& axes = std::get<0>(attrs);
     const std::size_t nb_dims = inputDims.size();
     const std::size_t totalElements = std::accumulate(inputDims.cbegin(), inputDims.cend(), 1, std::multiplies<std::size_t>());
 
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
index 66bb42f7fb909ee9b6c91a6321ee3fa32c977626..8590169272818a225fe4299150f873733cdd9cd9 100644
--- a/include/aidge/backend/cpu/operator/ScalingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
@@ -26,11 +26,23 @@ namespace Aidge {
 
 // compute kernel registry for forward and backward
 class ScalingImplForward_cpu
-    : public Registrable<ScalingImplForward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> {
-};
+    : public Registrable<ScalingImplForward_cpu,
+                        std::tuple<DataType, DataType>,
+                        void(const float,
+                            const std::size_t,
+                            const bool,
+                            std::size_t,
+                            const void*,
+                            void*)> {};
 class ScalingImplBackward_cpu
-    : public Registrable<ScalingImplBackward_cpu, std::tuple<DataType, DataType>, void(const Scaling_Op::Attrs&, std::size_t, const void*, void*)> {
-};
+    : public Registrable<ScalingImplBackward_cpu,
+                        std::tuple<DataType, DataType>,
+                        void(const float,
+                            const std::size_t,
+                            const bool,
+                            std::size_t,
+                            const void*,
+                            void*)> {};
 
 class ScalingImpl_cpu : public OperatorImpl {
 public:
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
index df8e1a7e7b02a4ad032d6f09fae3ae2cd8a42eff..c654265dd6f650129201037976d89da4b0f39d96 100644
--- a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
@@ -73,22 +73,21 @@ O saturate(const O value, const std::size_t quantizedNbBits, const bool isOutput
 }
 
 template <class I, class O>
-void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Attrs& attrs,
-                                     std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+void ScalingImpl_cpu_forward_kernel(const float scalingFactor,
+                                    const std::size_t quantizedNbBits,
+                                    const bool isOutputUnsigned,
+                                    std::size_t inputLenght,
+                                    const void* input_,
+                                    void* output_) {
 
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
-    const I& scalingFactor = static_cast<const I&>(std::get<0>(attrs));
-    const std::size_t quantizedNbBits = static_cast<std::size_t>(std::get<1>(attrs));
-    const bool isOutputUnsigned = static_cast<bool>(std::get<2>(attrs));
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = input[i] * scalingFactor;
+        output[i] = static_cast<O>(input[i] * static_cast<I>(scalingFactor));
 
         if(quantizedNbBits > 0) {
-                output[i] = saturate(std::round(output[i]), quantizedNbBits, isOutputUnsigned);
+            output[i] = saturate(std::round(output[i]), quantizedNbBits, isOutputUnsigned);
         }
     }
 }
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp
index 2e43023d678c8a4258c80fb91d82d2858fcdf188..34340e6166a48b465c7723e85d91c195bfb42277 100644
--- a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp
@@ -28,7 +28,7 @@ class SigmoidImplForward_cpu
     : public Registrable<SigmoidImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
 };
 class SigmoidImplBackward_cpu
-    : public Registrable<SigmoidImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
+    : public Registrable<SigmoidImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> {
 };
 
 class SigmoidImpl_cpu : public OperatorImpl {
@@ -40,7 +40,10 @@ public:
     }
 
     Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    void forward() override;
+	
+    void forward() override final;
+
+    void backward() override final;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4ceb3bd7ed9a3fb739591eee488f8035770fef18
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp
@@ -0,0 +1,43 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_
+
+#include <cstddef>  // std::size_t
+
+#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
+
+namespace Aidge {
+template <class O, class GI, class GO>
+void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght,
+                                     const void* output_, const void* grad_output_,
+				     void* grad_input_) {
+    const O* output = static_cast<const O*>(output_);
+    const GO* grad_output = static_cast<const GO*>(grad_output_);
+    GI* grad_input = static_cast<GI*>(grad_input_);
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i];
+    }
+}
+
+namespace {
+static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float32(
+    {DataType::Float32, DataType::Float32, DataType::Float32},
+    Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>);
+static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float64(
+    {DataType::Float64, DataType::Float64, DataType::Float64},
+    Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp
index a53650942540e6368855ffe19e2f7f651ab5b6bc..24ba11a0bca7f3fa15f9ac1e2c13e29f88eaf074 100644
--- a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp
@@ -19,15 +19,19 @@
 namespace Aidge {
 template <class I, class O>
 void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
+                                    const void* input_,
+                                    void* output_) {
 
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
 
 //#pragma omp parallel for if (inputLenght > 1024)
     for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = static_cast<O>(1.0) / (static_cast<O>(1.0) + std::exp(-input[i]));
+		if (input[i] > I(0)) {
+			output[i] = O(1) / (O(1) + std::exp(-input[i]));
+		} else {
+			output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i]));
+		}
     }
 }
 
diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..61aed1553bfbd2e67fc837ec6ea8d80b26ef3558
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp
@@ -0,0 +1,67 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_H__
+#define AIDGE_CPU_OPERATOR_SLICEIMPL_H__
+
+#include <memory>
+#include <vector>
+#include <array>
+
+#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/operator/Slice.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+
+namespace Aidge {
+// class Slice_Op;
+
+// compute kernel registry for forward and backward
+class SliceImplForward_cpu
+    : public Registrable<SliceImplForward_cpu,
+                        std::tuple<DataType, DataType>,
+                        void(const std::vector<std::int64_t>&,
+                            const std::vector<std::int64_t>&,
+                            const std::vector<std::int8_t>&,
+                            const std::vector<std::int64_t>&,
+                            const std::vector<DimSize_t>&,
+                            const void*,
+                            void*)> {};
+class SliceImplBackward_cpu
+    : public Registrable<SliceImplBackward_cpu,
+                        std::tuple<DataType, DataType>,
+                        void(const std::vector<std::int64_t>&,
+                            const std::vector<std::int64_t>&,
+                            const std::vector<std::int8_t>&,
+                            const std::vector<std::int64_t>&,
+                            const std::vector<DimSize_t>&,
+                            const void*,
+                            void*)> {};
+
+class SliceImpl_cpu : public OperatorImpl {
+public:
+    SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op, "cpu") {}
+
+    static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) {
+        return std::make_unique<SliceImpl_cpu>(op);
+    }
+
+    Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
+    void forward() override;
+};
+
+namespace {
+static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create);
+}
+}  // namespace Aidge
+
+#endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */
diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..31e409369cc640bbda9f54c54652af7f72b509b6
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
@@ -0,0 +1,101 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <iterator>
+
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/backend/cpu/operator/SliceImpl.hpp"
+
+namespace Aidge {
+
+template<class I, class O>
+void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts,
+                                const std::vector<std::int64_t>& ends,
+                                const std::vector<std::int8_t>& axes,
+                                const std::vector<std::int64_t>& steps,
+                                const std::vector<DimSize_t>& inputDims,
+                                const void* input_,
+                                void* output_)
+{
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+    const std::size_t nbDims = inputDims.size();
+    std::vector<DimSize_t> dims = inputDims;
+    DimSize_t totalSize = std::accumulate(inputDims.cbegin(), inputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
+    const I* inputAccumulation = input;
+    I* outputAccumulation = nullptr;
+    const std::size_t nbAxes = starts.size();
+    for (std::size_t i = 0; i < nbAxes; ++i) {
+        const DimIdx_t axis = axes[i] >= 0 ?
+                                    static_cast<DimIdx_t>(axes[i]) :
+                                    static_cast<DimIdx_t>(axes[i] + static_cast<DimIdx_t>(inputDims.size()));
+        const DimSize_t start = std::min(starts[i] >= 0 ?
+                                                static_cast<DimSize_t>(starts[i]) :
+                                                static_cast<DimSize_t>(starts[i] + static_cast<std::int64_t>(inputDims[axis])),
+                                         dims[axis]-1);
+        const DimSize_t end = ends[i] >= 0 ?
+                                        static_cast<DimSize_t>(ends[i]) :
+                                        static_cast<DimSize_t>(ends[i] + static_cast<std::int64_t>(inputDims[axis]));
+        const std::int64_t step = steps[i];
+
+        const std::size_t sliceSize = static_cast<std::size_t>(std::ceil((static_cast<float>(end) - static_cast<float>(start)) / static_cast<float>(step)));
+
+        outputAccumulation = new I[totalSize];
+        const std::size_t stride_pre = std::accumulate(dims.cbegin(), dims.cbegin() + axis, 1, std::multiplies<std::size_t>());
+        const std::size_t stride_post = std::accumulate(dims.crbegin(), dims.crbegin() + nbDims -1 - axis, 1, std::multiplies<std::size_t>());
+        for (std::size_t outer = 0; outer < stride_pre; ++outer)
+        {
+            const std::size_t idx_in = outer * stride_post * dims[axis] + start * stride_post;
+            const std::size_t idx_out = outer * stride_post * sliceSize;
+            std::size_t addedSlices = 0;
+            for (std::size_t inner = 0; inner < sliceSize; ++inner)
+            {
+                std::copy_n(std::next(inputAccumulation, idx_in + inner * step * stride_post),
+                            stride_post,
+                            std::next(outputAccumulation, idx_out + addedSlices * stride_post));
+                addedSlices++;
+            }
+        }
+        totalSize /= dims[axis];
+        totalSize *= sliceSize;
+        dims[axis] = sliceSize;
+
+        if (inputAccumulation != input) {
+            delete[] inputAccumulation;
+        }
+        inputAccumulation = outputAccumulation;
+
+    }
+    // Copy elements from inputAccumulation to output while dividing by divisor
+    std::copy_n(inputAccumulation, totalSize, output);
+    if (outputAccumulation) {
+        delete[] outputAccumulation;
+    }
+}
+
+namespace {
+static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32(
+        {DataType::Float32, DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, float>);
+static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, int>);
+static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64(
+        {DataType::Float64, DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp
index 19b0bd21de129ed303151987323234364ce5f6f2..10e6f58bb44b63f2d8712dc0aa64e0660f3356b2 100644
--- a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp
@@ -14,6 +14,10 @@
 
 #include "aidge/utils/Registrar.hpp"
 
+#include <cstddef>     // std::size_t
+#include <cstdint>     // std::int32_t, std::int64_t
+#include <vector>
+
 #include "aidge/backend/cpu/data/Broadcasting.hpp"
 #include "aidge/backend/cpu/operator/SubImpl.hpp"
 
@@ -36,7 +40,7 @@ void SubImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
         totalElements *= dimSize;
     }
 
-	for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex) 
+	for (std::size_t oIndex = 0; oIndex < totalElements; ++oIndex)
 	{
 		std::vector<size_t> indexes = getMultiDimIndices(outputDims, oIndex);
 		std::size_t idx1 = getFlattenedIndex(input1Dims, indexes);
@@ -49,12 +53,15 @@ namespace {
 static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float32(
         {DataType::Float32, DataType::Float32, DataType::Float32},
         Aidge::SubImpl_cpu_forward_kernel<float, float, float>);
-static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::SubImpl_cpu_forward_kernel<int, int, int>);
 static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float64(
         {DataType::Float64, DataType::Float64, DataType::Float64},
         Aidge::SubImpl_cpu_forward_kernel<double, double, double>);
+static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int32(
+        {DataType::Int32, DataType::Int32, DataType::Int32},
+        Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>);
+static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int64(
+        {DataType::Int64, DataType::Int64, DataType::Int64},
+        Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>);
 }  // namespace
 }  // namespace Aidge
 
diff --git a/include/aidge/backend/cpu/operator/TanhImpl.hpp b/include/aidge/backend/cpu/operator/TanhImpl.hpp
index 9e44f7bcd2b2392c634421478a096258b3e39795..0bf851e77d94c160c0362301df33d682347daf0c 100644
--- a/include/aidge/backend/cpu/operator/TanhImpl.hpp
+++ b/include/aidge/backend/cpu/operator/TanhImpl.hpp
@@ -28,7 +28,7 @@ class TanhImplForward_cpu
     : public Registrable<TanhImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
 };
 class TanhImplBackward_cpu
-    : public Registrable<TanhImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {
+    : public Registrable<TanhImplBackward_cpu, std::tuple<DataType, DataType, DataType>, void(const std::size_t, const void*, const void*, void*)> {
 };
 
 class TanhImpl_cpu : public OperatorImpl {
@@ -40,7 +40,10 @@ public:
     }
 
     Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
-    void forward() override;
+	
+    void forward() override final;
+
+    void backward() override final;
 };
 
 namespace {
diff --git a/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3a13c2cad21c35822fc6248590550e4716ee046d
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp
@@ -0,0 +1,43 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_
+
+#include <cstddef>  // std::size_t
+
+#include "aidge/backend/cpu/operator/TanhImpl.hpp"
+#include "aidge/utils/Registrar.hpp"
+
+namespace Aidge {
+template <class O, class GI, class GO>
+void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght,
+                                  const void* output_, const void* grad_output_,
+			          void* grad_input_) {
+    const O* output = static_cast<const O*>(output_);
+    const GO* grad_output = static_cast<const GO*>(grad_output_);
+    GI* grad_input = static_cast<GI*>(grad_input_);
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        grad_input[i] = (O(1) - output[i] * output[i]) * grad_output[i];
+    }
+}
+
+namespace {
+static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float32(
+    {DataType::Float32, DataType::Float32, DataType::Float32},
+    Aidge::TanhImpl_cpu_backward_kernel<float, float, float>);
+static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float64(
+    {DataType::Float64, DataType::Float64, DataType::Float64},
+    Aidge::TanhImpl_cpu_backward_kernel<double, double, double>);
+}  // namespace
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ */
diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp
index 8ba6751bf4068a69ed07e362924f59d0f4aca6c5..feaa7e67a8d0bc726462aed99e557493d3b8d0c6 100644
--- a/src/operator/AvgPoolingImpl.cpp
+++ b/src/operator/AvgPoolingImpl.cpp
@@ -9,17 +9,17 @@
  *
  ********************************************************************************/
 
-#include <cassert>
+#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
+
+#include <array>
 #include <numeric>
-#include <thread>
 #include <vector>
 
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include "aidge/operator/AvgPooling.hpp"
-
-#include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp"
 #include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp"
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/AvgPooling.hpp"
+#include "aidge/utils/Types.h"
 
 Aidge::Elts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
     // this implementation can be in-place
@@ -27,15 +27,18 @@ Aidge::Elts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*in
 }
 
 void Aidge::AvgPoolingImpl2D_cpu::forward() {
-    assert(mOp.getRawInput(0) && "missing input #0");
+    const auto& op_ = dynamic_cast<const AvgPooling_Op<2>&>(mOp);
+    assert(op_.getInput(0) && "missing input #0");
 
     // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<AvgPoolingImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+    auto kernelFunc = Registrar<AvgPoolingImpl2DForward_cpu>::create(
+        {op_.getInput(0)->dataType(),
+         op_.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(dynamic_cast<const AvgPooling_Op<2>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
-               getCPUPtr(mOp.getRawInput(0)),
-               getCPUPtr(mOp.getRawOutput(0)));
+    kernelFunc(op_.strideDims(),
+               op_.kernelDims(),
+               op_.getInput(0)->template dims<4>(),
+               getCPUPtr(op_.getInput(0)),
+               getCPUPtr(op_.getOutput(0)));
 }
diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp
index 96179d11850624f831333c9a4badaddf2221ecff..3046eea9bd241732daf39cce1783b5ee50de01c7 100644
--- a/src/operator/BatchNormImpl.cpp
+++ b/src/operator/BatchNormImpl.cpp
@@ -9,7 +9,9 @@
  *
  ********************************************************************************/
 
-#include <cassert>
+#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
+
+
 #include <numeric> // std::accumulate
 #include <vector>
 
@@ -17,7 +19,6 @@
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 #include "aidge/operator/BatchNorm.hpp"
 
-#include "aidge/backend/cpu/operator/BatchNormImpl.hpp"
 #include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp"
 
 Aidge::Elts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
@@ -26,27 +27,29 @@ Aidge::Elts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inp
 }
 
 void Aidge::BatchNormImpl2D_cpu::forward() {
-    assert(mOp.getRawInput(0) && "missing input #0");
-    assert(mOp.getRawInput(1) && "missing input #1");
-    assert(mOp.getRawInput(2) && "missing input #2");
-    assert(mOp.getRawInput(3) && "missing input #3");
-    assert(mOp.getRawInput(4) && "missing input #4");
+    const auto& op_ = dynamic_cast<const BatchNorm_Op<2>&>(mOp);
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 for BatchNorm Operator");
+    AIDGE_ASSERT(op_.getInput(1), "missing input #1 for BatchNorm Operator");
+    AIDGE_ASSERT(op_.getInput(2), "missing input #2 for BatchNorm Operator");
+    AIDGE_ASSERT(op_.getInput(3), "missing input #3 for BatchNorm Operator");
+    AIDGE_ASSERT(op_.getInput(4), "missing input #4 for BatchNorm Operator");
 
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->nbDims() == 4);
+    AIDGE_ASSERT(op_.getOutput(0)->nbDims() == 4, "");
     // Find the correct kernel type
     auto kernelFunc =
-            Registrar<BatchNormImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-                                                           std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
-                                                           std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+            Registrar<BatchNormImpl2DForward_cpu>::create({op_.getInput(0)->dataType(),
+                                                           op_.getInput(1)->dataType(),
+                                                           op_.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(dynamic_cast<const BatchNorm_Op<2>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
-               getCPUPtr(mOp.getRawInput(0)),
-               getCPUPtr(mOp.getRawInput(1)),
-               getCPUPtr(mOp.getRawInput(2)),
-               getCPUPtr(mOp.getRawInput(3)),
-               getCPUPtr(mOp.getRawInput(4)),
-               getCPUPtr(mOp.getRawOutput(0)),
-               true);
+    kernelFunc(op_.epsilon(),
+            op_.momentum(),
+            op_.getInput(0)->template dims<4>(),
+            getCPUPtr(op_.getRawInput(0)),
+            getCPUPtr(op_.getRawInput(1)),
+            getCPUPtr(op_.getRawInput(2)),
+            getCPUPtr(op_.getRawInput(3)),
+            getCPUPtr(op_.getRawInput(4)),
+            getCPUPtr(op_.getRawOutput(0)),
+            true);
 }
diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp
index 5c8d2fe307c70bd7ee3f64e14735417f7ffb0c67..591e8a0637d1e52c75193ac1750a210a08815ccc 100644
--- a/src/operator/ConvDepthWiseImpl.cpp
+++ b/src/operator/ConvDepthWiseImpl.cpp
@@ -9,18 +9,71 @@
  *
  ********************************************************************************/
 
-#include <cassert>
-#include <chrono>  // std::chrono::milliseconds
-#include <numeric> // std::accumulate
-#include <thread>  // std::this_thread::sleep_for
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
+
+#include <memory>
 #include <vector>
 
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp"
+#include "aidge/data/Tensor.hpp"
 #include "aidge/operator/ConvDepthWise.hpp"
+#include "aidge/utils/Log.hpp"
+#include "aidge/utils/Types.h"
 
-#include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp"
-#include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp"
+
+Aidge::Elts_t Aidge::ConvDepthWiseImpl1D_cpu::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return Elts_t::DataElts(0);
+}
+
+void Aidge::ConvDepthWiseImpl1D_cpu::forward() {
+    const auto& op_ = dynamic_cast<const ConvDepthWise_Op<1>&>(mOp);
+
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator");
+    AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator");
+
+    AIDGE_ASSERT((op_.getInput(0)->nbDims() == 3), "support for 4-dimensions tensors only");
+
+    // Find the correct kernel type
+    const auto outputDataType = op_.getOutput(0)->dataType();
+    const Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_key registrarKey = {
+        op_.getInput(0)->dataType(),
+        op_.getInput(1)->dataType(),
+        ((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()),
+        outputDataType};
+
+    Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_type kernelFunc;
+    if (Registrar<ConvDepthWiseImpl1DForward_cpu>::exists(registrarKey)) {
+        // One exists with the right inputs/output types
+        kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create(registrarKey);
+    }
+    else {
+        // Otherwise, fallback to the kernel with all types matching output type
+        kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create({
+            outputDataType, outputDataType, outputDataType, outputDataType});
+    }
+
+    // Convert input data (no overhead if not needed!)
+    // TODO: right now, if needed, memory will be allocated/deallocated at each
+    // call to forward(). We might put the following shared_ptr as members of
+    // this class to avoid that.
+    std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
+    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
+    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
+    const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
+
+    // Call kernel
+    kernelFunc(op_.strideDims(),
+                op_.dilationDims(),
+                op_.kernelDims(), // Conv attributes
+               op_.getInput(0)->template dims<3>(), // input dimensions
+               input0.getImpl()->rawPtr(), // input
+               input1.getImpl()->rawPtr(), // weight
+               (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, // bias
+               getCPUPtr(mOp.getRawOutput(0)) // output
+            );
+}
 
 Aidge::Elts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
     // this implementation can be in-place
@@ -28,23 +81,37 @@ Aidge::Elts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /
 }
 
 void Aidge::ConvDepthWiseImpl2D_cpu::forward() {
-    assert(mOp.getRawInput(0) && "missing input #0");
-    assert(mOp.getRawInput(1) && "missing input #1");
-    assert(mOp.getRawInput(2) && "missing input #2");
+    const auto& op_ = dynamic_cast<const ConvDepthWise_Op<2>&>(mOp);
 
-    assert((std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims() == 4) && "support for 4-dimensions tensors only");
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in ConvDepthWise Operator");
+    AIDGE_ASSERT(op_.getInput(1), "missing input #1 in ConvDepthWise Operator");
+    AIDGE_ASSERT(op_.getInput(2), "missing input #2 in ConvDepthWise Operator");
+
+    AIDGE_ASSERT((op_.getInput(0)->nbDims() == 4), "support for 4-dimensions tensors only");
 
     // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<ConvDepthWiseImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-                                                               std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
-                                                               std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(),
-                                                               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+    auto kernelFunc = Registrar<ConvDepthWiseImpl2DForward_cpu>::create(
+        {op_.getInput(0)->dataType(),
+        op_.getInput(1)->dataType(),
+        op_.getInput(2)->dataType(),
+        op_.getOutput(0)->dataType()});
+
+        // Convert input data (no overhead if not needed!)
+    // TODO: right now, if needed, memory will be allocated/deallocated at each
+    // call to forward(). We might put the following shared_ptr as members of
+    // this class to avoid that.
+    std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
+    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
+    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
+    const auto& input2 = op_.getInput(2) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
 
     // Call kernel
-    kernelFunc(dynamic_cast<const ConvDepthWise_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
-               getCPUPtr(mOp.getRawInput(0)),
-               getCPUPtr(mOp.getRawInput(1)),
-               getCPUPtr(mOp.getRawInput(2)),
-               getCPUPtr(mOp.getRawOutput(0)));
+    kernelFunc(op_.strideDims(),
+            op_.dilationDims(),
+            op_.kernelDims(),
+            op_.getInput(0)->template dims<4>(),
+            input0.getImpl()->rawPtr(),
+            input1.getImpl()->rawPtr(),
+            op_.getInput(2) ?  input2.getImpl()->rawPtr() : nullptr,
+            getCPUPtr(op_.getRawOutput(0)));
 }
diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp
index 7457a1a0b75af1f922c5a65ac88aabc813d00069..0be31befe2019d70b628db878443f14b1d622f1c 100644
--- a/src/operator/ConvImpl.cpp
+++ b/src/operator/ConvImpl.cpp
@@ -9,18 +9,71 @@
  *
  ********************************************************************************/
 
+#include "aidge/backend/cpu/operator/ConvImpl.hpp"
+
 #include <cassert>
 #include <chrono>  // std::chrono::milliseconds
 #include <numeric> // std::accumulate
 #include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp"
 #include "aidge/operator/Conv.hpp"
+#include "aidge/utils/Types.h"
 
-#include "aidge/backend/cpu/operator/ConvImpl.hpp"
-#include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp"
+Aidge::Elts_t Aidge::ConvImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return Elts_t::DataElts(0);
+}
+
+void Aidge::ConvImpl1D_cpu::forward() {
+    const auto& op_ = static_cast<const Conv_Op<1>&>(mOp);
+
+    // FIXME: uncomment the following code once memory handling will work
+AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
+    AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator.");
+
+    // Find the correct kernel type
+    const auto outputDataType = op_.getOutput(0)->dataType();
+    const Registrar<ConvImpl1DForward_cpu>::registrar_key registrarKey = {
+        op_.getInput(0)->dataType(),
+        op_.getInput(1)->dataType(),
+        (op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()),
+        outputDataType};
+
+    Registrar<ConvImpl1DForward_cpu>::registrar_type kernelFunc;
+    if (Registrar<ConvImpl1DForward_cpu>::exists(registrarKey)) {
+        // One exists with the right inputs/output types
+        kernelFunc = Registrar<ConvImpl1DForward_cpu>::create(registrarKey);
+    }
+    else {
+        // Otherwise, fallback to the kernel with all types matching output type
+        kernelFunc = Registrar<ConvImpl1DForward_cpu>::create({
+            outputDataType, outputDataType, outputDataType, outputDataType});
+    }
+
+    // Convert input data (no overhead if not needed!)
+    // TODO: right now, if needed, memory will be allocated/deallocated at each
+    // call to forward(). We might put the following shared_ptr as members of
+    // this class to avoid that.
+    std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
+    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
+    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
+    const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
+
+    // Call kernel
+    kernelFunc(op_.strideDims(),
+            op_.dilationDims(),
+            op_.kernelDims(),
+            op_.getInput(0)->template dims<3>(), // input dimensions
+            dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels
+            input0.getImpl()->rawPtr(), // input
+            input1.getImpl()->rawPtr(), // weight
+            op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias
+            getCPUPtr(mOp.getRawOutput(0)) // output
+            );
+}
 
 Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
     // this implementation can be in-place
@@ -28,19 +81,18 @@ Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx
 }
 
 void Aidge::ConvImpl2D_cpu::forward() {
-    const auto& opTensor = static_cast<const OperatorTensor&>(mOp);
+    const auto& op_ = dynamic_cast<const Conv_Op<2>&>(mOp);
 
     // FIXME: uncomment the following code once memory handling will work
-    assert(mOp.getRawInput(0) && "missing input #0");
-    assert(mOp.getRawInput(1) && "missing input #1");
-    assert(mOp.getRawInput(2) && "missing input #2");
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Conv Operator.");
+    AIDGE_ASSERT(op_.getInput(1), "missing input #1 in Conv Operator.");
 
     // Find the correct kernel type
-    const auto outputDataType = opTensor.getOutput(0)->dataType();
+    const auto outputDataType = op_.getOutput(0)->dataType();
     const Registrar<ConvImpl2DForward_cpu>::registrar_key registrarKey = {
-        opTensor.getInput(0)->dataType(),
-        opTensor.getInput(1)->dataType(),
-        opTensor.getInput(2)->dataType(),
+        op_.getInput(0)->dataType(),
+        op_.getInput(1)->dataType(),
+        (op_.getInput(2) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()),
         outputDataType};
 
     Registrar<ConvImpl2DForward_cpu>::registrar_type kernelFunc;
@@ -59,12 +111,19 @@ void Aidge::ConvImpl2D_cpu::forward() {
     // call to forward(). We might put the following shared_ptr as members of
     // this class to avoid that.
     std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
-    const auto& input0 = opTensor.getInput(0)->refCastFrom(input0Fallback, *opTensor.getOutput(0));
-    const auto& input1 = opTensor.getInput(1)->refCastFrom(input1Fallback, *opTensor.getOutput(0));
-    const auto& input2 = opTensor.getInput(2)->refCastFrom(input2Fallback, *opTensor.getOutput(0));
+    const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *op_.getOutput(0));
+    const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *op_.getOutput(0));
+    const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *op_.getOutput(0)) : Tensor();
 
     // Call kernel
-    kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), opTensor.getInput(0)->template dims<4>(),
-        input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
-        getCPUPtr(mOp.getRawOutput(0)));
+    kernelFunc(op_.strideDims(),
+            op_.dilationDims(),
+            op_.kernelDims(),
+            op_.getInput(0)->template dims<4>(), // input dimensions
+            dynamic_cast<const Conv_Op<2>&>(mOp).outChannels(), // outChannels
+            input0.getImpl()->rawPtr(), // input
+            input1.getImpl()->rawPtr(), // weight
+            op_.getInput(2) ? input2.getImpl()->rawPtr() : nullptr, // bias
+            getCPUPtr(mOp.getRawOutput(0)) // output
+            );
 }
diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp
index d9edf3a9959c1c80dbe85c93f7a1499260452c4c..f7eebb7b21512fb3b388b6927409fba9a1d92b34 100644
--- a/src/operator/FCImpl.cpp
+++ b/src/operator/FCImpl.cpp
@@ -29,14 +29,13 @@ void Aidge::FCImpl_cpu::forward()
     const FC_Op& op_ = dynamic_cast<const FC_Op&>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0");
     AIDGE_ASSERT(op_.getInput(1), "missing input #1");
-    AIDGE_ASSERT(op_.getInput(2), "missing input #2");
 
     // Find the correct kernel type
     const auto outputDataType = op_.getOutput(0)->dataType();
     const Registrar<FCImplForward_cpu>::registrar_key registrarKey = {
         op_.getInput(0)->dataType(),
         op_.getInput(1)->dataType(),
-        op_.getInput(2)->dataType(),
+        ((op_.getInput(2)) ? op_.getInput(2)->dataType() : op_.getInput(1)->dataType()),
         outputDataType};
 
     Registrar<FCImplForward_cpu>::registrar_type kernelFunc;
@@ -57,14 +56,16 @@ void Aidge::FCImpl_cpu::forward()
     std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback;
     const auto& input0 = op_.getInput(0)->refCastFrom(input0Fallback, *(op_.getOutput(0)));
     const auto& input1 = op_.getInput(1)->refCastFrom(input1Fallback, *(op_.getOutput(0)));
-    const auto& input2 = op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0)));
+    const auto& input2 = (op_.getInput(2)) ? op_.getInput(2)->refCastFrom(input2Fallback, *(op_.getOutput(0))) : Tensor();
 
     // Call kernel
     const auto batchSize = (input0.dims().size() > 1) ? input0.dims()[0] : 1;
-    kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
-        batchSize,
-        input0.size() / batchSize,
-        input0.getImpl()->rawPtr(), input1.getImpl()->rawPtr(), input2.getImpl()->rawPtr(),
+    kernelFunc(batchSize,
+        input1.dims()[1], // nb input features
+        input1.dims()[0], // nb output features
+        input0.getImpl()->rawPtr(),
+        input1.getImpl()->rawPtr(),
+        (op_.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr,
         getCPUPtr(mOp.getRawOutput(0)));
 }
 
@@ -75,14 +76,13 @@ void Aidge::FCImpl_cpu::backward()
     AIDGE_ASSERT(fc_grad, "missing ouput #0 gradient");
     AIDGE_ASSERT(op_.getInput(0)->grad(), "missing input #0 gradient");
     AIDGE_ASSERT(op_.getInput(1)->grad(), "missing input #1 gradient");
-    AIDGE_ASSERT(op_.getInput(2)->grad(), "missing input #2 gradient");
 
     // Find the correct kernel type
     const Registrar<FCImplBackward_cpu>::registrar_key registrarKey = {
         fc_grad->dataType(),
-        op_.getInput(0)->grad()->dataType(),
         op_.getInput(1)->grad()->dataType(),
-        op_.getInput(2)->grad()->dataType()};
+        (op_.getInput(2)) ? op_.getInput(2)->grad()->dataType() : op_.getInput(1)->grad()->dataType(),
+        op_.getInput(0)->grad()->dataType()};
 
     Registrar<FCImplBackward_cpu>::registrar_type kernelFunc;
     if (Registrar<FCImplBackward_cpu>::exists(registrarKey)) {
@@ -102,17 +102,17 @@ void Aidge::FCImpl_cpu::backward()
     std::shared_ptr<Tensor> input0gradFallback, input1gradFallback, input2gradFallback;
     const auto& input0grad = op_.getInput(0)->grad()->refCastFrom(input0gradFallback, *(op_.getOutput(0)));
     const auto& input1grad = op_.getInput(1)->grad()->refCastFrom(input1gradFallback, *(op_.getOutput(0)));
-    const auto& input2grad = op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0)));
+    const auto& input2grad = (op_.getInput(2)) ? op_.getInput(2)->grad()->refCastFrom(input2gradFallback, *(op_.getOutput(0))) : Tensor();
 
     // Call kernel
     const auto batchSize = (input0grad.dims().size() > 1) ? input0grad.dims()[0] : 1;
-    kernelFunc(dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(),
-        batchSize,
-        input0grad.size() / batchSize,
+    kernelFunc(batchSize,
+        input1grad.dims()[1], // nb input features
+        input1grad.dims()[0], // nb output features
         getCPUPtr(fc_grad),
         getCPUPtr(op_.getInput(0)),
         getCPUPtr(mOp.getRawInput(1)),
         input0grad.getImpl()->rawPtr(),
         input1grad.getImpl()->rawPtr(),
-        input2grad.getImpl()->rawPtr());
+        (op_.getInput(2)) ? input2grad.getImpl()->rawPtr() : nullptr);
 }
diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp
index 340af3eeaf370988f9b12d8535812c938e47078a..9d4f2a7edcdf263751ec1d9cea10cd4d60055610 100644
--- a/src/operator/LeakyReLUImpl.cpp
+++ b/src/operator/LeakyReLUImpl.cpp
@@ -9,18 +9,19 @@
  *
  ********************************************************************************/
 
-#include <cassert>
+#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
+
 #include <vector>
 
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp"
 #include "aidge/data/Tensor.hpp"
 #include "aidge/operator/LeakyReLU.hpp"
+#include "aidge/utils/Log.hpp"
 #include "aidge/utils/Types.h"
 #include "aidge/utils/Registrar.hpp"
-#include "aidge/backend/cpu/data/GetCPUPtr.h"
 
-#include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp"
-#include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp"
-#include "aidge/backend/cpu/operator/LeakyReLUImpl_backward_kernels.hpp"
 
 Aidge::Elts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
     // this implementation can be in-place
@@ -29,6 +30,7 @@ Aidge::Elts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIn
 
 void Aidge::LeakyReLUImpl_cpu::forward() {
     const LeakyReLU_Op& op_ = dynamic_cast<const LeakyReLU_Op&>(mOp);
+
     std::shared_ptr<Tensor> in0 = op_.getInput(0);
     std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     AIDGE_ASSERT(in0, "missing input #0");
@@ -39,7 +41,7 @@ void Aidge::LeakyReLUImpl_cpu::forward() {
         out0->dataType()});
 
     // Call kernel
-    kernelFunc(dynamic_cast<const LeakyReLU_Op&>(mOp).getStaticAttributes(),
+    kernelFunc(op_.negativeSlope(),
         in0->size(),
         getCPUPtr(mOp.getRawInput(0)),
         getCPUPtr(mOp.getRawOutput(0)));
@@ -58,7 +60,7 @@ void Aidge::LeakyReLUImpl_cpu::backward() {
         out0->dataType()});
 
     // Call kernel
-    kernelFunc(dynamic_cast<const LeakyReLU_Op&>(mOp).getStaticAttributes(),
+    kernelFunc(op_.negativeSlope(),
         in0->size(),
         getCPUPtr(in0),
         getCPUPtr(out0));
diff --git a/src/operator/LnImpl.cpp b/src/operator/LnImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..12885a944be46a977463e900af4047319bb1c8b2
--- /dev/null
+++ b/src/operator/LnImpl.cpp
@@ -0,0 +1,65 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <cassert>
+#include <chrono>  // std::chrono::milliseconds
+#include <numeric> // std::accumulate
+#include <thread>  // std::this_thread::sleep_for
+#include <vector>
+
+#include "aidge/operator/Ln.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+
+#include "aidge/backend/cpu/operator/LnImpl.hpp"
+#include "aidge/backend/cpu/operator/LnImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/LnImpl_backward_kernels.hpp"
+
+Aidge::Elts_t Aidge::LnImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return Elts_t::DataElts(0);
+}
+
+void Aidge::LnImpl_cpu::forward() {
+    const Ln_Op& op_ = static_cast<const Ln_Op&>(mOp);
+	std::shared_ptr<Tensor> in0 = op_.getInput(0);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
+    AIDGE_ASSERT(in0, "missing input #0");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<LnImplForward_cpu>::create({
+        in0->dataType(),
+	    out0->dataType()});
+
+    // Call kernel
+    kernelFunc(in0->size(),
+        getCPUPtr(mOp.getRawInput(0)),
+        getCPUPtr(mOp.getRawOutput(0)));
+}
+
+void Aidge::LnImpl_cpu::backward() {
+    const Ln_Op& op_ = dynamic_cast<const Ln_Op&>(mOp);
+	std::shared_ptr<Tensor> in0  = op_.getInput(0);
+    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+    std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
+    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<LnImplBackward_cpu>::create({
+        in0->dataType(),
+	    gra_int0->dataType(),
+        gra_out0->dataType()        
+    });
+
+    // Call kernel
+    kernelFunc(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+}
diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp
index 94591eaa9848b24aeb7afa1e8b6b87a3e6e2b45f..2e6d67abbdd6776a1f75449a0f4562143cbaae87 100644
--- a/src/operator/MaxPoolingImpl.cpp
+++ b/src/operator/MaxPoolingImpl.cpp
@@ -9,17 +9,16 @@
  *
  ********************************************************************************/
 
-#include <cassert>
-#include <numeric>
-#include <thread>
+#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
+
 #include <vector>
 
-#include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp"
 #include "aidge/operator/MaxPooling.hpp"
+#include "aidge/utils/Log.hpp"
+#include "aidge/utils/Types.h"
 
-#include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp"
-#include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp"
 
 Aidge::Elts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const {
     // this implementation can be in-place
@@ -27,15 +26,20 @@ Aidge::Elts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*in
 }
 
 void Aidge::MaxPoolingImpl2D_cpu::forward() {
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
+    const auto& op_ = dynamic_cast<const MaxPooling_Op<2>&>(mOp);
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in MaxPooling Operator.");
 
     // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<MaxPoolingImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+    auto kernelFunc = Registrar<MaxPoolingImpl2DForward_cpu>::create({
+        op_.getInput(0)->dataType(),
+        op_.getOutput(0)->dataType()
+    });
 
     // Call kernel
-    kernelFunc(dynamic_cast<const MaxPooling_Op<2>&>(mOp).getStaticAttributes(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
-               getCPUPtr(mOp.getRawInput(0)),
-               getCPUPtr(mOp.getRawOutput(0)));
+    kernelFunc(op_.strideDims(),
+                op_.kernelDims(),
+                op_.ceilMode(),
+                op_.getInput(0)->template dims<4>(),
+                getCPUPtr(mOp.getRawInput(0)),
+                getCPUPtr(mOp.getRawOutput(0)));
 }
diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp
index cd420a6241723c5d3fa5836838f84ce6bfe965d1..b4b52d6be855b6a1f8c0a71a6a9169ee9690f34c 100644
--- a/src/operator/PadImpl.cpp
+++ b/src/operator/PadImpl.cpp
@@ -9,10 +9,6 @@
  *
  ********************************************************************************/
 
-#include <cassert>
-#include <chrono>  // std::chrono::milliseconds
-#include <numeric> // std::accumulate
-#include <thread>  // std::this_thread::sleep_for
 #include <vector>
 
 #include "aidge/utils/Types.h"
@@ -22,8 +18,40 @@
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
 #include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp"
 
-Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const {
-    assert(inputIdx == 0 && "operator has only one input");
+Aidge::Elts_t Aidge::PadImpl1D_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const {
+    AIDGE_ASSERT(inputIdx == 0, "input index out of range."
+        "{} Operator has only one input", mOp.type());
+    (void) inputIdx;
+
+
+    // Padding cannot be in-place!
+    // We must ensure that we do not override data that has not been consummed yet.
+    const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size();
+    const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size();
+    return Elts_t::DataElts(outputSize - inputSize);
+}
+
+void Aidge::PadImpl1D_cpu::forward() {
+    const auto& op_ = dynamic_cast<const Pad_Op<1>&>(mOp);
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator.");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<PadImpl1DForward_cpu>::create({
+        op_.getInput(0)->dataType(),
+        op_.getOutput(0)->dataType()});
+
+    // Call kernel
+     kernelFunc(op_.beginEndBorders(),
+                op_.borderType(),
+                op_.borderValue(),
+                op_.getInput(0)->template dims<3>(),
+                getCPUPtr(mOp.getRawInput(0)),
+                getCPUPtr(mOp.getRawOutput(0)));
+}
+
+Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const {
+    AIDGE_ASSERT(inputIdx == 0, "input index out of range."
+        "{} Operator has only one input", mOp.type());
     (void) inputIdx;
 
     // Padding cannot be in-place!
@@ -34,15 +62,19 @@ Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) c
 }
 
 void Aidge::PadImpl2D_cpu::forward() {
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
+    const auto& op_ = dynamic_cast<const Pad_Op<2>&>(mOp);
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator.");
 
     // Find the correct kernel type
-    auto kernelFunc =
-            Registrar<PadImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+    auto kernelFunc = Registrar<PadImpl2DForward_cpu>::create({
+        op_.getInput(0)->dataType(),
+        op_.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(dynamic_cast<const Pad_Op<2>&>(mOp).getStaticAttributes(),
-                        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(),
-                        getCPUPtr(mOp.getRawInput(0)),
-                        getCPUPtr(mOp.getRawOutput(0)));
+    kernelFunc(op_.beginEndBorders(),
+                op_.borderType(),
+                op_.borderValue(),
+                op_.getInput(0)->template dims<4>(),
+                getCPUPtr(mOp.getRawInput(0)),
+                getCPUPtr(mOp.getRawOutput(0)));
 }
diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp
index 06859f09db169946175a93140e04f2e2a99e3362..4a0fb9f5d929e2ce731a21b5553e1b9257a32daa 100644
--- a/src/operator/ReLUImpl.cpp
+++ b/src/operator/ReLUImpl.cpp
@@ -28,13 +28,15 @@ Aidge::Elts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t
 }
 
 void Aidge::ReLUImpl_cpu::forward() {
-    std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0));
+	const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp);
+    std::shared_ptr<Tensor> in0 = op_.getInput(0);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<ReLUImplForward_cpu>::create({
         in0->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+	    out0->dataType()});
 
     // Call kernel
     kernelFunc(in0->size(),
@@ -43,20 +45,20 @@ void Aidge::ReLUImpl_cpu::forward() {
 }
 
 void Aidge::ReLUImpl_cpu::backward() {
-    // reversing in and out Tensors
-        const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp);
+    const ReLU_Op& op_ = dynamic_cast<const ReLU_Op&>(mOp);
+    std::shared_ptr<Tensor> in0  = op_.getInput(0);
     std::shared_ptr<Tensor> out0  = op_.getOutput(0);
-    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();
     std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
-    AIDGE_ASSERT(out0, "current {} operator output#0 has not gradient Tensor.", op_.type());
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
+    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<ReLUImplBackward_cpu>::create({
-        out0->dataType(),
-        gra_out0->dataType(),
-        gra_int0->dataType()
+	in0->dataType(),
+        gra_int0->dataType(),
+	gra_out0->dataType()
     });
 
     // Call kernel
-    kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+    kernelFunc(gra_int0->size(), getCPUPtr(in0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
 }
diff --git a/src/operator/ReduceMeanImpl.cpp b/src/operator/ReduceMeanImpl.cpp
index a9f17a28a2a47ec7bc50820d587e8d0f359d2bb3..b4cd8ffa9b46aaa1c1d7a2eca947ed0254947fef 100644
--- a/src/operator/ReduceMeanImpl.cpp
+++ b/src/operator/ReduceMeanImpl.cpp
@@ -26,10 +26,11 @@ void Aidge::ReduceMeanImpl_cpu::forward() {
         op_.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(op_.getStaticAttributes(),
-               op_.getInput(0)->dims(),
-               op_.getInput(0)->getImpl()->rawPtr(),
-               op_.getOutput(0)->getImpl()->rawPtr());
+    kernelFunc(op_.axes(),
+                op_.keepDims(),
+                op_.getInput(0)->dims(),
+                op_.getInput(0)->getImpl()->rawPtr(),
+                op_.getOutput(0)->getImpl()->rawPtr());
 }
 
 // void Aidge::ReduceMeanImpl1D_cpu::forward() {
diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp
index d0b58702c73f01fb62114d335f5c2342908542ea..db4670836e702f536243aadec36c5ba85b2344c8 100644
--- a/src/operator/ScalingImpl.cpp
+++ b/src/operator/ScalingImpl.cpp
@@ -12,6 +12,7 @@
 #include <cassert>
 #include <numeric>    // std::accumulate
 #include <functional> // std::multiplies
+#include <vector>
 
 #include "aidge/operator/Scaling.hpp"
 
@@ -19,7 +20,6 @@
 #include "aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include <vector>
 
 Aidge::Elts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
     // this implementation can be in-place
@@ -27,16 +27,19 @@ Aidge::Elts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOInde
 }
 
 void Aidge::ScalingImpl_cpu::forward() {
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
+    const auto& op_ = dynamic_cast<const Scaling_Op&>(mOp);
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Scaling Operator.");
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<ScalingImplForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+        op_.getInput(0)->dataType(),
+        op_.getOutput(0)->dataType()});
 
     // Call kernel
-    kernelFunc(dynamic_cast<const Scaling_Op&>(mOp).getStaticAttributes(),
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
-        getCPUPtr(mOp.getRawInput(0)),
-        getCPUPtr(mOp.getRawOutput(0)));
+    kernelFunc(op_.scalingFactor(),
+            op_.quantizedNbBits(),
+            op_.isOutputUnsigned(),
+            op_.getInput(0)->size(),
+            getCPUPtr(mOp.getRawInput(0)),
+            getCPUPtr(mOp.getRawOutput(0)));
 }
diff --git a/src/operator/SigmoidImpl.cpp b/src/operator/SigmoidImpl.cpp
index dd7ec26cb36777f79d382c815b60d2381544a0bd..ad69935c02e392d7aa1c9601acb827c5baf8970f 100644
--- a/src/operator/SigmoidImpl.cpp
+++ b/src/operator/SigmoidImpl.cpp
@@ -21,6 +21,7 @@
 
 #include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
 #include "aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp"
 
 Aidge::Elts_t Aidge::SigmoidImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
     // this implementation can be in-place
@@ -28,15 +29,36 @@ Aidge::Elts_t Aidge::SigmoidImpl_cpu::getNbRequiredProtected(const Aidge::IOInde
 }
 
 void Aidge::SigmoidImpl_cpu::forward() {
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
+	const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp);
+    std::shared_ptr<Tensor> in0 = op_.getInput(0);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
+    AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<SigmoidImplForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+        in0->dataType(),
+	    out0->dataType()});
 
     // Call kernel
-    kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
+    kernelFunc(in0->size(),
         getCPUPtr(mOp.getRawInput(0)),
         getCPUPtr(mOp.getRawOutput(0)));
 }
+
+void Aidge::SigmoidImpl_cpu::backward() {
+    const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp);
+    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+    std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
+    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<SigmoidImplBackward_cpu>::create({
+        out0->dataType(),
+	gra_int0->dataType(),
+        gra_out0->dataType()        
+    });
+
+    // Call kernel
+    kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+}
diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8ffe4dcdd97b58758885b013d0c1770bd98a83ba
--- /dev/null
+++ b/src/operator/SliceImpl.cpp
@@ -0,0 +1,44 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include "aidge/backend/cpu/operator/SliceImpl.hpp"
+
+#include <vector>
+
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+#include "aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp"
+#include "aidge/operator/Slice.hpp"
+#include "aidge/utils/Log.hpp"
+#include "aidge/utils/Types.h"
+
+Aidge::Elts_t Aidge::SliceImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
+    // this implementation can be in-place
+    return Elts_t::DataElts(0);
+}
+
+void Aidge::SliceImpl_cpu::forward() {
+    const auto& op_ = dynamic_cast<const Slice_Op&>(mOp);
+    AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Slice Operator.");
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<SliceImplForward_cpu>::create({
+        op_.getInput(0)->dataType(),
+        op_.getOutput(0)->dataType()});
+
+    // Call kernel
+    kernelFunc(op_.starts(),
+            op_.ends(),
+            op_.axes(),
+            op_.steps(),
+            op_.getInput(0)->dims(),
+            getCPUPtr(mOp.getRawInput(0)),
+            getCPUPtr(mOp.getRawOutput(0)));
+}
diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp
index 240267613e557c20edcc00e81f4bf20d17d9962f..5bc3699e2146e36a63b4a1602ca1cb86e3ff1e2f 100644
--- a/src/operator/SoftmaxImpl.cpp
+++ b/src/operator/SoftmaxImpl.cpp
@@ -28,19 +28,18 @@ Aidge::Elts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOInde
 }
 
 void Aidge::SoftmaxImpl_cpu::forward() {
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims()>1);
+    const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp);
+    AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty");
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<SoftmaxImplForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+        op_.getInput(0)->dataType(),
+        op_.getOutput(0)->dataType()});
 
-    Softmax_Op::Attrs attr = dynamic_cast<const Softmax_Op&>(mOp).getStaticAttributes();
-    const int& axisIdx = static_cast<const int&>(std::get<0>(attr));
+    std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis();
 
     // Call kernel
-    kernelFunc(axisIdx,
+    kernelFunc(static_cast<std::size_t>(axis), // axisIdx
                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
                std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
diff --git a/src/operator/TanhImpl.cpp b/src/operator/TanhImpl.cpp
index 44e180739ed86e25d4be6d0beb693f73bdadbf35..a2469ed9b83679c0edf8d0a761abf9d3d046db6e 100644
--- a/src/operator/TanhImpl.cpp
+++ b/src/operator/TanhImpl.cpp
@@ -21,6 +21,7 @@
 
 #include "aidge/backend/cpu/operator/TanhImpl.hpp"
 #include "aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp"
 
 Aidge::Elts_t Aidge::TanhImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const {
     // this implementation can be in-place
@@ -28,15 +29,37 @@ Aidge::Elts_t Aidge::TanhImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t
 }
 
 void Aidge::TanhImpl_cpu::forward() {
-    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
+	const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp);
+    std::shared_ptr<Tensor> in0 = op_.getInput(0);
+    std::shared_ptr<Tensor> out0 = op_.getOutput(0);
+    AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
     auto kernelFunc = Registrar<TanhImplForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+        in0->dataType(),
+	    out0->dataType()});
 
     // Call kernel
-    kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(),
+    kernelFunc(in0->size(),
         getCPUPtr(mOp.getRawInput(0)),
         getCPUPtr(mOp.getRawOutput(0)));
 }
+
+void Aidge::TanhImpl_cpu::backward() {
+    const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp);
+    std::shared_ptr<Tensor> out0  = op_.getOutput(0);
+    std::shared_ptr<Tensor> gra_int0 = op_.getInput(0)->grad();		
+    std::shared_ptr<Tensor> gra_out0 = op_.getOutput(0)->grad();    
+    AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
+
+    // Find the correct kernel type
+    auto kernelFunc = Registrar<TanhImplBackward_cpu>::create({
+        out0->dataType(),
+	gra_int0->dataType(),
+        gra_out0->dataType()        
+    });
+
+    // Call kernel
+    kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+}
+
diff --git a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
index 43903100a163b4499ed96c44d77ad119534d2eaa..d5f2065b624de431b43edef9a83bf079905129dd 100644
--- a/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
+++ b/unit_tests/operator/Test_GlobalAveragePoolingImpl.cpp
@@ -237,7 +237,7 @@ TEST_CASE("[cpu/operator] GlobalAveragePooling",
             REQUIRE(Tres->dims().at(i) == op->getOutput(0)->dims().at(i));
           }
 
-          REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres));
+          REQUIRE(approxEq<float>(*(op->getOutput(0)), *Tres, 1e-4f));
 
           delete[] array0;
           delete[] result;
diff --git a/unit_tests/operator/Test_MetaOperator.cpp b/unit_tests/operator/Test_MetaOperator.cpp
index aa9a3909619aac2bcd2718ab7aaa0f8f6699ed34..271a1e2f9860d92f840916f6b2e396993b0bea39 100644
--- a/unit_tests/operator/Test_MetaOperator.cpp
+++ b/unit_tests/operator/Test_MetaOperator.cpp
@@ -194,13 +194,19 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
     SECTION("LSTM(forward)") {
         auto pop = Pop();
         auto myLSTM = LSTM(32, 64, 0, true, "ltsm");
-        auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
+        auto op = std::dynamic_pointer_cast<MetaOperator_Op>(myLSTM->getOperator());
 
-        auto microGraph = std::dynamic_pointer_cast<MetaOperator_Op>(op)->getMicroGraph();
-        microGraph->save("lstm", false, false);
+        auto microGraph = op->getMicroGraph();
+        microGraph->save("lstm", false, true);
 
         REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
-        REQUIRE(myLSTM->nbData() == 1);
+        REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
+        for (size_t i = 1; i < 9; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
+        }
+        for (size_t i = 9; i < 17; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
+        }
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
@@ -259,7 +265,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         microGraph->save("lstm", false, false);
 
         REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
-        REQUIRE(myLSTM->nbData() == 1);
+        REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
+        for (size_t i = 1; i < 9; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
+        }
+        for (size_t i = 9; i < 17; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
+        }
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
@@ -316,7 +328,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         auto op = std::static_pointer_cast<OperatorTensor>(myLSTM->getOperator());
 
         REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
-        REQUIRE(myLSTM->nbData() == 1);
+        REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
+        for (size_t i = 1; i < 9; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
+        }
+        for (size_t i = 9; i < 17; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
+        }
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
@@ -344,13 +362,12 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         myLSTM->input(8).first->getOperator()->setOutput(0, myInitR);
 
         auto g = getConnectedGraphView(myLSTM);
-        g->setDataType(DataType::Float32);
-        g->setBackend("cpu");
+        g->compile("cpu", DataType::Float32);
 
         g->save("lstm_seq", true, true);
 
         auto scheduler = SequentialScheduler(g);
-        scheduler.forward(true);
+        scheduler.forward();
         scheduler.saveSchedulingDiagram("lstm_seq_schedule");
 
         std::shared_ptr<Tensor> myHiddenState = std::make_shared<Tensor>(
@@ -378,7 +395,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         myGraph->add(pop);
 
         REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
-        REQUIRE(myLSTM->nbData() == 1);
+        REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
+        for (size_t i = 1; i < 9; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
+        }
+        for (size_t i = 9; i < 17; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
+        }
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
@@ -441,7 +464,13 @@ TEST_CASE("[cpu/operator] MetaOperator", "[MetaOperator][CPU]") {
         myGraph->add(pop);
 
         REQUIRE(myLSTM->nbInputs() == 3 + 8 + 8);
-        REQUIRE(myLSTM->nbData() == 1);
+        REQUIRE(myLSTM->inputCategory(0) == InputCategory::Data);
+        for (size_t i = 1; i < 9; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::Param);
+        }
+        for (size_t i = 9; i < 17; ++i) {
+            REQUIRE(myLSTM->inputCategory(i) == InputCategory::OptionalParam);
+        }
         REQUIRE(myLSTM->nbOutputs() == 2);
 
         std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(
diff --git a/unit_tests/operator/Test_SliceImpl.cpp b/unit_tests/operator/Test_SliceImpl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..2b9f89e62c09c04a7f848c362336418ef62aecce
--- /dev/null
+++ b/unit_tests/operator/Test_SliceImpl.cpp
@@ -0,0 +1,279 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+
+#include "aidge/data/Tensor.hpp"
+#include "aidge/operator/Slice.hpp"
+
+using namespace Aidge;
+
+TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") {
+    SECTION("1D Tensor") {
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> {
+            {0, 1, -2,-3, 4,-5,-6, 7, 8, 9}
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,3> {
+            {0, 1, -2}
+        });
+        std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,1>{{0}});
+        std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,1>{{3}});
+        std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,1>{{0}});
+
+        std::shared_ptr<Node> mySlice = Slice();
+        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
+        mySlice->getOperator()->associateInput(0,input0);
+        mySlice->getOperator()->associateInput(1,starts);
+        mySlice->getOperator()->associateInput(2,ends);
+        mySlice->getOperator()->associateInput(3,axes);
+        mySlice->getOperator()->setDataType(DataType::Int32);
+        mySlice->getOperator()->setBackend("cpu");
+        mySlice->forward();
+
+        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
+        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
+        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
+    }
+
+    SECTION("2D Tensor") {
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> {
+            {
+                { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,3> {
+            {
+                {-5,-6, 7},
+                {-5,-6, 7}
+            }
+        });
+        std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,2>{{0,5}});
+        std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,2>{{2,8}});
+        std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,2>{{0,1}});
+
+        std::shared_ptr<Node> mySlice = Slice();
+        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
+        mySlice->getOperator()->associateInput(0,input0);
+        mySlice->getOperator()->associateInput(1,starts);
+        mySlice->getOperator()->associateInput(2,ends);
+        mySlice->getOperator()->associateInput(3,axes);
+        mySlice->getOperator()->setDataType(DataType::Int32);
+        mySlice->getOperator()->setBackend("cpu");
+        mySlice->forward();
+        // op->getOutput(0)->print();
+        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
+        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
+        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
+    }
+
+    SECTION("3D Tensor") {
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> {
+            {
+                {
+                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                },
+                {
+                    { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                    {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,1,1,3> {
+            {
+                {
+                    { 4,-5,-6}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,3>{{0,1,4}});
+        std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,3>{{1,2,7}});
+        std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,3>{{0,1,2}});
+
+        std::shared_ptr<Node> mySlice = Slice();
+        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
+        mySlice->getOperator()->associateInput(0,input0);
+        mySlice->getOperator()->associateInput(1,starts);
+        mySlice->getOperator()->associateInput(2,ends);
+        mySlice->getOperator()->associateInput(3,axes);
+        mySlice->getOperator()->setDataType(DataType::Int32);
+        mySlice->getOperator()->setBackend("cpu");
+        mySlice->forward();
+        // mySlice->getOperator()->output(0).print();
+        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
+        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
+        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
+    }
+
+    SECTION("4D Tensor") {
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
+            {
+                {
+                    {
+                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                    },
+                    {
+                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                    }
+                },
+                {
+                    {
+                        { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                    },
+                    {
+                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3,11,-5,-6, 7,-1,10}
+                    }
+                }
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
+            {
+                {
+                    {
+                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                    },
+                    {
+                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                    }
+                },
+                {
+                    {
+                        { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                    },
+                    {
+                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3,11,-5,-6, 7,-1,10}
+                    }
+                }
+            }
+        });
+        std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,4>{{0,0,0,0}});
+        std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,4>{{2,2,2,10}});
+        std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,4>{{0,1,2,3}});
+
+        std::shared_ptr<Node> mySlice = Slice();
+        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
+        mySlice->getOperator()->associateInput(0,input0);
+        mySlice->getOperator()->associateInput(1,starts);
+        mySlice->getOperator()->associateInput(2,ends);
+        mySlice->getOperator()->associateInput(3,axes);
+        mySlice->getOperator()->setDataType(DataType::Int32);
+        mySlice->getOperator()->setBackend("cpu");
+        mySlice->forward();
+        // op->getOutput(0)->print();
+        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
+        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
+        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
+    }
+
+    SECTION("Attributes instead of inputs") {
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> {
+            {
+                {
+                    {
+                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                    },
+                    {
+                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                    }
+                },
+                {
+                    {
+                        { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3, 4,-5,-6, 7,-1,10}
+                    },
+                    {
+                        { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9},
+                        {-5, 4, 2,-3,11,-5,-6, 7,-1,10}
+                    }
+                }
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,1,1,5> {
+            {
+                {
+                    {
+                        { 0, 1, 2,-3, 4}
+                    }
+                }
+            }
+        });
+
+        std::shared_ptr<Node> mySlice = Slice({0,0,0,0}, {1,1,1,5}, {0,1,2,3}, {1,1,1,1});
+        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
+        mySlice->getOperator()->associateInput(0,input0);
+        mySlice->getOperator()->setDataType(DataType::Int32);
+        mySlice->getOperator()->setBackend("cpu");
+        mySlice->forward();
+        // op->getOutput(0)->print();
+        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
+        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
+        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
+    }
+
+    SECTION("Different Steps") {
+        std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,4,2,8> {
+            {
+                {
+                    { 0, 1, 2,-3, 4,-5,-6,7},
+                    {-5, 4, 2,-3, 4,-5,-6,-7}
+                },
+                {
+                    { 10, 11, 12,-13, 14,-15,-16,17},
+                    {-15, 14, 12,-13, 14,-15,-16,-17}
+                },
+                {
+                    { 20, 21, 22,-23, 24,-25,-26,27},
+                    {-25, 24, 22,-23, 24,-25,-26,-27}
+                },
+                {
+                    { 30, 31, 32,-33, 34,-35,-36,37},
+                    {-35, 34, 32,-33, 34,-35,-36,-37}
+                }
+            }
+        });
+        std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,1,3> {
+            {
+                {
+                    { 7, 4, 1}
+                },
+                {
+                    { 27, 24, 21}
+                }
+            }
+        });
+
+        std::shared_ptr<Node> mySlice = Slice({0,0,7}, {4,1,0}, {0,1,2}, {2,1,-3});
+        // Steps are 2,1,-3 so the slice will be:
+        // on Axis 0: from 0 to 4 by step of 2
+        // on Axis 1: from 0 to 1 by step of 1
+        // on Axis 2: from 7 to 0 by step of -3 (reverse the order of elements)
+        auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
+        mySlice->getOperator()->associateInput(0,input0);
+        mySlice->getOperator()->setDataType(DataType::Int32);
+        mySlice->getOperator()->setBackend("cpu");
+        mySlice->forward();
+        op->getOutput(0)->print();
+        REQUIRE(*(op->getOutput(0)) == *expectedOutput);
+        REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims());
+        REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType());
+    }
+}
diff --git a/unit_tests/recipies/Test_HorizontalTiling.cpp b/unit_tests/recipies/Test_HorizontalTiling.cpp
index 2c10cdf369d7d37ea67b70b9dfe3e76018da2a32..7c127548417492141c3ea1eeb9374042befe75d2 100644
--- a/unit_tests/recipies/Test_HorizontalTiling.cpp
+++ b/unit_tests/recipies/Test_HorizontalTiling.cpp
@@ -174,7 +174,7 @@ TEST_CASE("[core/recipes] Tiling(transformation)", "[Tiling][Recipes]") {
             REQUIRE(*(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->getOutput(0)) == *myOutput);
 
             GraphView::replace({myConv, myConv->getParent(1), myConv->getParent(2)}, tiledConv);
-            g->compile("cpu", DataType::Int32);
+            g->compile("cpu", DataType::Int32, 0, {{2,3,5,5}});  // changes myInput DataType from Int32 to Float32. Why??????
             s.resetScheduling();
             s.forward();
 
diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp
index 01ccd37c319ee64deb15240b30cc369b37c9e47d..16112628053a35ef71d5819a53aacc85425da88d 100644
--- a/unit_tests/scheduler/Test_Scheduler.cpp
+++ b/unit_tests/scheduler/Test_Scheduler.cpp
@@ -416,7 +416,7 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward
     auto myProd = Producer(inputTensor, "prod");
     myProd -> addChild(gv);
     gv -> compile("cpu", DataType::Float32);
-    compile_gradient(gv);
+
     SequentialScheduler scheduler(gv);
     scheduler.forward();
     auto outNode = gv->getOrderedOutputs()[0].first;
@@ -432,7 +432,6 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(backward)", "[scheduler][backward
                                                                  {6.0f, 6.0f, 6.0f, 6.0f, 6.0f},
                                                                  {6.0f, 6.0f, 6.0f, 7.0f, 7.0f},
                                                                  {7.0f, 7.0f, 7.0f, 7.0f, 7.0f}}}}});
-    predictedOutput->initGrad();
     predictedOutput->setGrad(targetOutput);
     REQUIRE_NOTHROW(scheduler.backward());
 }
diff --git a/version.txt b/version.txt
index ee1372d33a29e27945406f0527f8af8e6ee119c9..7179039691ce07a214e7a815893fee97a97b1422 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-0.2.2
+0.2.3