diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp
index 2c52893315385f65910e8322202fd26d67d1f24f..740d72657b90c05ac3f21459f2716dc7935e2be4 100644
--- a/include/aidge/backend/cpu/operator/ConvImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp
@@ -35,15 +35,6 @@ using ConvImpl1D_cpu = OperatorImpl_cpu<Conv_Op<1>,
                             const void *,
                             const void *,
                             const void *,
-                            void *),
-    void(const std::array<DimSize_t, 1>&,
-                            const std::array<DimSize_t, 1>&,
-                            const std::array<DimSize_t, 1>&,
-                            bool,
-                            const std::array<DimSize_t, 3> &,
-                            const void *,
-                            const void *,
-                            const void *,
                             void *)>;
 
 using Conv2D_Op = Conv_Op<2>;
@@ -56,207 +47,11 @@ using ConvImpl2D_cpu = OperatorImpl_cpu<Conv_Op<2>,
                             const void *,
                             const void *,
                             const void *,
-                            void *),
-    void(const std::array<DimSize_t, 2>&,
-                            const std::array<DimSize_t, 2>&,
-                            const std::array<DimSize_t, 2>&,
-                            bool,
-                            const std::array<DimSize_t, 4> &,
-                            const void *,
-                            const void *,
-                            const void *,
                             void *)>;
 
 // Implementation entry point registration to Operator
 REGISTRAR(Conv1D_Op, "cpu", Aidge::ConvImpl1D_cpu::create);
 REGISTRAR(Conv2D_Op, "cpu", Aidge::ConvImpl2D_cpu::create);
-
-////////////////////////////////////////////////////////////////////////////////
-
-/**
- * @brief Forward kernel for 1D Convolution on CPU backend.
- * @tparam I Input data type.
- * @tparam W Weight data type.
- * @tparam B Bias data type.
- * @tparam O Output data type.
- * @param params tuple of Attributes from the Operator
- * @param inputDims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param weights_ const weight Tensor.
- * @param biases_ const Biais Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class W, class B, class O>
-void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
-                            const std::array<DimSize_t, 1>& /*dilationDims*/,
-                            const std::array<DimSize_t, 1>& kernelDims,
-                            const std::array<DimSize_t, 3>& inputDims,
-                            DimSize_t outChannels,
-                            const void *input_,
-                            const void *weights_,
-                            const void *biases_,
-                            void *output_)
-{
-    // FIXME: missing convolution attributes as arguments
-    const I *input = static_cast<const I *>(input_);
-    const W *weights = static_cast<const W *>(weights_);
-    const B *biases = static_cast<const B *>(biases_);
-    O *output = static_cast<O *>(output_);
-
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
-                                static_cast<float>(strideDims[0])));
-
-    // TODO: kernel computation
-    // output (batch, outCh, Xout, Yout)
-    // input  (batch, inCh, Xin, Yin)
-    // weight (outCh, inCh, kernelX, kernelY)
-    // does not take Dilation attribute into account
-    using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
-        for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
-            const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
-            // If bias = nullptr, set B(0)
-            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-            std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
-            for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
-                const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
-                const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
-                for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                    const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
-                    const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                    const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
-                    const std::size_t oIndexFull = oIndex + ox;
-                    const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
-
-                    for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
-                        output[oIndexFull] += weights[wIndex + sx] *
-                                                input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
-                    }
-                }
-            }
-        }
-    }
-}
-
-REGISTRAR(ConvImpl1D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
-REGISTRAR(ConvImpl1D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
-REGISTRAR(ConvImpl1D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>, nullptr});
-REGISTRAR(ConvImpl1D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
-
-
-/**
- * @brief Forward kernel for 2D Convolution on CPU backend.
- * @tparam I Input data type.
- * @tparam W Weight data type.
- * @tparam B Bias data type.
- * @tparam O Output data type.
- * @param params tuple of Attributes from the Operator
- * @param inputDims Array of input dimensions.
- * @param input_ const input Tensor.
- * @param weights_ const weight Tensor.
- * @param biases_ const Biais Tensor.
- * @param output_ Output Tensor.
- */
-template <class I, class W, class B, class O>
-void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
-                            const std::array<DimSize_t, 2>& /*dilationDims*/,
-                            const std::array<DimSize_t, 2>& kernelDims,
-                            const std::array<DimSize_t, 4> &inputDims,
-                            DimSize_t outChannels,
-                            const void *input_,
-                            const void *weights_,
-                            const void *biases_,
-                            void *output_)
-{
-    // FIXME: missing convolution attributes as arguments
-    const I *input = static_cast<const I *>(input_);
-    const W *weights = static_cast<const W *>(weights_);
-    const B *biases = static_cast<const B *>(biases_);
-    O *output = static_cast<O *>(output_);
-
-    // output H size
-    const std::size_t oxSize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
-                                static_cast<float>(strideDims[0])));
-    // output W size
-    const std::size_t oySize =
-            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
-                                static_cast<float>(strideDims[1])));
-
-    // TODO: kernel computation
-    // output (batch, outCh, Xout, Yout)
-    // input  (batch, inCh, Xin, Yin)
-    // weight (outCh, inCh, kernelX, kernelY)
-    // does not take Dilation attribute into account
-    using signedsize = std::make_signed<std::size_t>::type;
-    for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
-        for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
-            const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
-            // If bias = nullptr, set B(0)
-            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
-            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
-            for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
-                const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
-                const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
-                for (std::size_t ox = 0; ox < oxSize; ++ox) {
-                    const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
-                    const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
-                    const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
-                    for (std::size_t oy = 0; oy < oySize; ++oy) {
-                        const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
-                        const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
-                        const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
-                        const std::size_t oIndexFull = oIndex + ox*oySize + oy;
-                        const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
-                        const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
-
-                        if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
-                            output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
-                                                   weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
-                                                   weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
-                                                   weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
-                        } else {
-                            for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
-                                for (std::size_t sy = syMin; sy < syMax; ++sy) {
-                                    output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
-                                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-REGISTRAR(ConvImpl2D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
-REGISTRAR(ConvImpl2D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
-REGISTRAR(ConvImpl2D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>, nullptr});
-REGISTRAR(ConvImpl2D_cpu,
-    {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
-    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_CONVIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1b37f74d2a8eaa6688792059fab5b6242296e531
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/ConvImpl_kernels.hpp
@@ -0,0 +1,214 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_
+
+#include <array>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/operator/Conv.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+#include "aidge/backend/cpu/data/GetCPUPtr.h"
+
+namespace Aidge {
+/**
+ * @brief Forward kernel for 1D Convolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Attributes from the Operator
+ * @param inputDims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param weights_ const weight Tensor.
+ * @param biases_ const Biais Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class W, class B, class O>
+void ConvImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 1>& strideDims,
+                            const std::array<DimSize_t, 1>& /*dilationDims*/,
+                            const std::array<DimSize_t, 1>& kernelDims,
+                            const std::array<DimSize_t, 3>& inputDims,
+                            DimSize_t outChannels,
+                            const void *input_,
+                            const void *weights_,
+                            const void *biases_,
+                            void *output_)
+{
+    // FIXME: missing convolution attributes as arguments
+    const I *input = static_cast<const I *>(input_);
+    const W *weights = static_cast<const W *>(weights_);
+    const B *biases = static_cast<const B *>(biases_);
+    O *output = static_cast<O *>(output_);
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
+                                static_cast<float>(strideDims[0])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, inCh, Xin, Yin)
+    // weight (outCh, inCh, kernelX, kernelY)
+    // does not take Dilation attribute into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
+        for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
+            const std::size_t oIndex = (outCh + batch*outChannels) * oxSize;
+            // If bias = nullptr, set B(0)
+            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
+            std::fill(output + oIndex, output+(oIndex+oxSize), biasVal);
+            for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
+                const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2];
+                const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0];
+                for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                    const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
+                    const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                    const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
+                    const std::size_t oIndexFull = oIndex + ox;
+                    const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
+
+                    for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                        output[oIndexFull] += weights[wIndex + sx] *
+                                                input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))];
+                    }
+                }
+            }
+        }
+    }
+}
+
+REGISTRAR(ConvImpl1D_cpu,
+    {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
+    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>, nullptr});
+REGISTRAR(ConvImpl1D_cpu,
+    {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
+    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
+REGISTRAR(ConvImpl1D_cpu,
+    {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
+    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>, nullptr});
+REGISTRAR(ConvImpl1D_cpu,
+    {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
+    {ProdConso::inPlaceModel, Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>, nullptr});
+
+
+/**
+ * @brief Forward kernel for 2D Convolution on CPU backend.
+ * @tparam I Input data type.
+ * @tparam W Weight data type.
+ * @tparam B Bias data type.
+ * @tparam O Output data type.
+ * @param params tuple of Attributes from the Operator
+ * @param inputDims Array of input dimensions.
+ * @param input_ const input Tensor.
+ * @param weights_ const weight Tensor.
+ * @param biases_ const Biais Tensor.
+ * @param output_ Output Tensor.
+ */
+template <class I, class W, class B, class O>
+void ConvImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 2>& strideDims,
+                            const std::array<DimSize_t, 2>& /*dilationDims*/,
+                            const std::array<DimSize_t, 2>& kernelDims,
+                            const std::array<DimSize_t, 4> &inputDims,
+                            DimSize_t outChannels,
+                            const void *input_,
+                            const void *weights_,
+                            const void *biases_,
+                            void *output_)
+{
+    // FIXME: missing convolution attributes as arguments
+    const I *input = static_cast<const I *>(input_);
+    const W *weights = static_cast<const W *>(weights_);
+    const B *biases = static_cast<const B *>(biases_);
+    O *output = static_cast<O *>(output_);
+
+    // output H size
+    const std::size_t oxSize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - kernelDims[0] + strideDims[0]) /
+                                static_cast<float>(strideDims[0])));
+    // output W size
+    const std::size_t oySize =
+            static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[3] - kernelDims[1] + strideDims[1]) /
+                                static_cast<float>(strideDims[1])));
+
+    // TODO: kernel computation
+    // output (batch, outCh, Xout, Yout)
+    // input  (batch, inCh, Xin, Yin)
+    // weight (outCh, inCh, kernelX, kernelY)
+    // does not take Dilation attribute into account
+    using signedsize = std::make_signed<std::size_t>::type;
+    for (std::size_t batch = 0; batch < inputDims[0]; ++batch) {
+        for (std::size_t outCh = 0; outCh < outChannels; ++outCh) {
+            const std::size_t oIndex = (outCh + batch*outChannels) * oxSize * oySize;
+            // If bias = nullptr, set B(0)
+            B biasVal = (biases != nullptr) ? biases[outCh] : B(0);
+            std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal);
+            for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) {
+                const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2] * inputDims[3];
+                const std::size_t wIndex = (inCh + outCh*inputDims[1]) * kernelDims[0] * kernelDims[1];
+                for (std::size_t ox = 0; ox < oxSize; ++ox) {
+                    const signedsize difx = static_cast<signedsize>(- ox * strideDims[0]);
+                    const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0)));
+                    const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > kernelDims[0] ? kernelDims[0] : inputDims[2] + difx);
+                    for (std::size_t oy = 0; oy < oySize; ++oy) {
+                        const signedsize dify = static_cast<signedsize>(- oy * strideDims[1]);
+                        const std::size_t syMin = static_cast<std::size_t>(std::max(dify, signedsize(0)));
+                        const std::size_t syMax = (static_cast<signedsize>(inputDims[3]) + dify) < 0 ? 0 : ((inputDims[3] + dify) > kernelDims[1] ? kernelDims[1] : inputDims[3] + dify);
+                        const std::size_t oIndexFull = oIndex + ox*oySize + oy;
+                        const signedsize ix = static_cast<signedsize>(ox * strideDims[0]);
+                        const signedsize iy = static_cast<signedsize>(oy * strideDims[1]);
+
+                        if (sxMin == 0 && syMin == 0 && sxMax == 3 && syMax == 3) {
+                            output[oIndexFull] += (weights[wIndex + 0*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 0*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 0*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+0)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 1*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 1*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 1*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+1)*inputDims[3] + static_cast<std::size_t>(iy+2)] +
+                                                   weights[wIndex + 2*kernelDims[1] + 0] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+0)] +
+                                                   weights[wIndex + 2*kernelDims[1] + 1] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+1)] +
+                                                   weights[wIndex + 2*kernelDims[1] + 2] * input[iIndex + static_cast<std::size_t>(ix+2)*inputDims[3] + static_cast<std::size_t>(iy+2)]);
+                        } else {
+                            for (std::size_t sx = sxMin; sx < sxMax; ++sx) {
+                                for (std::size_t sy = syMin; sy < syMax; ++sy) {
+                                    output[oIndexFull] += weights[wIndex + sx*kernelDims[1] + sy] *
+                                                            input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))*inputDims[3] + static_cast<std::size_t>(iy+static_cast<signedsize>(sy))];
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+REGISTRAR(ConvImpl2D_cpu,
+    {{DataType::Any, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
+    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<float, float, float, float>, nullptr});
+REGISTRAR(ConvImpl2D_cpu,
+    {{DataType::Any, DataFormat::NCHW}, {DataType::Float16, DataFormat::NCHW}},
+    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>, nullptr});
+REGISTRAR(ConvImpl2D_cpu,
+    {{DataType::Any, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
+    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<int, int, int, int>, nullptr});
+REGISTRAR(ConvImpl2D_cpu,
+    {{DataType::Any, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
+    {ProdConso::inPlaceModel, Aidge::ConvImpl2D_cpu_forward_kernel<double, double, double, double>, nullptr});
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_CONVIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/OperatorImpl.hpp b/include/aidge/backend/cpu/operator/OperatorImpl.hpp
index 4e3100ec4172861c7341776f96c44f5c4032cb47..abf94ab9069a07e8f87819cb29c027b1adbfd9c6 100644
--- a/include/aidge/backend/cpu/operator/OperatorImpl.hpp
+++ b/include/aidge/backend/cpu/operator/OperatorImpl.hpp
@@ -22,7 +22,7 @@
 #include "aidge/utils/Types.h"
 
 namespace Aidge {
-template <class Op, class FwdFunc, class BwdFunc>
+template <class Op, class FwdFunc, class BwdFunc = void()>
 class OperatorImpl_cpu : public OperatorImpl,
     public Registrable<OperatorImpl_cpu<Op, FwdFunc, BwdFunc>, ImplSpec, Impl<FwdFunc, BwdFunc>>
 {
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl.hpp b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
index 1b8622e287f985279e57e9838308dc2f844aec1a..5b900618abce83ff1c3822d4f61cc62c93f5081f 100644
--- a/include/aidge/backend/cpu/operator/ReLUImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReLUImpl.hpp
@@ -30,46 +30,6 @@ using ReLUImpl_cpu = OperatorImpl_cpu<ReLU_Op,
 
 // Implementation entry point registration to Operator
 REGISTRAR(ReLU_Op, "cpu", Aidge::ReLUImpl_cpu::create);
-
-////////////////////////////////////////////////////////////////////////////////
-
-// Kernels
-template <class I, class O>
-void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-//#pragma omp parallel for if (inputLenght > 1024)
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = (input[i] > 0) ? input[i] : 0;
-    }
-}
-
-template <class I, class GI, class GO>
-void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                  const void* input_, const void* grad_output_,
-				  void* grad_input_) {
-    const I* input = static_cast<const I*>(input_);
-    const GO* grad_output = static_cast<const GO*>(grad_output_);
-    GI* grad_input = static_cast<GI*>(grad_input_);
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        grad_input[i] = (input[i] > 0) ? grad_output[i] : 0;
-    }
-}
-
-// Kernels registration to implementation entry point
-REGISTRAR(ReLUImpl_cpu,
-    {DataType::Float32},
-    {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<float, float>, Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>});
-REGISTRAR(ReLUImpl_cpu,
-    {DataType::Float64},
-    {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<double, double>, Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>});
-REGISTRAR(ReLUImpl_cpu,
-    {DataType::Int32},
-    {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<int, int>, Aidge::ReLUImpl_cpu_backward_kernel<int, int, int>});
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_RELUIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3131d6922fcb3930b9ab15fe7897cb3e30277447
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/ReLUImpl_kernels.hpp
@@ -0,0 +1,66 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_
+
+#include <cstddef>  // std::size_t
+#include <memory>
+#include <tuple>    // std::tuple
+#include <vector>
+
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+#include "aidge/operator/ReLU.hpp"
+#include "aidge/utils/Registrar.hpp"
+#include "aidge/utils/Types.h"
+
+namespace Aidge {
+// Kernels
+template <class I, class O>
+void ReLUImpl_cpu_forward_kernel(std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+//#pragma omp parallel for if (inputLenght > 1024)
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = (input[i] > 0) ? input[i] : 0;
+    }
+}
+
+template <class I, class GI, class GO>
+void ReLUImpl_cpu_backward_kernel(const std::size_t inputLenght,
+                                  const void* input_, const void* grad_output_,
+				  void* grad_input_) {
+    const I* input = static_cast<const I*>(input_);
+    const GO* grad_output = static_cast<const GO*>(grad_output_);
+    GI* grad_input = static_cast<GI*>(grad_input_);
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        grad_input[i] = (input[i] > 0) ? grad_output[i] : 0;
+    }
+}
+
+// Kernels registration to implementation entry point
+REGISTRAR(ReLUImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<float, float>, Aidge::ReLUImpl_cpu_backward_kernel<float, float, float>});
+REGISTRAR(ReLUImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<double, double>, Aidge::ReLUImpl_cpu_backward_kernel<double, double, double>});
+REGISTRAR(ReLUImpl_cpu,
+    {DataType::Int32},
+    {ProdConso::inPlaceModel, Aidge::ReLUImpl_cpu_forward_kernel<int, int>, Aidge::ReLUImpl_cpu_backward_kernel<int, int, int>});
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_RELUIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp
index b8a9d3bf8e3e0cbde08a6c07b224f4708ecce34d..ee1c36edecbe50cc1765da59737509a2b6333caf 100644
--- a/include/aidge/backend/cpu/operator/SigmoidImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SigmoidImpl.hpp
@@ -12,7 +12,7 @@
 #ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_
 #define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Sigmoid.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
@@ -21,34 +21,13 @@
 #include <vector>
 
 namespace Aidge {
-// class Sigmoid_Op;
+// Operator implementation entry point for the backend
+using SigmoidImpl_cpu = OperatorImpl_cpu<Sigmoid_Op,
+    void(const std::size_t, const void*, void*),
+    void(const std::size_t, const void*, const void*, void*)>;
 
-// compute kernel registry for forward and backward
-class SigmoidImplForward_cpu
-    : public Registrable<SigmoidImplForward_cpu, std::tuple<DataType, DataType>, std::function<void(const std::size_t, const void*, void*)>> {
-};
-class SigmoidImplBackward_cpu
-    : public Registrable<SigmoidImplBackward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::size_t, const void*, const void*, void*)>> {
-};
-
-class SigmoidImpl_cpu : public OperatorImpl {
-public:
-    SigmoidImpl_cpu(const Sigmoid_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<SigmoidImpl_cpu> create(const Sigmoid_Op& op) {
-        return std::make_unique<SigmoidImpl_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
-	
-    void forward() override final;
-
-    void backward() override final;
-};
-
-namespace {
-static Registrar<Sigmoid_Op> registrarSigmoidImpl_cpu("cpu", Aidge::SigmoidImpl_cpu::create);
-}
+// Implementation entry point registration to Operator
+REGISTRAR(Sigmoid_Op, "cpu", Aidge::SigmoidImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp
deleted file mode 100644
index 4ceb3bd7ed9a3fb739591eee488f8035770fef18..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_
-
-#include <cstddef>  // std::size_t
-
-#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
-#include "aidge/utils/Registrar.hpp"
-
-namespace Aidge {
-template <class O, class GI, class GO>
-void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                     const void* output_, const void* grad_output_,
-				     void* grad_input_) {
-    const O* output = static_cast<const O*>(output_);
-    const GO* grad_output = static_cast<const GO*>(grad_output_);
-    GI* grad_input = static_cast<GI*>(grad_input_);
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i];
-    }
-}
-
-namespace {
-static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float32(
-    {DataType::Float32, DataType::Float32, DataType::Float32},
-    Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>);
-static Registrar<SigmoidImplBackward_cpu> registrarSigmoidImplBackward_cpu_Float64(
-    {DataType::Float64, DataType::Float64, DataType::Float64},
-    Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_BACKWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp
deleted file mode 100644
index 24ba11a0bca7f3fa15f9ac1e2c13e29f88eaf074..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                    const void* input_,
-                                    void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-//#pragma omp parallel for if (inputLenght > 1024)
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-		if (input[i] > I(0)) {
-			output[i] = O(1) / (O(1) + std::exp(-input[i]));
-		} else {
-			output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i]));
-		}
-    }
-}
-
-namespace {
-static Registrar<SigmoidImplForward_cpu> registrarSigmoidImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>);
-static Registrar<SigmoidImplForward_cpu> registrarSigmoidImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..dfd71ce0a878efbeb779f3a67ad4ccc762bb8363
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp
@@ -0,0 +1,59 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void SigmoidImpl_cpu_forward_kernel(std::size_t inputLenght,
+                                    const void* input_,
+                                    void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+//#pragma omp parallel for if (inputLenght > 1024)
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+		if (input[i] > I(0)) {
+			output[i] = O(1) / (O(1) + std::exp(-input[i]));
+		} else {
+			output[i] = std::exp(input[i]) / (O(1) + std::exp(input[i]));
+		}
+    }
+}
+
+template <class O, class GI, class GO>
+void SigmoidImpl_cpu_backward_kernel(const std::size_t inputLenght,
+                                     const void* output_, const void* grad_output_,
+				     void* grad_input_) {
+    const O* output = static_cast<const O*>(output_);
+    const GO* grad_output = static_cast<const GO*>(grad_output_);
+    GI* grad_input = static_cast<GI*>(grad_input_);
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        grad_input[i] = output[i] * (O(1) - output[i]) * grad_output[i];
+    }
+}
+
+// Kernels registration to implementation entry point
+REGISTRAR(SigmoidImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<float, float>, Aidge::SigmoidImpl_cpu_backward_kernel<float, float, float>});
+REGISTRAR(SigmoidImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::SigmoidImpl_cpu_forward_kernel<double, double>, Aidge::SigmoidImpl_cpu_backward_kernel<double, double, double>});
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SIGMOIDIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp
index 0f67f8c5541f739c127c50435c97862f26cb03a3..fd98b38d7117eaa14e35fe3cb89abf95b2913997 100644
--- a/include/aidge/backend/cpu/operator/SliceImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp
@@ -16,52 +16,25 @@
 #include <vector>
 #include <array>
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Slice.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 namespace Aidge {
-// class Slice_Op;
-
-// compute kernel registry for forward and backward
-class SliceImplForward_cpu
-    : public Registrable<SliceImplForward_cpu,
-                        std::tuple<DataType, DataType>,
-                        std::function<void(const std::vector<std::int64_t>&,
+// Operator implementation entry point for the backend
+using SliceImpl_cpu = OperatorImpl_cpu<Slice_Op,
+    void(const std::vector<std::int64_t>&,
                             const std::vector<std::int64_t>&,
                             const std::vector<std::int8_t>&,
                             const std::vector<std::int64_t>&,
                             const std::vector<DimSize_t>&,
                             const void*,
-                            void*)>> {};
-class SliceImplBackward_cpu
-    : public Registrable<SliceImplBackward_cpu,
-                        std::tuple<DataType, DataType>,
-                        std::function<void(const std::vector<std::int64_t>&,
-                            const std::vector<std::int64_t>&,
-                            const std::vector<std::int8_t>&,
-                            const std::vector<std::int64_t>&,
-                            const std::vector<DimSize_t>&,
-                            const void*,
-                            void*)>> {};
-
-class SliceImpl_cpu : public OperatorImpl {
-public:
-    SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) {
-        return std::make_unique<SliceImpl_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
-    void forward() override;
-};
+                            void*)>;
 
-namespace {
-static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create);
-}
+// Implementation entry point registration to Operator
+REGISTRAR(Slice_Op, "cpu", Aidge::SliceImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */
diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp
similarity index 84%
rename from include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp
index 31e409369cc640bbda9f54c54652af7f72b509b6..4c37316d663212e303cdccb2c90bf55842f93575 100644
--- a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SliceImpl_kernels.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_
 
 #include <algorithm>
 #include <cmath>
@@ -88,14 +88,15 @@ void SliceImpl_cpu_forward_kernel(const std::vector<std::int64_t>& starts,
     }
 }
 
-namespace {
-static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, float>);
-static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, int>);
-static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, double>);
-}  // namespace
+REGISTRAR(SliceImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<float, float>, nullptr});
+REGISTRAR(SliceImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<double, double>, nullptr});
+REGISTRAR(SliceImpl_cpu,
+    {DataType::Int32},
+    {ProdConso::inPlaceModel, Aidge::SliceImpl_cpu_forward_kernel<int, int>, nullptr});
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */
+#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
index a09261d0ec79869465c2bd6291f057dfa8387c90..ec2c2696ed6e2ba8cad1536519298d9331921c07 100644
--- a/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl.hpp
@@ -12,7 +12,7 @@
 #ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_
 #define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Softmax.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
@@ -21,31 +21,12 @@
 #include <vector>
 
 namespace Aidge {
-// class Softmax_Op;
+// Operator implementation entry point for the backend
+using SoftmaxImpl_cpu = OperatorImpl_cpu<Softmax_Op,
+    void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>;
 
-// compute kernel registry for forward and backward
-class SoftmaxImplForward_cpu
-    : public Registrable<SoftmaxImplForward_cpu, std::tuple<DataType, DataType>, std::function<void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>> {
-};
-class SoftmaxImplBackward_cpu
-    : public Registrable<SoftmaxImplBackward_cpu, std::tuple<DataType, DataType>, std::function<void(std::size_t, const std::vector<DimSize_t>&, const void*, void*)>> {
-};
-
-class SoftmaxImpl_cpu : public OperatorImpl {
-public:
-    SoftmaxImpl_cpu(const Softmax_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<SoftmaxImpl_cpu> create(const Softmax_Op& op) {
-        return std::make_unique<SoftmaxImpl_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
-    void forward() override;
-};
-
-namespace {
-static Registrar<Softmax_Op> registrarSoftmaxImpl_cpu("cpu", Aidge::SoftmaxImpl_cpu::create);
-}
+// Implementation entry point registration to Operator
+REGISTRAR(Softmax_Op, "cpu", Aidge::SoftmaxImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp
similarity index 75%
rename from include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp
index 6ff8b3ddf39412aa6febdc188b7c27e8bfdcc178..6b52a307832674edd095d2c98b559d0adcb4062a 100644
--- a/include/aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_
 
 #include "aidge/utils/Registrar.hpp"
 #include <cstddef>
@@ -61,14 +61,15 @@ void SoftmaxImpl_cpu_forward_kernel(std::size_t axisIdx, const std::vector<DimSi
     }
 }
 
-namespace {
-static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>);
-static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>);
-static Registrar<SoftmaxImplForward_cpu> registrarSoftmaxImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>);
-}  // namespace
+REGISTRAR(SoftmaxImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<float, float>, nullptr});
+REGISTRAR(SoftmaxImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<double, double>, nullptr});
+REGISTRAR(SoftmaxImpl_cpu,
+    {DataType::Int32},
+    {ProdConso::inPlaceModel, Aidge::SoftmaxImpl_cpu_forward_kernel<int, int>, nullptr});
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_FORWARD_KERNEL_H_ */
+#endif /* AIDGE_CPU_OPERATOR_SOFTMAXIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/SqrtImpl.hpp b/include/aidge/backend/cpu/operator/SqrtImpl.hpp
index 5764fea4519b55389597db6ac0797239352b7dea..dba75d1c58fb19ab2284ee0e98a32bff7ac58557 100644
--- a/include/aidge/backend/cpu/operator/SqrtImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SqrtImpl.hpp
@@ -17,39 +17,19 @@
 #include <tuple>
 #include <vector>
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Sqrt.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
 
 namespace Aidge {
+// Operator implementation entry point for the backend
+using SqrtImpl_cpu = OperatorImpl_cpu<Sqrt_Op,
+    void(const std::size_t, const void*, void*),
+    void(const std::size_t, const void*, void*)>;
 
-// compute kernel registry for forward and backward
-class SqrtImplForward_cpu
-    : public Registrable<SqrtImplForward_cpu, std::tuple<DataType, DataType>, std::function<void(const std::size_t, const void*, void*)>> {
-};
-class SqrtImplBackward_cpu
-    : public Registrable<SqrtImplBackward_cpu, std::tuple<DataType, DataType>, std::function<void(const std::size_t, const void*, void*)>> {
-};
-
-class SqrtImpl_cpu : public OperatorImpl {
-public:
-    SqrtImpl_cpu(const Sqrt_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<SqrtImpl_cpu> create(const Sqrt_Op& op) {
-        return std::make_unique<SqrtImpl_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
-
-    void forward() override final;
-
-    void backward() override final;
-};
-
-namespace {
-static Registrar<Sqrt_Op> registrarSqrtImpl_cpu("cpu", Aidge::SqrtImpl_cpu::create);
-}
+// Implementation entry point registration to Operator
+REGISTRAR(Sqrt_Op, "cpu", Aidge::SqrtImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp
deleted file mode 100644
index 9cf5118a5ac81520d7a180b6aba22417ca512890..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_BACKWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SQRTIMPL_BACKWARD_KERNEL_H_
-
-#include <cmath>    // std::sqrt
-#include <cstddef>  // std::size_t
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void SqrtImpl_cpu_backward_kernel(const std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = static_cast<O>(0.5/(std::sqrt(static_cast<float>(input[i]))));
-    }
-}
-
-namespace {
-static Registrar<SqrtImplBackward_cpu> registrarSqrtImplBackward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::SqrtImpl_cpu_backward_kernel<float, float>);
-static Registrar<SqrtImplBackward_cpu> registrarSqrtImplBackward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::SqrtImpl_cpu_backward_kernel<int, int>);
-static Registrar<SqrtImplBackward_cpu> registrarSqrtImplBackward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::SqrtImpl_cpu_backward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_BACKWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp
deleted file mode 100644
index 886b978c2345ce555d229d684ba83f952be9e00e..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_
-
-#include <cmath>    // std::sqrt
-#include <cstddef>  // std::size_t
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i])));
-    }
-}
-
-namespace {
-static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::SqrtImpl_cpu_forward_kernel<float, float>);
-static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::SqrtImpl_cpu_forward_kernel<int, int>);
-static Registrar<SqrtImplForward_cpu> registrarSqrtImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::SqrtImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..0826251ea45583c8598ea0c320c5eda9672e5a25
--- /dev/null
+++ b/include/aidge/backend/cpu/operator/SqrtImpl_kernels.hpp
@@ -0,0 +1,60 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#ifndef AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_
+
+#include <cmath>    // std::sqrt
+#include <cstddef>  // std::size_t
+
+#include "aidge/utils/Registrar.hpp"
+
+#include "aidge/backend/cpu/operator/SqrtImpl.hpp"
+
+namespace Aidge {
+template <class I, class O>
+void SqrtImpl_cpu_forward_kernel(const std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = static_cast<O>(std::sqrt(static_cast<float>(input[i])));
+    }
+}
+
+template <class I, class O>
+void SqrtImpl_cpu_backward_kernel(const std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = static_cast<O>(0.5/(std::sqrt(static_cast<float>(input[i]))));
+    }
+}
+
+REGISTRAR(SqrtImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<float, float>, Aidge::SqrtImpl_cpu_backward_kernel<float, float>});
+REGISTRAR(SqrtImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<double, double>, Aidge::SqrtImpl_cpu_backward_kernel<double, double>});
+REGISTRAR(SqrtImpl_cpu,
+    {DataType::Int32},
+    {ProdConso::inPlaceModel, Aidge::SqrtImpl_cpu_forward_kernel<int, int>, Aidge::SqrtImpl_cpu_backward_kernel<int, int>});
+}  // namespace Aidge
+
+#endif /* AIDGE_CPU_OPERATOR_SQRTIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/SubImpl.hpp b/include/aidge/backend/cpu/operator/SubImpl.hpp
index 6f9b9a6d5d3d18499e6a74a9139cee7253b5d95a..2bb22bda74edf7db09404fd5613b6714ddcdf513 100644
--- a/include/aidge/backend/cpu/operator/SubImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SubImpl.hpp
@@ -12,7 +12,7 @@
 #ifndef AIDGE_CPU_OPERATOR_SUBIMPL_H_
 #define AIDGE_CPU_OPERATOR_SUBIMPL_H_
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Sub.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
@@ -21,31 +21,12 @@
 #include <vector>
 
 namespace Aidge {
-// class Sub_Op;
+// Operator implementation entry point for the backend
+using SubImpl_cpu = OperatorImpl_cpu<Sub_Op,
+    void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>;
 
-// compute kernel registry for forward and backward
-class SubImplForward_cpu
-    : public Registrable<SubImplForward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>> {
-};
-class SubImplBackward_cpu
-    : public Registrable<SubImplBackward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)>> {
-};
-
-class SubImpl_cpu : public OperatorImpl {
-public:
-    SubImpl_cpu(const Sub_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<SubImpl_cpu> create(const Sub_Op& op) {
-        return std::make_unique<SubImpl_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
-    void forward() override;
-};
-
-namespace {
-static Registrar<Sub_Op> registrarSubImpl_cpu("cpu", Aidge::SubImpl_cpu::create);
-}
+// Implementation entry point registration to Operator
+REGISTRAR(Sub_Op, "cpu", Aidge::SubImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_SUBIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp
similarity index 62%
rename from include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/SubImpl_kernels.hpp
index 10e6f58bb44b63f2d8712dc0aa64e0660f3356b2..0486ed2105b23e95f9cdfcda578e14900fcb2c8e 100644
--- a/include/aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SubImpl_kernels.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_
 
 #include "aidge/utils/Registrar.hpp"
 
@@ -49,20 +49,19 @@ void SubImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
 	}
 }
 
-namespace {
-static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::SubImpl_cpu_forward_kernel<float, float, float>);
-static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::SubImpl_cpu_forward_kernel<double, double, double>);
-static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>);
-static Registrar<SubImplForward_cpu> registrarSubImplForward_cpu_Int64(
-        {DataType::Int64, DataType::Int64, DataType::Int64},
-        Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>);
-}  // namespace
+// Kernels registration to implementation entry point
+REGISTRAR(SubImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<float, float, float>, nullptr});
+REGISTRAR(SubImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<double, double, double>, nullptr});
+REGISTRAR(SubImpl_cpu,
+    {DataType::Int32},
+    {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int32_t, std::int32_t, std::int32_t>, nullptr});
+REGISTRAR(SubImpl_cpu,
+    {DataType::Int64},
+    {ProdConso::inPlaceModel, Aidge::SubImpl_cpu_forward_kernel<std::int64_t, std::int64_t, std::int64_t>, nullptr});
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_FORWARD_KERNEL_H_ */
+#endif /* AIDGE_CPU_OPERATOR_SUBIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/TanhImpl.hpp b/include/aidge/backend/cpu/operator/TanhImpl.hpp
index 09864d3e50182df319762a2356c946c977b6253b..b1c2217bd29805eca2cf7b7906316756b75a74e0 100644
--- a/include/aidge/backend/cpu/operator/TanhImpl.hpp
+++ b/include/aidge/backend/cpu/operator/TanhImpl.hpp
@@ -12,7 +12,7 @@
 #ifndef AIDGE_CPU_OPERATOR_TANHIMPL_H_
 #define AIDGE_CPU_OPERATOR_TANHIMPL_H_
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Tanh.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
@@ -21,34 +21,13 @@
 #include <vector>
 
 namespace Aidge {
-// class Tanh_Op;
+// Operator implementation entry point for the backend
+using TanhImpl_cpu = OperatorImpl_cpu<Tanh_Op,
+    void(const std::size_t, const void*, void*),
+    void(const std::size_t, const void*, const void*, void*)>;
 
-// compute kernel registry for forward and backward
-class TanhImplForward_cpu
-    : public Registrable<TanhImplForward_cpu, std::tuple<DataType, DataType>, std::function<void(const std::size_t, const void*, void*)>> {
-};
-class TanhImplBackward_cpu
-    : public Registrable<TanhImplBackward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::size_t, const void*, const void*, void*)>> {
-};
-
-class TanhImpl_cpu : public OperatorImpl {
-public:
-    TanhImpl_cpu(const Tanh_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<TanhImpl_cpu> create(const Tanh_Op& op) {
-        return std::make_unique<TanhImpl_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
-	
-    void forward() override final;
-
-    void backward() override final;
-};
-
-namespace {
-static Registrar<Tanh_Op> registrarTanhImpl_cpu("cpu", Aidge::TanhImpl_cpu::create);
-}
+// Implementation entry point registration to Operator
+REGISTRAR(Tanh_Op, "cpu", Aidge::TanhImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_TANHIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp
deleted file mode 100644
index 9e57b6dfcb0da322f5b21944fb10ec7a10cd0ab8..0000000000000000000000000000000000000000
--- a/include/aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/********************************************************************************
- * Copyright (c) 2023 CEA-List
- *
- * This program and the accompanying materials are made available under the
- * terms of the Eclipse Public License 2.0 which is available at
- * http://www.eclipse.org/legal/epl-2.0.
- *
- * SPDX-License-Identifier: EPL-2.0
- *
- ********************************************************************************/
-
-#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_TANHIMPL_FORWARD_KERNEL_H_
-
-#include "aidge/utils/Registrar.hpp"
-
-#include "aidge/backend/cpu/operator/TanhImpl.hpp"
-
-namespace Aidge {
-template <class I, class O>
-void TanhImpl_cpu_forward_kernel(std::size_t inputLenght,
-                                     const void* input_,
-                                     void* output_) {
-
-    const I* input = static_cast<const I*>(input_);
-    O* output = static_cast<O*>(output_);
-
-//#pragma omp parallel for if (inputLenght > 1024)
-    for (std::size_t i = 0; i < inputLenght; ++i) {
-        output[i] = std::tanh(input[i]);
-    }
-}
-
-namespace {
-static Registrar<TanhImplForward_cpu> registrarTanhImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::TanhImpl_cpu_forward_kernel<float, float>);
-static Registrar<TanhImplForward_cpu> registrarTanhImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::TanhImpl_cpu_forward_kernel<double, double>);
-}  // namespace
-}  // namespace Aidge
-
-#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_FORWARD_KERNEL_H_ */
diff --git a/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp
similarity index 51%
rename from include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp
rename to include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp
index 3a13c2cad21c35822fc6248590550e4716ee046d..fdcac210484b11f2220dcc2a6813efed503d1913 100644
--- a/include/aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/TanhImpl_kernels.hpp
@@ -9,15 +9,28 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_
 
-#include <cstddef>  // std::size_t
+#include "aidge/utils/Registrar.hpp"
 
 #include "aidge/backend/cpu/operator/TanhImpl.hpp"
-#include "aidge/utils/Registrar.hpp"
 
 namespace Aidge {
+template <class I, class O>
+void TanhImpl_cpu_forward_kernel(std::size_t inputLenght,
+                                     const void* input_,
+                                     void* output_) {
+
+    const I* input = static_cast<const I*>(input_);
+    O* output = static_cast<O*>(output_);
+
+//#pragma omp parallel for if (inputLenght > 1024)
+    for (std::size_t i = 0; i < inputLenght; ++i) {
+        output[i] = std::tanh(input[i]);
+    }
+}
+
 template <class O, class GI, class GO>
 void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght,
                                   const void* output_, const void* grad_output_,
@@ -30,14 +43,13 @@ void TanhImpl_cpu_backward_kernel(const std::size_t inputLenght,
     }
 }
 
-namespace {
-static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float32(
-    {DataType::Float32, DataType::Float32, DataType::Float32},
-    Aidge::TanhImpl_cpu_backward_kernel<float, float, float>);
-static Registrar<TanhImplBackward_cpu> registrarTanhImplBackward_cpu_Float64(
-    {DataType::Float64, DataType::Float64, DataType::Float64},
-    Aidge::TanhImpl_cpu_backward_kernel<double, double, double>);
-}  // namespace
+// Kernels registration to implementation entry point
+REGISTRAR(TanhImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<float, float>, Aidge::TanhImpl_cpu_backward_kernel<float, float, float>});
+REGISTRAR(TanhImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::TanhImpl_cpu_forward_kernel<double, double>, Aidge::TanhImpl_cpu_backward_kernel<double, double, double>});
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_BACKWARD_KERNEL_H_ */
+#endif /* AIDGE_CPU_OPERATOR_TANHIMPL_KERNELS_H_ */
diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp
index 8df5934c4daa08a04aec0abf705c41252095d751..0f2a77a2a0a60be0f4e4f99bfd46be22e6ef7686 100644
--- a/src/operator/ConvImpl.cpp
+++ b/src/operator/ConvImpl.cpp
@@ -10,6 +10,7 @@
  ********************************************************************************/
 
 #include "aidge/backend/cpu/operator/ConvImpl.hpp"
+#include "aidge/backend/cpu/operator/ConvImpl_kernels.hpp"
 
 #include <cassert>
 #include <chrono>  // std::chrono::milliseconds
diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp
index 5f4170472cc99ee179faaf41853a5088c743eb2a..832f91aad347fc081439ec487d06b14b0e2fe8da 100644
--- a/src/operator/ReLUImpl.cpp
+++ b/src/operator/ReLUImpl.cpp
@@ -19,6 +19,7 @@
 #include "aidge/utils/ErrorHandling.hpp"
 
 #include "aidge/backend/cpu/operator/ReLUImpl.hpp"
+#include "aidge/backend/cpu/operator/ReLUImpl_kernels.hpp"
 
 template <>
 void Aidge::ReLUImpl_cpu::forward() {
diff --git a/src/operator/SigmoidImpl.cpp b/src/operator/SigmoidImpl.cpp
index 7e00f6f1944bb73c40324a9d5cb45a0f24a4626a..cdcbac85df3a38fea9b7100324e0618949262fc9 100644
--- a/src/operator/SigmoidImpl.cpp
+++ b/src/operator/SigmoidImpl.cpp
@@ -20,9 +20,9 @@
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/SigmoidImpl.hpp"
-#include "aidge/backend/cpu/operator/SigmoidImpl_forward_kernels.hpp"
-#include "aidge/backend/cpu/operator/SigmoidImpl_backward_kernels.hpp"
+#include "aidge/backend/cpu/operator/SigmoidImpl_kernels.hpp"
 
+template <>
 void Aidge::SigmoidImpl_cpu::forward() {
 	const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp);
     std::shared_ptr<Tensor> in0 = op_.getInput(0);
@@ -30,16 +30,15 @@ void Aidge::SigmoidImpl_cpu::forward() {
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<SigmoidImplForward_cpu>::create({
-        in0->dataType(),
-	    out0->dataType()});
+    const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(in0->size(),
+    impl.forward(in0->size(),
         getCPUPtr(mOp.getRawInput(0)),
         getCPUPtr(mOp.getRawOutput(0)));
 }
 
+template <>
 void Aidge::SigmoidImpl_cpu::backward() {
     const Sigmoid_Op& op_ = dynamic_cast<const Sigmoid_Op&>(mOp);
     std::shared_ptr<Tensor> out0  = op_.getOutput(0);
@@ -48,12 +47,8 @@ void Aidge::SigmoidImpl_cpu::backward() {
     AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<SigmoidImplBackward_cpu>::create({
-        out0->dataType(),
-	gra_int0->dataType(),
-        gra_out0->dataType()        
-    });
+    const auto impl = Registrar<SigmoidImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+    impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
 }
diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp
index a7664262a63e0e3b0e3ffdc775b7c11702b00e6a..945c1bc752feb8e6a194b1aff99b26f01a6a0e69 100644
--- a/src/operator/SliceImpl.cpp
+++ b/src/operator/SliceImpl.cpp
@@ -14,22 +14,21 @@
 #include <vector>
 
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
-#include "aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/SliceImpl_kernels.hpp"
 #include "aidge/operator/Slice.hpp"
 #include "aidge/utils/Log.hpp"
 #include "aidge/utils/Types.h"
 
+template <>
 void Aidge::SliceImpl_cpu::forward() {
     const auto& op_ = dynamic_cast<const Slice_Op&>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Slice Operator.");
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<SliceImplForward_cpu>::create({
-        op_.getInput(0)->dataType(),
-        op_.getOutput(0)->dataType()});
+    const auto impl = Registrar<SliceImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(op_.starts(),
+    impl.forward(op_.starts(),
             op_.ends(),
             op_.axes(),
             op_.steps(),
@@ -37,3 +36,8 @@ void Aidge::SliceImpl_cpu::forward() {
             getCPUPtr(mOp.getRawInput(0)),
             getCPUPtr(mOp.getRawOutput(0)));
 }
+
+template <>
+void Aidge::SliceImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Slice_Op on backend cpu");
+}
diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp
index 07eaec6cbb9ab0b10705d51b0749ac9ae5b83daa..8b6933f22f3673476f4a9f1e261fbcdc09857300 100644
--- a/src/operator/SoftmaxImpl.cpp
+++ b/src/operator/SoftmaxImpl.cpp
@@ -20,22 +20,25 @@
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp"
-#include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/SoftmaxImpl_kernels.hpp"
 
+template <>
 void Aidge::SoftmaxImpl_cpu::forward() {
     const auto& op_ = dynamic_cast<const Softmax_Op&>(mOp);
     AIDGE_ASSERT(!op_.getInput(0)->empty(), "Softmax input empty");
+    std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis();
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<SoftmaxImplForward_cpu>::create({
-        op_.getInput(0)->dataType(),
-        op_.getOutput(0)->dataType()});
-
-    std::int32_t axis = (op_.axis() >= 0) ? op_.axis() : op_.getInput(0)->nbDims() + op_.axis();
+    const auto impl = Registrar<SoftmaxImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(static_cast<std::size_t>(axis), // axisIdx
+    impl.forward(static_cast<std::size_t>(axis), // axisIdx
                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
                std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
                std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
 }
+
+template <>
+void Aidge::SoftmaxImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Softmax_Op on backend cpu");
+}
diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp
index 128135b2b5e415e3aaebcfd9975ec70950577ce9..25bdb42fd5140ef4f64d704fc3a5ccf237f17f81 100644
--- a/src/operator/SqrtImpl.cpp
+++ b/src/operator/SqrtImpl.cpp
@@ -19,25 +19,24 @@
 #include "aidge/utils/Types.h"
 
 #include "aidge/backend/cpu/operator/SqrtImpl.hpp"
-#include "aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp"
-#include "aidge/backend/cpu/operator/SqrtImpl_backward_kernels.hpp"
+#include "aidge/backend/cpu/operator/SqrtImpl_kernels.hpp"
 
+template <>
 void Aidge::SqrtImpl_cpu::forward() {
     std::shared_ptr<Tensor> in0 = std::static_pointer_cast<Tensor>(mOp.getRawInput(0));
     std::shared_ptr<Tensor> out0 = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0));
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<SqrtImplForward_cpu>::create({
-        in0->dataType(),
-        out0->dataType()});
+    const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(in0->size(),
+    impl.forward(in0->size(),
         getCPUPtr(mOp.getRawInput(0)),
         getCPUPtr(mOp.getRawOutput(0)));
 }
 
+template <>
 void Aidge::SqrtImpl_cpu::backward() {
     // reversing in and out Data for backprop
     const Sqrt_Op& op_ = dynamic_cast<const Sqrt_Op&>(mOp);
@@ -46,12 +45,10 @@ void Aidge::SqrtImpl_cpu::backward() {
     AIDGE_ASSERT(out0grad, "missing output #0");
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<SqrtImplForward_cpu>::create({
-        out0grad->dataType(),
-        in0grad->dataType()});
+    const auto impl = Registrar<SqrtImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(out0grad->size(),
+    impl.backward(out0grad->size(),
         getCPUPtr(out0grad),
         getCPUPtr(in0grad));
 }
\ No newline at end of file
diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp
index 24f2c982a4f305d4a27b579bbe6b61a41a96de41..d43771b967889183801cb93418c967ce9d9c8453 100644
--- a/src/operator/SubImpl.cpp
+++ b/src/operator/SubImpl.cpp
@@ -21,26 +21,28 @@
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/SubImpl.hpp"
-#include "aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/SubImpl_kernels.hpp"
 
+template <>
 void Aidge::SubImpl_cpu::forward() {
-
-    // Find the correct kernel type
-    auto kernelFunc = Registrar<SubImplForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
-
     const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
                                                                    std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
     const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
                                                                    std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
 
+    // Find the correct kernel type
+    const auto impl = Registrar<SubImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+
     // Call kernel
-    kernelFunc(inputDims0,
+    impl.forward(inputDims0,
         inputDims1,
         std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
         getCPUPtr(mOp.getRawInput(0)),
         getCPUPtr(mOp.getRawInput(1)),
         getCPUPtr(mOp.getRawOutput(0)));
 }
+
+template <>
+void Aidge::SubImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Sub_Op on backend cpu");
+}
diff --git a/src/operator/TanhImpl.cpp b/src/operator/TanhImpl.cpp
index 9fe054f103a2e1c5500dd86bb70735455f316cb2..ed8dce08b9f710c9e5830b2c72ffef71013edb6e 100644
--- a/src/operator/TanhImpl.cpp
+++ b/src/operator/TanhImpl.cpp
@@ -20,9 +20,9 @@
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/TanhImpl.hpp"
-#include "aidge/backend/cpu/operator/TanhImpl_forward_kernels.hpp"
-#include "aidge/backend/cpu/operator/TanhImpl_backward_kernels.hpp"
+#include "aidge/backend/cpu/operator/TanhImpl_kernels.hpp"
 
+template <>
 void Aidge::TanhImpl_cpu::forward() {
 	const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp);
     std::shared_ptr<Tensor> in0 = op_.getInput(0);
@@ -30,16 +30,15 @@ void Aidge::TanhImpl_cpu::forward() {
     AIDGE_ASSERT(in0, "missing input #0");
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<TanhImplForward_cpu>::create({
-        in0->dataType(),
-	    out0->dataType()});
+    const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(in0->size(),
+    impl.forward(in0->size(),
         getCPUPtr(mOp.getRawInput(0)),
         getCPUPtr(mOp.getRawOutput(0)));
 }
 
+template <>
 void Aidge::TanhImpl_cpu::backward() {
     const Tanh_Op& op_ = dynamic_cast<const Tanh_Op&>(mOp);
     std::shared_ptr<Tensor> out0  = op_.getOutput(0);
@@ -48,13 +47,9 @@ void Aidge::TanhImpl_cpu::backward() {
     AIDGE_ASSERT(out0, "missing output #0 for current {} operator", op_.type());
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<TanhImplBackward_cpu>::create({
-        out0->dataType(),
-	gra_int0->dataType(),
-        gra_out0->dataType()        
-    });
+    const auto impl = Registrar<TanhImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
+    impl.backward(gra_int0->size(), getCPUPtr(out0), getCPUPtr(gra_out0), getCPUPtr(gra_int0));
 }