diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp
index a49c5ee87c418ba094b5930e995ada669299e1af..c296aebd8d702e232134c2a36c310cacc1446735 100644
--- a/include/aidge/backend/cpu/operator/PadImpl.hpp
+++ b/include/aidge/backend/cpu/operator/PadImpl.hpp
@@ -17,7 +17,7 @@
 #include <tuple>
 #include <vector>
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Pad.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
@@ -27,75 +27,39 @@ namespace Aidge {
 class Pad_ProdConso_cpu : public ProdConso {
 public:
     Pad_ProdConso_cpu(const Operator& op): ProdConso(op) {}
+
+    static std::unique_ptr<ProdConso> defaultModel(const Operator& op) {
+        return std::make_unique<Pad_ProdConso_cpu>(op);
+    }
+
     Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final;
 };
 
-// class Pad_Op;
-// compute kernel registry for forward and backward
-class PadImpl1DForward_cpu
-    : public Registrable<PadImpl1DForward_cpu,
-                         std::tuple<DataType, DataType>,
-                         std::function<void(const std::array<DimSize_t, 2>&,
+// Operator implementation entry point for the backend
+using Pad1D_Op = Pad_Op<1>;
+using PadImpl1D_cpu = OperatorImpl_cpu<Pad_Op<1>,
+    void(const std::array<DimSize_t, 2>&,
                             const PadBorderType,
                             const double,
                             const std::array<DimSize_t, 3> &,
                             const void *,
-                            void *)>> {};
-
-class PadImpl1D_cpu : public OperatorImpl {
-public:
-    PadImpl1D_cpu(const Pad_Op<1> &op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<PadImpl1D_cpu> create(const Pad_Op<1> &op) {
-        return std::make_unique<PadImpl1D_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<Pad_ProdConso_cpu>(mOp); };
-    void forward() override;
-};
-
-namespace {
-// add cpu backend to Pad_Op<1> implementation registry
-static Registrar<Pad_Op<1>> registrarPadImpl1D_cpu("cpu", Aidge::PadImpl1D_cpu::create);
-}  // namespace
+                            void *)>;
 
+// Implementation entry point registration to Operator
+REGISTRAR(Pad1D_Op, "cpu", Aidge::PadImpl1D_cpu::create);
 
-// compute kernel registry for forward and backward
-class PadImpl2DForward_cpu
-    : public Registrable<PadImpl2DForward_cpu,
-                         std::tuple<DataType, DataType>,
-                         std::function<void(const std::array<DimSize_t, 4>&,
+// Operator implementation entry point for the backend
+using Pad2D_Op = Pad_Op<2>;
+using PadImpl2D_cpu = OperatorImpl_cpu<Pad_Op<2>,
+    void(const std::array<DimSize_t, 4>&,
                             const PadBorderType,
                             const double,
                             const std::array<DimSize_t, 4> &,
                             const void *,
-                            void *)>> {};
-class PadImpl2DBackward_cpu
-    : public Registrable<PadImpl2DBackward_cpu,
-                         std::tuple<DataType, DataType>,
-                         std::function<void(const std::array<DimSize_t, 4>&,
-                            const PadBorderType,
-                            const double,
-                            const std::array<DimSize_t, 4> &,
-                            const void *,
-                            void *)>> {};
-
-class PadImpl2D_cpu : public OperatorImpl {
-public:
-    PadImpl2D_cpu(const Pad_Op<2> &op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<PadImpl2D_cpu> create(const Pad_Op<2> &op) {
-        return std::make_unique<PadImpl2D_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<Pad_ProdConso_cpu>(mOp); };
-    void forward() override;
-};
+                            void *)>;
 
-namespace {
-// add cpu backend to Pad_Op<2> implementation registry
-static Registrar<Pad_Op<2>> registrarPadImpl2D_cpu("cpu", Aidge::PadImpl2D_cpu::create);
-}  // namespace
+// Implementation entry point registration to Operator
+REGISTRAR(Pad2D_Op, "cpu", Aidge::PadImpl2D_cpu::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_PADIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp
similarity index 81%
rename from include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/PadImpl_kernels.hpp
index 26c873c8fe7f140b09b31d0f1a9d4125acbcf50f..679c0b17263d92de51584a316e829eda588e3563 100644
--- a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/PadImpl_kernels.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_
 
 #include <algorithm>  // std::max, std::min
 #include <array>
@@ -88,17 +88,16 @@ void PadImpl1D_cpu_forward_kernel(const std::array<DimSize_t, 2>& beginEndBorder
     }
 }
 
-namespace {
-static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32},
-        PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>);
-static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32},
-        PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>);
-static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64},
-        PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>);
-}  // namespace
+// Kernels registration to implementation entry point
+REGISTRAR(PadImpl1D_cpu,
+    {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
+    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr});
+REGISTRAR(PadImpl1D_cpu,
+    {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
+    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr});
+REGISTRAR(PadImpl1D_cpu,
+    {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
+    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl1D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr});
 
 
 /**
@@ -178,17 +177,16 @@ void PadImpl2D_cpu_forward_kernel(const std::array<DimSize_t, 4>& beginEndBorder
     }
 }
 
-namespace {
-static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32},
-        Aidge::PadImpl2D_cpu_forward_kernel<float, float>);
-static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32},
-        Aidge::PadImpl2D_cpu_forward_kernel<std::int32_t, std::int32_t>);
-static Registrar<PadImpl2DForward_cpu> registrarPadImpl2DForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64},
-        Aidge::PadImpl2D_cpu_forward_kernel<double, double>);
-}  // namespace
+// Kernels registration to implementation entry point
+REGISTRAR(PadImpl2D_cpu,
+    {{DataType::Float32, DataFormat::NCHW}, {DataType::Float32, DataFormat::NCHW}},
+    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float32>, cpptype_t<DataType::Float32>>, nullptr});
+REGISTRAR(PadImpl2D_cpu,
+    {{DataType::Float64, DataFormat::NCHW}, {DataType::Float64, DataFormat::NCHW}},
+    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Float64>, cpptype_t<DataType::Float64>>, nullptr});
+REGISTRAR(PadImpl2D_cpu,
+    {{DataType::Int32, DataFormat::NCHW}, {DataType::Int32, DataFormat::NCHW}},
+    {Pad_ProdConso_cpu::defaultModel, Aidge::PadImpl2D_cpu_forward_kernel<cpptype_t<DataType::Int32>, cpptype_t<DataType::Int32>>, nullptr});
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_PADIMPL_FORWARD_KERNEL_H_ */
+#endif /* AIDGE_CPU_OPERATOR_PADIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/PowImpl.hpp b/include/aidge/backend/cpu/operator/PowImpl.hpp
index 120ca24695854b3258bdb1484ba82fa3a7c9dd87..daf23177fb57bee4111c92654ad94dfae3e50f08 100644
--- a/include/aidge/backend/cpu/operator/PowImpl.hpp
+++ b/include/aidge/backend/cpu/operator/PowImpl.hpp
@@ -12,7 +12,7 @@
 #ifndef AIDGE_CPU_OPERATOR_POWIMPL_H_
 #define AIDGE_CPU_OPERATOR_POWIMPL_H_
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Pow.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
@@ -21,32 +21,13 @@
 #include <vector>
 
 namespace Aidge {
-// class Pow_Op;
+// Operator implementation entry point for the backend
+using PowImpl_cpu = OperatorImpl_cpu<Pow_Op,
+    void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*),
+    void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)>;
 
-// compute kernel registry for forward and backward
-class PowImplForward_cpu
-    : public Registrable<PowImplForward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*,void*)>> {
-};
-class PowImplBackward_cpu
-    : public Registrable<PowImplBackward_cpu, std::tuple<DataType, DataType, DataType>, std::function<void(const std::vector<std::size_t>&, const std::vector<std::size_t>&, const std::vector<std::size_t>&, const void*, const void*, void*)>> {
-};
-
-class PowImpl_cpu : public OperatorImpl {
-public:
-    PowImpl_cpu(const Pow_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<PowImpl_cpu> create(const Pow_Op& op) {
-        return std::make_unique<PowImpl_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
-    void forward() override;
-    void backward() override;
-};
-
-namespace {
-static Registrar<Pow_Op> registrarPowImpl_cpu("cpu", Aidge::PowImpl_cpu::create);
-}
+// Implementation entry point registration to Operator
+REGISTRAR(Pow_Op, "cpu", Aidge::PowImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_POWIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp
similarity index 65%
rename from include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/PowImpl_kernels.hpp
index 1146cfa77464f8bd1c33a0ec0113415dcf599b53..7c6b2db8bc049a2123315d2efbc292ff6688c930 100644
--- a/include/aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/PowImpl_kernels.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_
 
 #include "aidge/utils/Registrar.hpp"
 #include <cmath>
@@ -47,17 +47,16 @@ void PowImpl_cpu_forward_kernel(const std::vector<std::size_t>& input1Dims,
 	}
 }
 
-namespace {
-static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32, DataType::Float32},
-        Aidge::PowImpl_cpu_forward_kernel<float, float, float>);
-static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32, DataType::Int32},
-        Aidge::PowImpl_cpu_forward_kernel<int, int, int>);
-static Registrar<PowImplForward_cpu> registrarPowImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64, DataType::Float64},
-        Aidge::PowImpl_cpu_forward_kernel<double, double, double>);
-}  // namespace
+// Kernels registration to implementation entry point
+REGISTRAR(PowImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<float, float, float>, nullptr});
+REGISTRAR(PowImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<double, double, double>, nullptr});
+REGISTRAR(PowImpl_cpu,
+    {DataType::Int32},
+    {ProdConso::inPlaceModel, Aidge::PowImpl_cpu_forward_kernel<int, int, int>, nullptr});
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_POWIMPL_FORWARD_KERNEL_H_ */
+#endif /* AIDGE_CPU_OPERATOR_POWIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
index 075dee200a10fb0b53d88499287fa33a0a715316..1c50805d5af768dfc160488fda1e8fadfa798454 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl.hpp
@@ -17,116 +17,22 @@
 #include <tuple>
 #include <vector>
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/ReduceMean.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
 
 namespace Aidge {
-// class ReduceMean_Op;
-
-// Every DIM
-class ReduceMeanImplForward_cpu
-    : public Registrable<ReduceMeanImplForward_cpu,
-                        std::tuple<DataType, DataType>,
-                        std::function<void(const std::vector<std::int32_t>&,
+// Operator implementation entry point for the backend
+using ReduceMeanImpl_cpu = OperatorImpl_cpu<ReduceMean_Op,
+    void(const std::vector<std::int32_t>&,
                             DimSize_t,
                             const std::vector<DimSize_t>&,
                             const void *,
-                            void *)>> {};
-class ReduceMeanImpl1DBackward_cpu
-    : public Registrable<ReduceMeanImpl1DBackward_cpu,
-                        std::tuple<DataType, DataType>,
-                        std::function<void(const std::vector<std::int32_t>&,
-                            DimSize_t,
-                            const std::vector<DimSize_t>&,
-                            const void *,
-                            void *)>> {};
-
-class ReduceMeanImpl_cpu : public OperatorImpl {
-   public:
-    ReduceMeanImpl_cpu(const ReduceMean_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<ReduceMeanImpl_cpu> create(const ReduceMean_Op &op) {
-        return std::make_unique<ReduceMeanImpl_cpu>(op);
-    }
-
-   public:
-    void forward() override;
-};
-
-// // compute kernel registry for forward and backward
-// // DIM 1
-// class ReduceMeanImpl1DForward_cpu
-//     : public Registrable<ReduceMeanImpl1DForward_cpu,
-//                          std::tuple<DataType, DataType>,
-//                          std::function<void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)>> {};
-// class ReduceMeanImpl1DBackward_cpu
-//     : public Registrable<ReduceMeanImpl1DBackward_cpu,
-//                          std::tuple<DataType, DataType>,
-//                          std::function<void(const ReduceMean_Op<1>::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)>> {};
-
-// // DIM 2
-// class ReduceMeanImpl2DForward_cpu
-//     : public Registrable<ReduceMeanImpl2DForward_cpu,
-//                          std::tuple<DataType, DataType>,
-//                          std::function<void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)>> {};
-// class ReduceMeanImpl2DBackward_cpu
-//     : public Registrable<ReduceMeanImpl2DBackward_cpu,
-//                          std::tuple<DataType, DataType>,
-//                          std::function<void(const ReduceMean_Op<2>::Attrs &, const std::vector<DimSize_t>&, const void *,  void *)>> {};
-// // DIM 3
-// class ReduceMeanImpl3DForward_cpu
-//     : public Registrable<ReduceMeanImpl3DForward_cpu,
-//                          std::tuple<DataType, DataType>,
-//                          std::function<void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)>> {};
-// class ReduceMeanImpl3DBackward_cpu
-//     : public Registrable<ReduceMeanImpl3DBackward_cpu,
-//                          std::tuple<DataType, DataType>,
-//                          std::function<void(const ReduceMean_Op<3>::Attrs &, const std::vector<DimSize_t>&, const void *, void *)>> {};
-
-// class ReduceMeanImpl1D_cpu : public OperatorImpl {
-//    public:
-//     ReduceMeanImpl1D_cpu(const ReduceMean_Op<1>& op) : OperatorImpl(op, "cpu") {}
-
-//     static std::unique_ptr<ReduceMeanImpl1D_cpu> create(const ReduceMean_Op<1> &op) {
-//         return std::make_unique<ReduceMeanImpl1D_cpu>(op);
-//     }
-
-//    public:
-//     void forward() override;
-// };
-
-// class ReduceMeanImpl2D_cpu : public OperatorImpl {
-//    public:
-//     ReduceMeanImpl2D_cpu(const ReduceMean_Op<2>& op) : OperatorImpl(op, "cpu") {}
-
-//     static std::unique_ptr<ReduceMeanImpl2D_cpu> create(const ReduceMean_Op<2> &op) {
-//         return std::make_unique<ReduceMeanImpl2D_cpu>(op);
-//     }
-
-//    public:
-//     void forward() override;
-// };
-
-// class ReduceMeanImpl3D_cpu : public OperatorImpl {
-//    public:
-//     ReduceMeanImpl3D_cpu(const ReduceMean_Op<3>& op) : OperatorImpl(op, "cpu") {}
-
-//     static std::unique_ptr<ReduceMeanImpl3D_cpu> create(const ReduceMean_Op<3> &op) {
-//         return std::make_unique<ReduceMeanImpl3D_cpu>(op);
-//     }
+                            void *)>;
 
-//    public:
-//     void forward() override;
-// };
-namespace {
-// add cpu backend to ReduceMean_Op<2> implementation registry
-static Registrar<ReduceMean_Op> registrarReduceMeanImpl_cpu("cpu", Aidge::ReduceMeanImpl_cpu::create);
-// static Registrar<ReduceMean_Op<1>> registrarReduceMeanImpl1D_cpu("cpu", Aidge::ReduceMeanImpl1D_cpu::create);
-// static Registrar<ReduceMean_Op<2>> registrarReduceMeanImpl2D_cpu("cpu", Aidge::ReduceMeanImpl2D_cpu::create);
-// static Registrar<ReduceMean_Op<3>> registrarReduceMeanImpl3D_cpu("cpu", Aidge::ReduceMeanImpl3D_cpu::create);
-}  // namespace
+// Implementation entry point registration to Operator
+REGISTRAR(ReduceMean_Op, "cpu", Aidge::ReduceMeanImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
similarity index 63%
rename from include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
index fb14893fdc96f9d91f1b8ee6375fd536a7a8a1c7..b84779f169f6e420cbb801afbd9028f1a267c48a 100644
--- a/include/aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_
 
 #include <algorithm>   // std::for_each
 #include <cstddef>     // std::size_t
@@ -107,38 +107,16 @@ void ReduceMeanImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
     }
 }
 
-namespace {
-static Registrar<ReduceMeanImplForward_cpu> registrarReduceMeanImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>);
-static Registrar<ReduceMeanImplForward_cpu> registrarReduceMeanImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int>);
-static Registrar<ReduceMeanImplForward_cpu> registrarReduceMeanImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>);
-
-// // DIM = 1
-// static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float32(
-//         {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,1>);
-// static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Int32(
-//         {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,1>);
-// static Registrar<ReduceMeanImpl1DForward_cpu> registrarReduceMeanImplForward_1D_cpu_Float64(
-//         {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,1>);
-
-// // DIM = 2
-// static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float32(
-//         {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,2>);
-// static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Int32(
-//         {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,2>);
-// static Registrar<ReduceMeanImpl2DForward_cpu> registrarReduceMeanImplForward_2D_cpu_Float64(
-//         {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,2>);
-
-// // DIM = 3
-// static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float32(
-//         {DataType::Float32, DataType::Float32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float,3>);
-// static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Int32(
-//         {DataType::Int32, DataType::Int32}, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int,3>);
-// static Registrar<ReduceMeanImpl3DForward_cpu> registrarReduceMeanImplForward_3D_cpu_Float64(
-//         {DataType::Float64, DataType::Float64}, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double,3>);
-}  // namespace
+// Kernels registration to implementation entry point
+REGISTRAR(ReduceMeanImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<float, float>, nullptr});
+REGISTRAR(ReduceMeanImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<double, double>, nullptr});
+REGISTRAR(ReduceMeanImpl_cpu,
+    {DataType::Int32},
+    {ProdConso::inPlaceModel, Aidge::ReduceMeanImpl_cpu_forward_kernel<int, int>, nullptr});
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_FORWARD_KERNEL_H_ */
+#endif /* AIDGE_CPU_OPERATOR_REDUCEMEANIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp
index 0f37ef5d2ae4c2752d0930ddbf082f87e0bfe825..4138c62c24149c15cfad5e85e8f50889b2b6a433 100644
--- a/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceSumImpl.hpp
@@ -17,44 +17,22 @@
 #include <tuple>
 #include <vector>
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/ReduceSum.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
 
 namespace Aidge {
-class ReduceSumImplForward_cpu
-    : public Registrable<ReduceSumImplForward_cpu,
-                        std::tuple<DataType, DataType>,
-                        std::function<void(const std::vector<std::int32_t>&,
+// Operator implementation entry point for the backend
+using ReduceSumImpl_cpu = OperatorImpl_cpu<ReduceSum_Op,
+    void(const std::vector<std::int32_t>&,
                             DimSize_t,
                             const std::vector<DimSize_t>&,
                             const void *,
-                            void *)>> {};
-class ReduceSumImpl1DBackward_cpu
-    : public Registrable<ReduceSumImpl1DBackward_cpu,
-                        std::tuple<DataType, DataType>,
-                        std::function<void(const std::vector<std::int32_t>&,
-                            DimSize_t,
-                            const std::vector<DimSize_t>&,
-                            const void *,
-                            void *)>> {};
-
-class ReduceSumImpl_cpu : public OperatorImpl {
-   public:
-    ReduceSumImpl_cpu(const ReduceSum_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<ReduceSumImpl_cpu> create(const ReduceSum_Op &op) {
-        return std::make_unique<ReduceSumImpl_cpu>(op);
-    }
-
-   public:
-    void forward() override;
-};
+                            void *)>;
 
-namespace {
-static Registrar<ReduceSum_Op> registrarReduceSumImpl_cpu("cpu", Aidge::ReduceSumImpl_cpu::create);
-}  // namespace
+// Implementation entry point registration to Operator
+REGISTRAR(ReduceSum_Op, "cpu", Aidge::ReduceSumImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/ReduceSumImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp
similarity index 85%
rename from include/aidge/backend/cpu/operator/ReduceSumImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp
index f215065af459a886a34a43d958ecd9e09ada4041..a6ed329f66304692856305f094d6db860ba338a3 100644
--- a/include/aidge/backend/cpu/operator/ReduceSumImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_FORWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_
 
 #include <algorithm>   // std::for_each
 #include <cstddef>     // std::size_t
@@ -105,14 +105,16 @@ void ReduceSumImpl_cpu_forward_kernel(const std::vector<std::int32_t>& axes,
     }
 }
 
-namespace {
-static Registrar<ReduceSumImplForward_cpu> registrarReduceSumImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>);
-static Registrar<ReduceSumImplForward_cpu> registrarReduceSumImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ReduceSumImpl_cpu_forward_kernel<int, int>);
-static Registrar<ReduceSumImplForward_cpu> registrarReduceSumImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>);
-}  // namespace
+// Kernels registration to implementation entry point
+REGISTRAR(ReduceSumImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<float, float>, nullptr});
+REGISTRAR(ReduceSumImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<double, double>, nullptr});
+REGISTRAR(ReduceSumImpl_cpu,
+    {DataType::Int32},
+    {ProdConso::inPlaceModel, Aidge::ReduceSumImpl_cpu_forward_kernel<int, int>, nullptr});
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_FORWARD_KERNEL_H_ */
+#endif /* AIDGE_CPU_OPERATOR_REDUCESUMIMPL_KERNELS_H_ */
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl.hpp b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
index ca53f2f15c856a51cd304b19133048036e38244f..c1cc247c548701d43e01b1e92d02f42a11cfc710 100644
--- a/include/aidge/backend/cpu/operator/ScalingImpl.hpp
+++ b/include/aidge/backend/cpu/operator/ScalingImpl.hpp
@@ -12,7 +12,7 @@
 #ifndef __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
 #define __AIDGE_CPU_OPERATOR_ScalingIMPL_H__
 
-#include "aidge/backend/OperatorImpl.hpp"
+#include "aidge/backend/cpu/operator/OperatorImpl.hpp"
 #include "aidge/operator/Scaling.hpp"
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/utils/Types.h"
@@ -22,43 +22,17 @@
 #include <array>
 
 namespace Aidge {
-// class Scaling_Op;
-
-// compute kernel registry for forward and backward
-class ScalingImplForward_cpu
-    : public Registrable<ScalingImplForward_cpu,
-                        std::tuple<DataType, DataType>,
-                        std::function<void(const float,
-                            const std::size_t,
-                            const bool,
-                            std::size_t,
-                            const void*,
-                            void*)>> {};
-class ScalingImplBackward_cpu
-    : public Registrable<ScalingImplBackward_cpu,
-                        std::tuple<DataType, DataType>,
-                        std::function<void(const float,
-                            const std::size_t,
-                            const bool,
-                            std::size_t,
-                            const void*,
-                            void*)>> {};
-
-class ScalingImpl_cpu : public OperatorImpl {
-public:
-    ScalingImpl_cpu(const Scaling_Op& op) : OperatorImpl(op, "cpu") {}
-
-    static std::unique_ptr<ScalingImpl_cpu> create(const Scaling_Op& op) {
-        return std::make_unique<ScalingImpl_cpu>(op);
-    }
-
-    std::shared_ptr<ProdConso> getProdConso() const override { return std::make_unique<ProdConso>(mOp, true); };
-    void forward() override;
-};
-
-namespace {
-static Registrar<Scaling_Op> registrarScalingImpl_cpu("cpu", Aidge::ScalingImpl_cpu::create);
-}
+// Operator implementation entry point for the backend
+using ScalingImpl_cpu = OperatorImpl_cpu<Scaling_Op,
+    void(const float,
+        const std::size_t,
+        const bool,
+        std::size_t,
+        const void*,
+        void*)>;
+
+// Implementation entry point registration to Operator
+REGISTRAR(Scaling_Op, "cpu", Aidge::ScalingImpl_cpu::create);
 }  // namespace Aidge
 
 #endif /* __AIDGE_CPU_OPERATOR_ScalingIMPL_H__ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp
similarity index 79%
rename from include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
rename to include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp
index c654265dd6f650129201037976d89da4b0f39d96..89b9ef73e4a4faa098844027f07fb2096b77e3e8 100644
--- a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ScalingImpl_kernels.hpp
@@ -9,8 +9,8 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_
-#define AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_
+#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_
+#define AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_
 
 #include <cmath>
 #include <cstddef>
@@ -92,14 +92,16 @@ void ScalingImpl_cpu_forward_kernel(const float scalingFactor,
     }
 }
 
-namespace {
-static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32}, Aidge::ScalingImpl_cpu_forward_kernel<float, float>);
-static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32}, Aidge::ScalingImpl_cpu_forward_kernel<int, int>);
-static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64}, Aidge::ScalingImpl_cpu_forward_kernel<double, double>);
-}  // namespace
+// Kernels registration to implementation entry point
+REGISTRAR(ScalingImpl_cpu,
+    {DataType::Float32},
+    {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<float, float>, nullptr});
+REGISTRAR(ScalingImpl_cpu,
+    {DataType::Float64},
+    {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<double, double>, nullptr});
+REGISTRAR(ScalingImpl_cpu,
+    {DataType::Int32},
+    {ProdConso::inPlaceModel, Aidge::ScalingImpl_cpu_forward_kernel<int, int>, nullptr});
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_KERNELS_H_ */
\ No newline at end of file
diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp
index 7e5364e647d73dd23f3c89058b251944c7ab07a3..cdae21f8ed2757128f6a36b661b0897a4ba65f89 100644
--- a/src/operator/PadImpl.cpp
+++ b/src/operator/PadImpl.cpp
@@ -16,7 +16,7 @@
 #include "aidge/operator/Conv.hpp"
 
 #include "aidge/backend/cpu/operator/PadImpl.hpp"
-#include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/PadImpl_kernels.hpp"
 
 Aidge::Elts_t Aidge::Pad_ProdConso_cpu::getNbRequiredProtected(Aidge::IOIndex_t inputIdx) const {
     AIDGE_ASSERT(inputIdx == 0, "input index out of range."
@@ -31,17 +31,16 @@ Aidge::Elts_t Aidge::Pad_ProdConso_cpu::getNbRequiredProtected(Aidge::IOIndex_t
     return Elts_t::DataElts(outputSize - inputSize);
 }
 
+template <>
 void Aidge::PadImpl1D_cpu::forward() {
     const auto& op_ = dynamic_cast<const Pad_Op<1>&>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator.");
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<PadImpl1DForward_cpu>::create({
-        op_.getInput(0)->dataType(),
-        op_.getOutput(0)->dataType()});
+    const auto impl = Registrar<PadImpl1D_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-     kernelFunc(op_.beginEndBorders(),
+    impl.forward(op_.beginEndBorders(),
                 op_.borderType(),
                 op_.borderValue(),
                 op_.getInput(0)->template dims<3>(),
@@ -49,20 +48,29 @@ void Aidge::PadImpl1D_cpu::forward() {
                 getCPUPtr(mOp.getRawOutput(0)));
 }
 
+template <>
+void Aidge::PadImpl1D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<1> on backend cpu");
+}
+
+template <>
 void Aidge::PadImpl2D_cpu::forward() {
     const auto& op_ = dynamic_cast<const Pad_Op<2>&>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Pad Operator.");
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<PadImpl2DForward_cpu>::create({
-        op_.getInput(0)->dataType(),
-        op_.getOutput(0)->dataType()});
+    const auto impl = Registrar<PadImpl2D_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(op_.beginEndBorders(),
+    impl.forward(op_.beginEndBorders(),
                 op_.borderType(),
                 op_.borderValue(),
                 op_.getInput(0)->template dims<4>(),
                 getCPUPtr(mOp.getRawInput(0)),
                 getCPUPtr(mOp.getRawOutput(0)));
 }
+
+template <>
+void Aidge::PadImpl2D_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Pad_Op<2> on backend cpu");
+}
diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp
index 97f510b96c2cad74534ce3a17e6e47f80fa6fbe2..fe16bb955973d99e022c61043e8144aeaf6801a1 100644
--- a/src/operator/PowImpl.cpp
+++ b/src/operator/PowImpl.cpp
@@ -21,22 +21,20 @@
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 
 #include "aidge/backend/cpu/operator/PowImpl.hpp"
-#include "aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/PowImpl_kernels.hpp"
 
+template <>
 void Aidge::PowImpl_cpu::forward() {
-    // Find the correct kernel type
-    auto kernelFunc = Registrar<PowImplForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
-
     const std::vector<std::size_t> inputDims0 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
                                                                    std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims());
     const std::vector<std::size_t> inputDims1 = getBroadcastedDims(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
                                                                    std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims());
 
+    // Find the correct kernel type
+    const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+
     // Call kernel
-    kernelFunc(inputDims0,
+    impl.forward(inputDims0,
         inputDims1,
         std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(),
         getCPUPtr(mOp.getRawInput(0)),
@@ -44,21 +42,20 @@ void Aidge::PowImpl_cpu::forward() {
         getCPUPtr(mOp.getRawOutput(0)));
 }
 
+template <>
 void Aidge::PowImpl_cpu::backward() {
     // Find the correct kernel type
     const Pow_Op& op_ = dynamic_cast<const Pow_Op&>(mOp);
-    auto kernelFunc = Registrar<PowImplForward_cpu>::create({
-        op_.getOutput(0)->grad()->dataType(),
-        op_.getInput(0)->grad()->dataType(),
-        op_.getInput(1)->grad()->dataType()});
-
     const std::vector<std::size_t> input0gradDims = getBroadcastedDims(op_.getInput(0)->grad()->dims(),
                                                                    op_.getOutput(0)->grad()->dims());
     const std::vector<std::size_t> input1gradDims = getBroadcastedDims(op_.getInput(1)->grad()->dims(),
                                                                    op_.getOutput(0)->grad()->dims());
 
+    // Find the correct kernel type
+    const auto impl = Registrar<PowImpl_cpu>::create(getBestMatch(getRequiredSpec()));
+
     // Call kernel
-    kernelFunc(op_.getOutput(0)->grad()->dims(),
+    impl.backward(op_.getOutput(0)->grad()->dims(),
                input0gradDims,
                input1gradDims,
                getCPUPtr(mOp.getRawOutput(0)),
diff --git a/src/operator/ReduceMeanImpl.cpp b/src/operator/ReduceMeanImpl.cpp
index b4cd8ffa9b46aaa1c1d7a2eca947ed0254947fef..622672569372ff4e9f135e36255095f4246d5920 100644
--- a/src/operator/ReduceMeanImpl.cpp
+++ b/src/operator/ReduceMeanImpl.cpp
@@ -16,23 +16,29 @@
 
 #include "aidge/utils/Types.h"
 #include "aidge/operator/ReduceMean.hpp"
-#include "aidge/backend/cpu/operator/ReduceMeanImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/ReduceMeanImpl_kernels.hpp"
 
+template <>
 void Aidge::ReduceMeanImpl_cpu::forward() {
     const ReduceMean_Op& op_ = dynamic_cast<const ReduceMean_Op&>(mOp);
+
     // Find the correct kernel type
-    auto kernelFunc = Registrar<ReduceMeanImplForward_cpu>::create({
-        op_.getInput(0)->dataType(),
-        op_.getOutput(0)->dataType()});
+    const auto impl = Registrar<ReduceMeanImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(op_.axes(),
+    impl.forward(op_.axes(),
                 op_.keepDims(),
                 op_.getInput(0)->dims(),
                 op_.getInput(0)->getImpl()->rawPtr(),
                 op_.getOutput(0)->getImpl()->rawPtr());
 }
 
+template <>
+void Aidge::ReduceMeanImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ReduceMean_Op on backend cpu");
+}
+
+
 // void Aidge::ReduceMeanImpl1D_cpu::forward() {
 
 //     // Find the correct kernel type
diff --git a/src/operator/ReduceSumImpl.cpp b/src/operator/ReduceSumImpl.cpp
index d9b7eea71c6f6bd078ad6e98f1058ca1dafd1c11..aad0801835a74ecefb046f3dc64729ae1f8bd8bb 100644
--- a/src/operator/ReduceSumImpl.cpp
+++ b/src/operator/ReduceSumImpl.cpp
@@ -16,19 +16,24 @@
 
 #include "aidge/utils/Types.h"
 #include "aidge/operator/ReduceSum.hpp"
-#include "aidge/backend/cpu/operator/ReduceSumImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/ReduceSumImpl_kernels.hpp"
 
+template <>
 void Aidge::ReduceSumImpl_cpu::forward() {
     const ReduceSum_Op& op_ = dynamic_cast<const ReduceSum_Op&>(mOp);
+
     // Find the correct kernel type
-    auto kernelFunc = Registrar<ReduceSumImplForward_cpu>::create({
-        op_.getInput(0)->dataType(),
-        op_.getOutput(0)->dataType()});
+    const auto impl = Registrar<ReduceSumImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(op_.axes(),
+    impl.forward(op_.axes(),
                 op_.keepDims(),
                 op_.getInput(0)->dims(),
                 op_.getInput(0)->getImpl()->rawPtr(),
                 op_.getOutput(0)->getImpl()->rawPtr());
 }
+
+template <>
+void Aidge::ReduceSumImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for ReduceSum_Op on backend cpu");
+}
diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp
index 30e0a30fa9d3fc14df458ad4364f050a78d8b0a4..1e7a408f267c5eb2d60d188f0ed2ba0394222561 100644
--- a/src/operator/ScalingImpl.cpp
+++ b/src/operator/ScalingImpl.cpp
@@ -17,24 +17,28 @@
 #include "aidge/operator/Scaling.hpp"
 
 #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
-#include "aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp"
+#include "aidge/backend/cpu/operator/ScalingImpl_kernels.hpp"
 #include "aidge/utils/Types.h"
 #include "aidge/backend/cpu/data/GetCPUPtr.h"
 
+template <>
 void Aidge::ScalingImpl_cpu::forward() {
     const auto& op_ = dynamic_cast<const Scaling_Op&>(mOp);
     AIDGE_ASSERT(op_.getInput(0), "missing input #0 in Scaling Operator.");
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<ScalingImplForward_cpu>::create({
-        op_.getInput(0)->dataType(),
-        op_.getOutput(0)->dataType()});
+    const auto impl = Registrar<ScalingImpl_cpu>::create(getBestMatch(getRequiredSpec()));
 
     // Call kernel
-    kernelFunc(op_.scalingFactor(),
+    impl.forward(op_.scalingFactor(),
             op_.quantizedNbBits(),
             op_.isOutputUnsigned(),
             op_.getInput(0)->size(),
             getCPUPtr(mOp.getRawInput(0)),
             getCPUPtr(mOp.getRawOutput(0)));
 }
+
+template <>
+void Aidge::ScalingImpl_cpu::backward() {
+    AIDGE_THROW_OR_ABORT(std::runtime_error, "Backward not yet implemented for Scaling_Op on backend cpu");
+}