diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
index 5258c4c3e7376c3883b119503ee9e6765de844d5..df8e1a7e7b02a4ad032d6f09fae3ae2cd8a42eff 100644
--- a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp
@@ -9,13 +9,12 @@
  *
  ********************************************************************************/
 
-#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H
-#define AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H
+#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_
+#define AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_
 
 #include <cmath>
-
+#include <cstddef>
 #include "aidge/utils/Registrar.hpp"
-
 #include "aidge/backend/cpu/operator/ScalingImpl.hpp"
 
 //TODO : improve propagate, n2d2 :
@@ -61,12 +60,13 @@ const O& clamp(const O& x, const O& min, const O& max)
 }
 
 template<class O>
-O saturate(O value, std::size_t quantizedNbBits, bool isOutputUnsigned) {
+O saturate(const O value, const std::size_t quantizedNbBits, const bool isOutputUnsigned) {
+    // TODO: no assertions in kernel
     assert(quantizedNbBits > 0);
 
-    const O min = isOutputUnsigned?0:
+    const O min = isOutputUnsigned ? 0 :
                                   -(1ll << (quantizedNbBits - 1ll));
-    const O max = isOutputUnsigned?(1ll << quantizedNbBits) - 1ll:
+    const O max = isOutputUnsigned ? (1ll << quantizedNbBits) - 1ll :
                                    (1ll << (quantizedNbBits - 1ll)) - 1ll;
 
     return clamp(value, min, max);
@@ -81,8 +81,8 @@ void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Attrs& attrs,
     const I* input = static_cast<const I*>(input_);
     O* output = static_cast<O*>(output_);
     const I& scalingFactor = static_cast<const I&>(std::get<0>(attrs));
-    std::size_t quantizedNbBits = static_cast<std::size_t>(std::get<1>(attrs));
-    bool isOutputUnsigned = static_cast<bool>(std::get<2>(attrs));
+    const std::size_t quantizedNbBits = static_cast<std::size_t>(std::get<1>(attrs));
+    const bool isOutputUnsigned = static_cast<bool>(std::get<2>(attrs));
 
     for (std::size_t i = 0; i < inputLenght; ++i) {
         output[i] = input[i] * scalingFactor;
@@ -103,4 +103,4 @@ static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float64
 }  // namespace
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H */
+#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_ */
\ No newline at end of file
diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp
index d6f7caad4febf72764892f956886c8fb6875d0ab..1cba5906064c51a4f0da2f1f3682b0828a080d43 100644
--- a/include/aidge/backend/cpu/operator/SliceImpl.hpp
+++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp
@@ -26,15 +26,17 @@ namespace Aidge {
 
 // compute kernel registry for forward and backward
 class SliceImplForward_cpu
-    : public Registrable<SliceImplForward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const std::vector<DimSize_t>&, const void*, DimSize_t, const void*, const void*, const void*, void*)> {
-};
+    : public Registrable<SliceImplForward_cpu, std::tuple<DataType>,
+                         void(const typename Slice_Op::Attrs&,
+                              const std::vector<std::size_t>,
+                              const void*,
+                              void*)> {};
 class SliceImplBackward_cpu
-    : public Registrable<SliceImplBackward_cpu,
-                         std::tuple<DataType, DataType>,
-                         void(const std::vector<DimSize_t>&, const void*, DimSize_t, const void*, const void*, const void*, void*)> {
-};
+    : public Registrable<SliceImplBackward_cpu, std::tuple<DataType>,
+                         void(const typename Slice_Op::Attrs&,
+                              const std::vector<std::size_t>,
+                              const void*,
+                              void*)> {};
 
 class SliceImpl_cpu : public OperatorImpl {
 public:
@@ -61,4 +63,4 @@ static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::c
 }
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_H_ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_H_ */
diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
index 9381039bb4d8fcdc5aa34c9d34d96ec50a73d651..80c036cded168aea017f3ae8e2c004474b3977f3 100644
--- a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
@@ -13,94 +13,82 @@
 #define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_
 
 #include "aidge/utils/Registrar.hpp"
-
+#include "aidge/operator/Slice.hpp"
 #include "aidge/backend/cpu/operator/SliceImpl.hpp"
+#include <vector>
+#include <cstddef>
 
-namespace Aidge {
-template <class I, class O>
-void SliceImpl_cpu_forward_kernel(const std::vector<DimSize_t>& inputDims,
-                                const void* input_,
-                                DimSize_t nbSlices,
-                                const void* axes_,
-                                const void* starts_,
-                                const void* ends_,
-                                void* output_) {
-    const I* input = static_cast<const I*>(input_);
-    const int* axes = static_cast<const int*>(axes_);
-    const int* starts = static_cast<const int*>(starts_);
-    const int* ends = static_cast<const int*>(ends_);
-    O* output = static_cast<O*>(output_);
-
-    // Calculate the total number of elements in the input array
-    size_t totalElements = 1;
-    for (size_t dimSize : inputDims) {
-        totalElements *= dimSize;
-    }
-
-    // Create a temporary arrays to store intermediate input/output for each slice op
-    std::vector<I> tempInArray(input, input + totalElements);
-    std::vector<I> tempOutArray(input, input + totalElements);
-    std::vector<size_t> currentDims = inputDims;
+#include "aidge/data/Data.hpp"
 
-    size_t copiedElems = 0;
-    // Loop over each slice operation
-    for(size_t i=0; i< nbSlices; ++i)
+namespace Aidge {
+template <class I>
+void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs,
+                                     const std::vector<std::size_t> inputDims,
+                                     const void* input_,
+                                     void* output_) {
+    std::vector<std::size_t> slicedDims = inputDims;
+    
+    std::size_t beginning = 0;
+    DimSize_t nbAxes = std::get<2>(attrs).size();
+    for(std::size_t i=0; i<nbAxes;++i)
     {
-        copiedElems = 0;
-        I* tempOutArrayPtr = tempOutArray.data();
-        // Extract parameters for the current slice, make sure indexes are positive
-        size_t axisIdx = axes[i]>=0?axes[i]:(axes[i]+currentDims.size());
-        size_t startIdx = starts[i]>=0?starts[i]:(starts[i]+currentDims[axisIdx]);
-        size_t endIdx = ends[i]>=0?ends[i]:(ends[i]+currentDims[axisIdx]);
-
-
-        // Compute the size of the slice over each element on the axis
-        size_t strideOnCurrDim = 1;
-        for(size_t j=(axisIdx+1); j<currentDims.size(); ++j)
-        {
-            strideOnCurrDim *= currentDims[j];
-        }
-        size_t sliceSize  = (endIdx - startIdx + 1) * strideOnCurrDim;
+        // For each slice operation get the params and cast them to size_t
+        int axis_ = std::get<2>(attrs)[i];
+        int start_ = std::get<0>(attrs)[i];
+        int end_ = std::get<1>(attrs)[i];
+        std::size_t axis = axis_>=0?axis_:axis_+inputDims.size();
+        std::size_t start = start_>=0?start_:start_+inputDims[axis];
+        std::size_t end = end_>=0?end_:end_+inputDims[axis];
+        std::size_t stride=1;
+        for(std::size_t j = inputDims.size()-1; j>axis; --j)
+            stride*=inputDims[j];
+        beginning += start * stride;
+        std::size_t sliceLength = end - start + 1;
+        slicedDims[axis] = sliceLength;
+    }
 
-        // For each slice operation, we will slice all elements on the axis (subSlice)
-        // the number of sublices is the product of dimension previous to the slice dimension
-        size_t nbSubSlices = 1;
-        for(size_t j=0; j<axisIdx; ++j)
-        {
-            nbSubSlices*=currentDims[j];
-        }
 
-        // Operate the slice over each element of the dim we want to slice
-        for(size_t s=0; s<nbSubSlices; ++s)
-        {
-            // Compute the pointer postion on input
-            std::size_t copyStartPos = s * strideOnCurrDim * currentDims[axisIdx] + startIdx * strideOnCurrDim;
-            const I* copyPtr = std::next(tempInArray.data(), copyStartPos);
-            // Copy slice to output array and update pointer
-            std::copy_n(copyPtr, sliceSize , tempOutArrayPtr);
-            tempOutArrayPtr += sliceSize ;
-            copiedElems+= sliceSize ;
-        }
+    const I* input = static_cast<const I*>(input_) + beginning;
+    I* output = static_cast<I*>(output_);
+    const std::size_t nbDims = slicedDims.size();
 
-        // Update the input for the next slice operation
-        tempInArray.assign(tempOutArray.begin(), tempOutArray.begin() + copiedElems);
-        currentDims[axisIdx] = endIdx - startIdx + 1;
+	// for inputDims = {4,5,5,3} & slicedDims = {3,2,2,1}, substractDims = {1,5,5,3}
+    std::vector<std::size_t> substractedDims = std::vector<std::size_t>(nbDims);
+    for (std::size_t i = 0; i < nbDims; ++i) {
+        substractedDims[i] = inputDims[i] - slicedDims[i];
     }
 
-    std::copy_n(tempInArray.data(), copiedElems, output);
+	// for slicedDims = {3,2,2,1}, prodSlicedDims = {12,4,2,1}
+    std::vector<std::size_t> prodSlicedDims = std::vector<std::size_t>(nbDims);
+    std::vector<std::size_t> prodInputDims = std::vector<std::size_t>(nbDims+1);
+	prodSlicedDims[nbDims - 1] = slicedDims[nbDims - 1];
+	prodInputDims[nbDims - 1] = inputDims[nbDims - 1];
+	prodInputDims[nbDims] = 1;
+	for (std::size_t i = 2; i <= nbDims; ++i) {
+		prodSlicedDims[nbDims - i] = prodSlicedDims[nbDims - i + 1]*slicedDims[nbDims - i];
+		prodInputDims[nbDims - i] = prodInputDims[nbDims - i + 1]*inputDims[nbDims - i];
+	}
+
+	std::size_t j = 0;
+	std::size_t i = 0;
+	for (; j < prodSlicedDims[0];) {
+		output[j] = input[i++];
+        ++j;
+		for (std::size_t idx = nbDims - 1; idx > 0; --idx) {
+			i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx]*prodInputDims[idx+1] : 0;
+		}
+	}
 }
 
 namespace {
+
 static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32(
-        {DataType::Float32, DataType::Float32},
-        Aidge::SliceImpl_cpu_forward_kernel<float, float>);
+        {DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float>);
 static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32(
-        {DataType::Int32, DataType::Int32},
-        Aidge::SliceImpl_cpu_forward_kernel<int, int>);
+        {DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int>);
 static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64(
-        {DataType::Float64, DataType::Float64},
-        Aidge::SliceImpl_cpu_forward_kernel<double, double>);
+        {DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double>);
 }  // namespace
 }  // namespace Aidge
 
-#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */
\ No newline at end of file
+#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */
diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp
index 6a528afdbc1399994139f5a1b8336d04ec582159..32d31f046465425a269d6f8e3fc52eaad31c663a 100644
--- a/src/operator/SliceImpl.cpp
+++ b/src/operator/SliceImpl.cpp
@@ -18,22 +18,29 @@
 #include "aidge/backend/cpu/operator/SliceImpl.hpp"
 #include "aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp"
 #include "aidge/utils/Types.h"
-
+#include <vector>
+#include <cassert>
+#include <tuple>
 
 Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const {
     assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input");
 
     // Requires the whole tensors
-    return std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0];
+    const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims();
+
+    return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1),
+                            std::multiplies<NbElts_t>());
 }
 
 Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; }
 
 Aidge::NbElts_t Aidge::SliceImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx,
-                               const std::vector<Aidge::DimSize_t>& inputsSize) const {
+                            const std::vector<Aidge::DimSize_t>& inputsSize) const {
     (void)outputIdx;
     (void)inputsSize;
-    return std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims()[0];
+    const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims();
+    return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1),
+                            std::multiplies<NbElts_t>());
 }
 
 Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const {
@@ -52,27 +59,24 @@ void Aidge::SliceImpl_cpu::updateConsummerProducer() {
 }
 
 void Aidge::SliceImpl_cpu::forward() {
-        for (IOIndex_t i = 0; i < 4; ++i) 
-            assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i)) && ("missing input"));
-    
-
-    assert((std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->nbDims() == 1) && "input #1 must either be a tensor of rank 1");
-    assert((std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->nbDims() == 1) && "input #2 must either be a tensor of rank 1");
-    assert((std::static_pointer_cast<Tensor>(mOp.getRawInput(3))->nbDims() == 1) && "input #3 must either be a tensor of rank 1");
+    // FIXME: uncomment the following code once memory handling will work
+    assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0");
 
     // Find the correct kernel type
-    auto kernelFunc = Registrar<SliceImplForward_cpu>::create({
-        std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(),
-        std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()});
+    auto kernelFunc = Registrar<SliceImplForward_cpu>::create(
+            {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()});
 
     // Call kernel
-    kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dims()[0],
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawInput(3))->getImpl()->rawPtr(),
-               std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr());
+    kernelFunc(dynamic_cast<const Slice_Op&>(mOp).getStaticAttributes(),
+            std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(),
+            std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(),
+            std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()
+            );
+
+    // each input is consumed by the minimum amount for a forward pass
+    mNbConsumedData[0] += getNbRequiredData(0);
+
+    mNbProducedData[0] += getRequiredMemory(0, {});
 }
 
 void Aidge::SliceImpl_cpu::backward() { printf("Not implemented yet.\n"); }
diff --git a/unit_tests/operator/Test_SliceImpl.cpp b/unit_tests/operator/Test_SliceImpl.cpp
index e9392d3a01dabf427813b653916a79f07093c41d..8d35c885871a83c88f2852fbd946a1fb7f74dfee 100644
--- a/unit_tests/operator/Test_SliceImpl.cpp
+++ b/unit_tests/operator/Test_SliceImpl.cpp
@@ -29,21 +29,15 @@ TEST_CASE("[cpu/operator] Slice(forward)") {
                 {5, 6, 7, 8}
             }
         });
-        std::shared_ptr<Tensor> axes =  std::make_shared<Tensor>(Array1D<int,2>{{0, 1}});
-        std::shared_ptr<Tensor> starts =  std::make_shared<Tensor>(Array1D<int,2>{{1, 1}});
-        std::shared_ptr<Tensor> ends =  std::make_shared<Tensor>(Array1D<int,2>{{1, 3}});
         std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,1,3> {
             {
                 {6, 7, 8}
             }
         });
 
-        std::shared_ptr<Node> mySlice = Slice();
+        std::shared_ptr<Node> mySlice = Slice({1, 1}, {1, 3}, {0, 1});
         auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
         op->associateInput(0, input);
-        op->associateInput(1, axes);
-        op->associateInput(2, starts);
-        op->associateInput(3, ends);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         op->computeOutputDims();
@@ -69,9 +63,6 @@ TEST_CASE("[cpu/operator] Slice(forward)") {
                 }
             }
         });
-        std::shared_ptr<Tensor> axes =  std::make_shared<Tensor>(Array1D<int,2>{{1, 2}});
-        std::shared_ptr<Tensor> starts =  std::make_shared<Tensor>(Array1D<int,2>{{0, 2}});
-        std::shared_ptr<Tensor> ends =  std::make_shared<Tensor>(Array1D<int,2>{{2, 2}});
         std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,3,1> {
             {
                 {
@@ -87,12 +78,9 @@ TEST_CASE("[cpu/operator] Slice(forward)") {
             }
         });
 
-        std::shared_ptr<Node> mySlice = Slice();
+        std::shared_ptr<Node> mySlice = Slice({0, 2}, {2, 2}, {1, 2});
         auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator());
         op->associateInput(0, input);
-        op->associateInput(1, axes);
-        op->associateInput(2, starts);
-        op->associateInput(3, ends);
         op->setDataType(DataType::Int32);
         op->setBackend("cpu");
         op->computeOutputDims();
diff --git a/unit_tests/recipies/Test_HorizontalTiling.cpp b/unit_tests/recipies/Test_HorizontalTiling.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b71a01d130a783caf5c643dfb0c3757b1c524e5e
--- /dev/null
+++ b/unit_tests/recipies/Test_HorizontalTiling.cpp
@@ -0,0 +1,208 @@
+/********************************************************************************
+ * Copyright (c) 2023 CEA-List
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0.
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *
+ ********************************************************************************/
+
+#include <catch2/catch_test_macros.hpp>
+#include <set>
+
+#include "aidge/graph/GraphView.hpp"
+#include "aidge/graph/OpArgs.hpp"
+#include "aidge/operator/Conv.hpp"
+#include "aidge/operator/ReLU.hpp"
+#include "aidge/recipies/Recipies.hpp"
+#include "aidge/scheduler/Scheduler.hpp"
+#include "aidge/operator/Concat.hpp"
+
+
+namespace Aidge {
+
+TEST_CASE("[core/recipies] Tiling(transformation)", "[Tiling][Recipies]") {
+
+    SECTION("Transform a pre-generated GraphView") {
+
+        SECTION("Simple Node: Conv") {
+            std::shared_ptr<Node> myReLU = ReLU("myReLU");
+            std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv");
+            std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> {
+                {
+                    {
+                        {{  0,   1,   2},
+                         {  3,   4,   5},
+                         {  6,   7,   8}},
+                        {{  9,  10,  11},
+                         { 12,  13,  14},
+                         { 15,  16,  17}},
+                        {{ 18,  19,  20},
+                         { 21,  22,  23},
+                         { 24,  25,  26}}
+                    },
+                    {
+                        {{ 27,  28,  29},
+                         { 30,  31,  32},
+                         { 33,  34,  35}},
+                        {{ 36,  37,  38},
+                         { 39,  40,  41},
+                         { 42,  43,  44}},
+                        {{ 45,  46,  47},
+                         { 48,  49,  50},
+                         { 51,  52,  53}}
+                    },
+                    {
+                        {{ 54,  55,  56},
+                         { 57,  58,  59},
+                         { 60,  61,  62}},
+                        {{ 63,  64,  65},
+                         { 66,  67,  68},
+                         { 69,  70,  71}},
+                        {{ 72,  73,  74},
+                         { 75,  76,  77},
+                         { 78,  79,  80}}
+                    },
+                    {
+                        {{ 81,  82,  83},
+                         { 84,  85,  86},
+                         { 87,  88,  89}},
+                        {{ 90,  91,  92},
+                         { 93,  94,  95},
+                         { 96,  97,  98}},
+                        {{ 99, 100, 101},
+                         {102, 103, 104},
+                         {105, 106, 107}}
+                    }
+                }
+            });
+            std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}});
+            std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW
+                {
+                    {
+                        {{  0,   1,   2,   3,   4},
+                        {  5,   6,   7,   8,   9},
+                        { 10,  11,  12,  13,  14},
+                        { 15,  16,  17,  18,  19},
+                        { 20,  21,  22,  23,  24}},
+
+                        {{ 25,  26,  27,  28,  29},
+                        { 30,  31,  32,  33,  34},
+                        { 35,  36,  37,  38,  39},
+                        { 40,  41,  42,  43,  44},
+                        { 45,  46,  47,  48,  49}},
+
+                        {{ 50,  51,  52,  53,  54},
+                        { 55,  56,  57,  58,  59},
+                        { 60,  61,  62,  63,  64},
+                        { 65,  66,  67,  68,  69},
+                        { 70,  71,  72,  73,  74}}
+                    },
+                    {
+                        {{ 75,  76,  77,  78,  79},
+                        { 80,  81,  82,  83,  84},
+                        { 85,  86,  87,  88,  89},
+                        { 90,  91,  92,  93,  94},
+                        { 95,  96,  97,  98,  99}},
+
+                        {{100, 101, 102, 103, 104},
+                        {105, 106, 107, 108, 109},
+                        {110, 111, 112, 113, 114},
+                        {115, 116, 117, 118, 119},
+                        {120, 121, 122, 123, 124}},
+
+                        {{125, 126, 127, 128, 129},
+                        {130, 131, 132, 133, 134},
+                        {135, 136, 137, 138, 139},
+                        {140, 141, 142, 143, 144},
+                        {145, 146, 147, 148, 149}}
+                    }
+                }
+            });
+            std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> {
+                {
+                    {
+                        {{ 15226,  15577,  15928},
+                         { 16981,  17332,  17683},
+                         { 18736,  19087,  19438}},
+
+                        {{ 37818,  38898,  39978},
+                         { 43218,  44298,  45378},
+                         { 48618,  49698,  50778}},
+
+                        {{ 60426,  62235,  64044},
+                         { 69471,  71280,  73089},
+                         { 78516,  80325,  82134}},
+
+                        {{ 83016,  85554,  88092},
+                         { 95706,  98244, 100782},
+                         {108396, 110934, 113472}}
+                    },
+                    {
+                        {{ 41551,  41902,  42253},
+                         { 43306,  43657,  44008},
+                         { 45061,  45412,  45763}},
+
+                        {{118818, 119898, 120978},
+                         {124218, 125298, 126378},
+                         {129618, 130698, 131778}},
+
+                        {{196101, 197910, 199719},
+                         {205146, 206955, 208764},
+                         {214191, 216000, 217809}},
+
+                        {{273366, 275904, 278442},
+                         {286056, 288594, 291132},
+                         {298746, 301284, 303822}}
+                    }
+                }
+            });
+            myReLU->getOperator()->associateInput(0, myInput);
+            myReLU->addChild(myConv, 0, 0);
+            myConv->getOperator()->setInput(1, myWeights);
+            myConv->getOperator()->setInput(2, myBias);
+            std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->computeOutputDims();
+
+            std::shared_ptr<GraphView> g = std::make_shared<GraphView>();
+            g->add({myReLU, myConv});
+            g->compile("cpu", DataType::Int32);
+            std::set<std::shared_ptr<Node>> tiledConv = getConvHorizontalTiling(myConv, 2, 3);
+
+            SequentialScheduler s(g);
+            s.forward();
+            REQUIRE(*(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->getOutput(0)) == *myOutput);
+
+            GraphView::replace({myConv, myConv->getParent(1), myConv->getParent(2)}, tiledConv);
+            g->compile("cpu", DataType::Int32);
+            s.resetScheduling();
+            s.forward();
+
+            REQUIRE(*(std::dynamic_pointer_cast<OperatorTensor>((*g->outputNodes().begin())->getOperator())->getOutput(0)) == *myOutput);
+        }
+    }
+}
+}
+        // std::shared_ptr<GraphView> g = Sequential({
+        //     Conv(3, 16, {3,3}, "conv1"),
+        //     ReLU("relu1"),
+        //     Conv(16, 32, {1,1}, "conv2"),
+        //     Conv(32, 16, {1,1}, "conv3"),
+        //     Conv(16, 10, {3,3}, "conv4"),
+        //     ReLU("relu2")
+        // });
+
+    //     for (auto& individualConv : g->match("Conv")) {
+    //         auto tiledConv = horizontalTiling(individualConv);
+    //         g->replace(individualConv, tiledConv);
+    //     }
+    // }
+
+    // SECTION("Create the GraphView with tiled layers") {
+    //     std::shared_ptr<GraphView> g;
+    //     g->addChild(horizontalTiling(Conv()))
+    // }
+
+// }
+// } // namespace Aidge
\ No newline at end of file