diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
index dc2741154b87b0b7cdf0426d4efb8e767ce3242b..d3a6ef32bdd269e08fa8320c554aa251b54bb80b 100644
--- a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
+++ b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp
@@ -15,6 +15,7 @@
 #include <algorithm>
 #include <cmath>
 #include <cstddef>
+#include <iterator>
 
 #include "aidge/utils/Registrar.hpp"
 #include "aidge/backend/cpu/operator/SliceImpl.hpp"
@@ -29,36 +30,37 @@ void SliceImpl_cpu_forward_kernel(const Slice_Op::Attrs &attrs, const std::vecto
     const std::size_t nbDims = inputDims.size();
     std::vector<DimSize_t> dims = inputDims;
     DimSize_t totalSize = std::accumulate(inputDims.cbegin(), inputDims.cend(), std::size_t(1), std::multiplies<std::size_t>());
-    I* outputAccumulation = new I[totalSize];
     const I* inputAccumulation = input;
+    I* outputAccumulation = nullptr;
     const std::size_t nbAxes = std::get<0>(attrs).size();
     for (std::size_t i = 0; i < nbAxes; ++i) {
-        DimIdx_t axis = std::get<2>(attrs)[i] >= 0 ?
-                            static_cast<DimIdx_t>(std::get<2>(attrs)[i]) :
-                            static_cast<DimIdx_t>(std::get<2>(attrs)[i] + static_cast<DimIdx_t>(inputDims.size()));
-        DimSize_t start = std::get<0>(attrs)[i] >= 0 ?
-                             static_cast<DimSize_t>(std::get<0>(attrs)[i]) :
-                             static_cast<DimSize_t>(std::get<0>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis]));
-        DimSize_t end = std::get<1>(attrs)[i] >= 0 ?
-                           static_cast<DimSize_t>(std::get<1>(attrs)[i]) :
-                           static_cast<DimSize_t>(std::get<1>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis]));
-        std::int64_t step = std::get<3>(attrs)[i];
+        const DimIdx_t axis = std::get<2>(attrs)[i] >= 0 ?
+                                    static_cast<DimIdx_t>(std::get<2>(attrs)[i]) :
+                                    static_cast<DimIdx_t>(std::get<2>(attrs)[i] + static_cast<DimIdx_t>(inputDims.size()));
+        const DimSize_t start = std::min(std::get<0>(attrs)[i] >= 0 ?
+                                                static_cast<DimSize_t>(std::get<0>(attrs)[i]) :
+                                                static_cast<DimSize_t>(std::get<0>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis])),
+                                         dims[axis]-1);
+        const DimSize_t end = std::get<1>(attrs)[i] >= 0 ?
+                                        static_cast<DimSize_t>(std::get<1>(attrs)[i]) :
+                                        static_cast<DimSize_t>(std::get<1>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis]));
+        const std::int64_t step = std::get<3>(attrs)[i];
 
-        std::size_t sliceSize = static_cast<std::size_t>(std::ceil((static_cast<float>(end) - static_cast<float>(start)) / static_cast<float>(step)));
+        const std::size_t sliceSize = static_cast<std::size_t>(std::ceil((static_cast<float>(end) - static_cast<float>(start)) / static_cast<float>(step)));
 
-        if ( i > 0) {
-            outputAccumulation = new I[totalSize];
-        }
+        outputAccumulation = new I[totalSize];
         const std::size_t stride_pre = std::accumulate(dims.cbegin(), dims.cbegin() + axis, 1, std::multiplies<std::size_t>());
         const std::size_t stride_post = std::accumulate(dims.crbegin(), dims.crbegin() + nbDims -1 - axis, 1, std::multiplies<std::size_t>());
         for (std::size_t outer = 0; outer < stride_pre; ++outer)
         {
+            const std::size_t idx_in = outer * stride_post * dims[axis] + start * stride_post;
+            const std::size_t idx_out = outer * stride_post * sliceSize;
             std::size_t addedSlices = 0;
             for (std::size_t inner = 0; inner < sliceSize; ++inner)
             {
-                std::size_t idx_in = outer * stride_post * dims[axis] + (start + inner * step) * stride_post;
-                std::size_t idx_out = outer * stride_post * sliceSize + addedSlices * stride_post;
-                std::copy_n(std::next(inputAccumulation, idx_in), stride_post, std::next(outputAccumulation, idx_out));
+                std::copy_n(std::next(inputAccumulation, idx_in + inner * step * stride_post),
+                            stride_post,
+                            std::next(outputAccumulation, idx_out + addedSlices * stride_post));
                 addedSlices++;
             }
         }