diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp index a5262f481008ad06e83fb7adecc25e0ebe0c1b1f..df2299f6b2a4a298cdd56c1901372fb25d6c8f35 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp @@ -12,8 +12,10 @@ #ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ #define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ +#include <algorithm> #include <cmath> #include <cstddef> + #include "aidge/utils/Registrar.hpp" #include "aidge/backend/cpu/operator/SliceImpl.hpp" @@ -34,34 +36,29 @@ void SliceImpl_cpu_forward_kernel(const Slice_Op::Attrs &attrs, const std::vecto DimIdx_t axis = std::get<2>(attrs)[i] >= 0 ? static_cast<DimIdx_t>(std::get<2>(attrs)[i]) : static_cast<DimIdx_t>(std::get<2>(attrs)[i] + static_cast<DimIdx_t>(inputDims.size())); - std::int64_t start = std::get<0>(attrs)[i] >= 0 ? - std::get<0>(attrs)[i] : - std::get<0>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis]); - std::int64_t end = std::get<1>(attrs)[i] >= 0 ? - std::get<1>(attrs)[i] : - std::get<1>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis]); + DimSize_t start = std::get<0>(attrs)[i] >= 0 ? + static_cast<DimSize_t>(std::get<0>(attrs)[i]) : + static_cast<DimSize_t>(std::get<0>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis])); + DimSize_t end = std::get<1>(attrs)[i] >= 0 ? + static_cast<DimSize_t>(std::get<1>(attrs)[i]) : + static_cast<DimSize_t>(std::get<1>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis])); std::int64_t step = std::get<3>(attrs)[i]; - std::size_t sliceSize = static_cast<std::size_t>((end - start) / std::abs(step)); + std::size_t sliceSize = static_cast<std::size_t>(std::ceil(static_cast<float>(end - start) / static_cast<float>(step))); if ( i > 0) { outputAccumulation = new I[totalSize]; } const std::size_t stride_pre = std::accumulate(dims.cbegin(), dims.cbegin() + axis, 1, std::multiplies<std::size_t>()); const std::size_t stride_post = std::accumulate(dims.crbegin(), dims.crbegin() + nbDims -1 - axis, 1, std::multiplies<std::size_t>()); - std::int64_t firstElem = step > 0 ? start : end; - std::int64_t lastElem = step > 0 ? end : start; - - for (std::size_t outer = 0; outer < stride_pre; outer++) + for (std::size_t outer = 0; outer < stride_pre; ++outer) { std::size_t addedSlices = 0; - for (std::int64_t inner = firstElem; inner < lastElem; inner+=step) + for (std::size_t inner = 0; inner < sliceSize; ++inner) { - size_t idx = outer * stride_post * dims[axis] + inner * stride_post; - size_t idx_out = outer * stride_post * sliceSize + addedSlices * stride_post; - if (idx < totalSize) { - std::copy_n(std::next(inputAccumulation, idx), stride_post, std::next(outputAccumulation, idx_out)); - } + std::size_t idx_in = outer * stride_post * dims[axis] + (start + inner * step) * stride_post; + std::size_t idx_out = outer * stride_post * sliceSize + addedSlices * stride_post; + std::copy_n(std::next(inputAccumulation, idx_in), stride_post, std::next(outputAccumulation, idx_out)); addedSlices++; } } @@ -77,7 +74,6 @@ void SliceImpl_cpu_forward_kernel(const Slice_Op::Attrs &attrs, const std::vecto } // Copy elements from inputAccumulation to output while dividing by divisor std::copy_n(inputAccumulation, totalSize, output); - // op.getOutput(0)->getImpl()->copy(inputAccumulation, totalSize); if (outputAccumulation) { delete[] outputAccumulation; }