diff --git a/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp index 307b6d99e97dd3a4017ef4b45c109dec690a5e2a..9fd5e5b58ed8e850c0a902e2de93b65cc75d274a 100644 --- a/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp @@ -33,6 +33,14 @@ void TransposeImpl_cpu_forward_kernel( const typename Transpose_Op<DIM>::Attrs& totalElements *= dimSize; } + std::vector<std::size_t> outStrides(DIM, 1); + for (size_t i = 0; i < DIM; ++i) { + for (size_t j = i+1; j < DIM; ++j) + { + outStrides[i] *= outputDims[j]; + } + } + std::vector<size_t> indices(outputDims.size(), 0); for (size_t i = 0; i < totalElements; ++i) { size_t idx = 0; @@ -42,20 +50,15 @@ void TransposeImpl_cpu_forward_kernel( const typename Transpose_Op<DIM>::Attrs& permutedIndices[j] = indices[std::get<0>(attrs)[j]]; } - // Compute the position of the next element to copy from input - for (size_t j = 0; j < DIM; ++j) { - size_t currsize = 1; - for(size_t k=j+1; k< DIM; ++k) - currsize*= inputDims[k]; - idx += permutedIndices[j] * currsize; + for (int j = DIM -1; j >=0; --j) { + idx += permutedIndices[j] * outStrides[j]; } - // Copy the value in output - output[i] = input[idx]; + output[idx] = input[i]; // Update indices for the next iteration for (int j = DIM - 1; j >= 0; --j) { - if (indices[j] < outputDims[j] - 1) { + if (indices[j] < inputDims[j] - 1) { indices[j]++; break; } else {