diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index a1417de1517a8212b4b4308e5128a5ee3fce1e39..11f9c264098d5a238d0d1f8e6bc4fac0cc099549 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -32,6 +32,7 @@ #include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/SigmoidImpl.hpp" #include "aidge/backend/cpu/operator/SqrtImpl.hpp" +#include "aidge/backend/cpu/operator/SliceImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/TanhImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7370eb5ee0a65ce9191a450271f816f873ac0737 --- /dev/null +++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp @@ -0,0 +1,52 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ +#define __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Slice.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <memory> +#include <vector> +#include <array> + +namespace Aidge { +// class Slice_Op; + +// compute kernel registry for forward and backward +class SliceImplForward_cpu + : public Registrable<SliceImplForward_cpu, std::tuple<DataType, DataType>, void(const Slice_Op::Attrs&, const std::vector<DimSize_t>&, const void*, void*)> { +}; +class SliceImplBackward_cpu + : public Registrable<SliceImplBackward_cpu, std::tuple<DataType, DataType>, void(const Slice_Op::Attrs&, const std::vector<DimSize_t>&, const void*, void*)> { +}; + +class SliceImpl_cpu : public OperatorImpl { +public: + SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) { + return std::make_unique<SliceImpl_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create); +} +} // namespace Aidge + +#endif /* __AIDGE_CPU_OPERATOR_SLICEIMPL_H__ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d3a6ef32bdd269e08fa8320c554aa251b54bb80b --- /dev/null +++ b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp @@ -0,0 +1,94 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ + +#include <algorithm> +#include <cmath> +#include <cstddef> +#include <iterator> + +#include "aidge/utils/Registrar.hpp" +#include "aidge/backend/cpu/operator/SliceImpl.hpp" + +namespace Aidge { + +template<class I, class O> +void SliceImpl_cpu_forward_kernel(const Slice_Op::Attrs &attrs, const std::vector<DimSize_t>&inputDims, const void *input_, void *output_){ + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + const std::size_t nbDims = inputDims.size(); + std::vector<DimSize_t> dims = inputDims; + DimSize_t totalSize = std::accumulate(inputDims.cbegin(), inputDims.cend(), std::size_t(1), std::multiplies<std::size_t>()); + const I* inputAccumulation = input; + I* outputAccumulation = nullptr; + const std::size_t nbAxes = std::get<0>(attrs).size(); + for (std::size_t i = 0; i < nbAxes; ++i) { + const DimIdx_t axis = std::get<2>(attrs)[i] >= 0 ? + static_cast<DimIdx_t>(std::get<2>(attrs)[i]) : + static_cast<DimIdx_t>(std::get<2>(attrs)[i] + static_cast<DimIdx_t>(inputDims.size())); + const DimSize_t start = std::min(std::get<0>(attrs)[i] >= 0 ? + static_cast<DimSize_t>(std::get<0>(attrs)[i]) : + static_cast<DimSize_t>(std::get<0>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis])), + dims[axis]-1); + const DimSize_t end = std::get<1>(attrs)[i] >= 0 ? + static_cast<DimSize_t>(std::get<1>(attrs)[i]) : + static_cast<DimSize_t>(std::get<1>(attrs)[i] + static_cast<std::int64_t>(inputDims[axis])); + const std::int64_t step = std::get<3>(attrs)[i]; + + const std::size_t sliceSize = static_cast<std::size_t>(std::ceil((static_cast<float>(end) - static_cast<float>(start)) / static_cast<float>(step))); + + outputAccumulation = new I[totalSize]; + const std::size_t stride_pre = std::accumulate(dims.cbegin(), dims.cbegin() + axis, 1, std::multiplies<std::size_t>()); + const std::size_t stride_post = std::accumulate(dims.crbegin(), dims.crbegin() + nbDims -1 - axis, 1, std::multiplies<std::size_t>()); + for (std::size_t outer = 0; outer < stride_pre; ++outer) + { + const std::size_t idx_in = outer * stride_post * dims[axis] + start * stride_post; + const std::size_t idx_out = outer * stride_post * sliceSize; + std::size_t addedSlices = 0; + for (std::size_t inner = 0; inner < sliceSize; ++inner) + { + std::copy_n(std::next(inputAccumulation, idx_in + inner * step * stride_post), + stride_post, + std::next(outputAccumulation, idx_out + addedSlices * stride_post)); + addedSlices++; + } + } + totalSize /= dims[axis]; + totalSize *= sliceSize; + dims[axis] = sliceSize; + + if (inputAccumulation != input) { + delete[] inputAccumulation; + } + inputAccumulation = outputAccumulation; + + } + // Copy elements from inputAccumulation to output while dividing by divisor + std::copy_n(inputAccumulation, totalSize, output); + if (outputAccumulation) { + delete[] outputAccumulation; + } +} + +namespace { +static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, float>); +static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, int>); +static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */ diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f6dc901f4759cc86515b1482a4566db4668a48c8 --- /dev/null +++ b/src/operator/SliceImpl.cpp @@ -0,0 +1,42 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <numeric> // std::accumulate +#include <functional> // std::multiplies + +#include "aidge/operator/Slice.hpp" + +#include "aidge/backend/cpu/operator/SliceImpl.hpp" +#include "aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" +#include <vector> + +Aidge::Elts_t Aidge::SliceImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::SliceImpl_cpu::forward() { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<SliceImplForward_cpu>::create({ + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + // Call kernel + kernelFunc(dynamic_cast<const Slice_Op&>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} diff --git a/unit_tests/operator/Test_SliceImpl.cpp b/unit_tests/operator/Test_SliceImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2b9f89e62c09c04a7f848c362336418ef62aecce --- /dev/null +++ b/unit_tests/operator/Test_SliceImpl.cpp @@ -0,0 +1,279 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Slice.hpp" + +using namespace Aidge; + +TEST_CASE("[cpu/operator] Slice(forward)", "[Slice][CPU]") { + SECTION("1D Tensor") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array1D<int,10> { + {0, 1, -2,-3, 4,-5,-6, 7, 8, 9} + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array1D<int,3> { + {0, 1, -2} + }); + std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,1>{{0}}); + std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,1>{{3}}); + std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,1>{{0}}); + + std::shared_ptr<Node> mySlice = Slice(); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->associateInput(1,starts); + mySlice->getOperator()->associateInput(2,ends); + mySlice->getOperator()->associateInput(3,axes); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("2D Tensor") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array2D<int,2,10> { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array2D<int,2,3> { + { + {-5,-6, 7}, + {-5,-6, 7} + } + }); + std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,2>{{0,5}}); + std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,2>{{2,8}}); + std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,2>{{0,1}}); + + std::shared_ptr<Node> mySlice = Slice(); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->associateInput(1,starts); + mySlice->getOperator()->associateInput(2,ends); + mySlice->getOperator()->associateInput(3,axes); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + // op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("3D Tensor") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,2,2,10> { + { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,1,1,3> { + { + { + { 4,-5,-6} + } + } + }); + std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,3>{{0,1,4}}); + std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,3>{{1,2,7}}); + std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,3>{{0,1,2}}); + + std::shared_ptr<Node> mySlice = Slice(); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->associateInput(1,starts); + mySlice->getOperator()->associateInput(2,ends); + mySlice->getOperator()->associateInput(3,axes); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + // mySlice->getOperator()->output(0).print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("4D Tensor") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { + { + { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + }, + { + { + { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3,11,-5,-6, 7,-1,10} + } + } + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { + { + { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + }, + { + { + { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3,11,-5,-6, 7,-1,10} + } + } + } + }); + std::shared_ptr<Tensor> starts = std::make_shared<Tensor>(Array1D<int,4>{{0,0,0,0}}); + std::shared_ptr<Tensor> ends = std::make_shared<Tensor>(Array1D<int,4>{{2,2,2,10}}); + std::shared_ptr<Tensor> axes = std::make_shared<Tensor>(Array1D<int,4>{{0,1,2,3}}); + + std::shared_ptr<Node> mySlice = Slice(); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->associateInput(1,starts); + mySlice->getOperator()->associateInput(2,ends); + mySlice->getOperator()->associateInput(3,axes); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + // op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("Attributes instead of inputs") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array4D<int,2,2,2,10> { + { + { + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + } + }, + { + { + { 0, 1, 2,-3, 6,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3, 4,-5,-6, 7,-1,10} + }, + { + { 0, 1, 2,-3, 4,-5,-6, 7, 8, 9}, + {-5, 4, 2,-3,11,-5,-6, 7,-1,10} + } + } + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array4D<int,1,1,1,5> { + { + { + { + { 0, 1, 2,-3, 4} + } + } + } + }); + + std::shared_ptr<Node> mySlice = Slice({0,0,0,0}, {1,1,1,5}, {0,1,2,3}, {1,1,1,1}); + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + // op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } + + SECTION("Different Steps") { + std::shared_ptr<Tensor> input0 = std::make_shared<Tensor>(Array3D<int,4,2,8> { + { + { + { 0, 1, 2,-3, 4,-5,-6,7}, + {-5, 4, 2,-3, 4,-5,-6,-7} + }, + { + { 10, 11, 12,-13, 14,-15,-16,17}, + {-15, 14, 12,-13, 14,-15,-16,-17} + }, + { + { 20, 21, 22,-23, 24,-25,-26,27}, + {-25, 24, 22,-23, 24,-25,-26,-27} + }, + { + { 30, 31, 32,-33, 34,-35,-36,37}, + {-35, 34, 32,-33, 34,-35,-36,-37} + } + } + }); + std::shared_ptr<Tensor> expectedOutput = std::make_shared<Tensor>(Array3D<int,2,1,3> { + { + { + { 7, 4, 1} + }, + { + { 27, 24, 21} + } + } + }); + + std::shared_ptr<Node> mySlice = Slice({0,0,7}, {4,1,0}, {0,1,2}, {2,1,-3}); + // Steps are 2,1,-3 so the slice will be: + // on Axis 0: from 0 to 4 by step of 2 + // on Axis 1: from 0 to 1 by step of 1 + // on Axis 2: from 7 to 0 by step of -3 (reverse the order of elements) + auto op = std::static_pointer_cast<OperatorTensor>(mySlice -> getOperator()); + mySlice->getOperator()->associateInput(0,input0); + mySlice->getOperator()->setDataType(DataType::Int32); + mySlice->getOperator()->setBackend("cpu"); + mySlice->forward(); + op->getOutput(0)->print(); + REQUIRE(*(op->getOutput(0)) == *expectedOutput); + REQUIRE(op->getOutput(0)->dims() == expectedOutput->dims()); + REQUIRE(op->getOutput(0)->dataType() == expectedOutput->dataType()); + } +}