diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 539a3128c8c7afb8dad06799e657f70d22db1e9c..0c8ab84d26ab6e88e9e9bf3f1ee4d3b7f1b0f257 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -30,6 +30,7 @@ #include "aidge/backend/cpu/operator/ConstantOfShapeImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/ErfImpl.hpp" +#include "aidge/backend/cpu/operator/ExpandImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FoldImpl.hpp" #include "aidge/backend/cpu/operator/GlobalAveragePoolingImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/ExpandImpl.hpp b/include/aidge/backend/cpu/operator/ExpandImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..adfc6ab1ef2e6550c6307fb93d0079ea3b5fc5a2 --- /dev/null +++ b/include/aidge/backend/cpu/operator/ExpandImpl.hpp @@ -0,0 +1,35 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_H_ +#define AIDGE_CPU_OPERATOR_EXPANDIMPL_H_ + +#include <memory> +#include <vector> + +#include "aidge/backend/cpu/operator/OperatorImpl.hpp" +#include "aidge/operator/Expand.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { +// Operator implementation entry point for the backend +using ExpandImpl_cpu = OperatorImpl_cpu<Expand_Op, + void(const std::shared_ptr<Tensor> &, + const std::shared_ptr<Tensor> &, + void *, + const std::vector<DimSize_t> &)>; + +// Implementation entry point registration to Operator +REGISTRAR(Expand_Op, "cpu", Aidge::ExpandImpl_cpu::create); +} // namespace Aidge + +#endif /* _AIDGE_CPU_OPERATOR_EXPANDIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/ExpandImpl_kernels.hpp b/include/aidge/backend/cpu/operator/ExpandImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3f4341c333ffd4032a90b8ee5d50a8475e81253e --- /dev/null +++ b/include/aidge/backend/cpu/operator/ExpandImpl_kernels.hpp @@ -0,0 +1,215 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ +#define AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ + +#include "aidge/backend/cpu/operator/ExpandImpl.hpp" +#include "aidge/utils/Registrar.hpp" + +#include <aidge/data/Data.hpp> +#include <aidge/data/Tensor.hpp> +#include <aidge/data/half.hpp> +#include <aidge/scheduler/ProdConso.hpp> +#include <aidge/utils/Types.h> +#include <cmath> +#include <cstdint> // std::int32_t, std::int64_t +#include <memory> +#include <numeric> + +namespace { +// suppose values are contiguous in memory +template <class IO> +void expandContiguousArray(const std::size_t inputStackSize, + const std::size_t outputStackSize, + const IO *input, + IO *output) { + for (std::size_t i = 0; i < outputStackSize; ++i) { + output[i] = (inputStackSize == 1) ? input[0] : input[i]; + } + return; +} +} // namespace + +namespace Aidge { + +template <class IO> +void ExpandImpl_cpu_forward_kernel( + const std::shared_ptr<Tensor> &inData, + const std::shared_ptr<Tensor> &_inExpandShape, + void *_output, + const std::vector<DimSize_t> &outputDims) { + + // retrieving data of inputShape & dimensions of inputDims + // as the process will require to modify the values + IO *output = static_cast<IO *>(_output); + std::vector<DimSize_t> inExpandShape(_inExpandShape->size()); + for (DimSize_t i = 0; i < _inExpandShape->size(); ++i) { + inExpandShape[i] = _inExpandShape->get<std::int64_t>(i); + } + std::vector<DimSize_t> inDataDims = inData->dims(); + + // Example with 2 tensors + // [5,2,1,7] & [2,6,7] + // 1. Same number of dimensions but adding 1s to le left of "smallest" + // tensor -> [5,2,1,7] & [1,2,6,7] + // 2. Find the highest equal dimension -> 3 + // Exception: if the first diverging dimension is the last one, then -> + // 4 (dims.size()) + // 3. Compute the highest number of contiguous data -> 7 + // 4. Compute stride and offset step for the broadcast mechanism + // 5. Call a simple kernel + + // ## Compute compatible input dimensions + // special case for equal dimensions, the kernel is called with the entire + // arrays at once + + if (inDataDims == inExpandShape) { + const std::size_t input0ContiguousSize = + std::accumulate(inDataDims.cbegin(), + inDataDims.cend(), + static_cast<std::size_t>(1), + std::multiplies<std::size_t>()); + for (std::size_t i = 0; i < input0ContiguousSize; ++i) { + output[i] = inData->get<IO>(i); + } + return; + } + + // set dimensions to be of equal size by filling the smallest one with + // ones. + if (inDataDims.size() > inExpandShape.size()) { + inExpandShape.insert(inExpandShape.cbegin(), + inDataDims.size() - inExpandShape.size(), + static_cast<DimSize_t>(1)); + } else if (_inExpandShape->size() > inDataDims.size()) { + inDataDims.insert(inDataDims.cbegin(), + inExpandShape.size() - inDataDims.size(), + static_cast<DimSize_t>(1)); + } + + const std::size_t nbDims = inDataDims.size(); + + // Find the highest equal dimension + // std::size_t contiguousIdx = nbDims - 1; + std::size_t contiguousIdx = nbDims; + while (contiguousIdx-- > 0) { + // for (; contiguousIdx+1 > 0; --contiguousIdx) { + if (inDataDims[contiguousIdx] != inExpandShape[contiguousIdx]) { + break; + } + } + if (contiguousIdx == (nbDims - 1)) { + // last dimensions of one of the input Tensor are of size 1 + const std::vector<std::size_t> &dims = + (inDataDims[contiguousIdx] == 1) ? inDataDims : inExpandShape; + while ((contiguousIdx + 1 > 0) && (dims[contiguousIdx] == 1)) { + --contiguousIdx; + } + } + ++contiguousIdx; + + // Compute the highest number of contiguous data for each Tensor + const std::size_t inputDataContiguousSize = + std::accumulate(inDataDims.cbegin() + contiguousIdx, + inDataDims.cend(), + static_cast<std::size_t>(1), + std::multiplies<std::size_t>()); + const std::size_t outputContiguousSize = + std::accumulate(outputDims.cbegin() + contiguousIdx, + outputDims.cend(), + static_cast<std::size_t>(1), + std::multiplies<std::size_t>()); + + // initialize strides to iterate through data because of broadcasting + std::unique_ptr<std::int32_t[]> stridePostIn = + std::make_unique<std::int32_t[]>(contiguousIdx); + std::unique_ptr<std::int32_t[]> strideStepIn = + std::make_unique<std::int32_t[]>(contiguousIdx); + if (contiguousIdx > 0) { + stridePostIn[contiguousIdx - 1] = 1; + for (std::size_t i = contiguousIdx - 2; + i != static_cast<std::size_t>(-1); + --i) { + stridePostIn[i] = stridePostIn[i + 1] * + static_cast<std::int32_t>(inDataDims[i + 1]); + } + for (std::size_t i = 0; i != contiguousIdx; ++i) { + strideStepIn[i] = (inDataDims[i] == 1) ? 1 - stridePostIn[i] : 1; + } + } + + // variables for arrays offsets + std::size_t offsetInData = 0; + std::size_t offsetOut = 0; + + std::size_t dim = contiguousIdx - 1; + const std::size_t nbStacks = + std::accumulate(outputDims.cbegin(), + outputDims.cbegin() + contiguousIdx, + static_cast<std::size_t>(1), + std::multiplies<std::size_t>()); + + for (std::size_t stack = 0; stack < nbStacks;) { + expandContiguousArray<IO>( + inputDataContiguousSize, + outputContiguousSize, + &static_cast<const IO *>( + inData->getImpl() + ->rawPtr())[offsetInData * inputDataContiguousSize], + &output[offsetOut * outputContiguousSize]); + if (++stack < nbStacks) { + std::size_t tmpStack = stack; + while (tmpStack % outputDims[dim] == 0) { + tmpStack /= outputDims[dim]; + dim--; + } + offsetInData += strideStepIn[dim]; + ++offsetOut; + dim = contiguousIdx - 1; + } + } +} + +REGISTRAR(ExpandImpl_cpu, + {{DataType::Int16, DataType::Int64}, {DataType::Int16}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<std::int16_t>, + nullptr}); +REGISTRAR(ExpandImpl_cpu, + {{DataType::Int32, DataType::Int64}, {DataType::Int32}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<std::int32_t>, + nullptr}); +REGISTRAR(ExpandImpl_cpu, + {{DataType::Int64, DataType::Int64}, {DataType::Int64}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<std::int64_t>, + nullptr}); + +REGISTRAR(ExpandImpl_cpu, + {{DataType::Float16, DataType::Int64}, {DataType::Float16}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<half_float::half>, + nullptr}); +REGISTRAR(ExpandImpl_cpu, + {{DataType::Float32, DataType::Int64}, {DataType::Float32}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<float>, + nullptr}); +REGISTRAR(ExpandImpl_cpu, + {{DataType::Float64, DataType::Int64}, {DataType::Float64}}, + {ProdConso::inPlaceModel, + Aidge::ExpandImpl_cpu_forward_kernel<double>, + nullptr}); +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_EXPANDIMPL_KERNELS_H_ */ diff --git a/src/operator/ExpandImpl.cpp b/src/operator/ExpandImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dfd4d2d82edc4dfb5bbaec6f5b33bf1c00bf3c75 --- /dev/null +++ b/src/operator/ExpandImpl.cpp @@ -0,0 +1,56 @@ + +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include "aidge/backend/cpu/operator/ExpandImpl.hpp" + +#include <vector> + +#include "aidge/backend/cpu/operator/ExpandImpl_kernels.hpp" +#include "aidge/data/Data.hpp" +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Expand.hpp" +#include "aidge/utils/ErrorHandling.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" + +namespace Aidge { + +template <> void ExpandImpl_cpu::forward() { + const Expand_Op &op_ = static_cast<const Expand_Op &>(mOp); + // Check if input are provided + AIDGE_ASSERT(op_.getInput(0), + "{}: missing input 0: {}", + Expand_Op::Type, + Expand_Op::getInputsName()[0]); + AIDGE_ASSERT(op_.getInput(1), + "{}: missing input 1: {}", + Expand_Op::Type, + Expand_Op::getInputsName()[1]); + + // Find the correct kernel type + const auto impl = + Registrar<ExpandImpl_cpu>::create(getBestMatch(getRequiredSpec())); + + // Call kernel + impl.forward(op_.getInput(0), + op_.getInput(1), + op_.getOutput(0)->getImpl()->rawPtr(), + op_.getOutput(0)->dims()); +} + +template <> void ExpandImpl_cpu::backward() { + AIDGE_THROW_OR_ABORT( + std::runtime_error, + "Backward not yet implemented for Expand_Op on backend cpu"); +} + +} // namespace Aidge diff --git a/unit_tests/operator/Test_ExpandImpl.cpp b/unit_tests/operator/Test_ExpandImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3fcb5e4460388712abb99dc4aa2f1fd0f274d841 --- /dev/null +++ b/unit_tests/operator/Test_ExpandImpl.cpp @@ -0,0 +1,113 @@ +/******************************************************************************** + * Copyright (c) 2024 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <aidge/data/Data.hpp> +#include <aidge/operator/OperatorTensor.hpp> +#include <aidge/utils/ArrayHelpers.hpp> +#include <aidge/utils/TensorUtils.hpp> +#include <aidge/utils/Types.h> +#include <catch2/catch_test_macros.hpp> +#include <cstdint> +#include <cstdlib> +#include <memory> + +#include "aidge/data/Tensor.hpp" +#include "aidge/filler/Filler.hpp" +#include "aidge/operator/Expand.hpp" + +#include "aidge/backend/cpu.hpp" + +using std::shared_ptr; + +using namespace Aidge; + +void setupTestExpand(shared_ptr<Tensor> inputData, + shared_ptr<Tensor> inputShape, + shared_ptr<OperatorTensor> &op, + shared_ptr<Tensor> &expectedOutput) { + + op->getOutput(0)->setDataType(inputData->dataType()); + + inputData->setBackend("cpu"); + op->associateInput(0, inputData); + + inputShape->setBackend("cpu"); + op->associateInput(1, inputShape); + + expectedOutput->setBackend("cpu"); + expectedOutput->setDataType(DataType::Int32); +} + +TEST_CASE("[cpu/operator] Expand(forward)", "[Expand][CPU]") { + auto node = Expand(); + auto op = std::static_pointer_cast<OperatorTensor>(node->getOperator()); + op->setBackend("cpu"); + + SECTION("Expand shape is bigger than inputData") { + auto inputData = std::make_shared<Tensor>(Array1D<int, 2>({1, 3})); + auto inputShape = + std::make_shared<Tensor>(Array1D<std::int64_t, 4>({1, 3, 4, 2})); + auto expectedOutput = std::make_shared<Tensor>( + Array4D<int, 1, 3, 4, 2>({{{{{1, 3}, {1, 3}, {1, 3}, {1, 3}}, + {{1, 3}, {1, 3}, {1, 3}, {1, 3}}, + {{1, 3}, {1, 3}, {1, 3}, {1, 3}}}}})); + setupTestExpand(inputData, inputShape, op, expectedOutput); + + // forwardDims has already been tested in core + CHECK(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); + CHECK(approxEq<int>(*expectedOutput, *op->getOutput(0))); + } + SECTION("Expand shape has less dimensions than inputData") { + auto inputData = std::make_shared<Tensor>( + Array3D<int, 2, 1, 3>({{{2, 1, 3}, {2, 1, 3}}})); + auto inputShape = + std::make_shared<Tensor>(Array1D<std::int64_t, 2>({2, 3})); + auto expectedOutput = std::make_shared<Tensor>(Array3D<int, 2, 2, 3>( + {{{{2, 1, 3}, {2, 1, 3}}, {{2, 1, 3}, {2, 1, 3}}}})); + setupTestExpand(inputData, inputShape, op, expectedOutput); + + // forwardDims has already been tested in core + CHECK(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); + CHECK(approxEq<int>(*expectedOutput, *op->getOutput(0))); + } + SECTION("Expand shape = {1} leads to input equal to output.") { + auto inputData = std::make_shared<Tensor>( + Array4D<int, 2, 1, 3, 1>({{{2, 1, 3}, {2, 1, 3}}})); + auto inputShape = + std::make_shared<Tensor>(Array1D<std::int64_t, 1>({1})); + auto expectedOutput = std::make_shared<Tensor>( + Array4D<int, 2, 1, 3, 1>({{{2, 1, 3}, {2, 1, 3}}})); + setupTestExpand(inputData, inputShape, op, expectedOutput); + + // forwardDims has already been tested in core + CHECK(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); + CHECK(approxEq<int>(*expectedOutput, *op->getOutput(0))); + } + SECTION("The only common dimension is the last one & its equal to 1") { + auto inputData = std::make_shared<Tensor>( + Array4D<int, 1, 1, 3, 1>({{{{2, 1, 3}}}})); + auto inputShape = + std::make_shared<Tensor>(Array1D<std::int64_t, 3>({2, 1, 1})); + auto expectedOutput = std::make_shared<Tensor>( + Array4D<int, 1, 2, 3, 1>({{{{2, 1, 3}, {2, 1, 3}}}})); + setupTestExpand(inputData, inputShape, op, expectedOutput); + + // forwardDims has already been tested in core + CHECK(op->forwardDims(true)); + REQUIRE_NOTHROW(op->forward()); + CHECK(approxEq<int>(*expectedOutput, *op->getOutput(0))); + } + SECTION("N-Dim to N-Dim") {} + auto inputData = std::shared_ptr<Tensor>(); +}