diff --git a/include/aidge/backend/cpu/operator/TransposeImpl.hpp b/include/aidge/backend/cpu/operator/TransposeImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3bcbeda6b6263fcbe7e33cd907b8f13bd62b6471 --- /dev/null +++ b/include/aidge/backend/cpu/operator/TransposeImpl.hpp @@ -0,0 +1,87 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_TransposeIMPL_H_ +#define AIDGE_CPU_OPERATOR_TransposeIMPL_H_ + +#include "aidge/backend/OperatorImpl.hpp" +#include "aidge/operator/Transpose.hpp" +#include "aidge/utils/Registrar.hpp" +#include "aidge/utils/Types.h" +#include <memory> +#include <vector> + +namespace Aidge { +// class Transpose_Op; + +// compute kernel registry for forward and backward +class TransposeImpl2DForward_cpu + : public Registrable<TransposeImpl2DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<2>::Attrs& attrs, const std::vector<DimSize_t>, const std::vector<DimSize_t>,const void*, void*)> { +}; +class TransposeImpl3DForward_cpu + : public Registrable<TransposeImpl3DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<3>::Attrs& attrs, const std::vector<DimSize_t>, const std::vector<DimSize_t>,const void*, void*)> { +}; +class TransposeImpl4DForward_cpu + : public Registrable<TransposeImpl4DForward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<4>::Attrs& attrs, const std::vector<DimSize_t>, const std::vector<DimSize_t>,const void*, void*)> { +}; +class TransposeImpl2DBackward_cpu + : public Registrable<TransposeImpl2DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<2>::Attrs& attrs, const std::vector<DimSize_t>, const std::vector<DimSize_t>,const void*, void*)> { +}; +class TransposeImpl3DBackward_cpu + : public Registrable<TransposeImpl3DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<3>::Attrs& attrs, const std::vector<DimSize_t>, const std::vector<DimSize_t>,const void*, void*)> { +}; +class TransposeImpl4DBackward_cpu + : public Registrable<TransposeImpl4DBackward_cpu, std::tuple<DataType, DataType>, void( const typename Transpose_Op<4>::Attrs& attrs, const std::vector<DimSize_t>, const std::vector<DimSize_t>,const void*, void*)> { +}; + + +class TransposeImpl2D_cpu : public OperatorImpl { +public: + TransposeImpl2D_cpu(const Transpose_Op<2>& op) : OperatorImpl(op) {} + + static std::unique_ptr<TransposeImpl2D_cpu> create(const Transpose_Op<2>& op) { + return std::make_unique<TransposeImpl2D_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; +class TransposeImpl3D_cpu : public OperatorImpl { +public: + TransposeImpl3D_cpu(const Transpose_Op<3>& op) : OperatorImpl(op) {} + + static std::unique_ptr<TransposeImpl3D_cpu> create(const Transpose_Op<3>& op) { + return std::make_unique<TransposeImpl3D_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; +class TransposeImpl4D_cpu : public OperatorImpl { +public: + TransposeImpl4D_cpu(const Transpose_Op<4>& op) : OperatorImpl(op) {} + + static std::unique_ptr<TransposeImpl4D_cpu> create(const Transpose_Op<4>& op) { + return std::make_unique<TransposeImpl4D_cpu>(op); + } + + NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +static Registrar<Transpose_Op<2>> registrarTransposeImpl2D_cpu("cpu", Aidge::TransposeImpl2D_cpu::create); +static Registrar<Transpose_Op<3>> registrarTransposeImpl3D_cpu("cpu", Aidge::TransposeImpl3D_cpu::create); +static Registrar<Transpose_Op<4>> registrarTransposeImpl4D_cpu("cpu", Aidge::TransposeImpl4D_cpu::create); +} +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_TransposeIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..79901424189b9327682376909bee087ae9eef47a --- /dev/null +++ b/include/aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp @@ -0,0 +1,93 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_OPERATOR_TRANSPOSEIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_TRANSPOSEIMPL_FORWARD_KERNEL_H_ + +#include "aidge/utils/Registrar.hpp" +#include <cstddef> +#include <cmath> +#include "aidge/data/Data.hpp" +#include "aidge/utils/Types.h" + +#include "aidge/backend/cpu/operator/TransposeImpl.hpp" + +namespace Aidge { +template <class I, class O, DimSize_t DIM> +void TransposeImpl_cpu_forward_kernel( const typename Transpose_Op<DIM>::Attrs& attrs, const std::vector<DimSize_t> inputDims, const std::vector<DimSize_t> outputDims, const void* input_, void* output_) +{ + O* output = static_cast<O*>(output_); + const I* input = static_cast<const I*>(input_); + + // Compute total number of elements in the input array + size_t totalElements = 1; + for (size_t dimSize : inputDims) { + totalElements *= dimSize; + } + + std::vector<size_t> indices(outputDims.size(), 0); + for (size_t i = 0; i < totalElements; ++i) { + size_t idx = 0; + // Permute indices based on OutputDimsOrder attr + std::vector<size_t> permutedIndices(DIM); + for (size_t j = 0; j < DIM; ++j) { + permutedIndices[j] = indices[std::get<0>(attrs)[j]]; + } + + // Compute the position of the next element to copy from input + for (size_t j = 0; j < DIM; ++j) { + size_t currsize = 1; + for(size_t k=j+1; k< DIM; ++k) + currsize*= inputDims[k]; + idx += permutedIndices[j] * currsize; + } + + // Copy the value in output + output[i] = input[idx]; + + // Update indices for the next iteration + for (int j = DIM - 1; j >= 0; --j) { + if (indices[j] < outputDims[j] - 1) { + indices[j]++; + break; + } else { + indices[j] = 0; + } + } + } + +} +namespace { +// DIM = 2 +static Registrar<TransposeImpl2DForward_cpu> registrarTransposeImpl2DForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 2>); +static Registrar<TransposeImpl2DForward_cpu> registrarTransposeImpl2DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 2>); +static Registrar<TransposeImpl2DForward_cpu> registrarTransposeImpl2DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 2>); +// DIM = 3 +static Registrar<TransposeImpl3DForward_cpu> registrarTransposeImpl3DForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 3>); +static Registrar<TransposeImpl3DForward_cpu> registrarTransposeImpl3DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 3>); +static Registrar<TransposeImpl3DForward_cpu> registrarTransposeImpl3DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 3>); +// DIM = 4 +static Registrar<TransposeImpl4DForward_cpu> registrarTransposeImpl4DForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::TransposeImpl_cpu_forward_kernel<float, float, 4>); +static Registrar<TransposeImpl4DForward_cpu> registrarTransposeImpl4DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::TransposeImpl_cpu_forward_kernel<int, int, 4>); +static Registrar<TransposeImpl4DForward_cpu> registrarTransposeImpl4DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::TransposeImpl_cpu_forward_kernel<double, double, 4>); +} // namespace +} // namespace Aidge + +#endif /* AIDGE_CPU_OPERATOR_TRANSPOSEIMPL_FORWARD_KERNEL_H_ */ diff --git a/src/operator/TransposeImpl.cpp b/src/operator/TransposeImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7485351c67e5e79219b6d4ac4ca2d58606eeebe6 --- /dev/null +++ b/src/operator/TransposeImpl.cpp @@ -0,0 +1,93 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <cassert> +#include <chrono> // std::chrono::milliseconds +#include <numeric> // std::accumulate +#include <thread> // std::this_thread::sleep_for +#include <vector> + +#include "aidge/utils/Types.h" +#include "aidge/operator/Transpose.hpp" + +#include "aidge/backend/cpu/operator/TransposeImpl.hpp" +#include "aidge/backend/cpu/operator/TransposeImpl_forward_kernels.hpp" + +Aidge::NbElts_t Aidge::TransposeImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + +Aidge::NbElts_t Aidge::TransposeImpl3D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + + +Aidge::NbElts_t Aidge::TransposeImpl4D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; +} + +void Aidge::TransposeImpl2D_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + assert(mOp.getInput(0)->nbDims() == 2 && "input #0 must have the same size as axes attributes (2)"); + + // Find the correct kernel type + auto kernelFunc = + Registrar<TransposeImpl2DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()}); + + // auto attr = dynamic_cast<const Transpose_Op<2>&>(mOp).getStaticAttributes(); + // std::vector<DimIdx_t> outDimsOrder; + // outDimsOrder.reserve(std::get<0>(attr).size()); // Reserve space for the new vector + + // std::transform(std::get<0>(attr).begin(), std::get<0>(attr).end(), std::back_inserter(outDimsOrder), + // [](int intValue) { return static_cast<DimIdx_t>(intValue); }); + + // Call kernel + kernelFunc(dynamic_cast<const Transpose_Op<2>&>(mOp).getStaticAttributes(), + mOp.getInput(0)->dims(), + mOp.getOutput(0)->dims(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} + +void Aidge::TransposeImpl3D_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + assert(mOp.getInput(0)->nbDims() == 3 && "input #0 must have the same size as axes attributes (3)"); + + // Find the correct kernel type + auto kernelFunc = + Registrar<TransposeImpl3DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(dynamic_cast<const Transpose_Op<3>&>(mOp).getStaticAttributes(), + mOp.getInput(0)->dims(), + mOp.getOutput(0)->dims(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} + +void Aidge::TransposeImpl4D_cpu::forward() { + assert(mOp.getInput(0) && "missing input #0"); + assert(mOp.getInput(0)->nbDims() == 4 && "input #0 must have the same size as axes attributes (4)"); + + // Find the correct kernel type + auto kernelFunc = + Registrar<TransposeImpl4DForward_cpu>::create({mOp.getInput(0)->dataType(), mOp.getOutput(0)->dataType()}); + + // Call kernel + kernelFunc(dynamic_cast<const Transpose_Op<4>&>(mOp).getStaticAttributes(), + mOp.getInput(0)->dims(), + mOp.getOutput(0)->dims(), + mOp.getInput(0)->getImpl()->rawPtr(), + mOp.getOutput(0)->getImpl()->rawPtr()); +} diff --git a/unit_tests/operator/Test_TransposeImpl.cpp b/unit_tests/operator/Test_TransposeImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..327d4f8db5570cd9258259eb1844b4b3fc74c607 --- /dev/null +++ b/unit_tests/operator/Test_TransposeImpl.cpp @@ -0,0 +1,105 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <memory> + +#include "aidge/data/Tensor.hpp" +#include "aidge/operator/Transpose.hpp" + +#include "aidge/backend/cpu.hpp" + +#include <iostream> +using namespace Aidge; + +TEST_CASE("[cpu/operator] Transpose(forward)") { + std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array3D<float,2,3,4> { + { + {{0.42507452, 0.11244237, 0.43243718, 0.62354952}, + {0.90250170, 0.48719984, 0.45781207, 0.92536664}, + {0.06348717, 0.91678733, 0.64452291, 0.00484818}}, + + {{0.66873497, 0.99508536, 0.55714869, 0.84887981}, + {0.41666120, 0.92365038, 0.80034822, 0.38721532}, + {0.52037925, 0.53937608, 0.66380072, 0.36330253}} + } + }); + std::shared_ptr<Tensor> output = std::make_shared<Tensor>(Array3D<float,2,4,3> { + { + {{0.42507452, 0.90250170, 0.06348717}, + {0.11244237, 0.48719984, 0.91678733}, + {0.43243718, 0.45781207, 0.64452291}, + {0.62354952, 0.92536664, 0.00484818}}, + + {{0.66873497, 0.41666120, 0.52037925}, + {0.99508536, 0.92365038, 0.53937608}, + {0.55714869, 0.80034822, 0.66380072}, + {0.84887981, 0.38721532, 0.36330253}} + } + }); + std::shared_ptr<Node> myTranspose = Transpose<3>(std::array<DimSize_t,3>{{0,2,1}}); + myTranspose->getOperator()->setDatatype(DataType::Float32); + myTranspose->getOperator()->setBackend("cpu"); + myTranspose->getOperator()->associateInput(0,input); + myTranspose->getOperator()->computeOutputDims(); + myTranspose->forward(); + + // float* resPtr = static_cast<float*>(myTranspose->getOperator()->getOutput(0)->getImpl()->rawPtr()); + // float* expectedPtr = static_cast<float*>(output->getImpl()->rawPtr()); + // for (std::size_t i = 0; i< 24; ++i) { + // std::cout << "Res " << resPtr[i] << " , expected : " << expectedPtr[i] << std::endl; + // REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); + // } + REQUIRE(*(myTranspose->getOperator()->getOutput(0)) == *output); + +} + +// TEST_CASE("[cpu/operator] Transpose(forward)") { +// std::shared_ptr<Tensor> input = std::make_shared<Tensor>(Array3D<float,2,3,4> { +// { +// {{0.0, 0.1, 0.2, 0.3}, +// {0.4, 0.5, 0.6, 0.7}, +// {0.8, 0.9, 1.0, 1.1}}, + +// {{1.2, 1.3, 1.4, 1.5}, +// {1.6, 1.7, 1.8, 1.9}, +// {2.0, 2.1, 2.2, 2.3}} +// } +// }); +// std::shared_ptr<Tensor> output = std::make_shared<Tensor>(Array3D<float,2,4,3> { +// { +// {{0.0, 0.4, 0.8}, +// {0.1, 0.5, 0.9}, +// {0.2, 0.6, 1.0}, +// {0.3, 0.7, 1.1}}, + +// {{1.2, 1.6, 2.0}, +// {1.3, 1.7, 2.1}, +// {1.4, 1.8, 2.2}, +// {1.5, 1.9, 2.3}} +// } +// }); +// std::shared_ptr<Node> myTranspose = Transpose<3>(std::array<DimSize_t,3>{{0,2,1}}); +// myTranspose->getOperator()->setDatatype(DataType::Float32); +// myTranspose->getOperator()->setBackend("cpu"); +// myTranspose->getOperator()->associateInput(0,input); +// myTranspose->getOperator()->computeOutputDims(); +// myTranspose->forward(); + +// float* resPtr = static_cast<float*>(myTranspose->getOperator()->getOutput(0)->getImpl()->rawPtr()); +// float* expectedPtr = static_cast<float*>(output->getImpl()->rawPtr()); +// for (std::size_t i = 0; i< 24; ++i) { +// std::cout << "Res " << resPtr[i] << " , expected : " << expectedPtr[i] << std::endl; +// REQUIRE(std::abs(resPtr[i]-expectedPtr[i]) < 0.00001); +// } +// // REQUIRE(*(myTranspose->getOperator()->getOutput(0)) == *output); + +// } \ No newline at end of file