diff --git a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp index 5258c4c3e7376c3883b119503ee9e6765de844d5..df8e1a7e7b02a4ad032d6f09fae3ae2cd8a42eff 100644 --- a/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp @@ -9,13 +9,12 @@ * ********************************************************************************/ -#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H -#define AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H +#ifndef AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_ +#define AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_ #include <cmath> - +#include <cstddef> #include "aidge/utils/Registrar.hpp" - #include "aidge/backend/cpu/operator/ScalingImpl.hpp" //TODO : improve propagate, n2d2 : @@ -61,12 +60,13 @@ const O& clamp(const O& x, const O& min, const O& max) } template<class O> -O saturate(O value, std::size_t quantizedNbBits, bool isOutputUnsigned) { +O saturate(const O value, const std::size_t quantizedNbBits, const bool isOutputUnsigned) { + // TODO: no assertions in kernel assert(quantizedNbBits > 0); - const O min = isOutputUnsigned?0: + const O min = isOutputUnsigned ? 0 : -(1ll << (quantizedNbBits - 1ll)); - const O max = isOutputUnsigned?(1ll << quantizedNbBits) - 1ll: + const O max = isOutputUnsigned ? (1ll << quantizedNbBits) - 1ll : (1ll << (quantizedNbBits - 1ll)) - 1ll; return clamp(value, min, max); @@ -81,8 +81,8 @@ void ScalingImpl_cpu_forward_kernel(const Scaling_Op::Attrs& attrs, const I* input = static_cast<const I*>(input_); O* output = static_cast<O*>(output_); const I& scalingFactor = static_cast<const I&>(std::get<0>(attrs)); - std::size_t quantizedNbBits = static_cast<std::size_t>(std::get<1>(attrs)); - bool isOutputUnsigned = static_cast<bool>(std::get<2>(attrs)); + const std::size_t quantizedNbBits = static_cast<std::size_t>(std::get<1>(attrs)); + const bool isOutputUnsigned = static_cast<bool>(std::get<2>(attrs)); for (std::size_t i = 0; i < inputLenght; ++i) { output[i] = input[i] * scalingFactor; @@ -103,4 +103,4 @@ static Registrar<ScalingImplForward_cpu> registrarScalingImplForward_cpu_Float64 } // namespace } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H */ +#endif /* AIDGE_CPU_OPERATOR_SCALINGIMPL_FORWARD_KERNEL_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp index 06c7cf29638c2ef0881db111702eeba83863a9e2..80e2f0fcef83a369561095f8e55a437f7acc9675 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp @@ -26,95 +26,26 @@ namespace Aidge { // class Slice_Op; // compute kernel registry for forward and backward -template <DimIdx_t DIM> class SliceImplForward_cpu - : public Registrable<SliceImplForward_cpu<DIM>, std::tuple<DataType>, - void(const typename Slice_Op<DIM>::Attrs&, - const std::array<std::size_t, DIM>, + : public Registrable<SliceImplForward_cpu, std::tuple<DataType>, + void(const typename Slice_Op::Attrs&, + const std::vector<std::size_t>, const void*, void*)> {}; -template <DimIdx_t DIM> class SliceImplBackward_cpu - : public Registrable<SliceImplBackward_cpu<DIM>, std::tuple<DataType>, - void(const typename Slice_Op<DIM>::Attrs&, - const std::array<std::size_t, DIM>, + : public Registrable<SliceImplBackward_cpu, std::tuple<DataType>, + void(const typename Slice_Op::Attrs&, + const std::vector<std::size_t>, const void*, void*)> {}; -template <DimIdx_t DIM> -class SliceImpl_cpu : public OperatorImpl { - public: - SliceImpl_cpu(const Slice_Op<DIM>& op) : OperatorImpl(op) {} - - static std::unique_ptr<SliceImpl_cpu<DIM>> create(const Slice_Op<DIM>& op) { - return std::make_unique<SliceImpl_cpu<DIM>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); - } - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final { return 0; } - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final { - (void)outputIdx; - (void)inputsSize; - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); - } - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final { - return mNbConsumedData[0]; - } - NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final { - return mNbProducedData[0]; - } - void updateConsummerProducer() override final { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); - } - - void forward() override { - // FIXME: uncomment the following code once memory handling will work - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - - // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<DIM>>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); - - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<DIM>(), - std::get<1>(std::static_pointer_cast<const Slice_Op<DIM>&>(mOp).getStaticAttributes()), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() - ); - - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); - } - - void backward() override { printf("Not implemented yet.\n"); } -}; -/******************************************************************************/ - -template <> -class SliceImpl_cpu<1> : public OperatorImpl { +class SliceImpl_cpu : public OperatorImpl { public: - SliceImpl_cpu(const Slice_Op<1>& op) : OperatorImpl(op) {} + SliceImpl_cpu(const Slice_Op& op) : OperatorImpl(op) {} - static std::unique_ptr<SliceImpl_cpu<1>> create(const Slice_Op<1>& op) { - return std::make_unique<SliceImpl_cpu<1>>(op); + static std::unique_ptr<SliceImpl_cpu> create(const Slice_Op& op) { + return std::make_unique<SliceImpl_cpu>(op); } public: @@ -127,89 +58,14 @@ public: void updateConsummerProducer() override final; void forward() override; - void backward() override; -}; - -/******************************************************************************/ - -template <> -class SliceImpl_cpu<2> : public OperatorImpl { - public: - SliceImpl_cpu(const Slice_Op<2>& op) : OperatorImpl(op) {} - static std::unique_ptr<SliceImpl_cpu<2>> create(const Slice_Op<2>& op) { - return std::make_unique<SliceImpl_cpu<2>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward() override; - void backward() override; -}; - -/******************************************************************************/ - -template <> -class SliceImpl_cpu<3> : public OperatorImpl { - public: - SliceImpl_cpu(const Slice_Op<3>& op) : OperatorImpl(op) {} - - static std::unique_ptr<SliceImpl_cpu<3>> create(const Slice_Op<3>& op) { - return std::make_unique<SliceImpl_cpu<3>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward() override; void backward() override; }; -/******************************************************************************/ - -template <> -class SliceImpl_cpu<4> : public OperatorImpl { - public: - SliceImpl_cpu(const Slice_Op<4>& op) : OperatorImpl(op) {} - - static std::unique_ptr<SliceImpl_cpu<4>> create(const Slice_Op<4>& op) { - return std::make_unique<SliceImpl_cpu<4>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward() override; - void backward() override; -}; - - namespace { -static Registrar<Slice_Op<1>> registrarSliceImpl_1D_cpu("cpu", Aidge::SliceImpl_cpu<1>::create); -static Registrar<Slice_Op<2>> registrarSliceImpl_2D_cpu("cpu", Aidge::SliceImpl_cpu<2>::create); -static Registrar<Slice_Op<3>> registrarSliceImpl_3D_cpu("cpu", Aidge::SliceImpl_cpu<3>::create); -static Registrar<Slice_Op<4>> registrarSliceImpl_4D_cpu("cpu", Aidge::SliceImpl_cpu<4>::create); +static Registrar<Slice_Op> registrarSliceImpl_cpu("cpu", Aidge::SliceImpl_cpu::create); } // namespace } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */ \ No newline at end of file diff --git a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp index bbf4ccbae77089ca75dfee34f5bc5b0dd7d3697d..7eb4b9dc2cb8dddc8b7fdaf4d63b8f1d39d879b0 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp @@ -15,46 +15,47 @@ #include "aidge/utils/Registrar.hpp" #include "aidge/operator/Slice.hpp" #include "aidge/backend/cpu/operator/SliceImpl.hpp" -#include <array> +#include <vector> #include <cstddef> #include "aidge/data/Data.hpp" namespace Aidge { -template <class I, std::size_t DIM> -void SliceImpl_cpu_forward_kernel(const typename Slice_Op<DIM>::Attrs& attrs, - const std::array<std::size_t, DIM> inputDims, +template <class I> +void SliceImpl_cpu_forward_kernel(const typename Slice_Op::Attrs& attrs, + const std::vector<std::size_t> inputDims, const void* input_, void* output_) { const I* input = static_cast<const I*>(input_) + std::get<0>(attrs); I* output = static_cast<I*>(output_); - const std::array<std::size_t, DIM> slicedDims = std::get<1>(attrs); + const std::vector<std::size_t> slicedDims = std::get<1>(attrs); + const std::size_t nbDims = slicedDims.size(); // for inputDims = {4,5,5,3} & slicedDims = {3,2,2,1}, substractDims = {1,5,5,3} - std::array<std::size_t, DIM> substractedDims; - for (std::size_t i = 0; i < DIM; ++i) { + std::vector<std::size_t> substractedDims = std::vector<std::size_t>(nbDims); + for (std::size_t i = 0; i < nbDims; ++i) { substractedDims[i] = inputDims[i] - slicedDims[i]; } // for slicedDims = {3,2,2,1}, prodSlicedDims = {12,4,2,1} - std::array<std::size_t, DIM> prodSlicedDims; - std::array<std::size_t, DIM+1> prodInputDims; - prodSlicedDims[DIM - 1] = slicedDims[DIM - 1]; - prodInputDims[DIM - 1] = inputDims[DIM - 1]; - prodInputDims[DIM] = 1; - for (std::size_t i = 2; i <= DIM; ++i) { - prodSlicedDims[DIM - i] = prodSlicedDims[DIM - i + 1]*slicedDims[DIM - i]; - prodInputDims[DIM - i] = prodInputDims[DIM - i + 1]*inputDims[DIM - i]; + std::vector<std::size_t> prodSlicedDims = std::vector<std::size_t>(nbDims); + std::vector<std::size_t> prodInputDims = std::vector<std::size_t>(nbDims+1); + prodSlicedDims[nbDims - 1] = slicedDims[nbDims - 1]; + prodInputDims[nbDims - 1] = inputDims[nbDims - 1]; + prodInputDims[nbDims] = 1; + for (std::size_t i = 2; i <= nbDims; ++i) { + prodSlicedDims[nbDims - i] = prodSlicedDims[nbDims - i + 1]*slicedDims[nbDims - i]; + prodInputDims[nbDims - i] = prodInputDims[nbDims - i + 1]*inputDims[nbDims - i]; } std::size_t j = 0; std::size_t i = 0; for (; j < prodSlicedDims[0];) { output[j] = input[i++]; - ++j; - for (std::size_t idx = DIM - 1; idx > 0; --idx) { - i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx]*prodInputDims[idx+1] : 0; + ++j; + for (std::size_t idx = nbDims - 1; idx > 0; --idx) { + i += j % prodSlicedDims[idx] == 0 ? substractedDims[idx]*prodInputDims[idx+1] : 0; } } } @@ -62,37 +63,13 @@ void SliceImpl_cpu_forward_kernel(const typename Slice_Op<DIM>::Attrs& attrs, namespace { // DIM = 1 -static Registrar<SliceImplForward_cpu<1>> registrarSliceImplForward_1D_cpu_Float32( - {DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, 1>); -static Registrar<SliceImplForward_cpu<1>> registrarSliceImplForward_1D_cpu_Int32( - {DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, 1>); -static Registrar<SliceImplForward_cpu<1>> registrarSliceImplForward_1D_cpu_Float64( - {DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, 1>); - -// DIM = 2 -static Registrar<SliceImplForward_cpu<2>> registrarSliceImplForward_2D_cpu_Float32( - {DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, 2>); -static Registrar<SliceImplForward_cpu<2>> registrarSliceImplForward_2D_cpu_Int32( - {DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, 2>); -static Registrar<SliceImplForward_cpu<2>> registrarSliceImplForward_2D_cpu_Float64( - {DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, 2>); - -// DIM = 3 -static Registrar<SliceImplForward_cpu<3>> registrarSliceImplForward_3D_cpu_Float32( - {DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, 3>); -static Registrar<SliceImplForward_cpu<3>> registrarSliceImplForward_3D_cpu_Int32( - {DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, 3>); -static Registrar<SliceImplForward_cpu<3>> registrarSliceImplForward_3D_cpu_Float64( - {DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, 3>); - -// DIM = 4 -static Registrar<SliceImplForward_cpu<4>> registrarSliceImplForward_4D_cpu_Float32( - {DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float, 4>); -static Registrar<SliceImplForward_cpu<4>> registrarSliceImplForward_4D_cpu_Int32( - {DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int, 4>); -static Registrar<SliceImplForward_cpu<4>> registrarSliceImplForward_4D_cpu_Float64( - {DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double, 4>); +static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float32( + {DataType::Float32}, Aidge::SliceImpl_cpu_forward_kernel<float>); +static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Int32( + {DataType::Int32}, Aidge::SliceImpl_cpu_forward_kernel<int>); +static Registrar<SliceImplForward_cpu> registrarSliceImplForward_cpu_Float64( + {DataType::Float64}, Aidge::SliceImpl_cpu_forward_kernel<double>); } // namespace } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_SLICEIMPL_FORWARD_KERNEL_H_ */ diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp index 3ae56e1a4f613a4188dc51659853e07674e74768..b60bbe60188f416f28ff2562875dce6e5ee15bd5 100644 --- a/src/operator/SliceImpl.cpp +++ b/src/operator/SliceImpl.cpp @@ -22,231 +22,55 @@ #include <cassert> #include <tuple> - -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); - - // Requires the whole tensors - return std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<1>()[0]; -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } - -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t>& inputsSize) const { - (void)outputIdx; - (void)inputsSize; - return std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<1>()[0]; -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} - -void Aidge::SliceImpl_cpu<1>::updateConsummerProducer() { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); -} - -void Aidge::SliceImpl_cpu<1>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - - // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<1>>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); - - // Call kernel - kernelFunc(dynamic_cast<const Slice_Op<1>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<1>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() - ); - - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); -} - -void Aidge::SliceImpl_cpu<1>::backward() { printf("Not implemented yet.\n"); } - -///////////////////////////////////////////////////////////////////////// - -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<2>(); - return inputDims[0]*inputDims[1]; -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } - -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t>& inputsSize) const { - (void)outputIdx; - (void)inputsSize; - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<2>(); - return outputDims[0]*outputDims[1]; -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} - -void Aidge::SliceImpl_cpu<2>::updateConsummerProducer() { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); -} - -void Aidge::SliceImpl_cpu<2>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - - // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<2>>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); - - // Call kernel - kernelFunc(dynamic_cast<const Slice_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<2>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() - ); - - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); -} - -void Aidge::SliceImpl_cpu<2>::backward() { printf("Not implemented yet.\n"); } - -//////////////////////////////////////////////////////////////////////////// - -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<3>(); - - return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } - -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t>& inputsSize) const { - (void)outputIdx; - (void)inputsSize; - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<3>(); - return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; -} - -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} - -void Aidge::SliceImpl_cpu<3>::updateConsummerProducer() { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); -} - -void Aidge::SliceImpl_cpu<3>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - - // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<3>>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); - - // Call kernel - kernelFunc(dynamic_cast<const Slice_Op<3>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<3>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() - ); - - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); -} - -void Aidge::SliceImpl_cpu<3>::backward() { printf("Not implemented yet.\n"); } - -////////////////////////////////////////////////////////////////////////////// - -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(); + const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(); return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } +Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t>& inputsSize) const { +Aidge::NbElts_t Aidge::SliceImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t>& inputsSize) const { (void)outputIdx; (void)inputsSize; - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<4>(); + const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { return mNbConsumedData[0]; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { +Aidge::NbElts_t Aidge::SliceImpl_cpu::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { return mNbProducedData[0]; } -void Aidge::SliceImpl_cpu<4>::updateConsummerProducer() { +void Aidge::SliceImpl_cpu::updateConsummerProducer() { // each input is consumed by the minimum amount for a forward pass mNbConsumedData[0] += getNbRequiredData(0); mNbProducedData[0] += getRequiredMemory(0, {}); } -void Aidge::SliceImpl_cpu<4>::forward() { +void Aidge::SliceImpl_cpu::forward() { // FIXME: uncomment the following code once memory handling will work assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<4>>::create( + auto kernelFunc = Registrar<SliceImplForward_cpu>::create( {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); // Call kernel - kernelFunc(dynamic_cast<const Slice_Op<4>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() + kernelFunc(dynamic_cast<const Slice_Op&>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() ); // each input is consumed by the minimum amount for a forward pass @@ -255,4 +79,4 @@ void Aidge::SliceImpl_cpu<4>::forward() { mNbProducedData[0] += getRequiredMemory(0, {}); } -void Aidge::SliceImpl_cpu<4>::backward() { printf("Not implemented yet.\n"); } \ No newline at end of file +void Aidge::SliceImpl_cpu::backward() { printf("Not implemented yet.\n"); } \ No newline at end of file diff --git a/unit_tests/recipies/Test_HorizontalTiling.cpp b/unit_tests/recipies/Test_HorizontalTiling.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b71a01d130a783caf5c643dfb0c3757b1c524e5e --- /dev/null +++ b/unit_tests/recipies/Test_HorizontalTiling.cpp @@ -0,0 +1,208 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#include <catch2/catch_test_macros.hpp> +#include <set> + +#include "aidge/graph/GraphView.hpp" +#include "aidge/graph/OpArgs.hpp" +#include "aidge/operator/Conv.hpp" +#include "aidge/operator/ReLU.hpp" +#include "aidge/recipies/Recipies.hpp" +#include "aidge/scheduler/Scheduler.hpp" +#include "aidge/operator/Concat.hpp" + + +namespace Aidge { + +TEST_CASE("[core/recipies] Tiling(transformation)", "[Tiling][Recipies]") { + + SECTION("Transform a pre-generated GraphView") { + + SECTION("Simple Node: Conv") { + std::shared_ptr<Node> myReLU = ReLU("myReLU"); + std::shared_ptr<Node> myConv = Conv(3,4,{3,3}, "myconv"); + std::shared_ptr<Tensor> myWeights = std::make_shared<Tensor>(Array4D<int,4,3,3,3> { + { + { + {{ 0, 1, 2}, + { 3, 4, 5}, + { 6, 7, 8}}, + {{ 9, 10, 11}, + { 12, 13, 14}, + { 15, 16, 17}}, + {{ 18, 19, 20}, + { 21, 22, 23}, + { 24, 25, 26}} + }, + { + {{ 27, 28, 29}, + { 30, 31, 32}, + { 33, 34, 35}}, + {{ 36, 37, 38}, + { 39, 40, 41}, + { 42, 43, 44}}, + {{ 45, 46, 47}, + { 48, 49, 50}, + { 51, 52, 53}} + }, + { + {{ 54, 55, 56}, + { 57, 58, 59}, + { 60, 61, 62}}, + {{ 63, 64, 65}, + { 66, 67, 68}, + { 69, 70, 71}}, + {{ 72, 73, 74}, + { 75, 76, 77}, + { 78, 79, 80}} + }, + { + {{ 81, 82, 83}, + { 84, 85, 86}, + { 87, 88, 89}}, + {{ 90, 91, 92}, + { 93, 94, 95}, + { 96, 97, 98}}, + {{ 99, 100, 101}, + {102, 103, 104}, + {105, 106, 107}} + } + } + }); + std::shared_ptr<Tensor> myBias = std::make_shared<Tensor>(Array1D<int,4> {{7,0,9,0}}); + std::shared_ptr<Tensor> myInput = std::make_shared<Tensor>(Array4D<int,2,3,5,5> { //NCHW + { + { + {{ 0, 1, 2, 3, 4}, + { 5, 6, 7, 8, 9}, + { 10, 11, 12, 13, 14}, + { 15, 16, 17, 18, 19}, + { 20, 21, 22, 23, 24}}, + + {{ 25, 26, 27, 28, 29}, + { 30, 31, 32, 33, 34}, + { 35, 36, 37, 38, 39}, + { 40, 41, 42, 43, 44}, + { 45, 46, 47, 48, 49}}, + + {{ 50, 51, 52, 53, 54}, + { 55, 56, 57, 58, 59}, + { 60, 61, 62, 63, 64}, + { 65, 66, 67, 68, 69}, + { 70, 71, 72, 73, 74}} + }, + { + {{ 75, 76, 77, 78, 79}, + { 80, 81, 82, 83, 84}, + { 85, 86, 87, 88, 89}, + { 90, 91, 92, 93, 94}, + { 95, 96, 97, 98, 99}}, + + {{100, 101, 102, 103, 104}, + {105, 106, 107, 108, 109}, + {110, 111, 112, 113, 114}, + {115, 116, 117, 118, 119}, + {120, 121, 122, 123, 124}}, + + {{125, 126, 127, 128, 129}, + {130, 131, 132, 133, 134}, + {135, 136, 137, 138, 139}, + {140, 141, 142, 143, 144}, + {145, 146, 147, 148, 149}} + } + } + }); + std::shared_ptr<Tensor> myOutput = std::make_shared<Tensor>(Array4D<int,2,4,3,3> { + { + { + {{ 15226, 15577, 15928}, + { 16981, 17332, 17683}, + { 18736, 19087, 19438}}, + + {{ 37818, 38898, 39978}, + { 43218, 44298, 45378}, + { 48618, 49698, 50778}}, + + {{ 60426, 62235, 64044}, + { 69471, 71280, 73089}, + { 78516, 80325, 82134}}, + + {{ 83016, 85554, 88092}, + { 95706, 98244, 100782}, + {108396, 110934, 113472}} + }, + { + {{ 41551, 41902, 42253}, + { 43306, 43657, 44008}, + { 45061, 45412, 45763}}, + + {{118818, 119898, 120978}, + {124218, 125298, 126378}, + {129618, 130698, 131778}}, + + {{196101, 197910, 199719}, + {205146, 206955, 208764}, + {214191, 216000, 217809}}, + + {{273366, 275904, 278442}, + {286056, 288594, 291132}, + {298746, 301284, 303822}} + } + } + }); + myReLU->getOperator()->associateInput(0, myInput); + myReLU->addChild(myConv, 0, 0); + myConv->getOperator()->setInput(1, myWeights); + myConv->getOperator()->setInput(2, myBias); + std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->computeOutputDims(); + + std::shared_ptr<GraphView> g = std::make_shared<GraphView>(); + g->add({myReLU, myConv}); + g->compile("cpu", DataType::Int32); + std::set<std::shared_ptr<Node>> tiledConv = getConvHorizontalTiling(myConv, 2, 3); + + SequentialScheduler s(g); + s.forward(); + REQUIRE(*(std::dynamic_pointer_cast<Conv_Op<2>>(myConv->getOperator())->getOutput(0)) == *myOutput); + + GraphView::replace({myConv, myConv->getParent(1), myConv->getParent(2)}, tiledConv); + g->compile("cpu", DataType::Int32); + s.resetScheduling(); + s.forward(); + + REQUIRE(*(std::dynamic_pointer_cast<OperatorTensor>((*g->outputNodes().begin())->getOperator())->getOutput(0)) == *myOutput); + } + } +} +} + // std::shared_ptr<GraphView> g = Sequential({ + // Conv(3, 16, {3,3}, "conv1"), + // ReLU("relu1"), + // Conv(16, 32, {1,1}, "conv2"), + // Conv(32, 16, {1,1}, "conv3"), + // Conv(16, 10, {3,3}, "conv4"), + // ReLU("relu2") + // }); + + // for (auto& individualConv : g->match("Conv")) { + // auto tiledConv = horizontalTiling(individualConv); + // g->replace(individualConv, tiledConv); + // } + // } + + // SECTION("Create the GraphView with tiled layers") { + // std::shared_ptr<GraphView> g; + // g->addChild(horizontalTiling(Conv())) + // } + +// } +// } // namespace Aidge \ No newline at end of file