diff --git a/include/aidge/backend/cpu/data/GetCPUPtr.h b/include/aidge/backend/cpu/data/GetCPUPtr.h new file mode 100644 index 0000000000000000000000000000000000000000..38ea848afc29fa4c23ff500f97e0c57954695021 --- /dev/null +++ b/include/aidge/backend/cpu/data/GetCPUPtr.h @@ -0,0 +1,23 @@ +/******************************************************************************** + * Copyright (c) 2023 CEA-List + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0. + * + * SPDX-License-Identifier: EPL-2.0 + * + ********************************************************************************/ + +#ifndef AIDGE_CPU_DATA_GETCPUPTR_H_ +#define AIDGE_CPU_DATA_GETCPUPTR_H_ + +#include "aidge/data/Tensor.hpp" + +namespace Aidge { +inline void *getCPUPtr(std::shared_ptr<Aidge::Data> const &data) { + return std::static_pointer_cast<Tensor>(data)->getImpl()->rawPtr(); +} +} // namespace Aidge + +#endif // AIDGE_CPU_DATA_GETCPUPTR_H_ \ No newline at end of file diff --git a/include/aidge/backend/cpu/data/TensorImpl.hpp b/include/aidge/backend/cpu/data/TensorImpl.hpp index 012ff5af1c15e73fe76114a23ec62f9ef023bce2..c89e980fc9c177646c1cf7ca4f87fdbd043d31e0 100644 --- a/include/aidge/backend/cpu/data/TensorImpl.hpp +++ b/include/aidge/backend/cpu/data/TensorImpl.hpp @@ -7,73 +7,77 @@ #include "aidge/utils/Types.h" namespace Aidge { -template <class T> -class TensorImpl_cpu : public TensorImpl { - private: - const Tensor &mTensor; // Impl needs to access Tensor information, but is not - // supposed to change it! - std::vector<T> mData; - - public: - static constexpr const char *Backend = "cpu"; - - TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {} - - bool operator==(const TensorImpl &otherImpl) const override final { - std::size_t i = 0; - for (; i < mTensor.size() && - mData[i] == reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i]; - ++i) { - } - return i == mTensor.size(); +template <class T> class TensorImpl_cpu : public TensorImpl { +private: + const Tensor &mTensor; // Impl needs to access Tensor information, but is not + // supposed to change it! + std::vector<T> mData; + +public: + static constexpr const char *Backend = "cpu"; + + TensorImpl_cpu(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {} + + bool operator==(const TensorImpl &otherImpl) const override final { + std::size_t i = 0; + for (; i < mTensor.size() && + mData[i] == + reinterpret_cast<const TensorImpl_cpu<T> &>(otherImpl).data()[i]; + ++i) { } + return i == mTensor.size(); + } - static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) { - return std::make_unique<TensorImpl_cpu<T>>(tensor); - } + static std::unique_ptr<TensorImpl_cpu> create(const Tensor &tensor) { + return std::make_unique<TensorImpl_cpu<T>>(tensor); + } - // native interface - const std::vector<T> &data() const { return mData; } + // native interface + const std::vector<T> &data() const { return mData; } - std::size_t scalarSize() const override { return sizeof(T); } + std::size_t scalarSize() const override { return sizeof(T); } - void copy(const void *src, NbElts_t length) override { - std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length, - static_cast<T *>(rawPtr())); - } + void copy(const void *src, NbElts_t length) override { + std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length, + static_cast<T *>(rawPtr())); + } - void *rawPtr() override { - lazyInit(mData); - return mData.data(); - }; + void *rawPtr() override { + lazyInit(mData); + return mData.data(); + }; - void* getRaw(std::size_t idx){ - return static_cast<void*>(static_cast<T *>(rawPtr()) + idx); - }; + void *getRaw(std::size_t idx) { + return static_cast<void *>(static_cast<T *>(rawPtr()) + idx); + }; - virtual ~TensorImpl_cpu() = default; + virtual ~TensorImpl_cpu() = default; - void setRawPtr(void *ptr) override final { - T *newPtr = static_cast<T *>(ptr); - mData = std::vector<T>(newPtr, newPtr + mTensor.size()); - }; + void setRawPtr(void *ptr) override final { + T *newPtr = static_cast<T *>(ptr); + mData = std::vector<T>(newPtr, newPtr + mTensor.size()); + }; - private: - void lazyInit(std::vector<T> &data) { - assert(mTensor.dataType() == NativeType<T>::type); +private: + void lazyInit(std::vector<T> &data) { + assert(mTensor.dataType() == NativeType<T>::type); - if (data.size() != mTensor.size()) data.resize(mTensor.size()); - } + if (data.size() != mTensor.size()) + data.resize(mTensor.size()); + } }; namespace { -static Registrar<Tensor> registrarTensorImpl_cpu_Float64( - {"cpu", DataType::Float64}, Aidge::TensorImpl_cpu<double>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Float32( - {"cpu", DataType::Float32}, Aidge::TensorImpl_cpu<float>::create); -static Registrar<Tensor> registrarTensorImpl_cpu_Int32( - {"cpu", DataType::Int32}, Aidge::TensorImpl_cpu<int>::create); -} // namespace -} // namespace Aidge +static Registrar<Tensor> + registrarTensorImpl_cpu_Float64({"cpu", DataType::Float64}, + Aidge::TensorImpl_cpu<double>::create); +static Registrar<Tensor> + registrarTensorImpl_cpu_Float32({"cpu", DataType::Float32}, + Aidge::TensorImpl_cpu<float>::create); +static Registrar<Tensor> + registrarTensorImpl_cpu_Int32({"cpu", DataType::Int32}, + Aidge::TensorImpl_cpu<int>::create); +} // namespace +} // namespace Aidge #endif /* AIDGE_CPU_DATA_TENSORIMPL_H_ */ diff --git a/include/aidge/backend/cpu/operator/SliceImpl.hpp b/include/aidge/backend/cpu/operator/SliceImpl.hpp index 06c7cf29638c2ef0881db111702eeba83863a9e2..1ba346f394029c3ff759539c90532818ab355e63 100644 --- a/include/aidge/backend/cpu/operator/SliceImpl.hpp +++ b/include/aidge/backend/cpu/operator/SliceImpl.hpp @@ -29,187 +29,203 @@ namespace Aidge { template <DimIdx_t DIM> class SliceImplForward_cpu : public Registrable<SliceImplForward_cpu<DIM>, std::tuple<DataType>, - void(const typename Slice_Op<DIM>::Attrs&, - const std::array<std::size_t, DIM>, - const void*, - void*)> {}; + void(const typename Slice_Op<DIM>::Attrs &, + const std::array<std::size_t, DIM>, const void *, + void *)> {}; template <DimIdx_t DIM> class SliceImplBackward_cpu : public Registrable<SliceImplBackward_cpu<DIM>, std::tuple<DataType>, - void(const typename Slice_Op<DIM>::Attrs&, - const std::array<std::size_t, DIM>, - const void*, - void*)> {}; + void(const typename Slice_Op<DIM>::Attrs &, + const std::array<std::size_t, DIM>, const void *, + void *)> {}; -template <DimIdx_t DIM> -class SliceImpl_cpu : public OperatorImpl { - public: - SliceImpl_cpu(const Slice_Op<DIM>& op) : OperatorImpl(op) {} - - static std::unique_ptr<SliceImpl_cpu<DIM>> create(const Slice_Op<DIM>& op) { - return std::make_unique<SliceImpl_cpu<DIM>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); - } - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final { return 0; } - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final { - (void)outputIdx; - (void)inputsSize; - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); - } - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final { - return mNbConsumedData[0]; - } - NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final { - return mNbProducedData[0]; - } - void updateConsummerProducer() override final { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); - } - - void forward() override { - // FIXME: uncomment the following code once memory handling will work - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - - // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<DIM>>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); - - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<DIM>(), - std::get<1>(std::static_pointer_cast<const Slice_Op<DIM>&>(mOp).getStaticAttributes()), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() - ); - - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); - - mNbProducedData[0] += getRequiredMemory(0, {}); - } - - void backward() override { printf("Not implemented yet.\n"); } +template <DimIdx_t DIM> class SliceImpl_cpu : public OperatorImpl { +public: + SliceImpl_cpu(const Slice_Op<DIM> &op) : OperatorImpl(op) {} + + static std::unique_ptr<SliceImpl_cpu<DIM>> create(const Slice_Op<DIM> &op) { + return std::make_unique<SliceImpl_cpu<DIM>>(op); + } + +public: + NbElts_t + getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "requires valid input"); + + // Requires the whole tensors + const auto &inputDims = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(); + + return std::accumulate(inputDims.begin(), inputDims.end(), + static_cast<NbElts_t>(1), + std::multiplies<NbElts_t>()); + } + NbElts_t + getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final { + return 0; + } + NbElts_t getRequiredMemory( + const IOIndex_t outputIdx, + const std::vector<DimSize_t> &inputsSize) const override final { + (void)outputIdx; + (void)inputsSize; + const auto &outputDims = + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); + return std::accumulate(outputDims.begin(), outputDims.end(), + static_cast<NbElts_t>(1), + std::multiplies<NbElts_t>()); + } + NbElts_t + getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final { + return mNbConsumedData[0]; + } + NbElts_t + getNbProducedData(const IOIndex_t /*outputIdx*/) const override final { + return mNbProducedData[0]; + } + void updateConsummerProducer() override final { + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); + + mNbProducedData[0] += getRequiredMemory(0, {}); + } + + void forward() override { + // FIXME: uncomment the following code once memory handling will work + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<SliceImplForward_cpu<DIM>>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); + + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) + ->template dims<DIM>(), + std::get<1>(std::static_pointer_cast<const Slice_Op<DIM> &>(mOp) + .getStaticAttributes()), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); + + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); + + mNbProducedData[0] += getRequiredMemory(0, {}); + } + + void backward() override { printf("Not implemented yet.\n"); } }; /******************************************************************************/ -template <> -class SliceImpl_cpu<1> : public OperatorImpl { +template <> class SliceImpl_cpu<1> : public OperatorImpl { public: - SliceImpl_cpu(const Slice_Op<1>& op) : OperatorImpl(op) {} + SliceImpl_cpu(const Slice_Op<1> &op) : OperatorImpl(op) {} - static std::unique_ptr<SliceImpl_cpu<1>> create(const Slice_Op<1>& op) { - return std::make_unique<SliceImpl_cpu<1>>(op); - } + static std::unique_ptr<SliceImpl_cpu<1>> create(const Slice_Op<1> &op) { + return std::make_unique<SliceImpl_cpu<1>>(op); + } public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward() override; - void backward() override; + NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t + getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t getRequiredMemory( + const IOIndex_t outputIdx, + const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; + + void forward() override; + void backward() override; }; /******************************************************************************/ -template <> -class SliceImpl_cpu<2> : public OperatorImpl { - public: - SliceImpl_cpu(const Slice_Op<2>& op) : OperatorImpl(op) {} - - static std::unique_ptr<SliceImpl_cpu<2>> create(const Slice_Op<2>& op) { - return std::make_unique<SliceImpl_cpu<2>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward() override; - void backward() override; +template <> class SliceImpl_cpu<2> : public OperatorImpl { +public: + SliceImpl_cpu(const Slice_Op<2> &op) : OperatorImpl(op) {} + + static std::unique_ptr<SliceImpl_cpu<2>> create(const Slice_Op<2> &op) { + return std::make_unique<SliceImpl_cpu<2>>(op); + } + +public: + NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t + getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t getRequiredMemory( + const IOIndex_t outputIdx, + const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; + + void forward() override; + void backward() override; }; /******************************************************************************/ -template <> -class SliceImpl_cpu<3> : public OperatorImpl { - public: - SliceImpl_cpu(const Slice_Op<3>& op) : OperatorImpl(op) {} - - static std::unique_ptr<SliceImpl_cpu<3>> create(const Slice_Op<3>& op) { - return std::make_unique<SliceImpl_cpu<3>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward() override; - void backward() override; +template <> class SliceImpl_cpu<3> : public OperatorImpl { +public: + SliceImpl_cpu(const Slice_Op<3> &op) : OperatorImpl(op) {} + + static std::unique_ptr<SliceImpl_cpu<3>> create(const Slice_Op<3> &op) { + return std::make_unique<SliceImpl_cpu<3>>(op); + } + +public: + NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t + getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t getRequiredMemory( + const IOIndex_t outputIdx, + const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; + + void forward() override; + void backward() override; }; /******************************************************************************/ -template <> -class SliceImpl_cpu<4> : public OperatorImpl { - public: - SliceImpl_cpu(const Slice_Op<4>& op) : OperatorImpl(op) {} - - static std::unique_ptr<SliceImpl_cpu<4>> create(const Slice_Op<4>& op) { - return std::make_unique<SliceImpl_cpu<4>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, - const std::vector<DimSize_t>& inputsSize) const override final; - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; - void updateConsummerProducer() override final; - - void forward() override; - void backward() override; -}; +template <> class SliceImpl_cpu<4> : public OperatorImpl { +public: + SliceImpl_cpu(const Slice_Op<4> &op) : OperatorImpl(op) {} + static std::unique_ptr<SliceImpl_cpu<4>> create(const Slice_Op<4> &op) { + return std::make_unique<SliceImpl_cpu<4>>(op); + } +public: + NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t + getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t getRequiredMemory( + const IOIndex_t outputIdx, + const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; + NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; + + void forward() override; + void backward() override; +}; namespace { -static Registrar<Slice_Op<1>> registrarSliceImpl_1D_cpu("cpu", Aidge::SliceImpl_cpu<1>::create); -static Registrar<Slice_Op<2>> registrarSliceImpl_2D_cpu("cpu", Aidge::SliceImpl_cpu<2>::create); -static Registrar<Slice_Op<3>> registrarSliceImpl_3D_cpu("cpu", Aidge::SliceImpl_cpu<3>::create); -static Registrar<Slice_Op<4>> registrarSliceImpl_4D_cpu("cpu", Aidge::SliceImpl_cpu<4>::create); -} // namespace -} // namespace Aidge +static Registrar<Slice_Op<1>> + registrarSliceImpl_1D_cpu("cpu", Aidge::SliceImpl_cpu<1>::create); +static Registrar<Slice_Op<2>> + registrarSliceImpl_2D_cpu("cpu", Aidge::SliceImpl_cpu<2>::create); +static Registrar<Slice_Op<3>> + registrarSliceImpl_3D_cpu("cpu", Aidge::SliceImpl_cpu<3>::create); +static Registrar<Slice_Op<4>> + registrarSliceImpl_4D_cpu("cpu", Aidge::SliceImpl_cpu<4>::create); +} // namespace +} // namespace Aidge #endif /* AIDGE_CPU_OPERATOR_LEAKYRELUIMPL_H_ */ diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index 851aaa5c6bcd1acc3e8bc17b11dd00143c543b5b..24301fe1619afd93e5841781f224574b35f3fc44 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -13,71 +13,87 @@ #include <numeric> // std::accumulate #include <vector> -#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/AddImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getRawInput(inputIdx) && "requires valid input"); +Aidge::NbElts_t +Aidge::AddImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { + assert(mOp.getRawInput(inputIdx) && "requires valid input"); - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims(); - return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); + // Requires the whole tensors + const auto &inputDims = + std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims(); + return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), + std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! - return 0; +Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // for the direct convolution algorithm, convolutions can be in-place, if + // there is no padding! + return 0; } -Aidge::NbElts_t Aidge::AddImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); +Aidge::NbElts_t Aidge::AddImpl_cpu::getRequiredMemory( + const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const { + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + (void)outputIdx; + + const auto &outputDims = + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); + return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), + std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::AddImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t inputIdx) const { - assert(inputIdx < mNbConsumedData.size()); - return mNbConsumedData[inputIdx]; +Aidge::NbElts_t +Aidge::AddImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t inputIdx) const { + assert(inputIdx < mNbConsumedData.size()); + return mNbConsumedData[inputIdx]; } -Aidge::NbElts_t Aidge::AddImpl_cpu::getNbProducedData(const Aidge::IOIndex_t outputIdx) const { - assert(outputIdx < mNbProducedData.size()); - return mNbProducedData[outputIdx]; +Aidge::NbElts_t +Aidge::AddImpl_cpu::getNbProducedData(const Aidge::IOIndex_t outputIdx) const { + assert(outputIdx < mNbProducedData.size()); + return mNbProducedData[outputIdx]; } -void Aidge::AddImpl_cpu::updateConsummerProducer() { - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); +void Aidge::AddImpl_cpu::updateConsummerProducer() { + for (IOIndex_t inputIdx = 0; + static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] += + getNbRequiredData(inputIdx); // each input is consumed by the minimum + // amount for a forward pass + mNbProducedData[0] += getRequiredMemory(0, {}); } -void Aidge::AddImpl_cpu::forward() { - assert(mOp.getRawInput(0) && "missing input in Add operator"); - DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(); - for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) { - assert(mOp.getRawInput(i) && "missing input in Add operator"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput); - } - - auto kernelFunc = Registrar<AddImplForward_cpu>::create({ - datatypeFirstInput, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - - std::vector<const void*> opInputs; - for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) { - opInputs.push_back(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->getImpl()->rawPtr()); - } - - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - opInputs, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); +void Aidge::AddImpl_cpu::forward() { + assert(mOp.getRawInput(0) && "missing input in Add operator"); + DataType datatypeFirstInput = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(); + for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) { + assert(mOp.getRawInput(i) && "missing input in Add operator"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == + datatypeFirstInput); + } + + auto kernelFunc = Registrar<AddImplForward_cpu>::create( + {datatypeFirstInput, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + std::vector<const void *> opInputs; + for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) { + opInputs.push_back(getCPUPtr(mOp.getRawInput(i))); + } + + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + opInputs, getCPUPtr(mOp.getRawOutput(0))); } \ No newline at end of file diff --git a/src/operator/AvgPoolingImpl.cpp b/src/operator/AvgPoolingImpl.cpp index ad236f004ec9d806d43b9549adcec1c094573a1d..0c59dd96b514eb2240746b8a5f42d765c16c4a94 100644 --- a/src/operator/AvgPoolingImpl.cpp +++ b/src/operator/AvgPoolingImpl.cpp @@ -14,27 +14,30 @@ #include <thread> #include <vector> -#include "aidge/utils/Types.h" #include "aidge/operator/AvgPooling.hpp" +#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/AvgPoolingImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::AvgPoolingImpl2D_cpu::getNbRequiredProtected( + IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::AvgPoolingImpl2D_cpu::forward() { - assert(mOp.getRawInput(0) && "missing input #0"); + assert(mOp.getRawInput(0) && "missing input #0"); - // Find the correct kernel type - auto kernelFunc = - Registrar<AvgPoolingImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<AvgPoolingImpl2DForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - kernelFunc(dynamic_cast<const AvgPooling_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Call kernel + kernelFunc( + dynamic_cast<const AvgPooling_Op<2> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/BatchNormImpl.cpp b/src/operator/BatchNormImpl.cpp index 4cfd4b1bef0a027dfd28c95235dd864101ced3c6..c2bdbd8170aab5b8d808bf45b89a536841ac3757 100644 --- a/src/operator/BatchNormImpl.cpp +++ b/src/operator/BatchNormImpl.cpp @@ -13,39 +13,38 @@ #include <numeric> // std::accumulate #include <vector> -#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/BatchNorm.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::BatchNormImpl2D_cpu::getNbRequiredProtected( + IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::BatchNormImpl2D_cpu::forward() { - assert(mOp.getRawInput(0) && "missing input #0"); - assert(mOp.getRawInput(1) && "missing input #1"); - assert(mOp.getRawInput(2) && "missing input #2"); - assert(mOp.getRawInput(3) && "missing input #3"); - assert(mOp.getRawInput(4) && "missing input #4"); + assert(mOp.getRawInput(0) && "missing input #0"); + assert(mOp.getRawInput(1) && "missing input #1"); + assert(mOp.getRawInput(2) && "missing input #2"); + assert(mOp.getRawInput(3) && "missing input #3"); + assert(mOp.getRawInput(4) && "missing input #4"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->nbDims() == 4); - // Find the correct kernel type - auto kernelFunc = - Registrar<BatchNormImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + assert(std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->nbDims() == 4); + // Find the correct kernel type + auto kernelFunc = Registrar<BatchNormImpl2DForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - kernelFunc(dynamic_cast<const BatchNorm_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(3))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(4))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr(), - true); + // Call kernel + kernelFunc( + dynamic_cast<const BatchNorm_Op<2> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawInput(2)), getCPUPtr(mOp.getRawInput(3)), + getCPUPtr(mOp.getRawInput(4)), getCPUPtr(mOp.getRawOutput(0)), true); } diff --git a/src/operator/ConcatImpl.cpp b/src/operator/ConcatImpl.cpp index d46054480bab433f3493ffac7fbff48f27f2b570..3d6ca04454122a85c049d965e3607fc84af0df43 100644 --- a/src/operator/ConcatImpl.cpp +++ b/src/operator/ConcatImpl.cpp @@ -13,77 +13,96 @@ #include <numeric> // std::accumulate #include <vector> -#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/data/Data.hpp" #include "aidge/data/Tensor.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/ConcatImpl.hpp" #include "aidge/backend/cpu/operator/ConcatImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getRawInput(inputIdx) && "requires valid input"); +Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredData( + const Aidge::IOIndex_t inputIdx) const { + assert(mOp.getRawInput(inputIdx) && "requires valid input"); - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims(); - return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); + // Requires the whole tensors + const auto &inputDims = + std::static_pointer_cast<Tensor>(mOp.getRawInput(inputIdx))->dims(); + return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), + std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! - return 0; +Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // for the direct convolution algorithm, convolutions can be in-place, if + // there is no padding! + return 0; } -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); +Aidge::NbElts_t Aidge::ConcatImpl_cpu::getRequiredMemory( + const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const { + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + (void)outputIdx; + + const auto &outputDims = + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dims(); + return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), + std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t inputIdx) const { - assert(inputIdx < mNbConsumedData.size()); - return mNbConsumedData[inputIdx]; +Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbConsumedData( + const Aidge::IOIndex_t inputIdx) const { + assert(inputIdx < mNbConsumedData.size()); + return mNbConsumedData[inputIdx]; } -Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbProducedData(const Aidge::IOIndex_t outputIdx) const { - assert(outputIdx < mNbProducedData.size()); - return mNbProducedData[outputIdx]; +Aidge::NbElts_t Aidge::ConcatImpl_cpu::getNbProducedData( + const Aidge::IOIndex_t outputIdx) const { + assert(outputIdx < mNbProducedData.size()); + return mNbProducedData[outputIdx]; } -void Aidge::ConcatImpl_cpu::updateConsummerProducer() { - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); +void Aidge::ConcatImpl_cpu::updateConsummerProducer() { + for (IOIndex_t inputIdx = 0; + static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] += + getNbRequiredData(inputIdx); // each input is consumed by the minimum + // amount for a forward pass + mNbProducedData[0] += getRequiredMemory(0, {}); } -void Aidge::ConcatImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input in Concat operator"); - DataType datatypeFirstInput = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(); - for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i)) && "missing input in Concat operator"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == datatypeFirstInput); - } - - auto kernelFunc = Registrar<ConcatImplForward_cpu>::create({ - datatypeFirstInput, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - - std::vector<const void*> opInputs; - std::vector<DimSize_t> opInputAxis; - for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) { - opInputs.push_back(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->getImpl()->rawPtr()); - opInputAxis.push_back(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dims()[dynamic_cast<const Concat_Op&>(mOp).template getAttr<DimSize_t>("Axis")]); - } - - kernelFunc(dynamic_cast<const Concat_Op&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), - opInputAxis, - opInputs, - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); +void Aidge::ConcatImpl_cpu::forward() { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input in Concat operator"); + DataType datatypeFirstInput = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(); + for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i)) && + "missing input in Concat operator"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(i))->dataType() == + datatypeFirstInput); + } + + auto kernelFunc = Registrar<ConcatImplForward_cpu>::create( + {datatypeFirstInput, + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + std::vector<const void *> opInputs; + std::vector<DimSize_t> opInputAxis; + for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) { + opInputs.push_back(getCPUPtr(mOp.getRawInput(i))); + opInputAxis.push_back( + std::static_pointer_cast<Tensor>(mOp.getRawInput(i)) + ->dims()[dynamic_cast<const Concat_Op &>(mOp) + .template getAttr<DimSize_t>("Axis")]); + } + + kernelFunc(dynamic_cast<const Concat_Op &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims(), + opInputAxis, opInputs, getCPUPtr(mOp.getRawOutput(0))); } -void Aidge::ConcatImpl_cpu::backward() { printf("Not implemented yet.\n"); } \ No newline at end of file +void Aidge::ConcatImpl_cpu::backward() { printf("Not implemented yet.\n"); } \ No newline at end of file diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index 4a722a5e4b00412617eb998f8cbfb36eb0c46035..89c95963163ee79b39f9d0483037b2a9ba6bcfc0 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -15,35 +15,39 @@ #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/ConvDepthWise.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected( + IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::ConvDepthWiseImpl2D_cpu::forward() { - assert(mOp.getRawInput(0) && "missing input #0"); - assert(mOp.getRawInput(1) && "missing input #1"); - assert(mOp.getRawInput(2) && "missing input #2"); - - assert((std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims() == 4) && "support for 4-dimensions tensors only"); - - // Find the correct kernel type - auto kernelFunc = - Registrar<ConvDepthWiseImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - - // Call kernel - kernelFunc(dynamic_cast<const ConvDepthWise_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + assert(mOp.getRawInput(0) && "missing input #0"); + assert(mOp.getRawInput(1) && "missing input #1"); + assert(mOp.getRawInput(2) && "missing input #2"); + + assert( + (std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims() == 4) && + "support for 4-dimensions tensors only"); + + // Find the correct kernel type + auto kernelFunc = Registrar<ConvDepthWiseImpl2DForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + // Call kernel + kernelFunc( + dynamic_cast<const ConvDepthWise_Op<2> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawInput(2)), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index 87b54afbfd0b4c2d3bb57812d07575bc0e255626..029f4dd03695729bf21c72f21528c99f84ce7ef2 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -15,32 +15,36 @@ #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Conv.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t +Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::ConvImpl2D_cpu::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(mOp.getRawInput(0) && "missing input #0"); - assert(mOp.getRawInput(1) && "missing input #1"); - assert(mOp.getRawInput(2) && "missing input #2"); + // FIXME: uncomment the following code once memory handling will work + assert(mOp.getRawInput(0) && "missing input #0"); + assert(mOp.getRawInput(1) && "missing input #1"); + assert(mOp.getRawInput(2) && "missing input #2"); - // Find the correct kernel type - auto kernelFunc = - Registrar<ConvImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<ConvImpl2DForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - kernelFunc(dynamic_cast<const Conv_Op<2>&>(mOp).getStaticAttributes(), std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Call kernel + kernelFunc( + dynamic_cast<const Conv_Op<2> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawInput(2)), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/DivImpl.cpp b/src/operator/DivImpl.cpp index 2e913df1a4c42a2c6132a4096a92c1ab0eeab0c0..c8cd75ae90f012c1bf7defdd1ccac31d16ebf057 100644 --- a/src/operator/DivImpl.cpp +++ b/src/operator/DivImpl.cpp @@ -15,28 +15,34 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Div.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/DivImpl.hpp" #include "aidge/backend/cpu/operator/DivImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::DivImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::DivImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::DivImpl_cpu::forward() { - // Find the correct kernel type - auto kernelFunc = Registrar<DivImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<DivImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)))->size(), - std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>( + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))) + ->size(), + std::static_pointer_cast<Tensor>( + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))) + ->size(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/FCImpl.cpp b/src/operator/FCImpl.cpp index 1e5450d330ee89bdceb30aca846800d7764ca911..89d295e622b352f3b300d034f6ffbd300aa8d672 100644 --- a/src/operator/FCImpl.cpp +++ b/src/operator/FCImpl.cpp @@ -15,42 +15,43 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/FC.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/FCImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl_forward_kernels.hpp" -void Aidge::FCImpl_cpu::forward() -{ - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(2)) && "missing input #2"); +void Aidge::FCImpl_cpu::forward() { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && + "missing input #1"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(2)) && + "missing input #2"); - // Find the correct kernel type - auto kernelFunc = Registrar<FCImplForward_cpu>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<FCImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - // if (std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->nbDims() == 4) { - // kernelFunc( - // mOp.getStaticAttributes(), - // std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - // std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->getImpl()->rawPtr(), - // mOp.mInputs[1]->getImpl()->rawPtr(), - // mOp.mInputs[2]->getImpl()->rawPtr(), - // mOp.getOutput(0)->getImpl()->rawPtr()); - // } - // else - kernelFunc( - dynamic_cast<const FC_Op&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(2))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Call kernel + // if (std::static_pointer_cast<Tensor>(mOp.getRawInput(0)->nbDims() == 4) { + // kernelFunc( + // mOp.getStaticAttributes(), + // std::static_pointer_cast<Tensor>(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template + // dims<4>(), getCPUPtr(mOp.getRawInput(0), + // mOp.mInputs[1]->getImpl()->rawPtr(), + // mOp.mInputs[2]->getImpl()->rawPtr(), + // mOp.getOutput(0)->getImpl()->rawPtr()); + // } + // else + kernelFunc(dynamic_cast<const FC_Op &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawInput(2)), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/LeakyReLUImpl.cpp b/src/operator/LeakyReLUImpl.cpp index 9ea0f2400d954bf5fd25428a7bf46fbd188b4994..e8f709de5f84345d5d544d9c41e1124f4d5bb710 100644 --- a/src/operator/LeakyReLUImpl.cpp +++ b/src/operator/LeakyReLUImpl.cpp @@ -15,28 +15,31 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/LeakyReLU.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/LeakyReLUImpl.hpp" #include "aidge/backend/cpu/operator/LeakyReLUImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::LeakyReLUImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::LeakyReLUImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - - // Find the correct kernel type - auto kernelFunc = Registrar<LeakyReLUImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - - // Call kernel - kernelFunc(dynamic_cast<const LeakyReLU_Op&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<LeakyReLUImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + // Call kernel + kernelFunc(dynamic_cast<const LeakyReLU_Op &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/MatMulImpl.cpp b/src/operator/MatMulImpl.cpp index 0ad9bd6ceb4ac27c35e72e21477864980bcecfd9..1482a94bee83380b792a391e22b2c3f0c8cc07c5 100644 --- a/src/operator/MatMulImpl.cpp +++ b/src/operator/MatMulImpl.cpp @@ -15,41 +15,40 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/MatMul.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/MatMulImpl.hpp" #include "aidge/backend/cpu/operator/MatMulImpl_forward_kernels.hpp" -void Aidge::MatMulImpl_cpu::forward() -{ - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && "missing input #1"); - - // Find the correct kernel type - auto kernelFunc = Registrar<MatMulImplForward_cpu>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - - // Call kernel - // if (mOp.getInput(0)->nbDims() == 4) { - // kernelFunc( - // mOp.getStaticAttributes(), - // std::static_pointer_cast<Tensor>(mOp.getInput(0))->template dims<4>(), - // mOp.getInput(0))->getImpl()->rawPtr(), - // mOp.mInputs[1]->getImpl()->rawPtr(), - // mOp.mInputs[2]->getImpl()->rawPtr(), - // std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)->getImpl()->rawPtr()); - // } - // else - kernelFunc( - dynamic_cast<const MatMul_Op&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); - - +void Aidge::MatMulImpl_cpu::forward() { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(1)) && + "missing input #1"); + + // Find the correct kernel type + auto kernelFunc = Registrar<MatMulImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + // Call kernel + // if (mOp.getInput(0)->nbDims() == 4) { + // kernelFunc( + // mOp.getStaticAttributes(), + // std::static_pointer_cast<Tensor>(mOp.getInput(0))->template + // dims<4>(), mOp.getInput(0))->getImpl()->rawPtr(), + // mOp.mInputs[1]->getImpl()->rawPtr(), + // mOp.mInputs[2]->getImpl()->rawPtr(), + // getCPUPtr(mOp.getRawOutput(0)); + // } + // else + kernelFunc(dynamic_cast<const MatMul_Op &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0], + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/MaxPoolingImpl.cpp b/src/operator/MaxPoolingImpl.cpp index 00a279707424e0fce4eb0af07cdade80fe2dffd9..7e614c6cfdae5d0e723fe41b1e6ec281bba6ace3 100644 --- a/src/operator/MaxPoolingImpl.cpp +++ b/src/operator/MaxPoolingImpl.cpp @@ -14,27 +14,31 @@ #include <thread> #include <vector> -#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/MaxPooling.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::MaxPoolingImpl2D_cpu::getNbRequiredProtected( + IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::MaxPoolingImpl2D_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); - // Find the correct kernel type - auto kernelFunc = - Registrar<MaxPoolingImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<MaxPoolingImpl2DForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - kernelFunc(dynamic_cast<const MaxPooling_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Call kernel + kernelFunc( + dynamic_cast<const MaxPooling_Op<2> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/MulImpl.cpp b/src/operator/MulImpl.cpp index dfd33445a527459245137f4ca6e6d5e8f416d82c..a53d2d903dbc1ed7d851245fc694f026bbde92e6 100644 --- a/src/operator/MulImpl.cpp +++ b/src/operator/MulImpl.cpp @@ -15,28 +15,30 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Mul.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/MulImpl.hpp" #include "aidge/backend/cpu/operator/MulImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::MulImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::MulImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::MulImpl_cpu::forward() { - // Find the correct kernel type - auto kernelFunc = Registrar<MulImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<MulImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->size(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp index a5bf1c52ba39a2f805f2b28562cbab19191ac754..e0c7d928e9f0f58a2e11b6c766314463f38a43b0 100644 --- a/src/operator/PadImpl.cpp +++ b/src/operator/PadImpl.cpp @@ -15,33 +15,40 @@ #include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/utils/Types.h" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Conv.hpp" +#include "aidge/utils/Types.h" #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const { - assert(inputIdx == 0 && "operator has only one input"); - (void) inputIdx; - - // Padding cannot be in-place! - // We must ensure that we do not override data that has not been consummed yet. - const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(); - const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(); - return (outputSize - inputSize); +Aidge::NbElts_t +Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const { + assert(inputIdx == 0 && "operator has only one input"); + (void)inputIdx; + + // Padding cannot be in-place! + // We must ensure that we do not override data that has not been consummed + // yet. + const auto inputSize = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(); + const auto outputSize = + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(); + return (outputSize - inputSize); } void Aidge::PadImpl2D_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - - // Find the correct kernel type - auto kernelFunc = - Registrar<PadImpl2DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - - // Call kernel - kernelFunc(dynamic_cast<const Pad_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<PadImpl2DForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + // Call kernel + kernelFunc( + dynamic_cast<const Pad_Op<2> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/PowImpl.cpp b/src/operator/PowImpl.cpp index 30fafa9b3d060eada9468fc9a2bf94ec6aeab3c0..443d71d3a4f5c999a71ad9314258e2556bbb2e9e 100644 --- a/src/operator/PowImpl.cpp +++ b/src/operator/PowImpl.cpp @@ -15,28 +15,30 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Pow.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/PowImpl.hpp" #include "aidge/backend/cpu/operator/PowImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::PowImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::PowImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::PowImpl_cpu::forward() { - // Find the correct kernel type - auto kernelFunc = Registrar<PowImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<PowImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->size(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/ReLUImpl.cpp b/src/operator/ReLUImpl.cpp index 2819bb864cb552adfcd1c52e57b734fa0a9d0ce1..7eff3a5c39f8bb19b01b29472ad4549cab31dd49 100644 --- a/src/operator/ReLUImpl.cpp +++ b/src/operator/ReLUImpl.cpp @@ -15,27 +15,30 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/ReLU.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/ReLUImpl.hpp" #include "aidge/backend/cpu/operator/ReLUImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::ReLUImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::ReLUImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); - // Find the correct kernel type - auto kernelFunc = Registrar<ReLUImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<ReLUImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/ScalingImpl.cpp b/src/operator/ScalingImpl.cpp index c2d2b17245811ab20de310e4656bc67b1a28b257..06a9b2708c7489cc96c1549a712030d54be2a302 100644 --- a/src/operator/ScalingImpl.cpp +++ b/src/operator/ScalingImpl.cpp @@ -10,32 +10,35 @@ ********************************************************************************/ #include <cassert> -#include <numeric> // std::accumulate #include <functional> // std::multiplies +#include <numeric> // std::accumulate +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Scaling.hpp" + #include "aidge/backend/cpu/operator/ScalingImpl.hpp" #include "aidge/backend/cpu/operator/ScalingImpl_forward_kernels.hpp" #include "aidge/utils/Types.h" #include <vector> -Aidge::NbElts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::ScalingImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::ScalingImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - - // Find the correct kernel type - auto kernelFunc = Registrar<ScalingImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - - // Call kernel - kernelFunc(dynamic_cast<const Scaling_Op&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<ScalingImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + // Call kernel + kernelFunc(dynamic_cast<const Scaling_Op &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/SliceImpl.cpp b/src/operator/SliceImpl.cpp index 3ae56e1a4f613a4188dc51659853e07674e74768..228edb323ccd09448f5b3e6b12423b99605c0e0c 100644 --- a/src/operator/SliceImpl.cpp +++ b/src/operator/SliceImpl.cpp @@ -10,249 +10,290 @@ ********************************************************************************/ #include <cassert> -#include <numeric> // std::accumulate #include <functional> // std::multiplies +#include <numeric> // std::accumulate #include "aidge/operator/Slice.hpp" +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/backend/cpu/operator/SliceImpl.hpp" #include "aidge/backend/cpu/operator/SliceImpl_forward_kernels.hpp" #include "aidge/utils/Types.h" -#include <vector> #include <cassert> #include <tuple> +#include <vector> -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); +Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbRequiredData( + const Aidge::IOIndex_t /*inputIdx*/) const { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "requires valid input"); - // Requires the whole tensors - return std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<1>()[0]; + // Requires the whole tensors + return std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) + ->template dims<1>()[0]; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } +Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + return 0; +} -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t>& inputsSize) const { - (void)outputIdx; - (void)inputsSize; - return std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<1>()[0]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getRequiredMemory( + const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> &inputsSize) const { + (void)outputIdx; + (void)inputsSize; + return std::static_pointer_cast<Tensor>(mOp.getRawOutput(0)) + ->template dims<1>()[0]; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbConsumedData( + const Aidge::IOIndex_t /*inputIdx*/) const { + return mNbConsumedData[0]; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<1>::getNbProducedData( + const Aidge::IOIndex_t /*outputIdx*/) const { + return mNbProducedData[0]; } void Aidge::SliceImpl_cpu<1>::updateConsummerProducer() { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); - mNbProducedData[0] += getRequiredMemory(0, {}); + mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::SliceImpl_cpu<1>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + // FIXME: uncomment the following code once memory handling will work + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); - // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<1>>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<SliceImplForward_cpu<1>>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); - // Call kernel - kernelFunc(dynamic_cast<const Slice_Op<1>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<1>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() - ); + // Call kernel + kernelFunc( + dynamic_cast<const Slice_Op<1> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<1>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); - mNbProducedData[0] += getRequiredMemory(0, {}); + mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::SliceImpl_cpu<1>::backward() { printf("Not implemented yet.\n"); } ///////////////////////////////////////////////////////////////////////// -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); +Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbRequiredData( + const Aidge::IOIndex_t /*inputIdx*/) const { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "requires valid input"); - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<2>(); - return inputDims[0]*inputDims[1]; + // Requires the whole tensors + const auto &inputDims = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<2>(); + return inputDims[0] * inputDims[1]; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } +Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + return 0; +} -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t>& inputsSize) const { - (void)outputIdx; - (void)inputsSize; - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<2>(); - return outputDims[0]*outputDims[1]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getRequiredMemory( + const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> &inputsSize) const { + (void)outputIdx; + (void)inputsSize; + const auto &outputDims = + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<2>(); + return outputDims[0] * outputDims[1]; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbConsumedData( + const Aidge::IOIndex_t /*inputIdx*/) const { + return mNbConsumedData[0]; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<2>::getNbProducedData( + const Aidge::IOIndex_t /*outputIdx*/) const { + return mNbProducedData[0]; } void Aidge::SliceImpl_cpu<2>::updateConsummerProducer() { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); - mNbProducedData[0] += getRequiredMemory(0, {}); + mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::SliceImpl_cpu<2>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + // FIXME: uncomment the following code once memory handling will work + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); - // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<2>>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<SliceImplForward_cpu<2>>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); - // Call kernel - kernelFunc(dynamic_cast<const Slice_Op<2>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<2>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() - ); + // Call kernel + kernelFunc( + dynamic_cast<const Slice_Op<2> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<2>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); - mNbProducedData[0] += getRequiredMemory(0, {}); + mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::SliceImpl_cpu<2>::backward() { printf("Not implemented yet.\n"); } //////////////////////////////////////////////////////////////////////////// -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); +Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbRequiredData( + const Aidge::IOIndex_t /*inputIdx*/) const { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "requires valid input"); - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<3>(); + // Requires the whole tensors + const auto &inputDims = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<3>(); - return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); + return std::accumulate(inputDims.begin(), inputDims.end(), + static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } +Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + return 0; +} -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t>& inputsSize) const { - (void)outputIdx; - (void)inputsSize; - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<3>(); - return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); +Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getRequiredMemory( + const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> &inputsSize) const { + (void)outputIdx; + (void)inputsSize; + const auto &outputDims = + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<3>(); + return std::accumulate(outputDims.begin(), outputDims.end(), + static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbConsumedData( + const Aidge::IOIndex_t /*inputIdx*/) const { + return mNbConsumedData[0]; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<3>::getNbProducedData( + const Aidge::IOIndex_t /*outputIdx*/) const { + return mNbProducedData[0]; } void Aidge::SliceImpl_cpu<3>::updateConsummerProducer() { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); - mNbProducedData[0] += getRequiredMemory(0, {}); + mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::SliceImpl_cpu<3>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + // FIXME: uncomment the following code once memory handling will work + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); - // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<3>>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<SliceImplForward_cpu<3>>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); - // Call kernel - kernelFunc(dynamic_cast<const Slice_Op<3>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<3>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() - ); + // Call kernel + kernelFunc( + dynamic_cast<const Slice_Op<3> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<3>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); - mNbProducedData[0] += getRequiredMemory(0, {}); + mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::SliceImpl_cpu<3>::backward() { printf("Not implemented yet.\n"); } ////////////////////////////////////////////////////////////////////////////// -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbRequiredData(const Aidge::IOIndex_t /*inputIdx*/) const { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "requires valid input"); +Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbRequiredData( + const Aidge::IOIndex_t /*inputIdx*/) const { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "requires valid input"); - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(); + // Requires the whole tensors + const auto &inputDims = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(); - return std::accumulate(inputDims.begin(), inputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); + return std::accumulate(inputDims.begin(), inputDims.end(), + static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { return 0; } +Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + return 0; +} -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, - const std::vector<Aidge::DimSize_t>& inputsSize) const { - (void)outputIdx; - (void)inputsSize; - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<4>(); - return std::accumulate(outputDims.begin(), outputDims.end(), static_cast<NbElts_t>(1), - std::multiplies<NbElts_t>()); +Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getRequiredMemory( + const Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> &inputsSize) const { + (void)outputIdx; + (void)inputsSize; + const auto &outputDims = + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->template dims<4>(); + return std::accumulate(outputDims.begin(), outputDims.end(), + static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbConsumedData(const Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbConsumedData( + const Aidge::IOIndex_t /*inputIdx*/) const { + return mNbConsumedData[0]; } -Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbProducedData(const Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; +Aidge::NbElts_t Aidge::SliceImpl_cpu<4>::getNbProducedData( + const Aidge::IOIndex_t /*outputIdx*/) const { + return mNbProducedData[0]; } void Aidge::SliceImpl_cpu<4>::updateConsummerProducer() { - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); - mNbProducedData[0] += getRequiredMemory(0, {}); + mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::SliceImpl_cpu<4>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + // FIXME: uncomment the following code once memory handling will work + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); - // Find the correct kernel type - auto kernelFunc = Registrar<SliceImplForward_cpu<4>>::create( - {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<SliceImplForward_cpu<4>>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType()}); - // Call kernel - kernelFunc(dynamic_cast<const Slice_Op<4>&>(mOp).getStaticAttributes(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr() - ); + // Call kernel + kernelFunc( + dynamic_cast<const Slice_Op<4> &>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<4>(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); - // each input is consumed by the minimum amount for a forward pass - mNbConsumedData[0] += getNbRequiredData(0); + // each input is consumed by the minimum amount for a forward pass + mNbConsumedData[0] += getNbRequiredData(0); - mNbProducedData[0] += getRequiredMemory(0, {}); + mNbProducedData[0] += getRequiredMemory(0, {}); } void Aidge::SliceImpl_cpu<4>::backward() { printf("Not implemented yet.\n"); } \ No newline at end of file diff --git a/src/operator/SoftmaxImpl.cpp b/src/operator/SoftmaxImpl.cpp index 1ce9564a19e9dc4b992705d3604a820065b1f725..e4c8486d72c144c81645b3919adbb526637dadbd 100644 --- a/src/operator/SoftmaxImpl.cpp +++ b/src/operator/SoftmaxImpl.cpp @@ -15,33 +15,38 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Softmax.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/SoftmaxImpl.hpp" #include "aidge/backend/cpu/operator/SoftmaxImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::SoftmaxImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::SoftmaxImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims()>1); - - // Find the correct kernel type - auto kernelFunc = Registrar<SoftmaxImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - - DimSize_t batchSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0]; - DimSize_t channelSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[1]; - DimSize_t featureSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1()/channelSize; - // Call kernel - kernelFunc(batchSize, - channelSize, - featureSize, - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims() > 1); + + // Find the correct kernel type + auto kernelFunc = Registrar<SoftmaxImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + DimSize_t batchSize = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[0]; + DimSize_t channelSize = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dims()[1]; + DimSize_t featureSize = + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->sizeM1() / + channelSize; + // Call kernel + kernelFunc(batchSize, channelSize, featureSize, getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); } diff --git a/src/operator/SqrtImpl.cpp b/src/operator/SqrtImpl.cpp index b9a01ff9146921b55dede775e1e3f61deed21a8e..af67f0763403df5f84aa19bdaccfb2458496d942 100644 --- a/src/operator/SqrtImpl.cpp +++ b/src/operator/SqrtImpl.cpp @@ -15,27 +15,30 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Sqrt.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/SqrtImpl.hpp" #include "aidge/backend/cpu/operator/SqrtImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::SqrtImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::SqrtImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::SqrtImpl_cpu::forward() { - assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && + "missing input #0"); - // Find the correct kernel type - auto kernelFunc = Registrar<SqrtImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + // Find the correct kernel type + auto kernelFunc = Registrar<SqrtImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawOutput(0))); } \ No newline at end of file diff --git a/src/operator/SubImpl.cpp b/src/operator/SubImpl.cpp index 7d33d935f8aed4eef741ef81e316f387ad676abe..38f89d570968c92ad6e4a73f9a148546b8a40a2e 100644 --- a/src/operator/SubImpl.cpp +++ b/src/operator/SubImpl.cpp @@ -15,29 +15,31 @@ #include <thread> // std::this_thread::sleep_for #include <vector> +#include "aidge/backend/cpu/data/GetCPUPtr.h" #include "aidge/operator/Sub.hpp" #include "aidge/utils/Types.h" + #include "aidge/backend/cpu/operator/SubImpl.hpp" #include "aidge/backend/cpu/operator/SubImpl_forward_kernels.hpp" -Aidge::NbElts_t Aidge::SubImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // this implementation can be in-place - return 0; +Aidge::NbElts_t Aidge::SubImpl_cpu::getNbRequiredProtected( + const Aidge::IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return 0; } void Aidge::SubImpl_cpu::forward() { - // Find the correct kernel type - auto kernelFunc = Registrar<SubImplForward_cpu>::create({ - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); - - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->size(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->getImpl()->rawPtr(), - std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->getImpl()->rawPtr()); + // Find the correct kernel type + auto kernelFunc = Registrar<SubImplForward_cpu>::create( + {std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->dataType(), + std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(1))->size(), + getCPUPtr(mOp.getRawInput(0)), getCPUPtr(mOp.getRawInput(1)), + getCPUPtr(mOp.getRawOutput(0))); }