From c295836dc02d6500d0ef173265416aee48d2c97c Mon Sep 17 00:00:00 2001 From: NAUD Maxence <maxence.naud@cea.fr> Date: Thu, 19 Oct 2023 08:37:16 +0000 Subject: [PATCH] [Upd] Add Operator to support an undefined number of inputs. Remove templates --- include/aidge/backend/cpu.hpp | 2 +- .../aidge/backend/cpu/operator/AddImpl.hpp | 176 ++-------------- .../cpu/operator/AddImpl_forward_kernels.hpp | 74 ++----- src/operator/AddImpl.cpp | 199 +++--------------- unit_tests/operator/Test_AddImpl.cpp | 8 +- unit_tests/scheduler/Test_Scheduler.cpp | 2 +- 6 files changed, 73 insertions(+), 388 deletions(-) diff --git a/include/aidge/backend/cpu.hpp b/include/aidge/backend/cpu.hpp index 4e0b4ab2..82a087f7 100644 --- a/include/aidge/backend/cpu.hpp +++ b/include/aidge/backend/cpu.hpp @@ -17,7 +17,7 @@ #include "aidge/backend/cpu/operator/AvgPoolingImpl.hpp" #include "aidge/backend/cpu/operator/MaxPoolingImpl.hpp" #include "aidge/backend/cpu/operator/BatchNormImpl.hpp" -#include "aidge/backend/cpu/operator/ConcatImpl.hpp.hpp" +// #include "aidge/backend/cpu/operator/ConcatImpl.hpp.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvImpl.hpp" #include "aidge/backend/cpu/operator/FCImpl.hpp" diff --git a/include/aidge/backend/cpu/operator/AddImpl.hpp b/include/aidge/backend/cpu/operator/AddImpl.hpp index 99c83e2c..d4019cca 100644 --- a/include/aidge/backend/cpu/operator/AddImpl.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl.hpp @@ -23,177 +23,37 @@ namespace Aidge { // class Add_Op<2>; // compute kernel registry for forward and backward -template <DimIdx_t NUM> -class AddImplForward_cpu; -template <DimIdx_t NUM> -class AddImplBackward_cpu; - -template <> -class AddImplForward_cpu<1> - : public Registrable<AddImplForward_cpu<1>, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {}; -template <> -class AddImplBackward_cpu<1> - : public Registrable<AddImplBackward_cpu<1>, std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {}; - -template <> -class AddImplForward_cpu<2> : public Registrable<AddImplForward_cpu<2>, std::tuple<DataType, DataType, DataType>, - void(const std::size_t, const void*, const void*, void*)> {}; -template <> -class AddImplBackward_cpu<2> : public Registrable<AddImplBackward_cpu<2>, std::tuple<DataType, DataType, DataType>, - void(const std::size_t, const void*, const void*, void*)> {}; - -template <> -class AddImplForward_cpu<3> : public Registrable<AddImplForward_cpu<3>, std::tuple<DataType, DataType, DataType, DataType>, - void(const std::size_t, const void*, const void*, const void*, void*)> { -}; -template <> -class AddImplBackward_cpu<3> - : public Registrable<AddImplBackward_cpu<3>, std::tuple<DataType, DataType, DataType, DataType>, - void(const std::size_t, const void*, const void*, const void*, void*)> {}; - -template <DimIdx_t NUM> -class AddImpl_cpu : public OperatorImpl { - private: - const Add_Op<NUM>& mOp; - std::array<NbElts_t, NUM> mNbConsumedData = {}; - std::array<NbElts_t, 1> mNbProducedData = {}; - - public: - AddImpl_cpu(const Add_Op<NUM>& op) : mOp(op) {} - - static std::unique_ptr<AddImpl_cpu<NUM>> create(const Add_Op<NUM>& op) { - return std::make_unique<AddImpl_cpu<NUM>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final { - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); - } - - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! - return 0; - } - - NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; - - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); - } - - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final { - assert(inputIdx < mNbConsumedData.size()); - return mNbConsumedData[inputIdx]; - } - - NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final { - assert(outputIdx < mNbProducedData.size()); - return mNbProducedData[outputIdx]; - } - void updateConsummerProducer() override final; - - void forward() override { - // nothing - } - - void backward() override { printf("Not implemented yet.\n"); } -}; - -template <> -class AddImpl_cpu<1> : public OperatorImpl { - private: - const Add_Op<1>& mOp; - std::array<NbElts_t, 1> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - AddImpl_cpu(const Add_Op<1>& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {} - - static std::unique_ptr<AddImpl_cpu<1>> create(const Add_Op<1>& op) { - return std::make_unique<AddImpl_cpu<1>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t /*inputIdx*/) const override final; +class AddImplForward_cpu + : public Registrable<AddImplForward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const std::vector<const void*>, void*)> {}; - NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, - const std::vector<DimSize_t> &/*inputsSize*/) const override final; - - NbElts_t getNbConsumedData(const IOIndex_t /*inputIdx*/) const override final; - - NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final; - void updateConsummerProducer() override final; - - void forward() override; +class AddImplBackward_cpu + : public Registrable<AddImplBackward_cpu, std::tuple<DataType, DataType>, void(const std::size_t, const std::vector<const void*>, void*)> {}; - void backward() override; -}; - -template <> -class AddImpl_cpu<2> : public OperatorImpl { - private: - const Add_Op<2>& mOp; - std::array<NbElts_t, 2> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; - - public: - AddImpl_cpu(const Add_Op<2>& op) : mOp(op), mNbConsumedData({0, 0}), mNbProducedData({0}) {} - - static std::unique_ptr<AddImpl_cpu<2>> create(const Add_Op<2>& op) { - return std::make_unique<AddImpl_cpu<2>>(op); - } - - public: - NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; - NbElts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; - - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, - const std::vector<DimSize_t>& /*inputsSize*/) const override final; - - NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; - - NbElts_t getNbProducedData(const IOIndex_t /*outputIdx*/) const override final; - void updateConsummerProducer() override final; - - void forward() override; - - void backward() override; -}; - -template <> -class AddImpl_cpu<3> : public OperatorImpl { - private: - const Add_Op<3>& mOp; - std::array<NbElts_t, 3> mNbConsumedData; - std::array<NbElts_t, 1> mNbProducedData; +class AddImpl_cpu : public OperatorImpl { +private: + const Add_Op& mOp; + std::vector<NbElts_t> mNbConsumedData; + std::array<NbElts_t, 1> mNbProducedData = {}; - public: - AddImpl_cpu(const Add_Op<3>& op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {} +public: + AddImpl_cpu(const Add_Op& op) : mOp(op), mNbConsumedData(std::vector<NbElts_t>(op.nbInputs())) {} - static std::unique_ptr<AddImpl_cpu<3>> create(const Add_Op<3>& op) { - return std::make_unique<AddImpl_cpu<3>>(op); + static std::unique_ptr<AddImpl_cpu> create(const Add_Op& op) { + return std::make_unique<AddImpl_cpu>(op); } - public: +public: NbElts_t getNbRequiredData(const IOIndex_t inputIdx) const override final; NbElts_t getNbRequiredProtected(const IOIndex_t /*inputIdx*/) const override final; - NbElts_t getRequiredMemory(const IOIndex_t /*outputIdx*/, const std::vector<DimSize_t>& /*inputsSize*/) const override final; + NbElts_t getRequiredMemory(const IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final; NbElts_t getNbConsumedData(const IOIndex_t inputIdx) const override final; NbElts_t getNbProducedData(const IOIndex_t outputIdx) const override final; + void updateConsummerProducer() override final; void forward() override; @@ -202,9 +62,7 @@ class AddImpl_cpu<3> : public OperatorImpl { }; namespace { -static Registrar<Add_Op<1>> registrarAddImpl1I_cpu("cpu", Aidge::AddImpl_cpu<1>::create); -static Registrar<Add_Op<2>> registrarAddImpl2I_cpu("cpu", Aidge::AddImpl_cpu<2>::create); -static Registrar<Add_Op<3>> registrarAddImpl3I_cpu("cpu", Aidge::AddImpl_cpu<3>::create); +static Registrar<Add_Op> registrarAddImpl_cpu("cpu", Aidge::AddImpl_cpu::create); } // namespace } // namespace Aidge diff --git a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp index 221e36dc..198bcbac 100644 --- a/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp @@ -18,70 +18,30 @@ namespace Aidge { -template <class I1, class O> -void AddImpl1I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) { +template <class I, class O> +void AddImpl_cpu_forward_kernel(const std::size_t inputLength, const std::vector<const void*> inputs_, void* output_) { // FIXME: missing Add attributes as arguments - const I1* input1 = static_cast<const I1*>(input1_); - O* output = static_cast<O*>(output_); - - for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { - output[oIndex] = input1[oIndex]; - } -} - -template <class I1, class I2, class O> -void AddImpl2I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, - void* output_) { - // FIXME: missing Add attributes as arguments - const I1* input1 = static_cast<const I1*>(input1_); - const I2* input2 = static_cast<const I2*>(input2_); - O* output = static_cast<O*>(output_); - - for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { - output[oIndex] = input1[oIndex] + input2[oIndex]; + std::vector<const I*> inputs; + for (const auto& input_ : inputs_) { + inputs.push_back(static_cast<const I*>(input_)); } -} - -template <class I1, class I2, class I3, class O> -void AddImpl3I_cpu_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, - const void* input3_, void* output_) { - // FIXME: missing Add attributes as arguments - const I1* input1 = static_cast<const I1*>(input1_); - const I2* input2 = static_cast<const I2*>(input2_); - const I3* input3 = static_cast<const I3*>(input3_); O* output = static_cast<O*>(output_); - for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { - output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex]; - } + for (std::size_t iIndex = 0; iIndex < inputs.size(); ++iIndex) { + for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { + output[oIndex] += inputs[iIndex][oIndex]; + } + } } namespace { -static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float32( - {DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_cpu_forward_kernel<float, float>); -static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Int32( - {DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_cpu_forward_kernel<int, int>); -static Registrar<AddImplForward_cpu<1>> registrarAddImpl1IForward_cpu_Float64( - {DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_cpu_forward_kernel<double, double>); - -static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::AddImpl2I_cpu_forward_kernel<float, float, float>); -static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_cpu_forward_kernel<int, int, int>); -static Registrar<AddImplForward_cpu<2>> registrarAddImpl2IForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_cpu_forward_kernel<double, double, double>); - -static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float32( - {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, - Aidge::AddImpl3I_cpu_forward_kernel<float, float, float, float>); -static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Int32( - {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, - Aidge::AddImpl3I_cpu_forward_kernel<int, int, int, int>); -static Registrar<AddImplForward_cpu<3>> registrarAddImpl3IForward_cpu_Float64( - {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, - Aidge::AddImpl3I_cpu_forward_kernel<double, double, double, double>); +static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, Aidge::AddImpl_cpu_forward_kernel<float, float>); +static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, Aidge::AddImpl_cpu_forward_kernel<int, int>); +static Registrar<AddImplForward_cpu> registrarAddImplForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, Aidge::AddImpl_cpu_forward_kernel<double, double>); } // namespace } // namespace Aidge -#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */ +#endif /* AIDGE_CPU_OPERATOR_ADDIMPL_CPU_FORWARD_KERNEL_H_ */ \ No newline at end of file diff --git a/src/operator/AddImpl.cpp b/src/operator/AddImpl.cpp index be792333..36eff221 100644 --- a/src/operator/AddImpl.cpp +++ b/src/operator/AddImpl.cpp @@ -10,209 +10,76 @@ ********************************************************************************/ #include <cassert> -#include <chrono> // std::chrono::milliseconds #include <numeric> // std::accumulate -#include <thread> // std::this_thread::sleep_for #include <vector> -#include "aidge/operator/Conv.hpp" #include "aidge/utils/Types.h" +#include "aidge/data/Data.hpp" +#include "aidge/data/Tensor.hpp" #include "aidge/backend/cpu/operator/AddImpl.hpp" #include "aidge/backend/cpu/operator/AddImpl_forward_kernels.hpp" -////////////////////////////////// -// AddImpl_cpu<1> -////////////////////////////////// - -Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { - assert(mOp.getInput(0) && "requires valid input"); - // Requires the whole tensors - return static_cast<int>(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size()); -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! - return 0; -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getRequiredMemory(const Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - return std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size(); -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const { - return mNbConsumedData[0]; -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<1>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; -} -void Aidge::AddImpl_cpu<1>::updateConsummerProducer(){ - // Update producer-consumer data - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); -} -void Aidge::AddImpl_cpu<1>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(mOp.getInput(0) && "missing input #0"); - - // Find the correct kernel type - auto kernelFunc = Registrar<AddImplForward_cpu<1>>::create({ - mOp.getInput(0)->dataType(), - mOp.getOutput(0)->dataType()}); - - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), - mOp.getInput(0)->getImpl()->rawPtr(), - mOp.getOutput(0)->getImpl()->rawPtr()); - -} - -void Aidge::AddImpl_cpu<1>::backward() { - printf("Not implemented yet.\n"); -} - - -////////////////////////////////// -// AddImpl_cpu<2> -////////////////////////////////// - - -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { +Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { assert(mOp.getInput(inputIdx) && "requires valid input"); // Requires the whole tensors const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), - NbElts_t(1), std::multiplies<NbElts_t>()); + return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { +Aidge::NbElts_t Aidge::AddImpl_cpu::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! return 0; } -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { +Aidge::NbElts_t Aidge::AddImpl_cpu::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { // Requires the whole tensors, regardless of available data on inputs assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; // avoid unused warning + (void) outputIdx; const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), - NbElts_t(1), std::multiplies<NbElts_t>()); + return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); } -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { - assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); - return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; +Aidge::NbElts_t Aidge::AddImpl_cpu::getNbConsumedData(const Aidge::IOIndex_t inputIdx) const { + assert(inputIdx < mNbConsumedData.size()); + return mNbConsumedData[inputIdx]; } -Aidge::NbElts_t Aidge::AddImpl_cpu<2>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { - return mNbProducedData[0]; +Aidge::NbElts_t Aidge::AddImpl_cpu::getNbProducedData(const Aidge::IOIndex_t outputIdx) const { + assert(outputIdx < mNbProducedData.size()); + return mNbProducedData[outputIdx]; } -void Aidge::AddImpl_cpu<2>::updateConsummerProducer(){ - // Update producer-consumer data + +void Aidge::AddImpl_cpu::updateConsummerProducer() { for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass mNbProducedData[0]+= getRequiredMemory(0, {}); -} -void Aidge::AddImpl_cpu<2>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(mOp.getInput(0) && "missing input #0"); - assert(mOp.mInputs[1] && "missing input #1"); - - // Find the correct kernel type - auto kernelFunc = Registrar<AddImplForward_cpu<2>>::create({ - mOp.getInput(0)->dataType(), - mOp.mInputs[1]->dataType(), - mOp.getOutput(0)->dataType()}); - - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), - mOp.getInput(0)->getImpl()->rawPtr(), - mOp.mInputs[1]->getImpl()->rawPtr(), - mOp.getOutput(0)->getImpl()->rawPtr()); } -void Aidge::AddImpl_cpu<2>::backward() { - printf("Not implemented yet.\n"); -} - - -////////////////////////////////// -// AddImpl_cpu<3> -////////////////////////////////// - - -Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredData(const Aidge::IOIndex_t inputIdx) const { - assert(mOp.getInput(inputIdx) && "requires valid input"); - - // Requires the whole tensors - const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); - - return std::accumulate(inputDims.begin(), inputDims.end(), - Aidge::NbElts_t(1), std::multiplies<Aidge::NbElts_t>()); -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbRequiredProtected(const Aidge::IOIndex_t /*inputIdx*/) const { - // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! - return 0; -} +void Aidge::AddImpl_cpu::forward() { + assert(mOp.getInput(0) && "missing input in Add operator"); + DataType datatypeFirstInput = mOp.getInput(0)->dataType(); + for (IOIndex_t i = 1; i < mOp.nbInputs(); ++i) { + assert(mOp.getInput(i) && "missing input in Add operator"); + assert(mOp.getInput(i)->dataType() == datatypeFirstInput); + } -Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getRequiredMemory(const Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { - // Requires the whole tensors, regardless of available data on inputs - assert(outputIdx == 0 && "operator has only one output"); - (void) outputIdx; // avoid unused warning - - const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); - return std::accumulate(outputDims.begin(), outputDims.end(), - NbElts_t(1), std::multiplies<NbElts_t>()); -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { - assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); - return mNbConsumedData[inputIdx]; -} - -Aidge::NbElts_t Aidge::AddImpl_cpu<3>::getNbProducedData(Aidge::IOIndex_t outputIdx) const { - assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size()); - return mNbProducedData[static_cast<std::size_t>(outputIdx)]; -} -void Aidge::AddImpl_cpu<3>::updateConsummerProducer(){ - // Update producer-consumer data - for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) - mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass - - mNbProducedData[0]+= getRequiredMemory(0, {}); -} -void Aidge::AddImpl_cpu<3>::forward() { - // FIXME: uncomment the following code once memory handling will work - assert(mOp.getInput(0) && "missing input #0"); - assert(mOp.mInputs[1] && "missing input #1"); - assert(mOp.mInputs[2] && "missing input #2"); - - // Find the correct kernel type - auto kernelFunc = Registrar<AddImplForward_cpu<3>>::create({ - mOp.getInput(0)->dataType(), - mOp.mInputs[1]->dataType(), - mOp.mInputs[2]->dataType(), + auto kernelFunc = Registrar<AddImplForward_cpu>::create({ + datatypeFirstInput, mOp.getOutput(0)->dataType()}); - // Call kernel - kernelFunc(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), - mOp.getInput(0)->getImpl()->rawPtr(), - mOp.mInputs[1]->getImpl()->rawPtr(), - mOp.mInputs[2]->getImpl()->rawPtr(), - mOp.getOutput(0)->getImpl()->rawPtr()); + std::vector<const void*> opInputs; + for (IOIndex_t i = 0; i < mOp.nbInputs(); ++i) { + opInputs.push_back(mOp.getInput(i)->getImpl()->rawPtr()); + } + kernelFunc(mOp.getInput(0)->size(), + opInputs, + mOp.getOutput(0)->getImpl()->rawPtr()); } -void Aidge::AddImpl_cpu<3>::backward() { - printf("Not implemented yet.\n"); -} +void Aidge::AddImpl_cpu::backward() { printf("Not implemented yet.\n"); } \ No newline at end of file diff --git a/unit_tests/operator/Test_AddImpl.cpp b/unit_tests/operator/Test_AddImpl.cpp index 18d98d16..579ded6c 100644 --- a/unit_tests/operator/Test_AddImpl.cpp +++ b/unit_tests/operator/Test_AddImpl.cpp @@ -18,7 +18,7 @@ using namespace Aidge; -TEST_CASE("[cpu/operator] Add(forward)") { +TEST_CASE("[cpu/operator] Add(forward)", "[Add]") { std::shared_ptr<Tensor> input1 = std::make_shared<Tensor>(Array4D<int,3,3,3,2> { { // { // @@ -40,7 +40,7 @@ TEST_CASE("[cpu/operator] Add(forward)") { }); // SECTION("One input") { - std::shared_ptr<Node> myAdd = Add<1>(); + std::shared_ptr<Node> myAdd = Add(1); myAdd->getOperator()->setBackend("cpu"); myAdd->getOperator()->setDatatype(DataType::Int32); myAdd->getOperator()->associateInput(0, input1); @@ -71,7 +71,7 @@ TEST_CASE("[cpu/operator] Add(forward)") { } }); - std::shared_ptr<Node> myAdd = Add<2>(); + std::shared_ptr<Node> myAdd = Add(2); myAdd->getOperator()->setDatatype(DataType::Int32); myAdd->getOperator()->setBackend("cpu"); myAdd->getOperator()->associateInput(0, input1); @@ -103,7 +103,7 @@ TEST_CASE("[cpu/operator] Add(forward)") { } }); - std::shared_ptr<Node> myAdd = Add<3>(); + std::shared_ptr<Node> myAdd = Add(3); myAdd->getOperator()->setDatatype(DataType::Int32); myAdd->getOperator()->setBackend("cpu"); myAdd->getOperator()->associateInput(0, input1); diff --git a/unit_tests/scheduler/Test_Scheduler.cpp b/unit_tests/scheduler/Test_Scheduler.cpp index 78ab8d5b..3fc1e445 100644 --- a/unit_tests/scheduler/Test_Scheduler.cpp +++ b/unit_tests/scheduler/Test_Scheduler.cpp @@ -147,7 +147,7 @@ TEST_CASE("[cpu/scheduler] SequentialScheduler(forward)") { Conv(3, 3, {1, 1}, "conv1.1"), Conv(3, 3, {1, 1}, "conv1.2"), Conv(3, 3, {1, 1}, "conv1.3")}), - Add<3>("add1"), + Add(3, "add1"), Conv(3, 2, {1, 1}, "conv2"), FC(5, false, "out")}); g->setBackend("cpu"); -- GitLab