diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d55a224c5bf0ef432001c16565e5f220d175ae7..ccd4ee8694a487490a1727ad5d3fa5669f72576c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,4 +17,4 @@ if (PYBIND) endif() add_subdirectory(${CMAKE_SOURCE_DIR}/aidge/_Core) -# add_subdirectory(${CMAKE_SOURCE_DIR}/aidge/_CPU) \ No newline at end of file +add_subdirectory(${CMAKE_SOURCE_DIR}/aidge/_CPU) \ No newline at end of file diff --git a/aidge/_CPU/CMakeLists.txt b/aidge/_CPU/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c6a8d775f86d7d4378927fb3c36704691d307e36 --- /dev/null +++ b/aidge/_CPU/CMakeLists.txt @@ -0,0 +1,30 @@ + +# project(Aidge_Core) + +if (PYBIND) + generate_python_binding(aidge_cpu cpu) +endif() + +add_library(cpu STATIC) + +# Add include directories +target_include_directories(cpu PUBLIC "include") + +# Containers module +file(GLOB_RECURSE src_files "src/*.cpp") +target_sources(cpu PRIVATE ${src_files}) + +target_link_libraries(cpu PUBLIC core) + +set_property(TARGET cpu PROPERTY POSITION_INDEPENDENT_CODE ON) + +if (PYBIND) + target_include_directories(cpu PUBLIC $<BUILD_INTERFACE:${PYTHON_INCLUDE_DIRS}>) + target_link_libraries(cpu PRIVATE ${PYTHON_LIBRARIES}) +endif() + + +# Activate compile time reducer for aidge_core +set_target_properties(cpu PROPERTIES COTIRE_ADD_UNITY_BUILD FALSE) +# set_target_properties(n2d2_cpu_lib PROPERTIES COTIRE_CXX_PREFIX_HEADER_INIT "include/utils/Precompiled.hpp") +cotire(cpu) \ No newline at end of file diff --git a/aidge/_CPU/include/data/TensorImpl.hpp b/aidge/_CPU/include/data/TensorImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..43016476a2563860632db2d89168b3f4e5333129 --- /dev/null +++ b/aidge/_CPU/include/data/TensorImpl.hpp @@ -0,0 +1,73 @@ +#ifndef TensorImpl_ref_cpp_H_ +#define TensorImpl_ref_cpp_H_ + +#include "backend/TensorImpl.hpp" +#include "data/Tensor.hpp" +#include "utils/Registrar.hpp" +#include "utils/Types.h" + +namespace Aidge { +template <class T> +class TensorImpl_ref_cpp : public TensorImpl { + private: + const Tensor &mTensor; // Impl needs to access Tensor information, but is not + // supposed to change it! + std::vector<T> mData; + + public: + static constexpr const char *Backend = "cpu"; + + TensorImpl_ref_cpp(const Tensor &tensor) : TensorImpl(Backend), mTensor(tensor) {} + + bool operator==(const TensorImpl &otherImpl) const override final { + std::size_t i = 0; + for (; i < mTensor.size() && + mData[i] == reinterpret_cast<const TensorImpl_ref_cpp<T> &>(otherImpl).data()[i]; + ++i) { + } + return i == mTensor.size(); + } + + static std::unique_ptr<TensorImpl_ref_cpp> create(const Tensor &tensor) { + return std::make_unique<TensorImpl_ref_cpp<T>>(tensor); + } + + // native interface + const std::vector<T> &data() const { return mData; } + + std::size_t scalarSize() const override { return sizeof(T); } + + void copy(const void *src, NbElts_t length) override { + std::copy(static_cast<const T *>(src), static_cast<const T *>(src) + length, + static_cast<T *>(rawPtr())); + } + + void *rawPtr() override { + lazyInit(mData); + return mData.data(); + }; + + virtual ~TensorImpl_ref_cpp() = default; + + void setRawPtr(void *ptr) override final { + T *newPtr = static_cast<T *>(ptr); + mData = std::vector<T>(newPtr, newPtr + (mTensor.size() * sizeof(T))); + }; + + private: + void lazyInit(std::vector<T> &data) { + assert(mTensor.dataType() == NativeType<T>::type); + + if (data.size() != mTensor.size()) data.resize(mTensor.size()); + } +}; + +namespace { +static Registrar<Tensor> registrarTensorImpl_ref_cpp_Float32( + {"cpu", DataType::Float32}, Aidge::TensorImpl_ref_cpp<float>::create); +static Registrar<Tensor> registrarTensorImpl_ref_cpp_Int32({"cpu", DataType::Int32}, + Aidge::TensorImpl_ref_cpp<int>::create); +} // namespace +} // namespace Aidge + +#endif /* TensorImpl_ref_cpp_H_ */ diff --git a/aidge/_CPU/include/operator/AddImpl.hpp b/aidge/_CPU/include/operator/AddImpl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..eb5caf72e70a15e77a4079de3edea8acbea42156 --- /dev/null +++ b/aidge/_CPU/include/operator/AddImpl.hpp @@ -0,0 +1,196 @@ +#ifndef AddImpl_ref_cpp_H_ +#define AddImpl_ref_cpp_H_ + +#include "utils/Types.h" + +#include "backend/OperatorImpl.hpp" +#include "operator/Add.hpp" +#include "utils/Registrar.hpp" +#include <memory> +#include <vector> + +namespace Aidge { +// class Add_Op<2>; + +// compute kernel registry for forward and backward +template <DimIdx_t NUM> +class AddImplForward_ref_cpp; +template <DimIdx_t NUM> +class AddImplBackward_ref_cpp; + +template <> +class AddImplForward_ref_cpp<1> + : public Registrable<std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {}; +template <> +class AddImplBackward_ref_cpp<1> + : public Registrable<std::tuple<DataType, DataType>, void(const std::size_t, const void*, void*)> {}; + +template <> +class AddImplForward_ref_cpp<2> : public Registrable<std::tuple<DataType, DataType, DataType>, + void(const std::size_t, const void*, const void*, void*)> {}; +template <> +class AddImplBackward_ref_cpp<2> : public Registrable<std::tuple<DataType, DataType, DataType>, + void(const std::size_t, const void*, const void*, void*)> {}; + +template <> +class AddImplForward_ref_cpp<3> : public Registrable<std::tuple<DataType, DataType, DataType, DataType>, + void(const std::size_t, const void*, const void*, const void*, void*)> { +}; +template <> +class AddImplBackward_ref_cpp<3> + : public Registrable<std::tuple<DataType, DataType, DataType, DataType>, + void(const std::size_t, const void*, const void*, const void*, void*)> {}; + +template <DimIdx_t NUM> +class AddImpl_ref_cpp : public OperatorImpl { + private: + const Add_Op<NUM>& mOp; + std::array<NbElts_t, NUM> mNbConsumedData = {}; + std::array<NbElts_t, 1> mNbProducedData = {}; + + public: + AddImpl_ref_cpp(const Add_Op<NUM>& op) : mOp(op) {} + + static std::unique_ptr<AddImpl_ref_cpp<NUM>> create(const Add_Op<NUM>& op) { + return std::make_unique<AddImpl_ref_cpp<NUM>>(op); + } + + public: + NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final { + assert(mOp.getInput(inputIdx) && "requires valid input"); + + // Requires the whole tensors + const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); + return std::accumulate(inputDims.begin(), inputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); + } + + NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final { + // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + return 0; + } + + NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final { + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + + const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); + return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); + } + + NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final { + assert(inputIdx < mNbConsumedData.size()); + return mNbConsumedData[inputIdx]; + } + + NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final { + assert(outputIdx < mNbProducedData.size()); + return mNbProducedData[outputIdx]; + } + + void forward() { + // nothing + } + + void backward() { printf("Not implemented yet.\n"); } +}; + +template <> +class AddImpl_ref_cpp<1> : public OperatorImpl { + private: + const Add_Op<1>& mOp; + std::array<NbElts_t, 1> mNbConsumedData; + std::array<NbElts_t, 1> mNbProducedData; + + public: + AddImpl_ref_cpp(const Add_Op<1>& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {} + + static std::unique_ptr<AddImpl_ref_cpp<1>> create(const Add_Op<1>& op) { + return std::make_unique<AddImpl_ref_cpp<1>>(op); + } + + public: + NbElts_t getNbRequiredData(IOIndex_t /*inputIdx*/) const override final; + + NbElts_t getNbRequiredProtected(IOIndex_t /*inputIdx*/) const override final; + + NbElts_t getRequiredMemory(IOIndex_t /*outputIdx*/, + const std::vector<DimSize_t>& /*inputsSize*/) const override final; + + NbElts_t getNbConsumedData(IOIndex_t /*inputIdx*/) const override final; + + NbElts_t getNbProducedData(IOIndex_t /*outputIdx*/) const override final; + + void forward(); + + void backward(); +}; + +template <> +class AddImpl_ref_cpp<2> : public OperatorImpl { + private: + const Add_Op<2>& mOp; + std::array<NbElts_t, 2> mNbConsumedData; + std::array<NbElts_t, 1> mNbProducedData; + + public: + AddImpl_ref_cpp(const Add_Op<2>& op) : mOp(op), mNbConsumedData({0, 0}), mNbProducedData({0}) {} + + static std::unique_ptr<AddImpl_ref_cpp<2>> create(const Add_Op<2>& op) { + return std::make_unique<AddImpl_ref_cpp<2>>(op); + } + + public: + NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final; + + NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final; + + NbElts_t getRequiredMemory(IOIndex_t /*outputIdx*/, + const std::vector<DimSize_t>& /*inputsSize*/) const override final; + + NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final; + + NbElts_t getNbProducedData(IOIndex_t /*outputIdx*/) const override final; + + void forward(); + + void backward(); +}; + +template <> +class AddImpl_ref_cpp<3> : public OperatorImpl { + private: + const Add_Op<3>& mOp; + std::array<NbElts_t, 3> mNbConsumedData; + std::array<NbElts_t, 1> mNbProducedData; + + public: + AddImpl_ref_cpp(const Add_Op<3>& op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {} + + static std::unique_ptr<AddImpl_ref_cpp<3>> create(const Add_Op<3>& op) { + return std::make_unique<AddImpl_ref_cpp<3>>(op); + } + + public: + NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final; + + NbElts_t getNbRequiredProtected(IOIndex_t /*inputIdx*/) const override final; + + NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t>& /*inputsSize*/) const override final; + + NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final; + + NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final; + + void forward(); + + void backward(); +}; + +namespace { +static Registrar<Add_Op<1>> registrarAddImpl1I_ref_cpp("cpu", Aidge::AddImpl_ref_cpp<1>::create); +static Registrar<Add_Op<2>> registrarAddImpl2I_ref_cpp("cpu", Aidge::AddImpl_ref_cpp<2>::create); +static Registrar<Add_Op<3>> registrarAddImpl3I_ref_cpp("cpu", Aidge::AddImpl_ref_cpp<3>::create); +} // namespace +} // namespace Aidge + +#endif /* AddImpl_ref_cpp_H_ */ \ No newline at end of file diff --git a/aidge/_CPU/include/operator/AddImpl_kernels.hpp b/aidge/_CPU/include/operator/AddImpl_kernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..adbb04769751bd7e59ab9771d267b9732997db13 --- /dev/null +++ b/aidge/_CPU/include/operator/AddImpl_kernels.hpp @@ -0,0 +1,76 @@ +#ifndef AddImpl_ref_cpp_forward_kernel_H_ +#define AddImpl_ref_cpp_forward_kernel_H_ + +#include "utils/Registrar.hpp" + +#include "operator/AddImpl.hpp" + +namespace Aidge { + +template <class I1, class O> +void AddImpl1I_ref_cpp_forward_kernel(const std::size_t inputLength, const void* input1_, void* output_) { + // FIXME: missing Add parameters as arguments + const I1* input1 = static_cast<const I1*>(input1_); + O* output = static_cast<O*>(output_); + + for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { + output[oIndex] = input1[oIndex]; + } +} + +template <class I1, class I2, class O> +void AddImpl2I_ref_cpp_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, + void* output_) { + // FIXME: missing Add parameters as arguments + const I1* input1 = static_cast<const I1*>(input1_); + const I2* input2 = static_cast<const I2*>(input2_); + O* output = static_cast<O*>(output_); + + for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { + output[oIndex] = input1[oIndex] + input2[oIndex]; + } +} + +template <class I1, class I2, class I3, class O> +void AddImpl3I_ref_cpp_forward_kernel(const std::size_t inputLength, const void* input1_, const void* input2_, + const void* input3_, void* output_) { + // FIXME: missing Add parameters as arguments + const I1* input1 = static_cast<const I1*>(input1_); + const I2* input2 = static_cast<const I2*>(input2_); + const I3* input3 = static_cast<const I3*>(input3_); + O* output = static_cast<O*>(output_); + + for (std::size_t oIndex = 0; oIndex < inputLength; ++oIndex) { + output[oIndex] = input1[oIndex] + input2[oIndex] + input3[oIndex]; + } +} + +namespace { +static Registrar<AddImplForward_ref_cpp<1>> registrarAddImpl1IForward_ref_cpp_Float32( + {DataType::Float32, DataType::Float32}, Aidge::AddImpl1I_ref_cpp_forward_kernel<float, float>); +static Registrar<AddImplForward_ref_cpp<1>> registrarAddImpl1IForward_ref_cpp_Int32( + {DataType::Int32, DataType::Int32}, Aidge::AddImpl1I_ref_cpp_forward_kernel<int, int>); +static Registrar<AddImplForward_ref_cpp<1>> registrarAddImpl1IForward_ref_cpp_Float64( + {DataType::Float64, DataType::Float64}, Aidge::AddImpl1I_ref_cpp_forward_kernel<double, double>); + +static Registrar<AddImplForward_ref_cpp<2>> registrarAddImpl2IForward_ref_cpp_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::AddImpl2I_ref_cpp_forward_kernel<float, float, float>); +static Registrar<AddImplForward_ref_cpp<2>> registrarAddImpl2IForward_ref_cpp_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32}, Aidge::AddImpl2I_ref_cpp_forward_kernel<int, int, int>); +static Registrar<AddImplForward_ref_cpp<2>> registrarAddImpl2IForward_ref_cpp_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64}, Aidge::AddImpl2I_ref_cpp_forward_kernel<double, double, double>); + +static Registrar<AddImplForward_ref_cpp<3>> registrarAddImpl3IForward_ref_cpp_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::AddImpl3I_ref_cpp_forward_kernel<float, float, float, float>); +static Registrar<AddImplForward_ref_cpp<3>> registrarAddImpl3IForward_ref_cpp_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::AddImpl3I_ref_cpp_forward_kernel<int, int, int, int>); +static Registrar<AddImplForward_ref_cpp<3>> registrarAddImpl3IForward_ref_cpp_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::AddImpl3I_ref_cpp_forward_kernel<double, double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* AddImpl_ref_cpp_forward_kernel_H_ */ diff --git a/aidge/_CPU/python_binding/pybind_cpu.cpp b/aidge/_CPU/python_binding/pybind_cpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6b5b7cd16a4cac9b418984dc798caed5595373aa --- /dev/null +++ b/aidge/_CPU/python_binding/pybind_cpu.cpp @@ -0,0 +1,22 @@ +#include <pybind11/pybind11.h> +// Need to call this header to register every impl +#include "data/TensorImpl.hpp" +#include "operator/AddImpl.hpp" + +namespace py = pybind11; + +namespace Aidge { +void init_registrar(py::module& m){ + + + +} + +void init_Aidge(py::module& m){ + init_registrar(m); +} + +PYBIND11_MODULE(aidge_cpu, m) { + init_Aidge(m); +} +} diff --git a/aidge/_CPU/src/operator/AddImpl.cpp b/aidge/_CPU/src/operator/AddImpl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a90a1ba0b0afb22ba93c066937115f12c41efa63 --- /dev/null +++ b/aidge/_CPU/src/operator/AddImpl.cpp @@ -0,0 +1,203 @@ + +#include <cassert> +#include <numeric> +#include <chrono> +#include <thread> +#include <vector> + +#include "operator/Conv.hpp" + +#include "operator/AddImpl.hpp" +#include "operator/AddImpl_kernels.hpp" +#include "utils/Types.h" + +////////////////////////////////// +// AddImpl_ref_cpp<1> +////////////////////////////////// + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<1>::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { + assert(mOp.getInput(0) && "requires valid input"); + // Requires the whole tensors + return static_cast<int>(std::static_pointer_cast<Tensor>(mOp.getInput(0))->size()); +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<1>::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const { + // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + return 0; +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<1>::getRequiredMemory(Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { + // Requires the whole tensors, regardless of available data on inputs + return std::static_pointer_cast<Tensor>(mOp.getOutput(0))->size(); +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<1>::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const { + return mNbConsumedData[0]; +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<1>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { + return mNbProducedData[0]; +} + +void Aidge::AddImpl_ref_cpp<1>::forward() { + // FIXME: uncomment the following code once memory handling will work + assert(mOp.mInputs[0] && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<AddImplForward_ref_cpp<1>>::create({ + mOp.mInputs[0]->dataType(), + mOp.mOutput->dataType()}); + + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.mInputs[0])->size(), + mOp.mInputs[0]->getImpl()->rawPtr(), + mOp.mOutput->getImpl()->rawPtr()); + + // Update producer-consumer data + for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass + + mNbProducedData[0]+= getRequiredMemory(0, {}); +} + +void Aidge::AddImpl_ref_cpp<1>::backward() { + printf("Not implemented yet.\n"); +} + + +////////////////////////////////// +// AddImpl_ref_cpp<2> +////////////////////////////////// + + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<2>::getNbRequiredData(Aidge::IOIndex_t inputIdx) const { + assert(mOp.getInput(inputIdx) && "requires valid input"); + + // Requires the whole tensors + const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); + + return std::accumulate(inputDims.begin(), inputDims.end(), + NbElts_t(1), std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<2>::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const { + // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + return 0; +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<2>::getRequiredMemory(Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + + const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); + return std::accumulate(outputDims.begin(), outputDims.end(), + NbElts_t(1), std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<2>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { + assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); + return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<2>::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { + return mNbProducedData[0]; +} + +void Aidge::AddImpl_ref_cpp<2>::forward() { + // FIXME: uncomment the following code once memory handling will work + assert(mOp.mInputs[0] && "missing input #0"); + assert(mOp.mInputs[1] && "missing input #1"); + + // Find the correct kernel type + auto kernelFunc = Registrar<AddImplForward_ref_cpp<2>>::create({ + mOp.mInputs[0]->dataType(), + mOp.mInputs[1]->dataType(), + mOp.mOutput->dataType()}); + + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.mInputs[0])->size(), + mOp.mInputs[0]->getImpl()->rawPtr(), + mOp.mInputs[1]->getImpl()->rawPtr(), + mOp.mOutput->getImpl()->rawPtr()); + + // Update producer-consumer data + for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx]+= getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum amount for a forward pass + + mNbProducedData[0]+= getRequiredMemory(0, {}); +} + +void Aidge::AddImpl_ref_cpp<2>::backward() { + printf("Not implemented yet.\n"); +} + + +////////////////////////////////// +// AddImpl_ref_cpp<3> +////////////////////////////////// + + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<3>::getNbRequiredData(Aidge::IOIndex_t inputIdx) const { + assert(mOp.getInput(inputIdx) && "requires valid input"); + + // Requires the whole tensors + const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); + + return std::accumulate(inputDims.begin(), inputDims.end(), + Aidge::NbElts_t(1), std::multiplies<Aidge::NbElts_t>()); +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<3>::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const { + // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + return 0; +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<3>::getRequiredMemory(Aidge::IOIndex_t outputIdx, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + + const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); + return std::accumulate(outputDims.begin(), outputDims.end(), + NbElts_t(1), std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<3>::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { + assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); + return mNbConsumedData[inputIdx]; +} + +Aidge::NbElts_t Aidge::AddImpl_ref_cpp<3>::getNbProducedData(Aidge::IOIndex_t outputIdx) const { + assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size()); + return mNbProducedData[static_cast<std::size_t>(outputIdx)]; +} + +void Aidge::AddImpl_ref_cpp<3>::forward() { + // FIXME: uncomment the following code once memory handling will work + assert(mOp.mInputs[0] && "missing input #0"); + assert(mOp.mInputs[1] && "missing input #1"); + assert(mOp.mInputs[2] && "missing input #2"); + + // Find the correct kernel type + auto kernelFunc = Registrar<AddImplForward_ref_cpp<3>>::create({ + mOp.mInputs[0]->dataType(), + mOp.mInputs[1]->dataType(), + mOp.mInputs[2]->dataType(), + mOp.mOutput->dataType()}); + + // Call kernel + kernelFunc(std::static_pointer_cast<Tensor>(mOp.mInputs[0])->size(), + mOp.mInputs[0]->getImpl()->rawPtr(), + mOp.mInputs[1]->getImpl()->rawPtr(), + mOp.mInputs[2]->getImpl()->rawPtr(), + mOp.mOutput->getImpl()->rawPtr()); + + // Update producer-consumer data + for (IOIndex_t inputIdx = 0; static_cast<NbElts_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx]+= getNbRequiredData(inputIdx); // each input is consumed by the minimum amount for a forward pass + + mNbProducedData[0]+= getRequiredMemory(0, {}); +} + +void Aidge::AddImpl_ref_cpp<3>::backward() { + printf("Not implemented yet.\n"); +} \ No newline at end of file diff --git a/aidge/__init__.py b/aidge/__init__.py index 6dce7358f07360c5e615e9dcce6124ba6914fc00..d6da5b0907470f0fc984921d6a00324d0d70f4fe 100644 --- a/aidge/__init__.py +++ b/aidge/__init__.py @@ -1 +1,2 @@ -from aidge.aidge_core import * \ No newline at end of file +from aidge.aidge_core import * +from aidge.aidge_cpu import * \ No newline at end of file