From f43bebf77f0286f0c9d065f9b332f6871290416c Mon Sep 17 00:00:00 2001 From: Vincent TEMPLIER <vincent.templier@cea.fr> Date: Wed, 26 Jul 2023 13:47:17 +0000 Subject: [PATCH] Add other operators in CPU library --- aidge/_CPU/include/operator/ConvImpl.hpp | 58 ++++++++ .../include/operator/ConvImpl_kernels.hpp | 132 ++++++++++++++++++ aidge/_CPU/include/operator/FCImpl.hpp | 51 +++++++ .../_CPU/include/operator/FCImpl_kernels.hpp | 117 ++++++++++++++++ aidge/_CPU/include/operator/ProducerImpl.hpp | 45 ++++++ aidge/_CPU/include/operator/ReLUImpl.hpp | 51 +++++++ .../include/operator/ReLUImpl_kernels.hpp | 33 +++++ aidge/_CPU/src/operator/ConvImpl.cpp | 75 ++++++++++ aidge/_CPU/src/operator/FCImpl.cpp | 114 +++++++++++++++ aidge/_CPU/src/operator/ProducerImpl.cpp | 62 ++++++++ aidge/_CPU/src/operator/ReLUImpl.cpp | 68 +++++++++ 11 files changed, 806 insertions(+) create mode 100644 aidge/_CPU/include/operator/ConvImpl.hpp create mode 100644 aidge/_CPU/include/operator/ConvImpl_kernels.hpp create mode 100644 aidge/_CPU/include/operator/FCImpl.hpp create mode 100644 aidge/_CPU/include/operator/FCImpl_kernels.hpp create mode 100644 aidge/_CPU/include/operator/ProducerImpl.hpp create mode 100644 aidge/_CPU/include/operator/ReLUImpl.hpp create mode 100644 aidge/_CPU/include/operator/ReLUImpl_kernels.hpp create mode 100644 aidge/_CPU/src/operator/ConvImpl.cpp create mode 100644 aidge/_CPU/src/operator/FCImpl.cpp create mode 100644 aidge/_CPU/src/operator/ProducerImpl.cpp create mode 100644 aidge/_CPU/src/operator/ReLUImpl.cpp diff --git a/aidge/_CPU/include/operator/ConvImpl.hpp b/aidge/_CPU/include/operator/ConvImpl.hpp new file mode 100644 index 00000000..5b7f7dca --- /dev/null +++ b/aidge/_CPU/include/operator/ConvImpl.hpp @@ -0,0 +1,58 @@ +#ifndef ConvImpl2D_ref_cpp_H_ +#define ConvImpl2D_ref_cpp_H_ + +#include <array> +#include <memory> +#include <tuple> +#include <vector> + +#include "backend/OperatorImpl.hpp" +#include "operator/Conv.hpp" +#include "utils/Registrar.hpp" +#include "utils/Types.h" + +namespace Aidge { +// class Conv_Op; + +// compute kernel registry for forward and backward +class ConvImpl2DForward_ref_cpp + : public Registrable<std::tuple<DataType, DataType, DataType, DataType>, + void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + const void *, const void *, void *)> {}; +class ConvImpl2DBackward_ref_cpp + : public Registrable<std::tuple<DataType, DataType, DataType, DataType>, + void(const Conv_Op<2>::Parameters &, const std::array<DimSize_t, 4> &, const void *, + const void *, const void *, void *)> {}; + +class ConvImpl2D_ref_cpp : public OperatorImpl { + private: + const Conv_Op<2> &mOp; + std::array<NbElts_t, 3> mNbConsumedData; + std::array<NbElts_t, 1> mNbProducedData; + + public: + ConvImpl2D_ref_cpp(const Conv_Op<2> &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {} + + static std::unique_ptr<ConvImpl2D_ref_cpp> create(const Conv_Op<2> &op) { + return std::make_unique<ConvImpl2D_ref_cpp>(op); + } + + public: + NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final; + NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final; + NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final; + NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final; + + void forward(); + + void backward(); +}; + +namespace { +// add ref_cpp backend to Conv_Op<2> implementation registry +static Registrar<Conv_Op<2>> registrarConvImpl2D_ref_cpp("cpu", Aidge::ConvImpl2D_ref_cpp::create); +} // namespace +} // namespace Aidge + +#endif /* ConvImpl2D_ref_cpp_H_ */ \ No newline at end of file diff --git a/aidge/_CPU/include/operator/ConvImpl_kernels.hpp b/aidge/_CPU/include/operator/ConvImpl_kernels.hpp new file mode 100644 index 00000000..ddd1764d --- /dev/null +++ b/aidge/_CPU/include/operator/ConvImpl_kernels.hpp @@ -0,0 +1,132 @@ + + +#ifndef ConvImpl2D_ref_cpp_forward_kernel_H_ +#define ConvImpl2D_ref_cpp_forward_kernel_H_ + +#include "utils/Registrar.hpp" + +#include "operator/ConvImpl.hpp" +#include "utils/Types.h" +#include <array> +#include <algorithm> + +namespace Aidge { +/** + * @brief Forward kernel for 2D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvImpl2D_ref_cpp_forward_kernel(const Conv_Op<2>::Parameters ¶ms, const std::array<DimSize_t, 4> &dims, + const void *input_, const void *weights_, const void *biases_, void *output_) { + // FIXME: missing convolution parameters as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); +/* + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(static_cast<float>(dims[0] - std::get<4>(params)[0] + std::get<0>(params)[0]) / + static_cast<float>(std::get<0>(params)[0])); + // output W size + const std::size_t oySize = + static_cast<std::size_t>(static_cast<float>(dims[1] - std::get<4>(params)[1] + std::get<0>(params)[1]) / + static_cast<float>(std::get<0>(params)[1])); + + // TODO: kernel computation + // output (Xout, Yout, outCh, batch) + // input (Xin, Yin, inCh, batch) + // weight (kernelX, kernelY, inCh, outCh) + // does not take Dilation parameter into account + for (std::size_t ox = 0; ox < oxSize; ++ox) { + for (std::size_t oy = 0; oy < oySize; ++oy) { + const std::size_t ix = ox * std::get<0>(params)[0]; + const std::size_t iy = oy * std::get<0>(params)[1]; + + for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { + const std::size_t oIndex = dims[3] * (outCh + std::get<3>(params) * (oy + oySize * ox)); + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + for (std::size_t batch = 0; batch < dims[3]; ++batch) { + output[oIndex + batch] = biasVal; + } + for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { + for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) { + for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) { + const std::size_t wIndex = + outCh + std::get<3>(params) * (inCh + dims[2] * (sy + std::get<4>(params)[1] * sx)); + std::size_t iIndex = dims[3] * (inCh + dims[2] * ((iy + sy) + dims[1] * (ix + sx))); + for (std::size_t batch = 0; batch < dims[3]; ++batch) { + output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; + } + } + } + } + } + } + } +*/ + + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(static_cast<float>(dims[2] - std::get<4>(params)[0] + std::get<0>(params)[0]) / + static_cast<float>(std::get<0>(params)[0])); + // output W size + const std::size_t oySize = + static_cast<std::size_t>(static_cast<float>(dims[3] - std::get<4>(params)[1] + std::get<0>(params)[1]) / + static_cast<float>(std::get<0>(params)[1])); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, inCh, Xin, Yin) + // weight (outCh, inCh, kernelX, kernelY) + // does not take Dilation parameter into account + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < std::get<3>(params); ++outCh) { + const std::size_t oIndex = (outCh + batch*std::get<3>(params)) * oxSize * oySize; + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize*oySize), biasVal); + for (std::size_t inCh = 0; inCh < dims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*dims[1]) * dims[2] * dims[3]; + const std::size_t wIndex = (inCh + outCh*dims[1]) * std::get<4>(params)[0] * std::get<4>(params)[1]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + for (std::size_t oy = 0; oy < oySize; ++oy) { + const std::size_t oIndexFull = oIndex + ox*oySize + oy; + const std::size_t ix = ox * std::get<0>(params)[0]; + const std::size_t iy = oy * std::get<0>(params)[1]; + for (std::size_t sx = 0; sx < std::get<4>(params)[0]; ++sx) { + for (std::size_t sy = 0; sy < std::get<4>(params)[1]; ++sy) { + output[oIndexFull] += weights[wIndex + sx*std::get<4>(params)[1] + sy] * + input[iIndex + (ix+sx)*dims[3] + (iy+sy)]; + } + } + } + } + } + } + } +} + +namespace { +static Registrar<ConvImpl2DForward_ref_cpp> registrarConvImpl2DForward_ref_cpp_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::ConvImpl2D_ref_cpp_forward_kernel<float, float, float, float>); +static Registrar<ConvImpl2DForward_ref_cpp> registrarConvImpl2DForward_ref_cpp_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::ConvImpl2D_ref_cpp_forward_kernel<int, int, int, int>); +static Registrar<ConvImpl2DForward_ref_cpp> registrarConvImpl2DForward_ref_cpp_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::ConvImpl2D_ref_cpp_forward_kernel<double, double, double, double>); +} // namespace +} // namespace Aidge + +#endif /* ConvImpl2D_ref_cpp_forward_kernel_H_ */ diff --git a/aidge/_CPU/include/operator/FCImpl.hpp b/aidge/_CPU/include/operator/FCImpl.hpp new file mode 100644 index 00000000..dd973409 --- /dev/null +++ b/aidge/_CPU/include/operator/FCImpl.hpp @@ -0,0 +1,51 @@ +#ifndef FCImpl_ref_cpp_H_ +#define FCImpl_ref_cpp_H_ + +#include "backend/OperatorImpl.hpp" +#include "operator/FC.hpp" +#include "utils/Registrar.hpp" +#include "utils/Types.h" +#include <memory> +#include <vector> +#include <array> + +namespace Aidge { +// class FC_Op; + +// compute kernel registry for forward and backward +class FCImplForward_ref_cpp : public Registrable<std::tuple<DataType, DataType, DataType, DataType>, + void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t, + const void *, const void *, const void *, void *)> {}; +class FCImplBackward_ref_cpp : public Registrable<std::tuple<DataType, DataType, DataType, DataType>, + void(const FC_Op::Parameters &, const DimSize_t, const DimSize_t, + const void *, const void *, const void *, void *)> {}; + +class FCImpl_ref_cpp : public OperatorImpl { + private: + const FC_Op &mOp; + std::array<NbElts_t, 3> mNbConsumedData; + std::array<NbElts_t, 1> mNbProducedData; + + public: + FCImpl_ref_cpp(const FC_Op &op) : mOp(op), mNbConsumedData({0, 0, 0}), mNbProducedData({0}) {} + + static std::unique_ptr<FCImpl_ref_cpp> create(const FC_Op &op) { return std::make_unique<FCImpl_ref_cpp>(op); } + + public: + NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final; + NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final; + NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final; + NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final; + + void forward(); + + void backward(); +}; + +namespace { +static Registrar<FC_Op> registrarFCImpl_ref_cpp("cpu", Aidge::FCImpl_ref_cpp::create); +} +} // namespace Aidge + +#endif /* FCImpl_ref_cpp_H_ */ \ No newline at end of file diff --git a/aidge/_CPU/include/operator/FCImpl_kernels.hpp b/aidge/_CPU/include/operator/FCImpl_kernels.hpp new file mode 100644 index 00000000..9b0f9ef6 --- /dev/null +++ b/aidge/_CPU/include/operator/FCImpl_kernels.hpp @@ -0,0 +1,117 @@ +#ifndef FCImpl_ref_cpp_forward_kernel_H_ +#define FCImpl_ref_cpp_forward_kernel_H_ + +#include "utils/Registrar.hpp" +#include <algorithm> + +#include "operator/FCImpl.hpp" + +namespace Aidge { +// template <class I, class W, class B, class O> +// void FCImpl_ref_cpp_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 4>& dims, +// const void* input_, const void* weights_, const void* biases_, void* output_) { +// // FIXME: missing FC parameters as arguments +// const I* input = static_cast<const I*>(input_); +// const W* weights = static_cast<const W*>(weights_); +// const B* biases = static_cast<const B*>(biases_); +// O* output = static_cast<O*>(output_); + +// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { +// std::size_t oIndex = outIdx * dims[3]; +// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; +// for (std::size_t batch = 0; batch < dims[3]; ++batch) { +// output[oIndex + batch] = bias; +// } +// } + +// for (std::size_t ix = 0; ix < dims[0]; ++ix) { +// for (std::size_t iy = 0; iy < dims[1]; ++iy) { +// for (std::size_t inCh = 0; inCh < dims[2]; ++inCh) { +// const std::size_t iIndex = dims[3] * (inCh + dims[2] * (iy + dims[1] * ix)); +// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { +// const std::size_t oIndex = dims[3] * outCh; +// const std::size_t wIndex = (inCh + dims[2] * (iy + dims[1] * ix)) * std::get<0>(params) + +// outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; +// for (std::size_t batch = 0; batch < dims[3]; ++batch) { +// output[oIndex + batch] += weights[wIndex] * input[iIndex + batch]; +// } +// } +// } +// } +// } +// } + +// template <class I, class W, class B, class O> +// void FCImpl_ref_cpp_forward_kernel(const FC_Op::Parameters& params, const std::array<DimSize_t, 2>& dims, +// const void* input_, const void* weights_, const void* biases_, void* output_) { +// // FIXME: missing FC parameters as arguments +// const I* input = static_cast<const I*>(input_); +// const W* weights = static_cast<const W*>(weights_); +// const B* biases = static_cast<const B*>(biases_); +// O* output = static_cast<O*>(output_); + +// // let's have I.dims() = [N, C, H, W] instead of [H, W, C, N] + +// for (std::size_t outIdx = 0; outIdx < std::get<0>(params); ++outIdx) { +// std::size_t oIndex = outIdx * dims[0]; +// const B bias = std::get<1>(params) ? B(0) : biases[outIdx]; +// for (std::size_t batch = 0; batch < dims[0]; ++batch) { +// output[oIndex + batch] = bias; +// } +// } + +// for (std::size_t batch = 0; batch < dims[0]; ++batch) { +// const std::size_t oIndex = dims[1] * batch; +// for (std::size_t i = 0; i < dims[1]; ++i) { +// for (std::size_t outCh = 0; outCh < std::get<0>(params); ++outCh) { +// std::size_t wIndex = i * std::get<0>(params) + outCh; // (iIndex*std::get<0>(params) + oIndex)/dims[3]; +// output[oIndex + outCh] += weights[wIndex] * input[i + batch]; +// } +// } +// } +// } + +template <class I, class W, class B, class O> +void FCImpl_ref_cpp_forward_kernel(const FC_Op::Parameters& params, const DimSize_t batchSize, const DimSize_t oneInputSize, + const void* input_, const void* weights_, const void* biases_, void* output_) { + // FIXME: missing FC parameters as arguments + const I* input = static_cast<const I*>(input_); + const W* weights = static_cast<const W*>(weights_); + const B* biases = static_cast<const B*>(biases_); + O* output = static_cast<O*>(output_); + + if (std::get<1>(params)) { + std::fill(output, output+(batchSize*std::get<0>(params)), B(0)); + } + else { + for (std::size_t batch = 0; batch < batchSize; ++batch) { + std::copy(biases, biases+std::get<0>(params), output+(batch*std::get<0>(params))); + } + } + + for (std::size_t batch = 0; batch < batchSize; ++batch) { + for (std::size_t out = 0; out < std::get<0>(params); ++out) { + output[out + batch*std::get<0>(params)] = std::inner_product(input + batch*oneInputSize, + input + (batch + 1)*oneInputSize, + weights + out*oneInputSize, + output[out + batch*std::get<0>(params)]); + } + } +} + + +namespace { +static Registrar<FCImplForward_ref_cpp> registrarFCImpl2DForward_ref_cpp_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::FCImpl_ref_cpp_forward_kernel<float, float, float, float>); +static Registrar<FCImplForward_ref_cpp> registrarFCImpl2DForward_ref_cpp_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::FCImpl_ref_cpp_forward_kernel<int, int, int, int>); +static Registrar<FCImplForward_ref_cpp> registrarFCImpl2DForward_ref_cpp_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::FCImpl_ref_cpp_forward_kernel<double, double, double, double>); +} // namespace + +} // namespace Aidge + +#endif /* FCImpl_ref_cpp_forward_kernel_H_ */ diff --git a/aidge/_CPU/include/operator/ProducerImpl.hpp b/aidge/_CPU/include/operator/ProducerImpl.hpp new file mode 100644 index 00000000..71cef1d4 --- /dev/null +++ b/aidge/_CPU/include/operator/ProducerImpl.hpp @@ -0,0 +1,45 @@ +#ifndef ProducerImpl_ref_cpp_H_ +#define ProducerImpl_ref_cpp_H_ + +#include "utils/Types.h" + +#include "backend/OperatorImpl.hpp" +#include "operator/Producer.hpp" +#include "utils/Registrar.hpp" +#include <memory> + +namespace Aidge { +template <DimIdx_t DIM> +class ProducerImpl_ref_cpp : public OperatorImpl { + private: + const Producer_Op<DIM> &mOp; + + public: + ProducerImpl_ref_cpp(const Producer_Op<DIM> &op) : mOp(op) {} + + static std::unique_ptr<ProducerImpl_ref_cpp> create(const Producer_Op<DIM> &op) { + return std::make_unique<ProducerImpl_ref_cpp>(op); + } + + public: + NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final; + NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final; + NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t> &inputsSize) const override final; + NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final; + NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final; + + void forward(); + + void backward(); +}; + +namespace { +static Registrar<Producer_Op<1>> registrarProducer1DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<1>::create); +static Registrar<Producer_Op<2>> registrarProducer2DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<2>::create); +static Registrar<Producer_Op<3>> registrarProducer3DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<3>::create); +static Registrar<Producer_Op<4>> registrarProducer4DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<4>::create); +static Registrar<Producer_Op<5>> registrarProducer5DImpl_ref_cpp("cpu", Aidge::ProducerImpl_ref_cpp<5>::create); +} // namespace +} // namespace Aidge + +#endif /* ProducerImpl_ref_cpp_H_ */ \ No newline at end of file diff --git a/aidge/_CPU/include/operator/ReLUImpl.hpp b/aidge/_CPU/include/operator/ReLUImpl.hpp new file mode 100644 index 00000000..f3001626 --- /dev/null +++ b/aidge/_CPU/include/operator/ReLUImpl.hpp @@ -0,0 +1,51 @@ +#ifndef ReLUImpl_ref_cpp_H_ +#define ReLUImpl_ref_cpp_H_ + +#include "backend/OperatorImpl.hpp" +#include "operator/ReLU.hpp" +#include "utils/Registrar.hpp" +#include <memory> +#include <vector> + +namespace Aidge { +// class ReLU_Op; + +// compute kernel registry for forward and backward +class ReLUImplForward_ref_cpp + : public Registrable<std::tuple<DataType, DataType>, void(const ReLU_Op::Parameters&, std::size_t, const void*, void*)> { +}; +class ReLUImplBackward_ref_cpp + : public Registrable<std::tuple<DataType, DataType>, void(const ReLU_Op::Parameters&, std::size_t, const void*, void*)> { +}; + +class ReLUImpl_ref_cpp : public OperatorImpl { + private: + const ReLU_Op& mOp; + std::array<NbElts_t, 1> mNbConsumedData; + std::array<NbElts_t, 1> mNbProducedData; + + public: + ReLUImpl_ref_cpp(const ReLU_Op& op) : mOp(op), mNbConsumedData({0}), mNbProducedData({0}) {} + + static std::unique_ptr<ReLUImpl_ref_cpp> create(const ReLU_Op& op) { + return std::make_unique<ReLUImpl_ref_cpp>(op); + } + + public: + NbElts_t getNbRequiredData(IOIndex_t inputIdx) const override final; + NbElts_t getNbRequiredProtected(IOIndex_t inputIdx) const override final; + NbElts_t getRequiredMemory(IOIndex_t outputIdx, const std::vector<DimSize_t>& inputsSize) const override final; + NbElts_t getNbConsumedData(IOIndex_t inputIdx) const override final; + NbElts_t getNbProducedData(IOIndex_t outputIdx) const override final; + + void forward(); + + void backward(); +}; + +namespace { +static Registrar<ReLU_Op> registrarReLUImpl_ref_cpp("cpu", Aidge::ReLUImpl_ref_cpp::create); +} +} // namespace Aidge + +#endif /* ReLUImpl_ref_cpp_H_ */ \ No newline at end of file diff --git a/aidge/_CPU/include/operator/ReLUImpl_kernels.hpp b/aidge/_CPU/include/operator/ReLUImpl_kernels.hpp new file mode 100644 index 00000000..11be9714 --- /dev/null +++ b/aidge/_CPU/include/operator/ReLUImpl_kernels.hpp @@ -0,0 +1,33 @@ +#ifndef ReLUImpl_ref_cpp_forward_kernel_H_ +#define ReLUImpl_ref_cpp_forward_kernel_H_ + +#include "utils/Registrar.hpp" + +#include "operator/ReLUImpl.hpp" + +namespace Aidge { +template <class I, class O> +void ReLUImpl_ref_cpp_forward_kernel(const ReLU_Op::Parameters& params, + std::size_t inputLenght, + const void* input_, + void* output_) { + // FIXME: missing ReLU parameters as arguments + const I* input = static_cast<const I*>(input_); + O* output = static_cast<O*>(output_); + + for (std::size_t i = 0; i < inputLenght; ++i) { + output[i] = input[i] >= 0 ? input[i] : input[i] * static_cast<I>(std::get<0>(params)); + } +} + +namespace { +static Registrar<ReLUImplForward_ref_cpp> registrarReLUImplForward_ref_cpp_Float32( + {DataType::Float32, DataType::Float32}, Aidge::ReLUImpl_ref_cpp_forward_kernel<float, float>); +static Registrar<ReLUImplForward_ref_cpp> registrarReLUImplForward_ref_cpp_Int32( + {DataType::Int32, DataType::Int32}, Aidge::ReLUImpl_ref_cpp_forward_kernel<int, int>); +static Registrar<ReLUImplForward_ref_cpp> registrarReLUImplForward_ref_cpp_Float64( + {DataType::Float64, DataType::Float64}, Aidge::ReLUImpl_ref_cpp_forward_kernel<double, double>); +} // namespace +} // namespace Aidge + +#endif /* ReLUImpl_ref_cpp_forward_kernel_H_ */ diff --git a/aidge/_CPU/src/operator/ConvImpl.cpp b/aidge/_CPU/src/operator/ConvImpl.cpp new file mode 100644 index 00000000..eb90a3af --- /dev/null +++ b/aidge/_CPU/src/operator/ConvImpl.cpp @@ -0,0 +1,75 @@ + +#include "operator/ConvImpl.hpp" + +#include <cassert> +#include <chrono> +#include <numeric> +#include <thread> +#include <vector> + +#include "operator/ConvImpl_kernels.hpp" +#include "operator/Conv.hpp" +#include "utils/Types.h" + +Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getNbRequiredData(Aidge::IOIndex_t inputIdx) const { + assert(mOp.getInput(inputIdx) && "requires valid input"); + + // Requires the whole tensors + const auto &inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); + + return std::accumulate(inputDims.begin(), inputDims.end(), Aidge::NbElts_t(1), std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // for the direct convolution algorithm, convolutions can be in-place, if + // there is no padding! + return 0; +} + +Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getRequiredMemory(Aidge::IOIndex_t outputIdx, + const std::vector<Aidge::DimSize_t> & /*inputsSize*/) const { + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + + const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); + return std::accumulate(outputDims.begin(), outputDims.end(), NbElts_t(1), std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getNbConsumedData(Aidge::IOIndex_t inputIdx) const { + assert(static_cast<std::size_t>(inputIdx) < mNbConsumedData.size()); + return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; +} + +Aidge::NbElts_t Aidge::ConvImpl2D_ref_cpp::getNbProducedData(Aidge::IOIndex_t outputIdx) const { + assert((outputIdx == 0) && (static_cast<std::size_t>(outputIdx) < mNbProducedData.size())); + return mNbProducedData[static_cast<std::size_t>(outputIdx)]; +} + +void Aidge::ConvImpl2D_ref_cpp::forward() { + // FIXME: uncomment the following code once memory handling will work + assert(mOp.mInputs[0] && "missing input #0"); + assert(mOp.mInputs[1] && "missing input #1"); + assert(mOp.mInputs[2] && "missing input #2"); + + // Find the correct kernel type + auto kernelFunc = + Registrar<ConvImpl2DForward_ref_cpp>::create({mOp.mInputs[0]->dataType(), mOp.mInputs[1]->dataType(), + mOp.mInputs[2]->dataType(), mOp.mOutput->dataType()}); + + // Call kernel + kernelFunc(mOp.getParams(), std::static_pointer_cast<Tensor>(mOp.mInputs[0])->dims<4>(), + mOp.mInputs[0]->getImpl()->rawPtr(), mOp.mInputs[1]->getImpl()->rawPtr(), + mOp.mInputs[2]->getImpl()->rawPtr(), mOp.mOutput->getImpl()->rawPtr()); + + // FIXME: Dummy wait for some earlier scheduler tests + std::this_thread::sleep_for(std::chrono::milliseconds(mOp.get<ConvParam::OutChannels>())); + + // Update producer-consumer data + for (std::size_t inputIdx = 0; inputIdx < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] += getNbRequiredData(static_cast<IOIndex_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + + mNbProducedData[0] += getRequiredMemory(0, {}); +} + +void Aidge::ConvImpl2D_ref_cpp::backward() { printf("Not implemented yet.\n"); } diff --git a/aidge/_CPU/src/operator/FCImpl.cpp b/aidge/_CPU/src/operator/FCImpl.cpp new file mode 100644 index 00000000..63b530df --- /dev/null +++ b/aidge/_CPU/src/operator/FCImpl.cpp @@ -0,0 +1,114 @@ + +#include <cassert> +#include <chrono> +#include <numeric> +#include <thread> +#include <vector> + +#include "operator/FC.hpp" + +#include "operator/FCImpl.hpp" +#include "operator/FCImpl_kernels.hpp" +#include "utils/Types.h" + +Aidge::NbElts_t Aidge::FCImpl_ref_cpp::getNbRequiredData(Aidge::IOIndex_t inputIdx) const +{ + assert(mOp.getInput(inputIdx) && "requires valid input"); + + // Requires the whole tensors + const auto &inputDims + = std::static_pointer_cast<Tensor>(mOp.getInput(inputIdx))->dims(); + + return std::accumulate( + inputDims.begin(), + inputDims.end(), + Aidge::NbElts_t(1), + std::multiplies<Aidge::NbElts_t>()); +} + +Aidge::NbElts_t + Aidge::FCImpl_ref_cpp::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const +{ + // for the direct convolution algorithm, convolutions can be in-place, if + // there is no padding! + return 0; +} + +Aidge::NbElts_t Aidge::FCImpl_ref_cpp::getRequiredMemory( + IOIndex_t outputIdx, const std::vector<DimSize_t> & /*inputsSize*/) const +{ + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + + const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); + return std::accumulate( + outputDims.begin(), + outputDims.end(), + static_cast<NbElts_t>(1), + std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::FCImpl_ref_cpp::getNbConsumedData(Aidge::IOIndex_t inputIdx) const +{ + assert((inputIdx >= 0) && (static_cast<IONb_t>(inputIdx) < mNbConsumedData.size())); + return mNbConsumedData[static_cast<std::size_t>(inputIdx)]; +} + +Aidge::NbElts_t Aidge::FCImpl_ref_cpp::getNbProducedData(Aidge::IOIndex_t outputIdx) const +{ + assert(static_cast<std::size_t>(outputIdx) < mNbProducedData.size()); + return mNbProducedData[static_cast<std::size_t>(outputIdx)]; +} + +void Aidge::FCImpl_ref_cpp::forward() +{ + // FIXME: uncomment the following code once memory handling will work + assert(mOp.mInputs[0] && "missing input #0"); + assert(mOp.mInputs[1] && "missing input #1"); + assert(mOp.mInputs[2] && "missing input #2"); + + // Find the correct kernel type + auto kernelFunc = Registrar<FCImplForward_ref_cpp>::create( + {mOp.mInputs[0]->dataType(), + mOp.mInputs[1]->dataType(), + mOp.mInputs[2]->dataType(), + mOp.mOutput->dataType()}); + + // Call kernel + // if (mOp.mInputs[0]->nbDims() == 4) { + // kernelFunc( + // mOp.getParams(), + // std::static_pointer_cast<Tensor>(mOp.mInputs[0])->dims<4>(), + // mOp.mInputs[0]->getImpl()->rawPtr(), + // mOp.mInputs[1]->getImpl()->rawPtr(), + // mOp.mInputs[2]->getImpl()->rawPtr(), + // mOp.mOutput->getImpl()->rawPtr()); + // } + // else + kernelFunc( + mOp.getParams(), + mOp.mInputs[0]->dims()[0], + mOp.mInputs[0]->sizeM1(), + mOp.mInputs[0]->getImpl()->rawPtr(), + mOp.mInputs[1]->getImpl()->rawPtr(), + mOp.mInputs[2]->getImpl()->rawPtr(), + mOp.mOutput->getImpl()->rawPtr()); + + + + // FIXME: Dummy wait for some earlier scheduler tests + std::this_thread::sleep_for(std::chrono::milliseconds(mOp.get<FCParam::OutChannels>())); + + // Update producer-consumer data + for (IOIndex_t inputIdx = 0; static_cast<std::size_t>(inputIdx) < mNbConsumedData.size(); ++inputIdx) + mNbConsumedData[inputIdx] + += getNbRequiredData(static_cast<std::size_t>(inputIdx)); // each input is consumed by the minimum + // amount for a forward pass + + mNbProducedData[0] += getRequiredMemory(0, {}); +} + +void Aidge::FCImpl_ref_cpp::backward() +{ + printf("Not implemented yet.\n"); +} diff --git a/aidge/_CPU/src/operator/ProducerImpl.cpp b/aidge/_CPU/src/operator/ProducerImpl.cpp new file mode 100644 index 00000000..5db6ae51 --- /dev/null +++ b/aidge/_CPU/src/operator/ProducerImpl.cpp @@ -0,0 +1,62 @@ + +#include <cassert> +#include <numeric> +#include <vector> + +#include "data/Tensor.hpp" +#include "operator/Producer.hpp" +#include "utils/Types.h" + +#include "operator/ProducerImpl.hpp" + +template<Aidge::DimIdx_t DIM> +std::size_t Aidge::ProducerImpl_ref_cpp<DIM>::getNbRequiredData( + Aidge::IOIndex_t /*inputIdx*/) const +{ + return 0; +} + +template<Aidge::DimIdx_t DIM> +Aidge::DimSize_t Aidge::ProducerImpl_ref_cpp<DIM>::getNbConsumedData( + Aidge::IOIndex_t /*inputIdx*/) const +{ + return 0; +} + +template<Aidge::DimIdx_t DIM> +std::size_t Aidge::ProducerImpl_ref_cpp<DIM>::getNbRequiredProtected( + Aidge::IOIndex_t /*inputIdx*/) const +{ + return 0; +} + +template<Aidge::DimIdx_t DIM> +std::size_t Aidge::ProducerImpl_ref_cpp<DIM>::getRequiredMemory( + IOIndex_t outputIdx, const std::vector<DimSize_t> & /*inputsSize*/) const +{ + // Requires the whole tensors, regardless of available data on inputs + assert(outputIdx == 0 && "operator has only one output"); + + const auto &outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); + return std::accumulate( + outputDims.begin(), + outputDims.end(), + NbElts_t(1), + std::multiplies<NbElts_t>()); +} + +template<Aidge::DimIdx_t DIM> +Aidge::DimSize_t Aidge::ProducerImpl_ref_cpp<DIM>::getNbProducedData( + Aidge::IOIndex_t /*outputIdx*/) const +{ + return getRequiredMemory(0, {}); +} + +template<Aidge::DimIdx_t DIM> void Aidge::ProducerImpl_ref_cpp<DIM>::forward() +{ +} + +template<Aidge::DimIdx_t DIM> void Aidge::ProducerImpl_ref_cpp<DIM>::backward() +{ + printf("Not implemented yet.\n"); +} \ No newline at end of file diff --git a/aidge/_CPU/src/operator/ReLUImpl.cpp b/aidge/_CPU/src/operator/ReLUImpl.cpp new file mode 100644 index 00000000..a345ee92 --- /dev/null +++ b/aidge/_CPU/src/operator/ReLUImpl.cpp @@ -0,0 +1,68 @@ + +#include <cassert> +#include <numeric> +#include <chrono> +#include <thread> + +#include "operator/ReLU.hpp" + +#include "operator/ReLUImpl.hpp" +#include "operator/ReLUImpl_kernels.hpp" +#include "utils/Types.h" +#include <numeric> +#include <vector> + +// FIXME: replace whole Tensor with minimum needed data quantity +Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getNbRequiredData(Aidge::IOIndex_t /*inputIdx*/) const { + assert(mOp.getInput(0) && "requires valid input"); + + // Requires the whole tensors + const auto& inputDims = std::static_pointer_cast<Tensor>(mOp.getInput(0))->dims(); + + return std::accumulate(inputDims.begin(), inputDims.end(), + static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getNbRequiredProtected(Aidge::IOIndex_t /*inputIdx*/) const { + // for the direct convolution algorithm, convolutions can be in-place, if there is no padding! + return 0; +} + +Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getRequiredMemory(Aidge::IOIndex_t /*outputIdx*/, const std::vector<Aidge::DimSize_t>& /*inputsSize*/) const { + const auto& outputDims = std::static_pointer_cast<Tensor>(mOp.getOutput(0))->dims(); + return std::accumulate(outputDims.begin(), outputDims.end(), + static_cast<NbElts_t>(1), std::multiplies<NbElts_t>()); +} + +Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getNbConsumedData(Aidge::IOIndex_t /*inputIdx*/) const { + return mNbConsumedData[0]; +} + +Aidge::NbElts_t Aidge::ReLUImpl_ref_cpp::getNbProducedData(Aidge::IOIndex_t /*outputIdx*/) const { + return mNbProducedData[0]; +} + +void Aidge::ReLUImpl_ref_cpp::forward() { + // FIXME: uncomment the following code once memory handling will work + assert(mOp.mInputs[0] && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = Registrar<ReLUImplForward_ref_cpp>::create({ + mOp.mInputs[0]->dataType(), + mOp.mOutput->dataType()}); + + // Call kernel + kernelFunc(mOp.getParams(), + std::static_pointer_cast<Tensor>(mOp.getInput(0))->size(), + mOp.mInputs[0]->getImpl()->rawPtr(), + mOp.mOutput->getImpl()->rawPtr()); + + + mNbConsumedData[0]+= getNbRequiredData(0); // each input is consumed by the minimum amount for a forward pass + + mNbProducedData[0]+= getRequiredMemory(0, {}); +} + +void Aidge::ReLUImpl_ref_cpp::backward() { + printf("Not implemented yet.\n"); +} -- GitLab