diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp index a61a7299ed6bd5c5a3e41c09e9d5b5f1f7ae3326..aa3b10970b1cd9beeead4353ff0c2b3d65fd9a83 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp @@ -25,6 +25,29 @@ namespace Aidge { // class ConvDepthWise_Op; +// compute kernel registry for forward and backward +class ConvDepthWiseImpl1DForward_cpu + : public Registrable<ConvDepthWiseImpl1DForward_cpu, + std::tuple<DataType, DataType, DataType, DataType>, + void(const ConvDepthWise_Op<1>::Attrs &, const std::array<DimSize_t, 3> &, const void *, + const void *, const void *, void *)> {}; + +class ConvDepthWiseImpl1D_cpu : public OperatorImpl { +public: + ConvDepthWiseImpl1D_cpu(const ConvDepthWise_Op<1> &op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<ConvDepthWiseImpl1D_cpu> create(const ConvDepthWise_Op<1> &op) { + return std::make_unique<ConvDepthWiseImpl1D_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to ConvDepthWise_Op<1> implementation registry +static Registrar<ConvDepthWise_Op<1>> registrarConvDepthWiseImpl1D_cpu("cpu", Aidge::ConvDepthWiseImpl1D_cpu::create); +} // namespace // compute kernel registry for forward and backward class ConvDepthWiseImpl2DForward_cpu diff --git a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp index 9537e34a85aaf70597d04e0065807d4d69dedd22..db44ffe4313e6a6e03ecd279dc0262fece00b567 100644 --- a/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp @@ -23,6 +23,76 @@ #include "aidge/utils/Types.h" namespace Aidge { +/** + * @brief Forward kernel for 1D ConvDepthWiseolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvDepthWiseImpl1D_cpu_forward_kernel(const ConvDepthWise_Op<1>::Attrs &attrs, const std::array<DimSize_t, 3> &inputDims, + const void *input_, const void *weights_, const void *biases_, void *output_) { + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, ch, Xin, Yin) + // weight (outCh, ch, kernelX, kernelY) + // does not take Dilation attribute into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t ch = 0; ch < inputDims[1]; ++ch) { + const std::size_t oIndex = (ch + batch*inputDims[1]) * oxSize; + B biasVal = (biases != nullptr) ? biases[ch] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + const std::size_t iIndex = (ch + batch*inputDims[1]) * inputDims[2]; + const std::size_t wIndex = ch * std::get<2>(attrs)[0]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > std::get<2>(attrs)[0] ? std::get<2>(attrs)[0] : inputDims[2] + difx); + const std::size_t oIndexFull = oIndex + ox; + const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); + + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + output[oIndexFull] += weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + } + } + } + } +} + +namespace { +static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<float, float, float, float>); +static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<int, int, int, int>); +static Registrar<ConvDepthWiseImpl1DForward_cpu> registrarConvDepthWiseImpl1DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::ConvDepthWiseImpl1D_cpu_forward_kernel<double, double, double, double>); +} // namespace + + /** * @brief Forward kernel for 2D ConvDepthWiseolution on CPU backend. * @tparam I Input data type. diff --git a/include/aidge/backend/cpu/operator/ConvImpl.hpp b/include/aidge/backend/cpu/operator/ConvImpl.hpp index 12af5860316ba0bc9f6c3eafc551037f531da6d7..1d85b31fbdbb6ac9a61ddba08d3f2c3df8ca91e3 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl.hpp @@ -27,6 +27,32 @@ namespace Aidge { // class Conv_Op; // compute kernel registry for forward and backward +// Conv 1D +class ConvImpl1DForward_cpu + : public Registrable<ConvImpl1DForward_cpu, + std::tuple<DataType, DataType, DataType, DataType>, + void(const Conv_Op<1>::Attrs &, const std::array<DimSize_t, 3> &, DimSize_t, const void *, + const void *, const void *, void *)> {}; + +class ConvImpl1D_cpu : public OperatorImpl { + public: + ConvImpl1D_cpu(const Conv_Op<1>& op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<ConvImpl1D_cpu> create(const Conv_Op<1> &op) { + return std::make_unique<ConvImpl1D_cpu>(op); + } + + public: + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to Conv_Op<1> implementation registry +static Registrar<Conv_Op<1>> registrarConvImpl1D_cpu("cpu", Aidge::ConvImpl1D_cpu::create); +} // namespace + +// Conv 2D class ConvImpl2DForward_cpu : public Registrable<ConvImpl2DForward_cpu, std::tuple<DataType, DataType, DataType, DataType>, diff --git a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp index c2e5e4ca345bf145b0d02f1643dd48139f563c66..718fc879fcc1124260901cdd06b059da6e8c7395 100644 --- a/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/ConvImpl_forward_kernels.hpp @@ -23,6 +23,81 @@ #include <algorithm> namespace Aidge { +/** + * @brief Forward kernel for 1D Convolution on CPU backend. + * @tparam I Input data type. + * @tparam W Weight data type. + * @tparam B Bias data type. + * @tparam O Output data type. + * @param params tuple of Attributes from the Operator + * @param inputDims Array of input dimensions. + * @param input_ const input Tensor. + * @param weights_ const weight Tensor. + * @param biases_ const Biais Tensor. + * @param output_ Output Tensor. + */ +template <class I, class W, class B, class O> +void ConvImpl1D_cpu_forward_kernel(const Conv_Op<1>::Attrs &attrs, const std::array<DimSize_t, 3> &inputDims, DimSize_t outChannels, + const void *input_, const void *weights_, const void *biases_, void *output_) { + // FIXME: missing convolution attributes as arguments + const I *input = static_cast<const I *>(input_); + const W *weights = static_cast<const W *>(weights_); + const B *biases = static_cast<const B *>(biases_); + O *output = static_cast<O *>(output_); + + // output H size + const std::size_t oxSize = + static_cast<std::size_t>(std::floor(static_cast<float>(inputDims[2] - std::get<2>(attrs)[0] + std::get<0>(attrs)[0]) / + static_cast<float>(std::get<0>(attrs)[0]))); + + // TODO: kernel computation + // output (batch, outCh, Xout, Yout) + // input (batch, inCh, Xin, Yin) + // weight (outCh, inCh, kernelX, kernelY) + // does not take Dilation attribute into account + using signedsize = std::make_signed<std::size_t>::type; + for (std::size_t batch = 0; batch < inputDims[0]; ++batch) { + for (std::size_t outCh = 0; outCh < outChannels; ++outCh) { + const std::size_t oIndex = (outCh + batch*outChannels) * oxSize; + // If bias = nullptr, set B(0) + B biasVal = (biases != nullptr) ? biases[outCh] : B(0); + std::fill(output + oIndex, output+(oIndex+oxSize), biasVal); + for (std::size_t inCh = 0; inCh < inputDims[1]; ++inCh) { + const std::size_t iIndex = (inCh + batch*inputDims[1]) * inputDims[2]; + const std::size_t wIndex = (inCh + outCh*inputDims[1]) * std::get<2>(attrs)[0]; + for (std::size_t ox = 0; ox < oxSize; ++ox) { + const signedsize difx = static_cast<signedsize>(- ox * std::get<0>(attrs)[0]); + const std::size_t sxMin = static_cast<std::size_t>(std::max(difx, signedsize(0))); + const std::size_t sxMax = (static_cast<signedsize>(inputDims[2]) + difx) < 0 ? 0 : ((inputDims[2] + difx) > std::get<2>(attrs)[0] ? std::get<2>(attrs)[0] : inputDims[2] + difx); + const std::size_t oIndexFull = oIndex + ox; + const signedsize ix = static_cast<signedsize>(ox * std::get<0>(attrs)[0]); + + for (std::size_t sx = sxMin; sx < sxMax; ++sx) { + output[oIndexFull] += weights[wIndex + sx] * + input[iIndex + static_cast<std::size_t>(ix+static_cast<signedsize>(sx))]; + } + } + } + } + } +} + +namespace { +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float32( + {DataType::Float32, DataType::Float32, DataType::Float32, DataType::Float32}, + Aidge::ConvImpl1D_cpu_forward_kernel<float, float, float, float>); +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float16( + {DataType::Float16, DataType::Float16, DataType::Float16, DataType::Float16}, + Aidge::ConvImpl1D_cpu_forward_kernel<half_float::half, half_float::half, half_float::half, half_float::half>); +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Int32( + {DataType::Int32, DataType::Int32, DataType::Int32, DataType::Int32}, + Aidge::ConvImpl1D_cpu_forward_kernel<int, int, int, int>); +static Registrar<ConvImpl1DForward_cpu> registrarConvImpl1DForward_cpu_Float64( + {DataType::Float64, DataType::Float64, DataType::Float64, DataType::Float64}, + Aidge::ConvImpl1D_cpu_forward_kernel<double, double, double, double>); +} // namespace + + /** * @brief Forward kernel for 2D Convolution on CPU backend. * @tparam I Input data type. diff --git a/include/aidge/backend/cpu/operator/PadImpl.hpp b/include/aidge/backend/cpu/operator/PadImpl.hpp index b3c91a43419e9a5e9e1299f4a2118a51b6b64fc7..72d60fc16f6d730a5cbd4941da03bfbcf72ff85b 100644 --- a/include/aidge/backend/cpu/operator/PadImpl.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl.hpp @@ -25,6 +25,30 @@ namespace Aidge { // class Pad_Op; +// compute kernel registry for forward and backward +class PadImpl1DForward_cpu + : public Registrable<PadImpl1DForward_cpu, + std::tuple<DataType, DataType>, + void(const Pad_Op<1>::Attrs &, const std::array<DimSize_t, 3> &, const void *, + void *)> {}; + +class PadImpl1D_cpu : public OperatorImpl { +public: + PadImpl1D_cpu(const Pad_Op<1> &op) : OperatorImpl(op, "cpu") {} + + static std::unique_ptr<PadImpl1D_cpu> create(const Pad_Op<1> &op) { + return std::make_unique<PadImpl1D_cpu>(op); + } + + Elts_t getNbRequiredProtected(const IOIndex_t inputIdx) const override final; + void forward() override; +}; + +namespace { +// add cpu backend to Pad_Op<1> implementation registry +static Registrar<Pad_Op<1>> registrarPadImpl1D_cpu("cpu", Aidge::PadImpl1D_cpu::create); +} // namespace + // compute kernel registry for forward and backward class PadImpl2DForward_cpu diff --git a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp index f6f00bc4df661921708e605f44056a77bb8125f4..c9f6b708d1aaeed71d0836fa3b6feb08c1093559 100644 --- a/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp +++ b/include/aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp @@ -22,6 +22,81 @@ #include <algorithm> namespace Aidge { +/** + * @brief Forward kernel for 1D Padding on CPU backend. + * @tparam I Input data type. + * @tparam O Output data type. + * @param attrs tuple of Parameters from the Operator + * @param dims Array of input dimensions. + * @param input_ const input Tensor. + * @param output_ Output Tensor. + */ +template <class I, class O> +void PadImpl1D_cpu_forward_kernel(const Pad_Op<1>::Attrs &attrs, const std::array<DimSize_t, 3> &dims, + const void *input_, void *output_) +{ + const I *input = static_cast<const I *>(input_); + O *output = static_cast<O *>(output_); + + const std::size_t oxSize = dims[2] + std::get<0>(attrs)[0] + std::get<0>(attrs)[1]; + + for (std::size_t batch = 0; batch < dims[0]; ++batch) { + for (std::size_t ch = 0; ch < dims[1]; ++ch) { + const std::size_t iIndex = (ch + batch*dims[1]) * dims[2]; + const std::size_t oIndex = (ch + batch*dims[1]) * oxSize; + + for (unsigned int ox = 0; ox < oxSize; ++ox) { + const std::size_t oIndexFull = oIndex + ox; + + O outputValue = std::get<2>(attrs); + + if (std::get<1>(attrs) == PadBorderType::Constant) { + int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[1]); + + if (ix >= 0 && ix < static_cast<int>(dims[2])) { + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + } + else if (std::get<1>(attrs) == PadBorderType::Edge) { + int ix = std::max(0, std::min(static_cast<int>(dims[2]) - 1, static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[1]))); + + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + else if (std::get<1>(attrs) == PadBorderType::Reflect) { + int ix = static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[1]); + + if (ix < 0) + ix = 0 - ix; + if (ix >= static_cast<int>(dims[2])) + ix = static_cast<int>(dims[2]) - ix; + + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + else if (std::get<1>(attrs) == PadBorderType::Wrap) { + int ix = (static_cast<int>(dims[2]) + static_cast<int>(ox) - static_cast<int>(std::get<0>(attrs)[1])) % static_cast<int>(dims[2]); + + outputValue = input[iIndex + static_cast<std::size_t>(ix)]; + } + + output[oIndexFull] = outputValue; + } + } + } +} + +namespace { +static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float32( + {DataType::Float32, DataType::Float32}, + Aidge::PadImpl1D_cpu_forward_kernel<float, float>); +static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Int32( + {DataType::Int32, DataType::Int32}, + Aidge::PadImpl1D_cpu_forward_kernel<int, int>); +static Registrar<PadImpl1DForward_cpu> registrarPadImpl1DForward_cpu_Float64( + {DataType::Float64, DataType::Float64}, + Aidge::PadImpl1D_cpu_forward_kernel<double, double>); +} // namespace + + /** * @brief Forward kernel for 2D Padding on CPU backend. * @tparam I Input data type. diff --git a/src/operator/ConvDepthWiseImpl.cpp b/src/operator/ConvDepthWiseImpl.cpp index 51677f05081f12dd48708f1537943e1799238cd7..9e0daa46d714a820902289c5c5a3929f6f80fe71 100644 --- a/src/operator/ConvDepthWiseImpl.cpp +++ b/src/operator/ConvDepthWiseImpl.cpp @@ -22,6 +22,58 @@ #include "aidge/backend/cpu/operator/ConvDepthWiseImpl.hpp" #include "aidge/backend/cpu/operator/ConvDepthWiseImpl_forward_kernels.hpp" +Aidge::Elts_t Aidge::ConvDepthWiseImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::ConvDepthWiseImpl1D_cpu::forward() { + const auto& opTensor = static_cast<const OperatorTensor&>(mOp); + + assert(mOp.getRawInput(0) && "missing input #0"); + assert(mOp.getRawInput(1) && "missing input #1"); + assert(mOp.getRawInput(2) && "missing input #2"); + + assert((std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->nbDims() == 3) && "support for 3-dimensions tensors only"); + + // Find the correct kernel type + const auto outputDataType = opTensor.getOutput(0)->dataType(); + const Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_key registrarKey = { + opTensor.getInput(0)->dataType(), + opTensor.getInput(1)->dataType(), + ((opTensor.getInput(2)) ? opTensor.getInput(2)->dataType() : opTensor.getInput(1)->dataType()), + outputDataType}; + + Registrar<ConvDepthWiseImpl1DForward_cpu>::registrar_type kernelFunc; + if (Registrar<ConvDepthWiseImpl1DForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<ConvDepthWiseImpl1DForward_cpu>::create({ + outputDataType, outputDataType, outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = opTensor.getInput(0)->refCastFrom(input0Fallback, *opTensor.getOutput(0)); + const auto& input1 = opTensor.getInput(1)->refCastFrom(input1Fallback, *opTensor.getOutput(0)); + const auto& input2 = (opTensor.getInput(2)) ? opTensor.getInput(2)->refCastFrom(input2Fallback, *opTensor.getOutput(0)) : Tensor(); + + // Call kernel + kernelFunc(dynamic_cast<const ConvDepthWise_Op<1>&>(mOp).getStaticAttributes(), // Conv attributes + opTensor.getInput(0)->template dims<3>(), // input dimensions + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + (opTensor.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} + Aidge::Elts_t Aidge::ConvDepthWiseImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place return Elts_t::DataElts(0); diff --git a/src/operator/ConvImpl.cpp b/src/operator/ConvImpl.cpp index b69bbc0725eeaac1e7aef4099d2bcc798bd7eb5d..b7deb6b84c16ad3b0865ceed674d260f7be542cc 100644 --- a/src/operator/ConvImpl.cpp +++ b/src/operator/ConvImpl.cpp @@ -22,6 +22,58 @@ #include "aidge/operator/Conv.hpp" #include "aidge/utils/Types.h" +Aidge::Elts_t Aidge::ConvImpl1D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { + // this implementation can be in-place + return Elts_t::DataElts(0); +} + +void Aidge::ConvImpl1D_cpu::forward() { + const auto& opTensor = static_cast<const OperatorTensor&>(mOp); + + // FIXME: uncomment the following code once memory handling will work + assert(mOp.getRawInput(0) && "missing input #0"); + assert(mOp.getRawInput(1) && "missing input #1"); + assert(mOp.getRawInput(2) && "missing input #2"); + + // Find the correct kernel type + const auto outputDataType = opTensor.getOutput(0)->dataType(); + const Registrar<ConvImpl1DForward_cpu>::registrar_key registrarKey = { + opTensor.getInput(0)->dataType(), + opTensor.getInput(1)->dataType(), + ((opTensor.getInput(2)) ? opTensor.getInput(2)->dataType() : opTensor.getInput(1)->dataType()), + outputDataType}; + + Registrar<ConvImpl1DForward_cpu>::registrar_type kernelFunc; + if (Registrar<ConvImpl1DForward_cpu>::exists(registrarKey)) { + // One exists with the right inputs/output types + kernelFunc = Registrar<ConvImpl1DForward_cpu>::create(registrarKey); + } + else { + // Otherwise, fallback to the kernel with all types matching output type + kernelFunc = Registrar<ConvImpl1DForward_cpu>::create({ + outputDataType, outputDataType, outputDataType, outputDataType}); + } + + // Convert input data (no overhead if not needed!) + // TODO: right now, if needed, memory will be allocated/deallocated at each + // call to forward(). We might put the following shared_ptr as members of + // this class to avoid that. + std::shared_ptr<Tensor> input0Fallback, input1Fallback, input2Fallback; + const auto& input0 = opTensor.getInput(0)->refCastFrom(input0Fallback, *opTensor.getOutput(0)); + const auto& input1 = opTensor.getInput(1)->refCastFrom(input1Fallback, *opTensor.getOutput(0)); + const auto& input2 = (opTensor.getInput(2)) ? opTensor.getInput(2)->refCastFrom(input2Fallback, *opTensor.getOutput(0)) : Tensor(); + + // Call kernel + kernelFunc(dynamic_cast<const Conv_Op<1>&>(mOp).getStaticAttributes(), // Conv attributes + opTensor.getInput(0)->template dims<3>(), // input dimensions + dynamic_cast<const Conv_Op<1>&>(mOp).outChannels(), // outChannels + input0.getImpl()->rawPtr(), // input + input1.getImpl()->rawPtr(), // weight + (opTensor.getInput(2)) ? input2.getImpl()->rawPtr() : nullptr, // bias + getCPUPtr(mOp.getRawOutput(0)) // output + ); +} + Aidge::Elts_t Aidge::ConvImpl2D_cpu::getNbRequiredProtected(IOIndex_t /*inputIdx*/) const { // this implementation can be in-place return Elts_t::DataElts(0); diff --git a/src/operator/PadImpl.cpp b/src/operator/PadImpl.cpp index cd420a6241723c5d3fa5836838f84ce6bfe965d1..f7ac36428536b88da73736cc7f3898bb16578b10 100644 --- a/src/operator/PadImpl.cpp +++ b/src/operator/PadImpl.cpp @@ -22,6 +22,31 @@ #include "aidge/backend/cpu/operator/PadImpl.hpp" #include "aidge/backend/cpu/operator/PadImpl_forward_kernels.hpp" +Aidge::Elts_t Aidge::PadImpl1D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const { + assert(inputIdx == 0 && "operator has only one input"); + (void) inputIdx; + + // Padding cannot be in-place! + // We must ensure that we do not override data that has not been consummed yet. + const auto inputSize = std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->size(); + const auto outputSize = std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->size(); + return Elts_t::DataElts(outputSize - inputSize); +} + +void Aidge::PadImpl1D_cpu::forward() { + assert(std::static_pointer_cast<Tensor>(mOp.getRawInput(0)) && "missing input #0"); + + // Find the correct kernel type + auto kernelFunc = + Registrar<PadImpl1DForward_cpu>::create({std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->dataType(), std::static_pointer_cast<Tensor>(mOp.getRawOutput(0))->dataType()}); + + // Call kernel + kernelFunc(dynamic_cast<const Pad_Op<1>&>(mOp).getStaticAttributes(), + std::static_pointer_cast<Tensor>(mOp.getRawInput(0))->template dims<3>(), + getCPUPtr(mOp.getRawInput(0)), + getCPUPtr(mOp.getRawOutput(0))); +} + Aidge::Elts_t Aidge::PadImpl2D_cpu::getNbRequiredProtected(IOIndex_t inputIdx) const { assert(inputIdx == 0 && "operator has only one input"); (void) inputIdx;